77 MemoryBufferRef Bitcode,
HashT HashValue,
const std::string &KernelName,
78 std::string &Suffix, dim3 BlockDim, dim3 GridDim,
79 const SmallVector<RuntimeConstant> &RCVec,
80 const SmallVector<std::pair<std::string, StringRef>> &LambdaCalleeInfo,
81 const std::unordered_map<std::string, const void *> &VarNameToDevPtr,
82 const SmallPtrSet<void *, 8> &GlobalLinkedBinaries,
84 bool DumpIR,
bool RelinkGlobalsByCopy)
86 Suffix(Suffix), BlockDim(BlockDim), GridDim(GridDim), RCVec(RCVec),
87 LambdaCalleeInfo(LambdaCalleeInfo), VarNameToDevPtr(VarNameToDevPtr),
88 GlobalLinkedBinaries(GlobalLinkedBinaries), DeviceArch(DeviceArch),
89 CGOption(CGConfig.codeGenOption()), DumpIR(DumpIR),
90 RelinkGlobalsByCopy(RelinkGlobalsByCopy),
92 CGConfig.minBlocksPerSM(BlockDim.x * BlockDim.y * BlockDim.z)),
93 SpecializeArgs(CGConfig.specializeArgs()),
94 SpecializeDims(CGConfig.specializeDims()),
95 SpecializeDimsAssume(CGConfig.specializeDimsAssume()),
96 SpecializeLaunchBounds(CGConfig.specializeLaunchBounds()),
97 OptLevel(CGConfig.optLevel()),
98 CodegenOptLevel(CGConfig.codeGenOptLevel()),
99 PassPipeline(CGConfig.optPipeline()) {
101 llvm::SmallString<128> S;
102 llvm::raw_svector_ostream OS(S);
103 OS <<
"[KernelConfig] ID:" <<
KernelName <<
" ";
122 std::chrono::high_resolution_clock::time_point Start, End;
124 TimerRAII(
HashT HashValue) : HashValue(HashValue) {
125 if (Config::get().ProteusDebugOutput) {
126 Start = std::chrono::high_resolution_clock::now();
131 if (Config::get().ProteusDebugOutput) {
132 auto End = std::chrono::high_resolution_clock::now();
133 auto Duration = End - Start;
135 std::chrono::duration_cast<std::chrono::milliseconds>(Duration)
137 Logger::logs(
"proteus")
138 <<
"Compiled HashValue " << HashValue.
toString() <<
" for "
139 << Milliseconds <<
"ms\n";
145 std::unique_ptr<Module> M = cloneKernelModule(Ctx);
147 std::string KernelMangled = (
KernelName + Suffix);
151 proteus::specializeIR(*M,
KernelName, Suffix, BlockDim, GridDim, RCVec,
152 LambdaCalleeInfo, SpecializeArgs, SpecializeDims,
153 SpecializeDimsAssume, SpecializeLaunchBounds,
158 replaceGlobalVariablesWithPointers(*M, VarNameToDevPtr);
160 invokeOptimizeIR(*M);
162 llvm::outs() <<
"LLVM IR module post optimization " << *M <<
"\n";
165 const auto CreateDumpDirectory = []() {
166 const std::string DumpDirectory =
".proteus-dump";
167 std::filesystem::create_directory(DumpDirectory);
168 return DumpDirectory;
171 static const std::string DumpDirectory = CreateDumpDirectory();
180 if (!RelinkGlobalsByCopy)
181 proteus::relinkGlobalsObject(ObjBuf->getMemBufferRef(), VarNameToDevPtr);