78 MemoryBufferRef Bitcode,
HashT HashValue,
const std::string &KernelName,
79 std::string &Suffix, dim3 BlockDim, dim3 GridDim,
80 const SmallVector<RuntimeConstant> &RCVec,
81 const SmallVector<std::pair<std::string, StringRef>> &LambdaCalleeInfo,
82 const std::unordered_map<std::string, GlobalVarInfo> &VarNameToGlobalInfo,
83 const SmallPtrSet<void *, 8> &GlobalLinkedBinaries,
85 bool DumpIR,
bool RelinkGlobalsByCopy)
87 Suffix(Suffix), BlockDim(BlockDim), GridDim(GridDim), RCVec(RCVec),
88 LambdaCalleeInfo(LambdaCalleeInfo),
89 VarNameToGlobalInfo(VarNameToGlobalInfo),
90 GlobalLinkedBinaries(GlobalLinkedBinaries), DeviceArch(DeviceArch),
91 CGOption(CGConfig.codeGenOption()), DumpIR(DumpIR),
92 RelinkGlobalsByCopy(RelinkGlobalsByCopy),
94 CGConfig.minBlocksPerSM(BlockDim.x * BlockDim.y * BlockDim.z)),
95 SpecializeArgs(CGConfig.specializeArgs()),
96 SpecializeDims(CGConfig.specializeDims()),
97 SpecializeDimsRange(CGConfig.specializeDimsRange()),
98 SpecializeLaunchBounds(CGConfig.specializeLaunchBounds()),
99 OptLevel(CGConfig.optLevel()),
100 CodegenOptLevel(CGConfig.codeGenOptLevel()),
101 PassPipeline(CGConfig.optPipeline()) {
103 llvm::SmallString<128> S;
104 llvm::raw_svector_ostream OS(S);
105 OS <<
"[KernelConfig] ID:" <<
KernelName <<
" ";
124 std::chrono::high_resolution_clock::time_point Start, End;
126 TimerRAII(
HashT HashValue) : HashValue(HashValue) {
127 if (Config::get().ProteusDebugOutput) {
128 Start = std::chrono::high_resolution_clock::now();
133 if (Config::get().ProteusDebugOutput) {
134 auto End = std::chrono::high_resolution_clock::now();
135 auto Duration = End - Start;
137 std::chrono::duration_cast<std::chrono::milliseconds>(Duration)
139 Logger::logs(
"proteus")
140 <<
"Compiled HashValue " << HashValue.
toString() <<
" for "
141 << Milliseconds <<
"ms\n";
147 std::unique_ptr<Module> M = cloneKernelModule(Ctx);
149 std::string KernelMangled = (
KernelName + Suffix);
153 proteus::specializeIR(*M,
KernelName, Suffix, BlockDim, GridDim, RCVec,
154 LambdaCalleeInfo, SpecializeArgs, SpecializeDims,
155 SpecializeDimsRange, SpecializeLaunchBounds,
160 replaceGlobalVariablesWithPointers(*M, VarNameToGlobalInfo);
162 invokeOptimizeIR(*M);
164 llvm::outs() <<
"LLVM IR module post optimization " << *M <<
"\n";
167 const auto CreateDumpDirectory = []() {
168 const std::string DumpDirectory =
".proteus-dump";
169 std::filesystem::create_directory(DumpDirectory);
170 return DumpDirectory;
173 static const std::string DumpDirectory = CreateDumpDirectory();
182 if (!RelinkGlobalsByCopy)
183 proteus::relinkGlobalsObject(ObjBuf->getMemBufferRef(),
184 VarNameToGlobalInfo);