77 MemoryBufferRef Bitcode,
HashT HashValue,
const std::string &KernelName,
78 std::string &Suffix, dim3 BlockDim, dim3 GridDim,
79 const SmallVector<RuntimeConstant> &RCVec,
80 const SmallVector<std::pair<std::string, StringRef>> &LambdaCalleeInfo,
81 const std::unordered_map<std::string, GlobalVarInfo> &VarNameToGlobalInfo,
82 const SmallPtrSet<void *, 8> &GlobalLinkedBinaries,
84 bool DumpIR,
bool RelinkGlobalsByCopy)
86 Suffix(Suffix), BlockDim(BlockDim), GridDim(GridDim), RCVec(RCVec),
87 LambdaCalleeInfo(LambdaCalleeInfo),
88 VarNameToGlobalInfo(VarNameToGlobalInfo),
89 GlobalLinkedBinaries(GlobalLinkedBinaries), DeviceArch(DeviceArch),
90 CGOption(CGConfig.codeGenOption()), DumpIR(DumpIR),
91 RelinkGlobalsByCopy(RelinkGlobalsByCopy),
93 CGConfig.minBlocksPerSM(BlockDim.x * BlockDim.y * BlockDim.z)),
94 SpecializeArgs(CGConfig.specializeArgs()),
95 SpecializeDims(CGConfig.specializeDims()),
96 SpecializeDimsAssume(CGConfig.specializeDimsAssume()),
97 SpecializeLaunchBounds(CGConfig.specializeLaunchBounds()),
98 OptLevel(CGConfig.optLevel()),
99 CodegenOptLevel(CGConfig.codeGenOptLevel()),
100 PassPipeline(CGConfig.optPipeline()) {
102 llvm::SmallString<128> S;
103 llvm::raw_svector_ostream OS(S);
104 OS <<
"[KernelConfig] ID:" <<
KernelName <<
" ";
123 std::chrono::high_resolution_clock::time_point Start, End;
125 TimerRAII(
HashT HashValue) : HashValue(HashValue) {
126 if (Config::get().ProteusDebugOutput) {
127 Start = std::chrono::high_resolution_clock::now();
132 if (Config::get().ProteusDebugOutput) {
133 auto End = std::chrono::high_resolution_clock::now();
134 auto Duration = End - Start;
136 std::chrono::duration_cast<std::chrono::milliseconds>(Duration)
138 Logger::logs(
"proteus")
139 <<
"Compiled HashValue " << HashValue.
toString() <<
" for "
140 << Milliseconds <<
"ms\n";
146 std::unique_ptr<Module> M = cloneKernelModule(Ctx);
148 std::string KernelMangled = (
KernelName + Suffix);
152 proteus::specializeIR(*M,
KernelName, Suffix, BlockDim, GridDim, RCVec,
153 LambdaCalleeInfo, SpecializeArgs, SpecializeDims,
154 SpecializeDimsAssume, SpecializeLaunchBounds,
159 replaceGlobalVariablesWithPointers(*M, VarNameToGlobalInfo);
161 invokeOptimizeIR(*M);
163 llvm::outs() <<
"LLVM IR module post optimization " << *M <<
"\n";
166 const auto CreateDumpDirectory = []() {
167 const std::string DumpDirectory =
".proteus-dump";
168 std::filesystem::create_directory(DumpDirectory);
169 return DumpDirectory;
172 static const std::string DumpDirectory = CreateDumpDirectory();
181 if (!RelinkGlobalsByCopy)
182 proteus::relinkGlobalsObject(ObjBuf->getMemBufferRef(),
183 VarNameToGlobalInfo);