1#ifndef PROTEUS_COMPILATION_TASK_H
2#define PROTEUS_COMPILATION_TASK_H
12#include <llvm/Bitcode/BitcodeReader.h>
13#include <llvm/Bitcode/BitcodeWriter.h>
21 MemoryBufferRef Bitcode;
23 std::string KernelName;
27 SmallVector<RuntimeConstant> RCVec;
28 SmallVector<std::pair<std::string, StringRef>> LambdaCalleeInfo;
29 std::unordered_map<std::string, GlobalVarInfo> VarNameToGlobalInfo;
30 SmallPtrSet<void *, 8> GlobalLinkedBinaries;
31 std::string DeviceArch;
34 bool RelinkGlobalsByCopy;
38 bool SpecializeDimsRange;
39 bool SpecializeLaunchBounds;
42 std::unique_ptr<Module> cloneKernelModule(LLVMContext &Ctx) {
44 auto ClonedModule = parseBitcodeFile(Bitcode, Ctx);
45 if (
auto E = ClonedModule.takeError()) {
49 return std::move(*ClonedModule);
52 void invokeOptimizeIR(Module &M) {
54#if PROTEUS_ENABLE_CUDA
57#elif PROTEUS_ENABLE_HIP
65#error "JitEngineDevice requires PROTEUS_ENABLE_CUDA or PROTEUS_ENABLE_HIP"
71 MemoryBufferRef Bitcode,
HashT HashValue,
const std::string &KernelName,
72 std::string &Suffix, dim3 BlockDim, dim3 GridDim,
73 const SmallVector<RuntimeConstant> &RCVec,
74 const SmallVector<std::pair<std::string, StringRef>> &LambdaCalleeInfo,
75 const std::unordered_map<std::string, GlobalVarInfo> &VarNameToGlobalInfo,
76 const SmallPtrSet<void *, 8> &GlobalLinkedBinaries,
78 bool DumpIR,
bool RelinkGlobalsByCopy)
80 Suffix(Suffix), BlockDim(BlockDim), GridDim(GridDim), RCVec(RCVec),
81 LambdaCalleeInfo(LambdaCalleeInfo),
82 VarNameToGlobalInfo(VarNameToGlobalInfo),
83 GlobalLinkedBinaries(GlobalLinkedBinaries), DeviceArch(DeviceArch),
84 CGOption(CGConfig.codeGenOption()), DumpIR(DumpIR),
85 RelinkGlobalsByCopy(RelinkGlobalsByCopy),
87 CGConfig.minBlocksPerSM(BlockDim.x * BlockDim.y * BlockDim.z)),
88 SpecializeArgs(CGConfig.specializeArgs()),
89 SpecializeDims(CGConfig.specializeDims()),
90 SpecializeDimsRange(CGConfig.specializeDimsRange()),
91 SpecializeLaunchBounds(CGConfig.specializeLaunchBounds()),
94 llvm::SmallString<128> S;
95 llvm::raw_svector_ostream OS(S);
96 OS <<
"[KernelConfig] ID:" <<
KernelName <<
" ";
116 std::chrono::high_resolution_clock::time_point Start, End;
118 TimerRAII(
HashT HashValue) : HashValue(HashValue) {
119 if (Config::get().ProteusDebugOutput) {
120 Start = std::chrono::high_resolution_clock::now();
125 if (Config::get().ProteusDebugOutput) {
126 auto End = std::chrono::high_resolution_clock::now();
127 auto Duration = End - Start;
129 std::chrono::duration_cast<std::chrono::milliseconds>(Duration)
131 Logger::logs(
"proteus")
132 <<
"Compiled HashValue " << HashValue.
toString() <<
" for "
133 << Milliseconds <<
"ms\n";
139 std::unique_ptr<Module> M = cloneKernelModule(Ctx);
141 std::string KernelMangled = (
KernelName + Suffix);
145 proteus::specializeIR(*M,
KernelName, Suffix, BlockDim, GridDim, RCVec,
146 LambdaCalleeInfo, SpecializeArgs, SpecializeDims,
147 SpecializeDimsRange, SpecializeLaunchBounds,
152 replaceGlobalVariablesWithPointers(*M, VarNameToGlobalInfo);
154 invokeOptimizeIR(*M);
156 llvm::outs() <<
"LLVM IR module post optimization " << *M <<
"\n";
159 const auto CreateDumpDirectory = []() {
160 const std::string DumpDirectory =
".proteus-dump";
161 std::filesystem::create_directory(DumpDirectory);
162 return DumpDirectory;
165 static const std::string DumpDirectory = CreateDumpDirectory();
171#if PROTEUS_ENABLE_CUDA
174#elif PROTEUS_ENABLE_HIP
176 CGOption, OptConfig);
179 if (!RelinkGlobalsByCopy)
180 proteus::relinkGlobalsObject(ObjBuf->getMemBufferRef(),
181 VarNameToGlobalInfo);
void char * KernelName
Definition CompilerInterfaceDevice.cpp:55
#define PROTEUS_DBG(x)
Definition Debug.h:9
#define TIMESCOPE(...)
Definition TimeTracing.h:66
void saveToFile(llvm::StringRef Filepath, T &&Data)
Definition Utils.h:26
void dump(T &OS) const
Definition Config.h:280
Definition CompilationTask.h:19
CompilationTask & operator=(const CompilationTask &)=delete
HashT getHashValue() const
Definition CompilationTask.h:111
CompilationTask(CompilationTask &&) noexcept=default
CompilationTask(const CompilationTask &)=delete
std::unique_ptr< MemoryBuffer > compile()
Definition CompilationTask.h:113
CompilationTask(MemoryBufferRef Bitcode, HashT HashValue, const std::string &KernelName, std::string &Suffix, dim3 BlockDim, dim3 GridDim, const SmallVector< RuntimeConstant > &RCVec, const SmallVector< std::pair< std::string, StringRef > > &LambdaCalleeInfo, const std::unordered_map< std::string, GlobalVarInfo > &VarNameToGlobalInfo, const SmallPtrSet< void *, 8 > &GlobalLinkedBinaries, const std::string &DeviceArch, const CodeGenerationConfig &CGConfig, bool DumpIR, bool RelinkGlobalsByCopy)
Definition CompilationTask.h:70
static Config & get()
Definition Config.h:334
std::string toString() const
Definition Hashing.h:30
static void trace(llvm::StringRef Msg)
Definition Logger.h:30
static void logfile(const std::string &Filename, T &&Data)
Definition Logger.h:33
Definition TimeTracing.h:33
Definition CompiledLibrary.h:7
Definition MemoryCache.h:27
void optimizeIR(Module &M, StringRef Arch, const OptimizationPipelineConfig &OptConfig)
Definition CoreLLVM.h:205
void reportFatalError(const llvm::Twine &Reason, const char *FILE, unsigned Line)
Definition Error.cpp:14
CodegenOption
Definition Config.h:16
std::unique_ptr< MemoryBuffer > codegenObject(Module &M, StringRef DeviceArch, SmallPtrSetImpl< void * > &GlobalLinkedBinaries, CodegenOption CGOption=CodegenOption::RTC)
Definition CoreLLVMCUDA.h:176
std::string toString(CodegenOption Option)
Definition Config.h:28
Definition CoreLLVM.h:187