1#ifndef PROTEUS_COMPILATION_TASK_HPP
2#define PROTEUS_COMPILATION_TASK_HPP
4#include <llvm/Bitcode/BitcodeReader.h>
5#include <llvm/Bitcode/BitcodeWriter.h>
19 std::reference_wrapper<const Module> KernelModule;
21 std::string KernelName;
25 SmallVector<int32_t> RCIndices;
26 SmallVector<RuntimeConstant> RCVec;
27 SmallVector<std::pair<std::string, StringRef>> LambdaCalleeInfo;
28 std::unordered_map<std::string, const void *> VarNameToDevPtr;
29 SmallPtrSet<void *, 8> GlobalLinkedBinaries;
30 std::string DeviceArch;
33 bool RelinkGlobalsByCopy;
36 bool SpecializeLaunchBounds;
38 std::unique_ptr<Module> cloneKernelModule(LLVMContext &Ctx) {
39 SmallVector<char, 4096> ModuleStr;
40 raw_svector_ostream OS(ModuleStr);
41 WriteBitcodeToFile(KernelModule, OS);
42 StringRef ModuleStrRef = StringRef{ModuleStr.data(), ModuleStr.size()};
43 auto BufferRef = MemoryBufferRef{ModuleStrRef,
""};
44 auto ClonedModule = parseBitcodeFile(BufferRef, Ctx);
45 if (
auto E = ClonedModule.takeError()) {
49 return std::move(*ClonedModule);
54 const Module &Mod,
HashT HashValue,
const std::string &KernelName,
55 std::string &Suffix, dim3 BlockDim, dim3 GridDim,
56 const SmallVector<int32_t> &RCIndices,
57 const SmallVector<RuntimeConstant> &RCVec,
58 const SmallVector<std::pair<std::string, StringRef>> &LambdaCalleeInfo,
59 const std::unordered_map<std::string, const void *> &VarNameToDevPtr,
60 const SmallPtrSet<void *, 8> &GlobalLinkedBinaries,
61 const std::string &DeviceArch,
bool UseRTC,
bool DumpIR,
62 bool RelinkGlobalsByCopy,
bool SpecializeArgs,
bool SpecializeDims,
63 bool SpecializeLaunchBounds)
65 Suffix(Suffix), BlockDim(BlockDim), GridDim(GridDim),
67 VarNameToDevPtr(VarNameToDevPtr),
68 GlobalLinkedBinaries(GlobalLinkedBinaries), DeviceArch(DeviceArch),
69 UseRTC(UseRTC), DumpIR(DumpIR),
70 RelinkGlobalsByCopy(RelinkGlobalsByCopy),
71 SpecializeArgs(SpecializeArgs), SpecializeDims(SpecializeDims),
72 SpecializeLaunchBounds(SpecializeLaunchBounds) {}
85#if PROTEUS_ENABLE_DEBUG
86 auto Start = std::chrono::high_resolution_clock::now();
90 std::unique_ptr<Module> M = cloneKernelModule(Ctx);
92 std::string KernelMangled = (
KernelName + Suffix);
95 RCVec, LambdaCalleeInfo, SpecializeArgs,
96 SpecializeDims, SpecializeLaunchBounds);
98 replaceGlobalVariablesWithPointers(*M, VarNameToDevPtr);
105#if PROTEUS_ENABLE_CUDA
107#elif PROTEUS_ENABLE_HIP
111#error "JitEngineDevice requires PROTEUS_ENABLE_CUDA or PROTEUS_ENABLE_HIP"
115 const auto CreateDumpDirectory = []() {
116 const std::string DumpDirectory =
".proteus-dump";
117 std::filesystem::create_directory(DumpDirectory);
118 return DumpDirectory;
121 static const std::string DumpDirectory = CreateDumpDirectory();
130 if (!RelinkGlobalsByCopy)
131 proteus::relinkGlobalsObject(ObjBuf->getMemBufferRef(), VarNameToDevPtr);
133#if PROTEUS_ENABLE_DEBUG
134 auto End = std::chrono::high_resolution_clock::now();
135 auto Duration = End - Start;
137 std::chrono::duration_cast<std::chrono::milliseconds>(Duration).count();
139 <<
" for " << Milliseconds <<
"ms\n";
void char int32_t * RCIndices
Definition CompilerInterfaceDevice.cpp:51
void char * KernelName
Definition CompilerInterfaceDevice.cpp:50
#define PROTEUS_FATAL_ERROR(x)
Definition Error.h:4
void saveToFile(llvm::StringRef Filepath, T &&Data)
Definition Utils.h:23
Definition CompilationTask.hpp:17
CompilationTask & operator=(const CompilationTask &)=delete
CompilationTask(const Module &Mod, HashT HashValue, const std::string &KernelName, std::string &Suffix, dim3 BlockDim, dim3 GridDim, const SmallVector< int32_t > &RCIndices, const SmallVector< RuntimeConstant > &RCVec, const SmallVector< std::pair< std::string, StringRef > > &LambdaCalleeInfo, const std::unordered_map< std::string, const void * > &VarNameToDevPtr, const SmallPtrSet< void *, 8 > &GlobalLinkedBinaries, const std::string &DeviceArch, bool UseRTC, bool DumpIR, bool RelinkGlobalsByCopy, bool SpecializeArgs, bool SpecializeDims, bool SpecializeLaunchBounds)
Definition CompilationTask.hpp:53
HashT getHashValue() const
Definition CompilationTask.hpp:82
CompilationTask(CompilationTask &&) noexcept=default
CompilationTask(const CompilationTask &)=delete
std::unique_ptr< MemoryBuffer > compile()
Definition CompilationTask.hpp:84
Definition Hashing.hpp:19
std::string toString() const
Definition Hashing.hpp:27
static llvm::raw_ostream & logs(const std::string &Name)
Definition Logger.hpp:18
Definition JitEngine.cpp:20
void optimizeIR(Module &M, StringRef Arch, char OptLevel, unsigned CodegenOptLevel)
Definition CoreLLVM.hpp:147
std::unique_ptr< MemoryBuffer > codegenObject(Module &M, StringRef DeviceArch, SmallPtrSetImpl< void * > &GlobalLinkedBinaries, bool UseRTC=true)
Definition CoreLLVMCUDA.hpp:148