1#ifndef PROTEUS_COMPILATION_TASK_HPP
2#define PROTEUS_COMPILATION_TASK_HPP
4#include <llvm/Bitcode/BitcodeReader.h>
5#include <llvm/Bitcode/BitcodeWriter.h>
19 MemoryBufferRef Bitcode;
21 std::string KernelName;
25 SmallVector<RuntimeConstant> RCVec;
26 SmallVector<std::pair<std::string, StringRef>> LambdaCalleeInfo;
27 std::unordered_map<std::string, const void *> VarNameToDevPtr;
28 SmallPtrSet<void *, 8> GlobalLinkedBinaries;
29 std::string DeviceArch;
32 bool RelinkGlobalsByCopy;
35 bool SpecializeLaunchBounds;
37 unsigned CodegenOptLevel;
38 std::optional<std::string> PassPipeline;
40 std::unique_ptr<Module> cloneKernelModule(LLVMContext &Ctx) {
41 auto ClonedModule = parseBitcodeFile(Bitcode, Ctx);
42 if (
auto E = ClonedModule.takeError()) {
46 return std::move(*ClonedModule);
49 void invokeOptimizeIR(Module &M) {
50#if PROTEUS_ENABLE_CUDA
53 optimizeIR(M, DeviceArch, OptLevel, CodegenOptLevel);
55 optimizeIR(M, DeviceArch, PassPipeline.value(), CodegenOptLevel);
56#elif PROTEUS_ENABLE_HIP
62 optimizeIR(M, DeviceArch, OptLevel, CodegenOptLevel);
64 optimizeIR(M, DeviceArch, PassPipeline.value(), CodegenOptLevel);
68#error "JitEngineDevice requires PROTEUS_ENABLE_CUDA or PROTEUS_ENABLE_HIP"
74 MemoryBufferRef Bitcode,
HashT HashValue,
const std::string &KernelName,
75 std::string &Suffix, dim3 BlockDim, dim3 GridDim,
76 const SmallVector<RuntimeConstant> &RCVec,
77 const SmallVector<std::pair<std::string, StringRef>> &LambdaCalleeInfo,
78 const std::unordered_map<std::string, const void *> &VarNameToDevPtr,
79 const SmallPtrSet<void *, 8> &GlobalLinkedBinaries,
80 const std::string &DeviceArch,
CodegenOption CGOption,
bool DumpIR,
81 bool RelinkGlobalsByCopy,
bool SpecializeArgs,
bool SpecializeDims,
82 bool SpecializeLaunchBounds,
char OptLevel,
unsigned CodegenOptLevel,
83 const std::optional<std::string> &PassPipeline)
85 Suffix(Suffix), BlockDim(BlockDim), GridDim(GridDim), RCVec(RCVec),
86 LambdaCalleeInfo(LambdaCalleeInfo), VarNameToDevPtr(VarNameToDevPtr),
87 GlobalLinkedBinaries(GlobalLinkedBinaries), DeviceArch(DeviceArch),
88 CGOption(CGOption), DumpIR(DumpIR),
89 RelinkGlobalsByCopy(RelinkGlobalsByCopy),
90 SpecializeArgs(SpecializeArgs), SpecializeDims(SpecializeDims),
91 SpecializeLaunchBounds(SpecializeLaunchBounds), OptLevel(OptLevel),
92 CodegenOptLevel(CodegenOptLevel), PassPipeline(PassPipeline) {}
105#if PROTEUS_ENABLE_DEBUG
106 auto Start = std::chrono::high_resolution_clock::now();
110 std::unique_ptr<Module> M = cloneKernelModule(Ctx);
112 std::string KernelMangled = (
KernelName + Suffix);
114 proteus::specializeIR(*M,
KernelName, Suffix, BlockDim, GridDim, RCVec,
115 LambdaCalleeInfo, SpecializeArgs, SpecializeDims,
116 SpecializeLaunchBounds);
118 replaceGlobalVariablesWithPointers(*M, VarNameToDevPtr);
120 invokeOptimizeIR(*M);
122 llvm::outs() <<
"LLVM IR module post optimization " << *M <<
"\n";
125 const auto CreateDumpDirectory = []() {
126 const std::string DumpDirectory =
".proteus-dump";
127 std::filesystem::create_directory(DumpDirectory);
128 return DumpDirectory;
131 static const std::string DumpDirectory = CreateDumpDirectory();
140 if (!RelinkGlobalsByCopy)
141 proteus::relinkGlobalsObject(ObjBuf->getMemBufferRef(), VarNameToDevPtr);
143#if PROTEUS_ENABLE_DEBUG
144 auto End = std::chrono::high_resolution_clock::now();
145 auto Duration = End - Start;
147 std::chrono::duration_cast<std::chrono::milliseconds>(Duration).count();
149 <<
" for " << Milliseconds <<
"ms\n";
void char * KernelName
Definition CompilerInterfaceDevice.cpp:50
#define PROTEUS_FATAL_ERROR(x)
Definition Error.h:7
void saveToFile(llvm::StringRef Filepath, T &&Data)
Definition Utils.h:23
Definition CompilationTask.hpp:17
CompilationTask & operator=(const CompilationTask &)=delete
HashT getHashValue() const
Definition CompilationTask.hpp:102
CompilationTask(CompilationTask &&) noexcept=default
CompilationTask(MemoryBufferRef Bitcode, HashT HashValue, const std::string &KernelName, std::string &Suffix, dim3 BlockDim, dim3 GridDim, const SmallVector< RuntimeConstant > &RCVec, const SmallVector< std::pair< std::string, StringRef > > &LambdaCalleeInfo, const std::unordered_map< std::string, const void * > &VarNameToDevPtr, const SmallPtrSet< void *, 8 > &GlobalLinkedBinaries, const std::string &DeviceArch, CodegenOption CGOption, bool DumpIR, bool RelinkGlobalsByCopy, bool SpecializeArgs, bool SpecializeDims, bool SpecializeLaunchBounds, char OptLevel, unsigned CodegenOptLevel, const std::optional< std::string > &PassPipeline)
Definition CompilationTask.hpp:73
CompilationTask(const CompilationTask &)=delete
std::unique_ptr< MemoryBuffer > compile()
Definition CompilationTask.hpp:104
static Config & get()
Definition Config.hpp:114
Definition Hashing.hpp:20
std::string toString() const
Definition Hashing.hpp:28
static llvm::raw_ostream & logs(const std::string &Name)
Definition Logger.hpp:19
Definition BuiltinsCUDA.cpp:4
void optimizeIR(Module &M, StringRef Arch, char OptLevel, unsigned CodegenOptLevel)
Definition CoreLLVM.hpp:182
CodegenOption
Definition Config.hpp:11
std::unique_ptr< MemoryBuffer > codegenObject(Module &M, StringRef DeviceArch, SmallPtrSetImpl< void * > &GlobalLinkedBinaries, CodegenOption CGOption=CodegenOption::RTC)
Definition CoreLLVMCUDA.hpp:160
std::string toString(CodegenOption Option)
Definition Config.hpp:23