Proteus
Programmable JIT compilation and optimization for C/C++ using LLVM
Loading...
Searching...
No Matches
CompilationTask.h
Go to the documentation of this file.
1#ifndef PROTEUS_COMPILATION_TASK_H
2#define PROTEUS_COMPILATION_TASK_H
3
10#include "proteus/impl/Utils.h"
11
12#include <llvm/Bitcode/BitcodeReader.h>
13#include <llvm/Bitcode/BitcodeWriter.h>
14
15namespace proteus {
16
17using namespace llvm;
18
20private:
21 MemoryBufferRef Bitcode;
22 HashT HashValue;
23 std::string KernelName;
24 std::string Suffix;
25 dim3 BlockDim;
26 dim3 GridDim;
27 SmallVector<RuntimeConstant> RCVec;
28 SmallVector<std::pair<std::string, StringRef>> LambdaCalleeInfo;
29 std::unordered_map<std::string, GlobalVarInfo> VarNameToGlobalInfo;
30 SmallPtrSet<void *, 8> GlobalLinkedBinaries;
31 std::string DeviceArch;
32 CodegenOption CGOption;
33 bool DumpIR;
34 bool RelinkGlobalsByCopy;
35 int MinBlocksPerSM;
36 bool SpecializeArgs;
37 bool SpecializeDims;
38 bool SpecializeDimsRange;
39 bool SpecializeLaunchBounds;
41
42 std::unique_ptr<Module> cloneKernelModule(LLVMContext &Ctx) {
43 TIMESCOPE(CompilationTask, cloneKernelModule);
44 auto ClonedModule = parseBitcodeFile(Bitcode, Ctx);
45 if (auto E = ClonedModule.takeError()) {
46 reportFatalError("Failed to parse bitcode" + toString(std::move(E)));
47 }
48
49 return std::move(*ClonedModule);
50 }
51
52 void invokeOptimizeIR(Module &M) {
53 TIMESCOPE(CompilationTask, invokeOptimizeIR);
54#if PROTEUS_ENABLE_CUDA
55 // For CUDA we always run the optimization pipeline.
56 optimizeIR(M, DeviceArch, OptConfig);
57#elif PROTEUS_ENABLE_HIP
58 // For HIP we run the optimization pipeline here only for Serial codegen.
59 // Parallel codegen forwards custom pipelines to LTO; HIP RTC invokes
60 // optimization internally.
61 // TODO: Move optimizeIR inside the codegen routines?
62 if (CGOption == CodegenOption::Serial)
63 optimizeIR(M, DeviceArch, OptConfig);
64#else
65#error "JitEngineDevice requires PROTEUS_ENABLE_CUDA or PROTEUS_ENABLE_HIP"
66#endif
67 }
68
69public:
71 MemoryBufferRef Bitcode, HashT HashValue, const std::string &KernelName,
72 std::string &Suffix, dim3 BlockDim, dim3 GridDim,
73 const SmallVector<RuntimeConstant> &RCVec,
74 const SmallVector<std::pair<std::string, StringRef>> &LambdaCalleeInfo,
75 const std::unordered_map<std::string, GlobalVarInfo> &VarNameToGlobalInfo,
76 const SmallPtrSet<void *, 8> &GlobalLinkedBinaries,
77 const std::string &DeviceArch, const CodeGenerationConfig &CGConfig,
78 bool DumpIR, bool RelinkGlobalsByCopy)
79 : Bitcode(Bitcode), HashValue(HashValue), KernelName(KernelName),
80 Suffix(Suffix), BlockDim(BlockDim), GridDim(GridDim), RCVec(RCVec),
81 LambdaCalleeInfo(LambdaCalleeInfo),
82 VarNameToGlobalInfo(VarNameToGlobalInfo),
83 GlobalLinkedBinaries(GlobalLinkedBinaries), DeviceArch(DeviceArch),
84 CGOption(CGConfig.codeGenOption()), DumpIR(DumpIR),
85 RelinkGlobalsByCopy(RelinkGlobalsByCopy),
86 MinBlocksPerSM(
87 CGConfig.minBlocksPerSM(BlockDim.x * BlockDim.y * BlockDim.z)),
88 SpecializeArgs(CGConfig.specializeArgs()),
89 SpecializeDims(CGConfig.specializeDims()),
90 SpecializeDimsRange(CGConfig.specializeDimsRange()),
91 SpecializeLaunchBounds(CGConfig.specializeLaunchBounds()),
92 OptConfig(CGConfig) {
93 if (Config::get().traceSpecializations()) {
94 llvm::SmallString<128> S;
95 llvm::raw_svector_ostream OS(S);
96 OS << "[KernelConfig] ID:" << KernelName << " ";
97 CGConfig.dump(OS);
98 OS << "\n";
99 Logger::trace(OS.str());
100 }
101 }
102
103 // Delete copy operations.
106
107 // Use default move operations.
108 CompilationTask(CompilationTask &&) noexcept = default;
109 CompilationTask &operator=(CompilationTask &&) noexcept = default;
110
111 HashT getHashValue() const { return HashValue; }
112
113 std::unique_ptr<MemoryBuffer> compile() {
115 struct TimerRAII {
116 std::chrono::high_resolution_clock::time_point Start, End;
117 HashT HashValue;
118 TimerRAII(HashT HashValue) : HashValue(HashValue) {
119 if (Config::get().ProteusDebugOutput) {
120 Start = std::chrono::high_resolution_clock::now();
121 }
122 }
123
124 ~TimerRAII() {
125 if (Config::get().ProteusDebugOutput) {
126 auto End = std::chrono::high_resolution_clock::now();
127 auto Duration = End - Start;
128 auto Milliseconds =
129 std::chrono::duration_cast<std::chrono::milliseconds>(Duration)
130 .count();
131 Logger::logs("proteus")
132 << "Compiled HashValue " << HashValue.toString() << " for "
133 << Milliseconds << "ms\n";
134 }
135 }
136 } Timer{HashValue};
137
138 LLVMContext Ctx;
139 std::unique_ptr<Module> M = cloneKernelModule(Ctx);
140
141 std::string KernelMangled = (KernelName + Suffix);
142
143 PROTEUS_DBG(Logger::logfile(HashValue.toString() + ".input.ll", *M));
144
145 proteus::specializeIR(*M, KernelName, Suffix, BlockDim, GridDim, RCVec,
146 LambdaCalleeInfo, SpecializeArgs, SpecializeDims,
147 SpecializeDimsRange, SpecializeLaunchBounds,
148 MinBlocksPerSM);
149
150 PROTEUS_DBG(Logger::logfile(HashValue.toString() + ".specialized.ll", *M));
151
152 replaceGlobalVariablesWithPointers(*M, VarNameToGlobalInfo);
153
154 invokeOptimizeIR(*M);
155 if (Config::get().traceIRDump()) {
156 llvm::outs() << "LLVM IR module post optimization " << *M << "\n";
157 }
158 if (DumpIR) {
159 const auto CreateDumpDirectory = []() {
160 const std::string DumpDirectory = ".proteus-dump";
161 std::filesystem::create_directory(DumpDirectory);
162 return DumpDirectory;
163 };
164
165 static const std::string DumpDirectory = CreateDumpDirectory();
166
167 saveToFile(DumpDirectory + "/device-jit-" + HashValue.toString() + ".ll",
168 *M);
169 }
170
171#if PROTEUS_ENABLE_CUDA
172 auto ObjBuf =
173 proteus::codegenObject(*M, DeviceArch, GlobalLinkedBinaries, CGOption);
174#elif PROTEUS_ENABLE_HIP
175 auto ObjBuf = proteus::codegenObject(*M, DeviceArch, GlobalLinkedBinaries,
176 CGOption, OptConfig);
177#endif
178
179 if (!RelinkGlobalsByCopy)
180 proteus::relinkGlobalsObject(ObjBuf->getMemBufferRef(),
181 VarNameToGlobalInfo);
182
183 return ObjBuf;
184 }
185};
186
187} // namespace proteus
188
189#endif
void char * KernelName
Definition CompilerInterfaceDevice.cpp:55
#define PROTEUS_DBG(x)
Definition Debug.h:9
#define TIMESCOPE(...)
Definition TimeTracing.h:66
void saveToFile(llvm::StringRef Filepath, T &&Data)
Definition Utils.h:26
Definition Config.h:164
void dump(T &OS) const
Definition Config.h:280
Definition CompilationTask.h:19
CompilationTask & operator=(const CompilationTask &)=delete
HashT getHashValue() const
Definition CompilationTask.h:111
CompilationTask(CompilationTask &&) noexcept=default
CompilationTask(const CompilationTask &)=delete
std::unique_ptr< MemoryBuffer > compile()
Definition CompilationTask.h:113
CompilationTask(MemoryBufferRef Bitcode, HashT HashValue, const std::string &KernelName, std::string &Suffix, dim3 BlockDim, dim3 GridDim, const SmallVector< RuntimeConstant > &RCVec, const SmallVector< std::pair< std::string, StringRef > > &LambdaCalleeInfo, const std::unordered_map< std::string, GlobalVarInfo > &VarNameToGlobalInfo, const SmallPtrSet< void *, 8 > &GlobalLinkedBinaries, const std::string &DeviceArch, const CodeGenerationConfig &CGConfig, bool DumpIR, bool RelinkGlobalsByCopy)
Definition CompilationTask.h:70
static Config & get()
Definition Config.h:334
Definition Hashing.h:22
std::string toString() const
Definition Hashing.h:30
static void trace(llvm::StringRef Msg)
Definition Logger.h:30
static void logfile(const std::string &Filename, T &&Data)
Definition Logger.h:33
Definition TimeTracing.h:33
Definition CompiledLibrary.h:7
Definition MemoryCache.h:27
void optimizeIR(Module &M, StringRef Arch, const OptimizationPipelineConfig &OptConfig)
Definition CoreLLVM.h:205
void reportFatalError(const llvm::Twine &Reason, const char *FILE, unsigned Line)
Definition Error.cpp:14
CodegenOption
Definition Config.h:16
std::unique_ptr< MemoryBuffer > codegenObject(Module &M, StringRef DeviceArch, SmallPtrSetImpl< void * > &GlobalLinkedBinaries, CodegenOption CGOption=CodegenOption::RTC)
Definition CoreLLVMCUDA.h:176
std::string toString(CodegenOption Option)
Definition Config.h:28
Definition CoreLLVM.h:187