Proteus
Programmable JIT compilation and optimization for C/C++ using LLVM
Loading...
Searching...
No Matches
CompilationTask.hpp
Go to the documentation of this file.
1#ifndef PROTEUS_COMPILATION_TASK_HPP
2#define PROTEUS_COMPILATION_TASK_HPP
3
4#include <llvm/Bitcode/BitcodeReader.h>
5#include <llvm/Bitcode/BitcodeWriter.h>
6
8#include "proteus/Config.hpp"
10#include "proteus/Debug.h"
11#include "proteus/Hashing.hpp"
12#include "proteus/Utils.h"
13
14namespace proteus {
15
16using namespace llvm;
17
19private:
20 MemoryBufferRef Bitcode;
21 HashT HashValue;
22 std::string KernelName;
23 std::string Suffix;
24 dim3 BlockDim;
25 dim3 GridDim;
26 SmallVector<RuntimeConstant> RCVec;
27 SmallVector<std::pair<std::string, StringRef>> LambdaCalleeInfo;
28 std::unordered_map<std::string, GlobalVarInfo> VarNameToGlobalInfo;
29 SmallPtrSet<void *, 8> GlobalLinkedBinaries;
30 std::string DeviceArch;
31 CodegenOption CGOption;
32 bool DumpIR;
33 bool RelinkGlobalsByCopy;
34 int MinBlocksPerSM;
35 bool SpecializeArgs;
36 bool SpecializeDims;
37 bool SpecializeDimsAssume;
38 bool SpecializeLaunchBounds;
39 char OptLevel;
40 unsigned CodegenOptLevel;
41 std::optional<std::string> PassPipeline;
42
43 std::unique_ptr<Module> cloneKernelModule(LLVMContext &Ctx) {
44 auto ClonedModule = parseBitcodeFile(Bitcode, Ctx);
45 if (auto E = ClonedModule.takeError()) {
46 PROTEUS_FATAL_ERROR("Failed to parse bitcode" + toString(std::move(E)));
47 }
48
49 return std::move(*ClonedModule);
50 }
51
52 void invokeOptimizeIR(Module &M) {
53#if PROTEUS_ENABLE_CUDA
54 // For CUDA we always run the optimization pipeline.
55 if (!PassPipeline)
56 optimizeIR(M, DeviceArch, OptLevel, CodegenOptLevel);
57 else
58 optimizeIR(M, DeviceArch, PassPipeline.value(), CodegenOptLevel);
59#elif PROTEUS_ENABLE_HIP
60 // For HIP we run the optimization pipeline only for Serial codegen. HIP RTC
61 // and Parallel codegen, which uses LTO, invoke optimization internally.
62 // TODO: Move optimizeIR inside the codegen routines?
63 if (CGOption == CodegenOption::Serial) {
64 if (!PassPipeline) {
65 optimizeIR(M, DeviceArch, OptLevel, CodegenOptLevel);
66 } else {
67 optimizeIR(M, DeviceArch, PassPipeline.value(), CodegenOptLevel);
68 }
69 }
70#else
71#error "JitEngineDevice requires PROTEUS_ENABLE_CUDA or PROTEUS_ENABLE_HIP"
72#endif
73 }
74
75public:
77 MemoryBufferRef Bitcode, HashT HashValue, const std::string &KernelName,
78 std::string &Suffix, dim3 BlockDim, dim3 GridDim,
79 const SmallVector<RuntimeConstant> &RCVec,
80 const SmallVector<std::pair<std::string, StringRef>> &LambdaCalleeInfo,
81 const std::unordered_map<std::string, GlobalVarInfo> &VarNameToGlobalInfo,
82 const SmallPtrSet<void *, 8> &GlobalLinkedBinaries,
83 const std::string &DeviceArch, const CodeGenerationConfig &CGConfig,
84 bool DumpIR, bool RelinkGlobalsByCopy)
85 : Bitcode(Bitcode), HashValue(HashValue), KernelName(KernelName),
86 Suffix(Suffix), BlockDim(BlockDim), GridDim(GridDim), RCVec(RCVec),
87 LambdaCalleeInfo(LambdaCalleeInfo),
88 VarNameToGlobalInfo(VarNameToGlobalInfo),
89 GlobalLinkedBinaries(GlobalLinkedBinaries), DeviceArch(DeviceArch),
90 CGOption(CGConfig.codeGenOption()), DumpIR(DumpIR),
91 RelinkGlobalsByCopy(RelinkGlobalsByCopy),
92 MinBlocksPerSM(
93 CGConfig.minBlocksPerSM(BlockDim.x * BlockDim.y * BlockDim.z)),
94 SpecializeArgs(CGConfig.specializeArgs()),
95 SpecializeDims(CGConfig.specializeDims()),
96 SpecializeDimsAssume(CGConfig.specializeDimsAssume()),
97 SpecializeLaunchBounds(CGConfig.specializeLaunchBounds()),
98 OptLevel(CGConfig.optLevel()),
99 CodegenOptLevel(CGConfig.codeGenOptLevel()),
100 PassPipeline(CGConfig.optPipeline()) {
101 if (Config::get().ProteusTraceOutput >= 1) {
102 llvm::SmallString<128> S;
103 llvm::raw_svector_ostream OS(S);
104 OS << "[KernelConfig] ID:" << KernelName << " ";
105 CGConfig.dump(OS);
106 OS << "\n";
107 Logger::trace(OS.str());
108 }
109 }
110
111 // Delete copy operations.
114
115 // Use default move operations.
116 CompilationTask(CompilationTask &&) noexcept = default;
117 CompilationTask &operator=(CompilationTask &&) noexcept = default;
118
119 HashT getHashValue() const { return HashValue; }
120
121 std::unique_ptr<MemoryBuffer> compile() {
122 struct TimerRAII {
123 std::chrono::high_resolution_clock::time_point Start, End;
124 HashT HashValue;
125 TimerRAII(HashT HashValue) : HashValue(HashValue) {
126 if (Config::get().ProteusDebugOutput) {
127 Start = std::chrono::high_resolution_clock::now();
128 }
129 }
130
131 ~TimerRAII() {
132 if (Config::get().ProteusDebugOutput) {
133 auto End = std::chrono::high_resolution_clock::now();
134 auto Duration = End - Start;
135 auto Milliseconds =
136 std::chrono::duration_cast<std::chrono::milliseconds>(Duration)
137 .count();
138 Logger::logs("proteus")
139 << "Compiled HashValue " << HashValue.toString() << " for "
140 << Milliseconds << "ms\n";
141 }
142 }
143 } Timer{HashValue};
144
145 LLVMContext Ctx;
146 std::unique_ptr<Module> M = cloneKernelModule(Ctx);
147
148 std::string KernelMangled = (KernelName + Suffix);
149
150 PROTEUS_DBG(Logger::logfile(HashValue.toString() + ".input.ll", *M));
151
152 proteus::specializeIR(*M, KernelName, Suffix, BlockDim, GridDim, RCVec,
153 LambdaCalleeInfo, SpecializeArgs, SpecializeDims,
154 SpecializeDimsAssume, SpecializeLaunchBounds,
155 MinBlocksPerSM);
156
157 PROTEUS_DBG(Logger::logfile(HashValue.toString() + ".specialized.ll", *M));
158
159 replaceGlobalVariablesWithPointers(*M, VarNameToGlobalInfo);
160
161 invokeOptimizeIR(*M);
162 if (Config::get().ProteusTraceOutput == 2) {
163 llvm::outs() << "LLVM IR module post optimization " << *M << "\n";
164 }
165 if (DumpIR) {
166 const auto CreateDumpDirectory = []() {
167 const std::string DumpDirectory = ".proteus-dump";
168 std::filesystem::create_directory(DumpDirectory);
169 return DumpDirectory;
170 };
171
172 static const std::string DumpDirectory = CreateDumpDirectory();
173
174 saveToFile(DumpDirectory + "/device-jit-" + HashValue.toString() + ".ll",
175 *M);
176 }
177
178 auto ObjBuf =
179 proteus::codegenObject(*M, DeviceArch, GlobalLinkedBinaries, CGOption);
180
181 if (!RelinkGlobalsByCopy)
182 proteus::relinkGlobalsObject(ObjBuf->getMemBufferRef(),
183 VarNameToGlobalInfo);
184
185 return ObjBuf;
186 }
187};
188
189} // namespace proteus
190
191#endif
void char * KernelName
Definition CompilerInterfaceDevice.cpp:52
#define PROTEUS_DBG(x)
Definition Debug.h:9
#define PROTEUS_FATAL_ERROR(x)
Definition Error.h:7
void saveToFile(llvm::StringRef Filepath, T &&Data)
Definition Utils.h:23
Definition Config.hpp:130
void dump(T &OS) const
Definition Config.hpp:250
Definition CompilationTask.hpp:18
CompilationTask & operator=(const CompilationTask &)=delete
HashT getHashValue() const
Definition CompilationTask.hpp:119
CompilationTask(CompilationTask &&) noexcept=default
CompilationTask(const CompilationTask &)=delete
std::unique_ptr< MemoryBuffer > compile()
Definition CompilationTask.hpp:121
CompilationTask(MemoryBufferRef Bitcode, HashT HashValue, const std::string &KernelName, std::string &Suffix, dim3 BlockDim, dim3 GridDim, const SmallVector< RuntimeConstant > &RCVec, const SmallVector< std::pair< std::string, StringRef > > &LambdaCalleeInfo, const std::unordered_map< std::string, GlobalVarInfo > &VarNameToGlobalInfo, const SmallPtrSet< void *, 8 > &GlobalLinkedBinaries, const std::string &DeviceArch, const CodeGenerationConfig &CGConfig, bool DumpIR, bool RelinkGlobalsByCopy)
Definition CompilationTask.hpp:76
static Config & get()
Definition Config.hpp:304
Definition Hashing.hpp:20
std::string toString() const
Definition Hashing.hpp:28
static void trace(llvm::StringRef Msg)
Definition Logger.hpp:30
static void logfile(const std::string &Filename, T &&Data)
Definition Logger.hpp:33
Definition TimeTracing.hpp:36
Definition Helpers.h:138
Definition StorageCache.cpp:24
void optimizeIR(Module &M, StringRef Arch, char OptLevel, unsigned CodegenOptLevel)
Definition CoreLLVM.hpp:182
CodegenOption
Definition Config.hpp:14
std::unique_ptr< MemoryBuffer > codegenObject(Module &M, StringRef DeviceArch, SmallPtrSetImpl< void * > &GlobalLinkedBinaries, CodegenOption CGOption=CodegenOption::RTC)
Definition CoreLLVMCUDA.hpp:160
std::string toString(CodegenOption Option)
Definition Config.hpp:26