Proteus
Programmable JIT compilation and optimization for C/C++ using LLVM
Loading...
Searching...
No Matches
CompilationTask.hpp
Go to the documentation of this file.
1#ifndef PROTEUS_COMPILATION_TASK_HPP
2#define PROTEUS_COMPILATION_TASK_HPP
3
4#include <llvm/Bitcode/BitcodeReader.h>
5#include <llvm/Bitcode/BitcodeWriter.h>
6
9#include "proteus/Debug.h"
10#include "proteus/Hashing.hpp"
11#include "proteus/Utils.h"
12
13namespace proteus {
14
15using namespace llvm;
16
18private:
19 MemoryBufferRef Bitcode;
20 HashT HashValue;
21 std::string KernelName;
22 std::string Suffix;
23 dim3 BlockDim;
24 dim3 GridDim;
25 SmallVector<RuntimeConstant> RCVec;
26 SmallVector<std::pair<std::string, StringRef>> LambdaCalleeInfo;
27 std::unordered_map<std::string, const void *> VarNameToDevPtr;
28 SmallPtrSet<void *, 8> GlobalLinkedBinaries;
29 std::string DeviceArch;
30 CodegenOption CGOption;
31 bool DumpIR;
32 bool RelinkGlobalsByCopy;
33 bool SpecializeArgs;
34 bool SpecializeDims;
35 bool SpecializeLaunchBounds;
36 char OptLevel;
37 unsigned CodegenOptLevel;
38 std::optional<std::string> PassPipeline;
39
40 std::unique_ptr<Module> cloneKernelModule(LLVMContext &Ctx) {
41 auto ClonedModule = parseBitcodeFile(Bitcode, Ctx);
42 if (auto E = ClonedModule.takeError()) {
43 PROTEUS_FATAL_ERROR("Failed to parse bitcode" + toString(std::move(E)));
44 }
45
46 return std::move(*ClonedModule);
47 }
48
49 void invokeOptimizeIR(Module &M) {
50#if PROTEUS_ENABLE_CUDA
51 // For CUDA we always run the optimization pipeline.
52 if (!PassPipeline)
53 optimizeIR(M, DeviceArch, OptLevel, CodegenOptLevel);
54 else
55 optimizeIR(M, DeviceArch, PassPipeline.value(), CodegenOptLevel);
56#elif PROTEUS_ENABLE_HIP
57 // For HIP we run the optimization pipeline only for Serial codegen. HIP RTC
58 // and Parallel codegen, which uses LTO, invoke optimization internally.
59 // TODO: Move optimizeIR inside the codegen routines?
60 if (CGOption == CodegenOption::Serial) {
61 if (!PassPipeline) {
62 optimizeIR(M, DeviceArch, OptLevel, CodegenOptLevel);
63 } else {
64 optimizeIR(M, DeviceArch, PassPipeline.value(), CodegenOptLevel);
65 }
66 }
67#else
68#error "JitEngineDevice requires PROTEUS_ENABLE_CUDA or PROTEUS_ENABLE_HIP"
69#endif
70 }
71
72public:
74 MemoryBufferRef Bitcode, HashT HashValue, const std::string &KernelName,
75 std::string &Suffix, dim3 BlockDim, dim3 GridDim,
76 const SmallVector<RuntimeConstant> &RCVec,
77 const SmallVector<std::pair<std::string, StringRef>> &LambdaCalleeInfo,
78 const std::unordered_map<std::string, const void *> &VarNameToDevPtr,
79 const SmallPtrSet<void *, 8> &GlobalLinkedBinaries,
80 const std::string &DeviceArch, CodegenOption CGOption, bool DumpIR,
81 bool RelinkGlobalsByCopy, bool SpecializeArgs, bool SpecializeDims,
82 bool SpecializeLaunchBounds, char OptLevel, unsigned CodegenOptLevel,
83 const std::optional<std::string> &PassPipeline)
84 : Bitcode(Bitcode), HashValue(HashValue), KernelName(KernelName),
85 Suffix(Suffix), BlockDim(BlockDim), GridDim(GridDim), RCVec(RCVec),
86 LambdaCalleeInfo(LambdaCalleeInfo), VarNameToDevPtr(VarNameToDevPtr),
87 GlobalLinkedBinaries(GlobalLinkedBinaries), DeviceArch(DeviceArch),
88 CGOption(CGOption), DumpIR(DumpIR),
89 RelinkGlobalsByCopy(RelinkGlobalsByCopy),
90 SpecializeArgs(SpecializeArgs), SpecializeDims(SpecializeDims),
91 SpecializeLaunchBounds(SpecializeLaunchBounds), OptLevel(OptLevel),
92 CodegenOptLevel(CodegenOptLevel), PassPipeline(PassPipeline) {}
93
94 // Delete copy operations.
97
98 // Use default move operations.
99 CompilationTask(CompilationTask &&) noexcept = default;
100 CompilationTask &operator=(CompilationTask &&) noexcept = default;
101
102 HashT getHashValue() const { return HashValue; }
103
104 std::unique_ptr<MemoryBuffer> compile() {
105#if PROTEUS_ENABLE_DEBUG
106 auto Start = std::chrono::high_resolution_clock::now();
107#endif
108
109 LLVMContext Ctx;
110 std::unique_ptr<Module> M = cloneKernelModule(Ctx);
111
112 std::string KernelMangled = (KernelName + Suffix);
113
114 proteus::specializeIR(*M, KernelName, Suffix, BlockDim, GridDim, RCVec,
115 LambdaCalleeInfo, SpecializeArgs, SpecializeDims,
116 SpecializeLaunchBounds);
117
118 replaceGlobalVariablesWithPointers(*M, VarNameToDevPtr);
119
120 invokeOptimizeIR(*M);
121 if (Config::get().ProteusTraceOutput == 2) {
122 llvm::outs() << "LLVM IR module post optimization " << *M << "\n";
123 }
124 if (DumpIR) {
125 const auto CreateDumpDirectory = []() {
126 const std::string DumpDirectory = ".proteus-dump";
127 std::filesystem::create_directory(DumpDirectory);
128 return DumpDirectory;
129 };
130
131 static const std::string DumpDirectory = CreateDumpDirectory();
132
133 saveToFile(DumpDirectory + "/device-jit-" + HashValue.toString() + ".ll",
134 *M);
135 }
136
137 auto ObjBuf =
138 proteus::codegenObject(*M, DeviceArch, GlobalLinkedBinaries, CGOption);
139
140 if (!RelinkGlobalsByCopy)
141 proteus::relinkGlobalsObject(ObjBuf->getMemBufferRef(), VarNameToDevPtr);
142
143#if PROTEUS_ENABLE_DEBUG
144 auto End = std::chrono::high_resolution_clock::now();
145 auto Duration = End - Start;
146 auto Milliseconds =
147 std::chrono::duration_cast<std::chrono::milliseconds>(Duration).count();
148 Logger::logs("proteus") << "Compiled HashValue " << HashValue.toString()
149 << " for " << Milliseconds << "ms\n";
150#endif
151
152 return ObjBuf;
153 }
154};
155
156} // namespace proteus
157
158#endif
void char * KernelName
Definition CompilerInterfaceDevice.cpp:50
#define PROTEUS_FATAL_ERROR(x)
Definition Error.h:7
void saveToFile(llvm::StringRef Filepath, T &&Data)
Definition Utils.h:23
Definition CompilationTask.hpp:17
CompilationTask & operator=(const CompilationTask &)=delete
HashT getHashValue() const
Definition CompilationTask.hpp:102
CompilationTask(CompilationTask &&) noexcept=default
CompilationTask(MemoryBufferRef Bitcode, HashT HashValue, const std::string &KernelName, std::string &Suffix, dim3 BlockDim, dim3 GridDim, const SmallVector< RuntimeConstant > &RCVec, const SmallVector< std::pair< std::string, StringRef > > &LambdaCalleeInfo, const std::unordered_map< std::string, const void * > &VarNameToDevPtr, const SmallPtrSet< void *, 8 > &GlobalLinkedBinaries, const std::string &DeviceArch, CodegenOption CGOption, bool DumpIR, bool RelinkGlobalsByCopy, bool SpecializeArgs, bool SpecializeDims, bool SpecializeLaunchBounds, char OptLevel, unsigned CodegenOptLevel, const std::optional< std::string > &PassPipeline)
Definition CompilationTask.hpp:73
CompilationTask(const CompilationTask &)=delete
std::unique_ptr< MemoryBuffer > compile()
Definition CompilationTask.hpp:104
static Config & get()
Definition Config.hpp:114
Definition Hashing.hpp:20
std::string toString() const
Definition Hashing.hpp:28
static llvm::raw_ostream & logs(const std::string &Name)
Definition Logger.hpp:19
Definition Helpers.h:76
Definition BuiltinsCUDA.cpp:4
void optimizeIR(Module &M, StringRef Arch, char OptLevel, unsigned CodegenOptLevel)
Definition CoreLLVM.hpp:182
CodegenOption
Definition Config.hpp:11
std::unique_ptr< MemoryBuffer > codegenObject(Module &M, StringRef DeviceArch, SmallPtrSetImpl< void * > &GlobalLinkedBinaries, CodegenOption CGOption=CodegenOption::RTC)
Definition CoreLLVMCUDA.hpp:160
std::string toString(CodegenOption Option)
Definition Config.hpp:23