Proteus
Programmable JIT compilation and optimization for C/C++ using LLVM
Loading...
Searching...
No Matches
CompilationTask.hpp
Go to the documentation of this file.
1#ifndef PROTEUS_COMPILATION_TASK_HPP
2#define PROTEUS_COMPILATION_TASK_HPP
3
4#include <llvm/Bitcode/BitcodeReader.h>
5#include <llvm/Bitcode/BitcodeWriter.h>
6
9#include "proteus/Debug.h"
10#include "proteus/Hashing.hpp"
11#include "proteus/Utils.h"
12
13namespace proteus {
14
15using namespace llvm;
16
18private:
19 MemoryBufferRef Bitcode;
20 HashT HashValue;
21 std::string KernelName;
22 std::string Suffix;
23 dim3 BlockDim;
24 dim3 GridDim;
25 SmallVector<RuntimeConstant> RCVec;
26 SmallVector<std::pair<std::string, StringRef>> LambdaCalleeInfo;
27 std::unordered_map<std::string, const void *> VarNameToDevPtr;
28 SmallPtrSet<void *, 8> GlobalLinkedBinaries;
29 std::string DeviceArch;
30 CodegenOption CGOption;
31 bool DumpIR;
32 bool RelinkGlobalsByCopy;
33 bool SpecializeArgs;
34 bool SpecializeDims;
35 bool SpecializeLaunchBounds;
36
37 std::unique_ptr<Module> cloneKernelModule(LLVMContext &Ctx) {
38 auto ClonedModule = parseBitcodeFile(Bitcode, Ctx);
39 if (auto E = ClonedModule.takeError()) {
40 PROTEUS_FATAL_ERROR("Failed to parse bitcode" + toString(std::move(E)));
41 }
42
43 return std::move(*ClonedModule);
44 }
45
46public:
48 MemoryBufferRef Bitcode, HashT HashValue, const std::string &KernelName,
49 std::string &Suffix, dim3 BlockDim, dim3 GridDim,
50 const SmallVector<RuntimeConstant> &RCVec,
51 const SmallVector<std::pair<std::string, StringRef>> &LambdaCalleeInfo,
52 const std::unordered_map<std::string, const void *> &VarNameToDevPtr,
53 const SmallPtrSet<void *, 8> &GlobalLinkedBinaries,
54 const std::string &DeviceArch, CodegenOption CGOption, bool DumpIR,
55 bool RelinkGlobalsByCopy, bool SpecializeArgs, bool SpecializeDims,
56 bool SpecializeLaunchBounds)
57 : Bitcode(Bitcode), HashValue(HashValue), KernelName(KernelName),
58 Suffix(Suffix), BlockDim(BlockDim), GridDim(GridDim), RCVec(RCVec),
59 LambdaCalleeInfo(LambdaCalleeInfo), VarNameToDevPtr(VarNameToDevPtr),
60 GlobalLinkedBinaries(GlobalLinkedBinaries), DeviceArch(DeviceArch),
61 CGOption(CGOption), DumpIR(DumpIR),
62 RelinkGlobalsByCopy(RelinkGlobalsByCopy),
63 SpecializeArgs(SpecializeArgs), SpecializeDims(SpecializeDims),
64 SpecializeLaunchBounds(SpecializeLaunchBounds) {}
65
66 // Delete copy operations.
69
70 // Use default move operations.
71 CompilationTask(CompilationTask &&) noexcept = default;
72 CompilationTask &operator=(CompilationTask &&) noexcept = default;
73
74 HashT getHashValue() const { return HashValue; }
75
76 std::unique_ptr<MemoryBuffer> compile() {
77#if PROTEUS_ENABLE_DEBUG
78 auto Start = std::chrono::high_resolution_clock::now();
79#endif
80
81 LLVMContext Ctx;
82 std::unique_ptr<Module> M = cloneKernelModule(Ctx);
83
84 std::string KernelMangled = (KernelName + Suffix);
85
86 proteus::specializeIR(*M, KernelName, Suffix, BlockDim, GridDim, RCVec,
87 LambdaCalleeInfo, SpecializeArgs, SpecializeDims,
88 SpecializeLaunchBounds);
89
90 replaceGlobalVariablesWithPointers(*M, VarNameToDevPtr);
91
92#if PROTEUS_ENABLE_CUDA
93 // For CUDA we always run the optimization pipeline.
94 optimizeIR(*M, DeviceArch, '3', 3);
95#elif PROTEUS_ENABLE_HIP
96 // For HIP RTC codegen we run the optimization pipeline only for Serial and
97 // Parallel codegen since those do not run it internally. HIP RTC and
98 // Parallel ThinLTO invoke optimization internally.
99 // TODO: Move optimizeIR inside the codegen routines?
100 if (CGOption == CodegenOption::Serial ||
101 CGOption == CodegenOption::Parallel)
102 optimizeIR(*M, DeviceArch, '3', 3);
103#else
104#error "JitEngineDevice requires PROTEUS_ENABLE_CUDA or PROTEUS_ENABLE_HIP"
105#endif
106
107 if (DumpIR) {
108 const auto CreateDumpDirectory = []() {
109 const std::string DumpDirectory = ".proteus-dump";
110 std::filesystem::create_directory(DumpDirectory);
111 return DumpDirectory;
112 };
113
114 static const std::string DumpDirectory = CreateDumpDirectory();
115
116 saveToFile(DumpDirectory + "/device-jit-" + HashValue.toString() + ".ll",
117 *M);
118 }
119
120 auto ObjBuf =
121 proteus::codegenObject(*M, DeviceArch, GlobalLinkedBinaries, CGOption);
122
123 if (!RelinkGlobalsByCopy)
124 proteus::relinkGlobalsObject(ObjBuf->getMemBufferRef(), VarNameToDevPtr);
125
126#if PROTEUS_ENABLE_DEBUG
127 auto End = std::chrono::high_resolution_clock::now();
128 auto Duration = End - Start;
129 auto Milliseconds =
130 std::chrono::duration_cast<std::chrono::milliseconds>(Duration).count();
131 Logger::logs("proteus") << "Compiled HashValue " << HashValue.toString()
132 << " for " << Milliseconds << "ms\n";
133#endif
134
135 return ObjBuf;
136 }
137};
138
139} // namespace proteus
140
141#endif
void char * KernelName
Definition CompilerInterfaceDevice.cpp:50
#define PROTEUS_FATAL_ERROR(x)
Definition Error.h:4
void saveToFile(llvm::StringRef Filepath, T &&Data)
Definition Utils.h:23
Definition CompilationTask.hpp:17
CompilationTask & operator=(const CompilationTask &)=delete
CompilationTask(MemoryBufferRef Bitcode, HashT HashValue, const std::string &KernelName, std::string &Suffix, dim3 BlockDim, dim3 GridDim, const SmallVector< RuntimeConstant > &RCVec, const SmallVector< std::pair< std::string, StringRef > > &LambdaCalleeInfo, const std::unordered_map< std::string, const void * > &VarNameToDevPtr, const SmallPtrSet< void *, 8 > &GlobalLinkedBinaries, const std::string &DeviceArch, CodegenOption CGOption, bool DumpIR, bool RelinkGlobalsByCopy, bool SpecializeArgs, bool SpecializeDims, bool SpecializeLaunchBounds)
Definition CompilationTask.hpp:47
HashT getHashValue() const
Definition CompilationTask.hpp:74
CompilationTask(CompilationTask &&) noexcept=default
CompilationTask(const CompilationTask &)=delete
std::unique_ptr< MemoryBuffer > compile()
Definition CompilationTask.hpp:76
Definition Hashing.hpp:19
std::string toString() const
Definition Hashing.hpp:27
static llvm::raw_ostream & logs(const std::string &Name)
Definition Logger.hpp:19
Definition Dispatcher.cpp:14
void optimizeIR(Module &M, StringRef Arch, char OptLevel, unsigned CodegenOptLevel)
Definition CoreLLVM.hpp:151
CodegenOption
Definition Config.hpp:10
std::unique_ptr< MemoryBuffer > codegenObject(Module &M, StringRef DeviceArch, SmallPtrSetImpl< void * > &GlobalLinkedBinaries, CodegenOption CGOption=CodegenOption::RTC)
Definition CoreLLVMCUDA.hpp:155
std::string toString(CodegenOption Option)
Definition Config.hpp:23