Proteus
Programmable JIT compilation and optimization for C/C++ using LLVM
Loading...
Searching...
No Matches
CoreLLVM.hpp
Go to the documentation of this file.
1#ifndef PROTEUS_CORE_LLVM_HPP
2#define PROTEUS_CORE_LLVM_HPP
3
4static_assert(__cplusplus >= 201703L,
5 "This header requires C++17 or later due to LLVM.");
6
7#include <llvm/CodeGen/CommandFlags.h>
8#include <llvm/IR/DebugInfo.h>
9#include <llvm/IR/Module.h>
10#include <llvm/IR/PassManager.h>
11#include <llvm/Linker/Linker.h>
12#include <llvm/MC/TargetRegistry.h>
13#include <llvm/Passes/PassBuilder.h>
14#include <llvm/Support/TargetSelect.h>
15#include <llvm/Target/TargetMachine.h>
16#include <llvm/Transforms/IPO/MergeFunctions.h>
17
18#if LLVM_VERSION_MAJOR >= 18
19#include <llvm/TargetParser/SubtargetFeature.h>
20// This convoluted logic below is because AMD ROCm 5.7.1 identifies as LLVM 17
21// but includes the header SubtargetFeature.h to a different directory than
22// upstream LLVM 17. We basically detect if it's the HIP version and include it
23// from the expected MC directory, otherwise from TargetParser.
24#elif LLVM_VERSION_MAJOR == 17
25#if defined(__HIP_PLATFORM_HCC__) || defined(HIP_VERSION_MAJOR)
26#include <llvm/MC/SubtargetFeature.h>
27#else
28#include <llvm/TargetParser/SubtargetFeature.h>
29#endif
30#else
31#define STRINGIFY_HELPER(x) #x
32#define STRINGIFY(x) STRINGIFY_HELPER(x)
33#error "Unsupported LLVM version " STRINGIFY(LLVM_VERSION_MAJOR)
34#endif
35#include <llvm/Transforms/IPO/GlobalDCE.h>
36#include <llvm/Transforms/IPO/Internalize.h>
37#include <llvm/Transforms/IPO/StripDeadPrototypes.h>
38#include <llvm/Transforms/IPO/StripSymbols.h>
39#include <llvm/Transforms/Utils/ModuleUtils.h>
40
41#include "proteus/Debug.h"
42#include "proteus/Error.h"
43#include "proteus/Logger.hpp"
45
46namespace proteus {
47using namespace llvm;
48
49namespace detail {
50
51inline Expected<std::unique_ptr<TargetMachine>>
52createTargetMachine(Module &M, StringRef Arch, unsigned OptLevel = 3) {
53 Triple TT(M.getTargetTriple());
54 auto CGOptLevel = CodeGenOpt::getLevel(OptLevel);
55 if (CGOptLevel == std::nullopt)
56 PROTEUS_FATAL_ERROR("Invalid opt level");
57
58 std::string Msg;
59 const Target *T = TargetRegistry::lookupTarget(M.getTargetTriple(), Msg);
60 if (!T)
61 return make_error<StringError>(Msg, inconvertibleErrorCode());
62
63 SubtargetFeatures Features;
64 Features.getDefaultSubtargetFeatures(TT);
65
66 std::optional<Reloc::Model> RelocModel;
67 if (M.getModuleFlag("PIC Level"))
68 RelocModel =
69 M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_;
70
71 std::optional<CodeModel::Model> CodeModel = M.getCodeModel();
72
73 // Use default target options.
74 // TODO: Customize based on AOT compilation flags or by creating a
75 // constructor that sets target options based on the triple.
76 TargetOptions Options;
77 std::unique_ptr<TargetMachine> TM(T->createTargetMachine(
78 M.getTargetTriple(), Arch, Features.getString(), Options, RelocModel,
79 CodeModel, CGOptLevel.value()));
80 if (!TM)
81 return make_error<StringError>("Failed to create target machine",
82 inconvertibleErrorCode());
83 return TM;
84}
85
86inline void runOptimizationPassPipeline(Module &M, StringRef Arch,
87 const std::string &PassPipeline,
88 unsigned CodegenOptLevel = 3) {
89 PipelineTuningOptions PTO;
90
91 std::optional<PGOOptions> PGOOpt;
92 auto TM = createTargetMachine(M, Arch, CodegenOptLevel);
93 if (auto Err = TM.takeError())
94 report_fatal_error(std::move(Err));
95 TargetLibraryInfoImpl TLII(Triple(M.getTargetTriple()));
96
97 PassBuilder PB(TM->get(), PTO, PGOOpt, nullptr);
98 LoopAnalysisManager LAM;
99 FunctionAnalysisManager FAM;
100 CGSCCAnalysisManager CGAM;
101 ModuleAnalysisManager MAM;
102
103 FAM.registerPass([&] { return TargetLibraryAnalysis(TLII); });
104
105 PB.registerModuleAnalyses(MAM);
106 PB.registerCGSCCAnalyses(CGAM);
107 PB.registerFunctionAnalyses(FAM);
108 PB.registerLoopAnalyses(LAM);
109 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
110 ModulePassManager Passes;
111 if (auto E = PB.parsePassPipeline(Passes, PassPipeline))
112 PROTEUS_FATAL_ERROR("Error: " + toString(std::move(E)));
113
114 Passes.run(M, MAM);
115}
116
117inline void runOptimizationPassPipeline(Module &M, StringRef Arch,
118 char OptLevel = '3',
119 unsigned CodegenOptLevel = 3) {
120 PipelineTuningOptions PTO;
121
122 std::optional<PGOOptions> PGOOpt;
123 auto TM = createTargetMachine(M, Arch, CodegenOptLevel);
124 if (auto Err = TM.takeError())
125 report_fatal_error(std::move(Err));
126 TargetLibraryInfoImpl TLII(Triple(M.getTargetTriple()));
127
128 PassBuilder PB(TM->get(), PTO, PGOOpt, nullptr);
129 LoopAnalysisManager LAM;
130 FunctionAnalysisManager FAM;
131 CGSCCAnalysisManager CGAM;
132 ModuleAnalysisManager MAM;
133
134 FAM.registerPass([&] { return TargetLibraryAnalysis(TLII); });
135
136 PB.registerModuleAnalyses(MAM);
137 PB.registerCGSCCAnalyses(CGAM);
138 PB.registerFunctionAnalyses(FAM);
139 PB.registerLoopAnalyses(LAM);
140 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
141
142 OptimizationLevel OptSetting;
143 switch (OptLevel) {
144 case '0':
145 OptSetting = OptimizationLevel::O0;
146 break;
147 case '1':
148 OptSetting = OptimizationLevel::O1;
149 break;
150 case '2':
151 OptSetting = OptimizationLevel::O2;
152 break;
153 case '3':
154 OptSetting = OptimizationLevel::O3;
155 break;
156 case 's':
157 OptSetting = OptimizationLevel::Os;
158 break;
159 case 'z':
160 OptSetting = OptimizationLevel::Oz;
161 break;
162 default:
163 PROTEUS_FATAL_ERROR("Unsupported optimization level " + OptLevel);
164 };
165
166 ModulePassManager Passes = PB.buildPerModuleDefaultPipeline(OptSetting);
167 Passes.run(M, MAM);
168}
169
170} // namespace detail
171
174 InitializeAllTargetInfos();
175 InitializeAllTargets();
176 InitializeAllTargetMCs();
177 InitializeAllAsmParsers();
178 InitializeAllAsmPrinters();
179 }
180};
181
182inline void optimizeIR(Module &M, StringRef Arch, char OptLevel,
183 unsigned CodegenOptLevel) {
184 Timer T;
185 detail::runOptimizationPassPipeline(M, Arch, OptLevel, CodegenOptLevel);
187 << "optimizeIR optlevel " << OptLevel << " codegenopt "
188 << CodegenOptLevel << " " << T.elapsed() << " ms\n");
189}
190
191inline void optimizeIR(Module &M, StringRef Arch,
192 const std::string &PassPipeline,
193 unsigned CodegenOptLevel) {
194 Timer T;
195 auto TraceOut = [](const std::string &PassPipeline) {
196 SmallString<128> S;
197 raw_svector_ostream OS(S);
198 OS << "[CustomPipeline] " << PassPipeline << "\n";
199 return S;
200 };
201
202 if (Config::get().ProteusTraceOutput >= 1)
203 Logger::trace(TraceOut(PassPipeline));
204
205 detail::runOptimizationPassPipeline(M, Arch, PassPipeline, CodegenOptLevel);
207 << "optimizeIR optlevel " << PassPipeline
208 << " codegenopt " << CodegenOptLevel << " "
209 << T.elapsed() << " ms\n");
210}
211
212inline std::unique_ptr<Module>
213linkModules(LLVMContext &Ctx,
214 SmallVector<std::unique_ptr<Module>> LinkedModules) {
215 if (LinkedModules.empty())
216 PROTEUS_FATAL_ERROR("Expected jit module");
217
218 auto LinkedModule = std::make_unique<llvm::Module>("JitModule", Ctx);
219 Linker IRLinker(*LinkedModule);
220 // Link in all the proteus-enabled extracted modules.
221 for (auto &LinkedM : LinkedModules) {
222 // Returns true if linking failed.
223 if (IRLinker.linkInModule(std::move(LinkedM)))
224 PROTEUS_FATAL_ERROR("Linking failed");
225 }
226
227 return LinkedModule;
228}
229
230inline void runCleanupPassPipeline(Module &M) {
231 PassBuilder PB;
232 LoopAnalysisManager LAM;
233 FunctionAnalysisManager FAM;
234 CGSCCAnalysisManager CGAM;
235 ModuleAnalysisManager MAM;
236
237 PB.registerModuleAnalyses(MAM);
238 PB.registerCGSCCAnalyses(CGAM);
239 PB.registerFunctionAnalyses(FAM);
240 PB.registerLoopAnalyses(LAM);
241 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
242
243 ModulePassManager Passes;
244 Passes.addPass(MergeFunctionsPass());
245 Passes.addPass(GlobalDCEPass());
246 // Passes.addPass(StripDeadDebugInfoPass());
247 Passes.addPass(StripDeadPrototypesPass());
248
249 Passes.run(M, MAM);
250
251 StripDebugInfo(M);
252}
253
254inline void pruneIR(Module &M, bool UnsetExternallyInitialized = true) {
255 // Remove llvm.global.annotations now that we have read them.
256 if (auto *GlobalAnnotations = M.getGlobalVariable("llvm.global.annotations"))
257 M.eraseGlobalVariable(GlobalAnnotations);
258
259 // Remove llvm.compiler.used
260 if (auto *CompilerUsed = M.getGlobalVariable("llvm.compiler.used"))
261 M.eraseGlobalVariable(CompilerUsed);
262
263 // Remove the __clang_gpu_used_external used in HIP RDC compilation and its
264 // uses in llvm.used, llvm.compiler.used.
265 SmallVector<GlobalVariable *> GlobalsToErase;
266 for (auto &GV : M.globals()) {
267 auto Name = GV.getName();
268 if (Name.starts_with("__clang_gpu_used_external") ||
269 Name.starts_with("_jit_bitcode") || Name.starts_with("__hip_cuid")) {
270 GlobalsToErase.push_back(&GV);
271 removeFromUsedLists(M, [&GV](Constant *C) {
272 if (auto *Global = dyn_cast<GlobalVariable>(C))
273 return Global == &GV;
274 return false;
275 });
276 }
277 }
278 for (auto *GV : GlobalsToErase) {
279 M.eraseGlobalVariable(GV);
280 }
281
282 // Remove externaly_initialized attributes.
283 if (UnsetExternallyInitialized)
284 for (auto &GV : M.globals())
285 if (GV.isExternallyInitialized())
286 GV.setExternallyInitialized(false);
287}
288
289inline void internalize(Module &M, StringRef PreserveFunctionName) {
290 auto *F = M.getFunction(PreserveFunctionName);
291 // Internalize others besides the kernel function.
292 internalizeModule(M, [&F](const GlobalValue &GV) {
293 // Do not internalize the kernel function.
294 if (&GV == F)
295 return true;
296
297 // Internalize everything else.
298 return false;
299 });
300}
301
302} // namespace proteus
303
305
306#endif
#define PROTEUS_FATAL_ERROR(x)
Definition Error.h:7
#define PROTEUS_TIMER_OUTPUT(x)
Definition TimeTracing.hpp:57
static Config & get()
Definition Config.hpp:114
static llvm::raw_ostream & outs(const std::string &Name)
Definition Logger.hpp:25
static void trace(llvm::StringRef Msg)
Definition Logger.hpp:30
Definition TimeTracing.hpp:36
uint64_t elapsed()
Definition TimeTracing.hpp:45
Definition Helpers.h:76
void runOptimizationPassPipeline(Module &M, StringRef Arch, const std::string &PassPipeline, unsigned CodegenOptLevel=3)
Definition CoreLLVM.hpp:86
Expected< std::unique_ptr< TargetMachine > > createTargetMachine(Module &M, StringRef Arch, unsigned OptLevel=3)
Definition CoreLLVM.hpp:52
Definition BuiltinsCUDA.cpp:4
void optimizeIR(Module &M, StringRef Arch, char OptLevel, unsigned CodegenOptLevel)
Definition CoreLLVM.hpp:182
void pruneIR(Module &M, bool UnsetExternallyInitialized=true)
Definition CoreLLVM.hpp:254
std::string toString(CodegenOption Option)
Definition Config.hpp:23
void internalize(Module &M, StringRef PreserveFunctionName)
Definition CoreLLVM.hpp:289
void runCleanupPassPipeline(Module &M)
Definition CoreLLVM.hpp:230
std::unique_ptr< Module > linkModules(LLVMContext &Ctx, SmallVector< std::unique_ptr< Module > > LinkedModules)
Definition CoreLLVM.hpp:213
Definition CoreLLVM.hpp:172
InitLLVMTargets()
Definition CoreLLVM.hpp:173