Proteus
Programmable JIT compilation and optimization for C/C++ using LLVM
Loading...
Searching...
No Matches
CoreLLVM.h
Go to the documentation of this file.
1#ifndef PROTEUS_CORE_LLVM_H
2#define PROTEUS_CORE_LLVM_H
3
4static_assert(__cplusplus >= 201703L,
5 "This header requires C++17 or later due to LLVM.");
6
7#include "proteus/Error.h"
10#include "proteus/impl/Debug.h"
11#include "proteus/impl/Logger.h"
12
13#include <llvm/CodeGen/CommandFlags.h>
14#include <llvm/IR/DebugInfo.h>
15#include <llvm/IR/Module.h>
16#include <llvm/IR/PassManager.h>
17#include <llvm/Linker/Linker.h>
18#include <llvm/MC/TargetRegistry.h>
19#include <llvm/Passes/PassBuilder.h>
20#include <llvm/Support/TargetSelect.h>
21#include <llvm/Target/TargetMachine.h>
22#include <llvm/Transforms/IPO/MergeFunctions.h>
23
24#if LLVM_VERSION_MAJOR >= 18
25#include <llvm/TargetParser/SubtargetFeature.h>
26// This convoluted logic below is because AMD ROCm 5.7.1 identifies as LLVM 17
27// but includes the header SubtargetFeature.h to a different directory than
28// upstream LLVM 17. We basically detect if it's the HIP version and include it
29// from the expected MC directory, otherwise from TargetParser.
30#elif LLVM_VERSION_MAJOR == 17
31#if defined(__HIP_PLATFORM_HCC__) || defined(HIP_VERSION_MAJOR)
32#include <llvm/MC/SubtargetFeature.h>
33#else
34#include <llvm/TargetParser/SubtargetFeature.h>
35#endif
36#else
37#define STRINGIFY_HELPER(x) #x
38#define STRINGIFY(x) STRINGIFY_HELPER(x)
39#error "Unsupported LLVM version " STRINGIFY(LLVM_VERSION_MAJOR)
40#endif
41#include <llvm/Transforms/IPO/GlobalDCE.h>
42#include <llvm/Transforms/IPO/Internalize.h>
43#include <llvm/Transforms/IPO/StripDeadPrototypes.h>
44#include <llvm/Transforms/IPO/StripSymbols.h>
45#include <llvm/Transforms/Utils/ModuleUtils.h>
46
47#include <optional>
48#include <string>
49#include <utility>
50
51namespace proteus {
52using namespace llvm;
53
54namespace detail {
55
56inline Expected<std::unique_ptr<TargetMachine>>
57createTargetMachine(Module &M, StringRef Arch, unsigned OptLevel = 3) {
58 Triple TT(M.getTargetTriple());
59 auto CGOptLevel = CodeGenOpt::getLevel(OptLevel);
60 if (CGOptLevel == std::nullopt)
61 reportFatalError("Invalid opt level");
62
63 std::string Msg;
64 const Target *T = TargetRegistry::lookupTarget(M.getTargetTriple(), Msg);
65 if (!T)
66 return make_error<StringError>(Msg, inconvertibleErrorCode());
67
68 SubtargetFeatures Features;
69 Features.getDefaultSubtargetFeatures(TT);
70
71 std::optional<Reloc::Model> RelocModel;
72 if (M.getModuleFlag("PIC Level"))
73 RelocModel =
74 M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_;
75
76 std::optional<CodeModel::Model> CodeModel = M.getCodeModel();
77
78 // Use default target options.
79 // TODO: Customize based on AOT compilation flags or by creating a
80 // constructor that sets target options based on the triple.
81 TargetOptions Options;
82 std::unique_ptr<TargetMachine> TM(T->createTargetMachine(
83 M.getTargetTriple(), Arch, Features.getString(), Options, RelocModel,
84 CodeModel, CGOptLevel.value()));
85 if (!TM)
86 return make_error<StringError>("Failed to create target machine",
87 inconvertibleErrorCode());
88 return TM;
89}
90
91inline void runOptimizationPassPipeline(Module &M, StringRef Arch,
92 const std::string &PassPipeline,
93 unsigned CodegenOptLevel = 3) {
94 PipelineTuningOptions PTO;
95
96 std::optional<PGOOptions> PGOOpt;
97 auto TM = createTargetMachine(M, Arch, CodegenOptLevel);
98 if (auto Err = TM.takeError())
99 report_fatal_error(std::move(Err));
100 TargetLibraryInfoImpl TLII(Triple(M.getTargetTriple()));
101
102 PassBuilder PB(TM->get(), PTO, PGOOpt, nullptr);
103 LoopAnalysisManager LAM;
104 FunctionAnalysisManager FAM;
105 CGSCCAnalysisManager CGAM;
106 ModuleAnalysisManager MAM;
107
108 FAM.registerPass([&] { return TargetLibraryAnalysis(TLII); });
109
110 PB.registerModuleAnalyses(MAM);
111 PB.registerCGSCCAnalyses(CGAM);
112 PB.registerFunctionAnalyses(FAM);
113 PB.registerLoopAnalyses(LAM);
114 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
115 ModulePassManager Passes;
116 if (auto E = PB.parsePassPipeline(Passes, PassPipeline))
117 reportFatalError("Error: " + toString(std::move(E)));
118
119 Passes.run(M, MAM);
120}
121
122inline void runOptimizationPassPipeline(Module &M, StringRef Arch,
123 char OptLevel = '3',
124 unsigned CodegenOptLevel = 3) {
125 PipelineTuningOptions PTO;
126
127 std::optional<PGOOptions> PGOOpt;
128 auto TM = createTargetMachine(M, Arch, CodegenOptLevel);
129 if (auto Err = TM.takeError())
130 report_fatal_error(std::move(Err));
131 TargetLibraryInfoImpl TLII(Triple(M.getTargetTriple()));
132
133 PassBuilder PB(TM->get(), PTO, PGOOpt, nullptr);
134 LoopAnalysisManager LAM;
135 FunctionAnalysisManager FAM;
136 CGSCCAnalysisManager CGAM;
137 ModuleAnalysisManager MAM;
138
139 FAM.registerPass([&] { return TargetLibraryAnalysis(TLII); });
140
141 PB.registerModuleAnalyses(MAM);
142 PB.registerCGSCCAnalyses(CGAM);
143 PB.registerFunctionAnalyses(FAM);
144 PB.registerLoopAnalyses(LAM);
145 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
146
147 OptimizationLevel OptSetting;
148 switch (OptLevel) {
149 case '0':
150 OptSetting = OptimizationLevel::O0;
151 break;
152 case '1':
153 OptSetting = OptimizationLevel::O1;
154 break;
155 case '2':
156 OptSetting = OptimizationLevel::O2;
157 break;
158 case '3':
159 OptSetting = OptimizationLevel::O3;
160 break;
161 case 's':
162 OptSetting = OptimizationLevel::Os;
163 break;
164 case 'z':
165 OptSetting = OptimizationLevel::Oz;
166 break;
167 default:
168 reportFatalError(std::string("Unsupported optimization level ") + OptLevel);
169 };
170
171 ModulePassManager Passes = PB.buildPerModuleDefaultPipeline(OptSetting);
172 Passes.run(M, MAM);
173}
174
175} // namespace detail
176
179 InitializeAllTargetInfos();
180 InitializeAllTargets();
181 InitializeAllTargetMCs();
182 InitializeAllAsmParsers();
183 InitializeAllAsmPrinters();
184 }
185};
186
188 std::optional<std::string> PassPipeline;
191
193 : OptLevel(CGConfig.optLevel()),
194 CodegenOptLevel(CGConfig.codeGenOptLevel()) {
195 if (auto Pipeline = CGConfig.optPipeline())
196 PassPipeline = Pipeline.value();
197 }
198
203};
204
205inline void optimizeIR(Module &M, StringRef Arch,
206 const OptimizationPipelineConfig &OptConfig) {
207 TIMESCOPE("proteus::optimizeIR");
208 Timer T(Config::get().ProteusEnableTimers);
209
210 if (OptConfig.PassPipeline) {
211 auto TraceOut = [](const std::string &PassPipeline) {
212 SmallString<128> S;
213 raw_svector_ostream OS(S);
214 OS << "[CustomPipeline] " << PassPipeline << "\n";
215 return S;
216 };
217
218 if (Config::get().traceSpecializations())
219 Logger::trace(TraceOut(OptConfig.PassPipeline.value()));
220
221 detail::runOptimizationPassPipeline(M, Arch, OptConfig.PassPipeline.value(),
222 OptConfig.CodegenOptLevel);
223 } else {
225 OptConfig.CodegenOptLevel);
226 }
227
229 << "optimizeIR optlevel "
230 << (OptConfig.PassPipeline
231 ? StringRef(OptConfig.PassPipeline.value())
232 : StringRef(&OptConfig.OptLevel, 1))
233 << " codegenopt " << OptConfig.CodegenOptLevel << " "
234 << T.elapsed() << " ms\n");
235}
236
237inline std::unique_ptr<Module>
238linkModules(LLVMContext &Ctx,
239 SmallVector<std::unique_ptr<Module>> LinkedModules) {
240 TIMESCOPE("proteus::linkModules");
241 if (LinkedModules.empty())
242 reportFatalError("Expected jit module");
243
244 auto LinkedModule = std::make_unique<llvm::Module>("JitModule", Ctx);
245 Linker IRLinker(*LinkedModule);
246 // Link in all the proteus-enabled extracted modules.
247 for (auto &LinkedM : LinkedModules) {
248 // Returns true if linking failed.
249 if (IRLinker.linkInModule(std::move(LinkedM)))
250 reportFatalError("Linking failed");
251 }
252
253 return LinkedModule;
254}
255
256inline void runCleanupPassPipeline(Module &M) {
257 TIMESCOPE("proteus::runCleanupPassPipeline");
258 PassBuilder PB;
259 LoopAnalysisManager LAM;
260 FunctionAnalysisManager FAM;
261 CGSCCAnalysisManager CGAM;
262 ModuleAnalysisManager MAM;
263
264 PB.registerModuleAnalyses(MAM);
265 PB.registerCGSCCAnalyses(CGAM);
266 PB.registerFunctionAnalyses(FAM);
267 PB.registerLoopAnalyses(LAM);
268 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
269
270 ModulePassManager Passes;
271 Passes.addPass(GlobalDCEPass());
272 // Passes.addPass(StripDeadDebugInfoPass());
273 Passes.addPass(StripDeadPrototypesPass());
274
275 Passes.run(M, MAM);
276
277 StripDebugInfo(M);
278}
279
280inline void pruneIR(Module &M, bool UnsetExternallyInitialized = true) {
281 // Remove llvm.global.annotations now that we have read them.
282 if (auto *GlobalAnnotations = M.getGlobalVariable("llvm.global.annotations"))
283 M.eraseGlobalVariable(GlobalAnnotations);
284
285 // Remove llvm.compiler.used
286 if (auto *CompilerUsed = M.getGlobalVariable("llvm.compiler.used"))
287 M.eraseGlobalVariable(CompilerUsed);
288
289 // Remove the __clang_gpu_used_external used in HIP RDC compilation and its
290 // uses in llvm.used, llvm.compiler.used.
291 SmallVector<GlobalVariable *> GlobalsToErase;
292 for (auto &GV : M.globals()) {
293 auto Name = GV.getName();
294 if (Name.starts_with("__clang_gpu_used_external") ||
295 Name.starts_with("_jit_bitcode") || Name.starts_with("__hip_cuid")) {
296 GlobalsToErase.push_back(&GV);
297 removeFromUsedLists(M, [&GV](Constant *C) {
298 if (auto *Global = dyn_cast<GlobalVariable>(C))
299 return Global == &GV;
300 return false;
301 });
302 }
303 }
304 for (auto *GV : GlobalsToErase) {
305 M.eraseGlobalVariable(GV);
306 }
307
308 // Remove externaly_initialized attributes.
309 if (UnsetExternallyInitialized)
310 for (auto &GV : M.globals())
311 if (GV.isExternallyInitialized())
312 GV.setExternallyInitialized(false);
313}
314
315inline void internalize(Module &M, StringRef PreserveFunctionName) {
316 auto *F = M.getFunction(PreserveFunctionName);
317 // Internalize others besides the kernel function.
318 internalizeModule(M, [&F](const GlobalValue &GV) {
319 // Do not internalize the kernel function.
320 if (&GV == F)
321 return true;
322
323 // Internalize everything else.
324 return false;
325 });
326}
327
328} // namespace proteus
329
330#endif
#define PROTEUS_TIMER_OUTPUT(x)
Definition Config.h:440
#define TIMESCOPE(...)
Definition TimeTracing.h:66
Definition Config.h:164
std::optional< const std::string > optPipeline() const
Definition Config.h:266
static Config & get()
Definition Config.h:334
static llvm::raw_ostream & outs(const std::string &Name)
Definition Logger.h:25
static void trace(llvm::StringRef Msg)
Definition Logger.h:30
Definition TimeTracing.h:33
uint64_t elapsed()
Definition TimeTracing.cpp:66
Definition CompiledLibrary.h:7
void runOptimizationPassPipeline(Module &M, StringRef Arch, const std::string &PassPipeline, unsigned CodegenOptLevel=3)
Definition CoreLLVM.h:91
Expected< std::unique_ptr< TargetMachine > > createTargetMachine(Module &M, StringRef Arch, unsigned OptLevel=3)
Definition CoreLLVM.h:57
Definition MemoryCache.h:27
void optimizeIR(Module &M, StringRef Arch, const OptimizationPipelineConfig &OptConfig)
Definition CoreLLVM.h:205
void reportFatalError(const llvm::Twine &Reason, const char *FILE, unsigned Line)
Definition Error.cpp:14
void pruneIR(Module &M, bool UnsetExternallyInitialized=true)
Definition CoreLLVM.h:280
std::string toString(CodegenOption Option)
Definition Config.h:28
void internalize(Module &M, StringRef PreserveFunctionName)
Definition CoreLLVM.h:315
void runCleanupPassPipeline(Module &M)
Definition CoreLLVM.h:256
std::unique_ptr< Module > linkModules(LLVMContext &Ctx, SmallVector< std::unique_ptr< Module > > LinkedModules)
Definition CoreLLVM.h:238
Definition Hashing.h:184
Definition CoreLLVM.h:177
InitLLVMTargets()
Definition CoreLLVM.h:178
Definition CoreLLVM.h:187
std::optional< std::string > PassPipeline
Definition CoreLLVM.h:188
OptimizationPipelineConfig(const CodeGenerationConfig &CGConfig)
Definition CoreLLVM.h:192
unsigned CodegenOptLevel
Definition CoreLLVM.h:190
OptimizationPipelineConfig(std::optional< std::string > PassPipeline, char OptLevel, unsigned CodegenOptLevel)
Definition CoreLLVM.h:199
char OptLevel
Definition CoreLLVM.h:189