Proteus
Programmable JIT compilation and optimization for C/C++ using LLVM
Loading...
Searching...
No Matches
CoreLLVM.hpp
Go to the documentation of this file.
1#ifndef PROTEUS_CORE_LLVM_HPP
2#define PROTEUS_CORE_LLVM_HPP
3
4static_assert(__cplusplus >= 201703L,
5 "This header requires C++17 or later due to LLVM.");
6
7#include <llvm/CodeGen/CommandFlags.h>
8#include <llvm/IR/DebugInfo.h>
9#include <llvm/IR/Module.h>
10#include <llvm/IR/PassManager.h>
11#include <llvm/Linker/Linker.h>
12#include <llvm/MC/TargetRegistry.h>
13#include <llvm/Passes/PassBuilder.h>
14#include <llvm/Support/TargetSelect.h>
15#include <llvm/Target/TargetMachine.h>
16#include <llvm/Transforms/IPO/MergeFunctions.h>
17
18#if LLVM_VERSION_MAJOR >= 18
19#include <llvm/TargetParser/SubtargetFeature.h>
20// This convoluted logic below is because AMD ROCm 5.7.1 identifies as LLVM 17
21// but includes the header SubtargetFeature.h to a different directory than
22// upstream LLVM 17. We basically detect if it's the HIP version and include it
23// from the expected MC directory, otherwise from TargetParser.
24#elif LLVM_VERSION_MAJOR == 17
25#if defined(__HIP_PLATFORM_HCC__) || defined(HIP_VERSION_MAJOR)
26#include <llvm/MC/SubtargetFeature.h>
27#else
28#include <llvm/TargetParser/SubtargetFeature.h>
29#endif
30#else
31#define STRINGIFY_HELPER(x) #x
32#define STRINGIFY(x) STRINGIFY_HELPER(x)
33#error "Unsupported LLVM version " STRINGIFY(LLVM_VERSION_MAJOR)
34#endif
35#include <llvm/Transforms/IPO/GlobalDCE.h>
36#include <llvm/Transforms/IPO/Internalize.h>
37#include <llvm/Transforms/IPO/StripDeadPrototypes.h>
38#include <llvm/Transforms/IPO/StripSymbols.h>
39#include <llvm/Transforms/Utils/ModuleUtils.h>
40
41#include "proteus/Debug.h"
42#include "proteus/Error.h"
43#include "proteus/Logger.hpp"
45
46namespace proteus {
47using namespace llvm;
48
49namespace detail {
50
52createTargetMachine(Module &M, StringRef Arch, unsigned OptLevel = 3) {
53 Triple TT(M.getTargetTriple());
54 auto CGOptLevel = CodeGenOpt::getLevel(OptLevel);
55 if (CGOptLevel == std::nullopt)
56 PROTEUS_FATAL_ERROR("Invalid opt level");
57
58 std::string Msg;
59 const Target *T = TargetRegistry::lookupTarget(M.getTargetTriple(), Msg);
60 if (!T)
62
64 Features.getDefaultSubtargetFeatures(TT);
65
66 std::optional<Reloc::Model> RelocModel;
67 if (M.getModuleFlag("PIC Level"))
69 M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_;
70
71 std::optional<CodeModel::Model> CodeModel = M.getCodeModel();
72
73 // Use default target options.
74 // TODO: Customize based on AOT compilation flags or by creating a
75 // constructor that sets target options based on the triple.
77 std::unique_ptr<TargetMachine> TM(T->createTargetMachine(
78 M.getTargetTriple(), Arch, Features.getString(), Options, RelocModel,
79 CodeModel, CGOptLevel.value()));
80 if (!TM)
81 return make_error<StringError>("Failed to create target machine",
83 return TM;
84}
85
87 const std::string &PassPipeline,
88 unsigned CodegenOptLevel = 3) {
90
91 std::optional<PGOOptions> PGOOpt;
92 auto TM = createTargetMachine(M, Arch, CodegenOptLevel);
93 if (auto Err = TM.takeError())
94 report_fatal_error(std::move(Err));
95 TargetLibraryInfoImpl TLII(Triple(M.getTargetTriple()));
96
97 PassBuilder PB(TM->get(), PTO, PGOOpt, nullptr);
102
103 FAM.registerPass([&] { return TargetLibraryAnalysis(TLII); });
104
105 PB.registerModuleAnalyses(MAM);
106 PB.registerCGSCCAnalyses(CGAM);
107 PB.registerFunctionAnalyses(FAM);
108 PB.registerLoopAnalyses(LAM);
109 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
111 if (auto E = PB.parsePassPipeline(Passes, PassPipeline))
112 PROTEUS_FATAL_ERROR("Error: " + toString(std::move(E)));
113
114 Passes.run(M, MAM);
115}
116
118 char OptLevel = '3',
119 unsigned CodegenOptLevel = 3) {
121
122 std::optional<PGOOptions> PGOOpt;
123 auto TM = createTargetMachine(M, Arch, CodegenOptLevel);
124 if (auto Err = TM.takeError())
125 report_fatal_error(std::move(Err));
126 TargetLibraryInfoImpl TLII(Triple(M.getTargetTriple()));
127
128 PassBuilder PB(TM->get(), PTO, PGOOpt, nullptr);
133
134 FAM.registerPass([&] { return TargetLibraryAnalysis(TLII); });
135
136 PB.registerModuleAnalyses(MAM);
137 PB.registerCGSCCAnalyses(CGAM);
138 PB.registerFunctionAnalyses(FAM);
139 PB.registerLoopAnalyses(LAM);
140 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
141
143 switch (OptLevel) {
144 case '0':
145 OptSetting = OptimizationLevel::O0;
146 break;
147 case '1':
148 OptSetting = OptimizationLevel::O1;
149 break;
150 case '2':
151 OptSetting = OptimizationLevel::O2;
152 break;
153 case '3':
154 OptSetting = OptimizationLevel::O3;
155 break;
156 case 's':
157 OptSetting = OptimizationLevel::Os;
158 break;
159 case 'z':
160 OptSetting = OptimizationLevel::Oz;
161 break;
162 default:
163 PROTEUS_FATAL_ERROR("Unsupported optimization level " + OptLevel);
164 };
165
166 ModulePassManager Passes = PB.buildPerModuleDefaultPipeline(OptSetting);
167 Passes.run(M, MAM);
168}
169
170} // namespace detail
171
181
182inline void optimizeIR(Module &M, StringRef Arch, char OptLevel,
183 unsigned CodegenOptLevel) {
184 Timer T;
185 detail::runOptimizationPassPipeline(M, Arch, OptLevel, CodegenOptLevel);
187 << "optimizeIR optlevel " << OptLevel << " codegenopt "
188 << CodegenOptLevel << " " << T.elapsed() << " ms\n");
189}
190
192 const std::string &PassPipeline,
193 unsigned CodegenOptLevel) {
194 Timer T;
195 auto TraceOut = [](const std::string &PassPipeline) {
198 OS << "[CustomPipeline] " << PassPipeline << "\n";
199 return S;
200 };
201
202 if (Config::get().ProteusTraceOutput >= 1)
203 Logger::trace(TraceOut(PassPipeline));
204
205 detail::runOptimizationPassPipeline(M, Arch, PassPipeline, CodegenOptLevel);
207 << "optimizeIR optlevel " << PassPipeline
208 << " codegenopt " << CodegenOptLevel << " "
209 << T.elapsed() << " ms\n");
210}
211
212inline std::unique_ptr<Module>
214 SmallVector<std::unique_ptr<Module>> LinkedModules) {
215 if (LinkedModules.empty())
216 PROTEUS_FATAL_ERROR("Expected jit module");
217
218 auto LinkedModule = std::make_unique<llvm::Module>("JitModule", Ctx);
219 Linker IRLinker(*LinkedModule);
220 // Link in all the proteus-enabled extracted modules.
221 for (auto &LinkedM : LinkedModules) {
222 // Returns true if linking failed.
223 if (IRLinker.linkInModule(std::move(LinkedM)))
224 PROTEUS_FATAL_ERROR("Linking failed");
225 }
226
227 return LinkedModule;
228}
229
236
237 PB.registerModuleAnalyses(MAM);
238 PB.registerCGSCCAnalyses(CGAM);
239 PB.registerFunctionAnalyses(FAM);
240 PB.registerLoopAnalyses(LAM);
241 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
242
244 Passes.addPass(GlobalDCEPass());
245 // Passes.addPass(StripDeadDebugInfoPass());
247
248 Passes.run(M, MAM);
249
251}
252
253inline void pruneIR(Module &M, bool UnsetExternallyInitialized = true) {
254 // Remove llvm.global.annotations now that we have read them.
255 if (auto *GlobalAnnotations = M.getGlobalVariable("llvm.global.annotations"))
256 M.eraseGlobalVariable(GlobalAnnotations);
257
258 // Remove llvm.compiler.used
259 if (auto *CompilerUsed = M.getGlobalVariable("llvm.compiler.used"))
260 M.eraseGlobalVariable(CompilerUsed);
261
262 // Remove the __clang_gpu_used_external used in HIP RDC compilation and its
263 // uses in llvm.used, llvm.compiler.used.
265 for (auto &GV : M.globals()) {
266 auto Name = GV.getName();
267 if (Name.starts_with("__clang_gpu_used_external") ||
268 Name.starts_with("_jit_bitcode") || Name.starts_with("__hip_cuid")) {
269 GlobalsToErase.push_back(&GV);
271 if (auto *Global = dyn_cast<GlobalVariable>(C))
272 return Global == &GV;
273 return false;
274 });
275 }
276 }
277 for (auto *GV : GlobalsToErase) {
278 M.eraseGlobalVariable(GV);
279 }
280
281 // Remove externaly_initialized attributes.
283 for (auto &GV : M.globals())
284 if (GV.isExternallyInitialized())
285 GV.setExternallyInitialized(false);
286}
287
289 auto *F = M.getFunction(PreserveFunctionName);
290 // Internalize others besides the kernel function.
291 internalizeModule(M, [&F](const GlobalValue &GV) {
292 // Do not internalize the kernel function.
293 if (&GV == F)
294 return true;
295
296 // Internalize everything else.
297 return false;
298 });
299}
300
301} // namespace proteus
302
304
305#endif
#define PROTEUS_FATAL_ERROR(x)
Definition Error.h:7
#define PROTEUS_TIMER_OUTPUT(x)
Definition TimeTracing.hpp:57
static Config & get()
Definition Config.hpp:284
static llvm::raw_ostream & outs(const std::string &Name)
Definition Logger.hpp:25
static void trace(llvm::StringRef Msg)
Definition Logger.hpp:30
Definition TimeTracing.hpp:36
Definition Helpers.h:138
void runOptimizationPassPipeline(Module &M, StringRef Arch, const std::string &PassPipeline, unsigned CodegenOptLevel=3)
Definition CoreLLVM.hpp:86
Expected< std::unique_ptr< TargetMachine > > createTargetMachine(Module &M, StringRef Arch, unsigned OptLevel=3)
Definition CoreLLVM.hpp:52
Definition BuiltinsCUDA.cpp:4
void optimizeIR(Module &M, StringRef Arch, char OptLevel, unsigned CodegenOptLevel)
Definition CoreLLVM.hpp:182
T getRuntimeConstantValue(void *Arg)
Definition CompilerInterfaceRuntimeConstantInfo.h:114
void pruneIR(Module &M, bool UnsetExternallyInitialized=true)
Definition CoreLLVM.hpp:253
std::string toString(CodegenOption Option)
Definition Config.hpp:26
void internalize(Module &M, StringRef PreserveFunctionName)
Definition CoreLLVM.hpp:288
void runCleanupPassPipeline(Module &M)
Definition CoreLLVM.hpp:230
std::unique_ptr< Module > linkModules(LLVMContext &Ctx, SmallVector< std::unique_ptr< Module > > LinkedModules)
Definition CoreLLVM.hpp:213
Definition CoreLLVM.hpp:172
InitLLVMTargets()
Definition CoreLLVM.hpp:173