Proteus
Programmable JIT compilation and optimization for C/C++ using LLVM
Loading...
Searching...
No Matches
CoreLLVMHIP.h
Go to the documentation of this file.
1#ifndef PROTEUS_CORE_LLVM_HIP_H
2#define PROTEUS_CORE_LLVM_HIP_H
3
4#include "proteus/Error.h"
11
12#include <llvm/Bitcode/BitcodeWriter.h>
13#include <llvm/CodeGen/MachineModuleInfo.h>
14#include <llvm/IR/DiagnosticPrinter.h>
15#include <llvm/IR/Function.h>
16#include <llvm/IR/LegacyPassManager.h>
17#include <llvm/IR/Module.h>
18#include <llvm/IR/Verifier.h>
19#include <llvm/LTO/LTO.h>
20#include <llvm/MC/MCSubtargetInfo.h>
21#include <llvm/Support/CodeGen.h>
22#include <llvm/Support/FileSystem.h>
23#include <llvm/Support/MemoryBuffer.h>
24#include <llvm/Support/Path.h>
25#include <llvm/Support/Signals.h>
26#include <llvm/Support/TargetSelect.h>
27#include <llvm/Support/WithColor.h>
28#include <llvm/Target/TargetMachine.h>
29
30#if LLVM_VERSION_MAJOR >= 18
31#include <lld/Common/Driver.h>
32LLD_HAS_DRIVER(elf)
33#endif
34
35namespace proteus {
36
37using namespace llvm;
38
39namespace detail {
40
41inline const SmallVector<StringRef> &gridDimXFnName() {
42 static SmallVector<StringRef> Names = {
43 "_ZNK17__HIP_CoordinatesI13__HIP_GridDimE3__XcvjEv",
44 "llvm.amdgcn.num.workgroups.x", "_ZL20__hip_get_grid_dim_xv"};
45 return Names;
46}
47
48inline const SmallVector<StringRef> &gridDimYFnName() {
49 static SmallVector<StringRef> Names = {
50 "_ZNK17__HIP_CoordinatesI13__HIP_GridDimE3__YcvjEv",
51 "llvm.amdgcn.num.workgroups.y", "_ZL20__hip_get_grid_dim_yv"};
52 return Names;
53}
54
55inline const SmallVector<StringRef> &gridDimZFnName() {
56 static SmallVector<StringRef> Names = {
57 "_ZNK17__HIP_CoordinatesI13__HIP_GridDimE3__ZcvjEv",
58 "llvm.amdgcn.num.workgroups.z", "_ZL20__hip_get_grid_dim_zv"};
59 return Names;
60}
61
62inline const SmallVector<StringRef> &blockDimXFnName() {
63 static SmallVector<StringRef> Names = {
64 "_ZNK17__HIP_CoordinatesI14__HIP_BlockDimE3__XcvjEv",
65 "llvm.amdgcn.workgroup.size.x", "_ZL21__hip_get_block_dim_xv"};
66 return Names;
67}
68
69inline const SmallVector<StringRef> &blockDimYFnName() {
70 static SmallVector<StringRef> Names = {
71 "_ZNK17__HIP_CoordinatesI14__HIP_BlockDimE3__YcvjEv",
72 "llvm.amdgcn.workgroup.size.y", "_ZL21__hip_get_block_dim_yv"};
73 return Names;
74}
75
76inline const SmallVector<StringRef> &blockDimZFnName() {
77 static SmallVector<StringRef> Names = {
78 "_ZNK17__HIP_CoordinatesI14__HIP_BlockDimE3__ZcvjEv",
79 "llvm.amdgcn.workgroup.size.z", "_ZL21__hip_get_block_dim_zv"};
80 return Names;
81}
82
83inline const SmallVector<StringRef> &blockIdxXFnName() {
84 static SmallVector<StringRef> Names = {
85 "_ZNK17__HIP_CoordinatesI14__HIP_BlockIdxE3__XcvjEv",
86 "llvm.amdgcn.workgroup.id.x"};
87 return Names;
88};
89
90inline const SmallVector<StringRef> &blockIdxYFnName() {
91 static SmallVector<StringRef> Names = {
92 "_ZNK17__HIP_CoordinatesI14__HIP_BlockIdxE3__YcvjEv",
93 "llvm.amdgcn.workgroup.id.y"};
94 return Names;
95};
96
97inline const SmallVector<StringRef> &blockIdxZFnName() {
98 static SmallVector<StringRef> Names = {
99 "_ZNK17__HIP_CoordinatesI14__HIP_BlockIdxE3__ZcvjEv",
100 "llvm.amdgcn.workgroup.id.z"};
101 return Names;
102}
103
104inline const SmallVector<StringRef> &threadIdxXFnName() {
105 static SmallVector<StringRef> Names = {
106 "_ZNK17__HIP_CoordinatesI15__HIP_ThreadIdxE3__XcvjEv",
107 "llvm.amdgcn.workitem.id.x"};
108 return Names;
109};
110
111inline const SmallVector<StringRef> &threadIdxYFnName() {
112 static SmallVector<StringRef> Names = {
113 "_ZNK17__HIP_CoordinatesI15__HIP_ThreadIdxE3__YcvjEv",
114 "llvm.amdgcn.workitem.id.y"};
115 return Names;
116};
117
118inline const SmallVector<StringRef> &threadIdxZFnName() {
119 static SmallVector<StringRef> Names = {
120 "_ZNK17__HIP_CoordinatesI15__HIP_ThreadIdxE3__ZcvjEv",
121 "llvm.amdgcn.workitem.id.z"};
122 return Names;
123};
124
125inline Expected<sys::fs::TempFile> createTempFile(StringRef Prefix,
126 StringRef Suffix) {
127 SmallString<128> TmpDir;
128 sys::path::system_temp_directory(true, TmpDir);
129
130 SmallString<64> FileName;
131 FileName.append(Prefix);
132 FileName.append(Suffix.empty() ? "-%%%%%%%" : "-%%%%%%%.");
133 FileName.append(Suffix);
134 sys::path::append(TmpDir, FileName);
135 return sys::fs::TempFile::create(TmpDir);
136}
137
138#if LLVM_VERSION_MAJOR >= 18
139inline SmallVector<std::unique_ptr<sys::fs::TempFile>>
140codegenSerial(Module &M, StringRef DeviceArch,
141 [[maybe_unused]] char OptLevel = '3', int CodegenOptLevel = 3) {
142 SmallVector<std::unique_ptr<sys::fs::TempFile>> ObjectFiles;
143
144 auto ExpectedTM =
145 proteus::detail::createTargetMachine(M, DeviceArch, CodegenOptLevel);
146 if (!ExpectedTM)
147 reportFatalError(toString(ExpectedTM.takeError()));
148
149 std::unique_ptr<TargetMachine> TM = std::move(*ExpectedTM);
150 TargetLibraryInfoImpl TLII(Triple(M.getTargetTriple()));
151 M.setDataLayout(TM->createDataLayout());
152
153 legacy::PassManager PM;
154 PM.add(new TargetLibraryInfoWrapperPass(TLII));
155 MachineModuleInfoWrapperPass *MMIWP =
156#if LLVM_VERSION_MAJOR >= 20
157 new MachineModuleInfoWrapperPass(TM.get());
158#else
159 new MachineModuleInfoWrapperPass(
160 reinterpret_cast<LLVMTargetMachine *>(TM.get()));
161#endif
162
163 SmallVector<char, 4096> ObjectCode;
164 raw_svector_ostream OS(ObjectCode);
165 auto ExpectedF = createTempFile("object", "o");
166 if (auto E = ExpectedF.takeError())
167 reportFatalError("Error creating object tmp file " +
168 toString(std::move(E)));
169 auto ObjectFile = std::move(*ExpectedF);
170 auto FileStream = std::make_unique<CachedFileStream>(
171 std::make_unique<llvm::raw_fd_ostream>(ObjectFile.FD, false));
172 TM->addPassesToEmitFile(PM, *FileStream->OS, nullptr,
173 CodeGenFileType::ObjectFile,
174 /* DisableVerify */ true, MMIWP);
175
176 std::unique_ptr<sys::fs::TempFile> ObjectFilePtr =
177 std::make_unique<sys::fs::TempFile>(std::move(ObjectFile));
178 ObjectFiles.emplace_back(std::move(ObjectFilePtr));
179
180 PM.run(M);
181
182 return ObjectFiles;
183}
184
185inline SmallVector<std::unique_ptr<sys::fs::TempFile>>
186codegenParallel(Module &M, StringRef DeviceArch, unsigned int OptLevel = 3,
187 int CodegenOptLevel = 3) {
188 // Use regular LTO with parallelism enabled to parallelize codegen.
189 std::atomic<bool> LTOError = false;
190
191 auto DiagnosticHandler = [&](const DiagnosticInfo &DI) {
192 std::string ErrStorage;
193 raw_string_ostream OS(ErrStorage);
194 DiagnosticPrinterRawOStream DP(OS);
195 DI.print(DP);
196
197 switch (DI.getSeverity()) {
198 case DS_Error:
199 WithColor::error(errs(), "[proteus codegen]") << ErrStorage << "\n";
200 LTOError = true;
201 break;
202 case DS_Warning:
203 WithColor::warning(errs(), "[proteus codegen]") << ErrStorage << "\n";
204 break;
205 case DS_Note:
206 WithColor::note(errs(), "[proteus codegen]") << ErrStorage << "\n";
207 break;
208 case DS_Remark:
209 WithColor::remark(errs()) << ErrStorage << "\n";
210 break;
211 }
212 };
213
214 // Create TargetMachine and extract options/features.
215 auto ExpectedTM =
216 proteus::detail::createTargetMachine(M, DeviceArch, CodegenOptLevel);
217 if (!ExpectedTM)
218 reportFatalError(toString(ExpectedTM.takeError()));
219 std::unique_ptr<TargetMachine> TM = std::move(*ExpectedTM);
220
221 lto::Config Conf;
222 Conf.CPU = DeviceArch;
223
224 // Propagate attributes from TargetMachine to LTO Config.
225 std::string FeatureStr = TM->getMCSubtargetInfo()->getFeatureString().str();
226 if (!FeatureStr.empty()) {
227 SmallVector<StringRef> Features;
228 StringRef(FeatureStr).split(Features, ',');
229 for (auto &F : Features)
230 Conf.MAttrs.push_back(F.str());
231 } else {
232 Conf.MAttrs = {};
233 }
234
235 // FIX: Propagate TargetOptions (e.g. UnsafeFPMath, etc.)
236 Conf.Options = TM->Options;
237
238 Conf.DisableVerify = true;
239 Conf.TimeTraceEnabled = false;
240 Conf.DebugPassManager = false;
241 Conf.VerifyEach = false;
242 Conf.DiagHandler = DiagnosticHandler;
243 Conf.OptLevel = OptLevel;
244 Conf.CGOptLevel = static_cast<CodeGenOptLevel>(CodegenOptLevel);
245
246 unsigned ParallelCodeGenParallelismLevel =
247 std::max(1u, std::thread::hardware_concurrency());
248 lto::LTO L(std::move(Conf), {}, ParallelCodeGenParallelismLevel);
249
250 // Ensure module has the correct DataLayout prior to emitting bitcode.
251 M.setDataLayout(TM->createDataLayout());
252
253 SmallString<0> BitcodeBuf;
254 raw_svector_ostream BitcodeOS(BitcodeBuf);
255 WriteBitcodeToFile(M, BitcodeOS);
256
257 // TODO: Module identifier can be empty because you always have on module to
258 // link. However, in the general case, with multiple modules, each one must
259 // have a unique identifier for LTO to work correctly.
260 auto IF = cantFail(lto::InputFile::create(
261 MemoryBufferRef{BitcodeBuf, M.getModuleIdentifier()}));
262
263 std::set<std::string> PrevailingSymbols;
264 auto BuildResolutions = [&]() {
265 // Save the input file and the buffer associated with its memory.
266 const auto Symbols = IF->symbols();
267 SmallVector<lto::SymbolResolution, 16> Resolutions(Symbols.size());
268 size_t SymbolIdx = 0;
269 for (auto &Sym : Symbols) {
270 lto::SymbolResolution &Res = Resolutions[SymbolIdx];
271 SymbolIdx++;
272
273 // All defined symbols are prevailing.
274 Res.Prevailing = !Sym.isUndefined() &&
275 PrevailingSymbols.insert(Sym.getName().str()).second;
276
277 Res.VisibleToRegularObj =
278 Res.Prevailing &&
279 Sym.getVisibility() != GlobalValue::HiddenVisibility &&
280 !Sym.canBeOmittedFromSymbolTable();
281
282 Res.ExportDynamic =
283 Sym.getVisibility() != GlobalValue::HiddenVisibility &&
284 (!Sym.canBeOmittedFromSymbolTable());
285
286 Res.FinalDefinitionInLinkageUnit =
287 Sym.getVisibility() != GlobalValue::DefaultVisibility &&
288 (!Sym.isUndefined() && !Sym.isCommon());
289
290 // Device linking does not support linker redefined symbols (e.g.
291 // --wrap).
292 Res.LinkerRedefined = false;
293
295 auto PrintSymbol = [](const lto::InputFile::Symbol &Sym,
296 lto::SymbolResolution &Res) {
297 auto &OutStream = Logger::logs("proteus");
298 OutStream << "Vis: ";
299 switch (Sym.getVisibility()) {
300 case GlobalValue::HiddenVisibility:
301 OutStream << 'H';
302 break;
303 case GlobalValue::ProtectedVisibility:
304 OutStream << 'P';
305 break;
306 case GlobalValue::DefaultVisibility:
307 OutStream << 'D';
308 break;
309 }
310
311 OutStream << " Sym: ";
312 auto PrintBool = [&](char C, bool B) { OutStream << (B ? C : '-'); };
313 PrintBool('U', Sym.isUndefined());
314 PrintBool('C', Sym.isCommon());
315 PrintBool('W', Sym.isWeak());
316 PrintBool('I', Sym.isIndirect());
317 PrintBool('O', Sym.canBeOmittedFromSymbolTable());
318 PrintBool('T', Sym.isTLS());
319 PrintBool('X', Sym.isExecutable());
320 OutStream << ' ' << Sym.getName();
321 OutStream << "| P " << Res.Prevailing;
322 OutStream << " V " << Res.VisibleToRegularObj;
323 OutStream << " E " << Res.ExportDynamic;
324 OutStream << " F " << Res.FinalDefinitionInLinkageUnit;
325 OutStream << "\n";
326 };
327
328 PrintSymbol(Sym, Res);
329 }
330 }
331
332 // Add the bitcode file with its resolved symbols to the LTO job.
333 cantFail(L.add(std::move(IF), Resolutions));
334 };
335
336 BuildResolutions();
337
338 // Run the LTO job to compile the bitcode.
339 size_t MaxTasks = L.getMaxTasks();
340 SmallVector<std::unique_ptr<sys::fs::TempFile>> ObjectFiles{MaxTasks};
341
342 auto AddStream =
343 [&](size_t Task,
344 const Twine & /*ModuleName*/) -> std::unique_ptr<CachedFileStream> {
345 std::string TaskStr = Task ? "." + std::to_string(Task) : "";
346 auto ExpectedF = createTempFile("lto-shard" + TaskStr, "o");
347 if (auto E = ExpectedF.takeError())
348 reportFatalError("Error creating tmp file " + toString(std::move(E)));
349 ObjectFiles[Task] =
350 std::make_unique<sys::fs::TempFile>(std::move(*ExpectedF));
351 auto Ret = std::make_unique<CachedFileStream>(
352 std::make_unique<llvm::raw_fd_ostream>(ObjectFiles[Task]->FD, false));
353 if (!Ret)
354 reportFatalError("Error creating CachedFileStream");
355 return Ret;
356 };
357
358 if (Error E = L.run(AddStream))
359 reportFatalError("Error: " + toString(std::move(E)));
360
361 if (LTOError)
363 createStringError(inconvertibleErrorCode(),
364 "Errors encountered inside the LTO pipeline.")));
365
366 return ObjectFiles;
367}
368#endif
369
370inline std::unique_ptr<MemoryBuffer> codegenRTC(Module &M,
371 StringRef DeviceArch) {
372 char *BinOut;
373 size_t BinSize;
374
375 SmallString<4096> ModuleBuf;
376 raw_svector_ostream ModuleBufOS(ModuleBuf);
377 WriteBitcodeToFile(M, ModuleBufOS);
378
379 hiprtcLinkState HipLinkStatePtr;
380
381 // NOTE: This code is an example of passing custom, AMD-specific
382 // options to the compiler/linker.
383 // NOTE: Unrolling can have a dramatic (time-consuming) effect on JIT
384 // compilation time and on the resulting optimization, better or worse
385 // depending on code specifics.
386 std::string MArchOpt = ("-march=" + DeviceArch).str();
387
388 // NOTE: We used to pass these options as well. "-mllvm",
389 // "-unroll-threshold=1000",
390 // We removed them cause we saw on bezier they cause slowdowns
391 const char *OptArgs[] = {MArchOpt.c_str()};
392 std::vector<hiprtcJIT_option> JITOptions = {
393 HIPRTC_JIT_IR_TO_ISA_OPT_EXT, HIPRTC_JIT_IR_TO_ISA_OPT_COUNT_EXT};
394 size_t OptArgsSize = 1;
395 const void *JITOptionsValues[] = {(void *)OptArgs, (void *)(OptArgsSize)};
396 proteusHiprtcErrCheck(hiprtcLinkCreate(JITOptions.size(), JITOptions.data(),
397 (void **)JITOptionsValues,
398 &HipLinkStatePtr));
399 // NOTE: the following version of te code does not set options.
400 // proteusHiprtcErrCheck(hiprtcLinkCreate(0, nullptr, nullptr,
401 // &hip_link_state_ptr));
402
403 proteusHiprtcErrCheck(hiprtcLinkAddData(
404 HipLinkStatePtr, HIPRTC_JIT_INPUT_LLVM_BITCODE, (void *)ModuleBuf.data(),
405 ModuleBuf.size(), "", 0, nullptr, nullptr));
407 hiprtcLinkComplete(HipLinkStatePtr, (void **)&BinOut, &BinSize));
408
409 return MemoryBuffer::getMemBuffer(StringRef{BinOut, BinSize});
410}
411
412} // namespace detail
413
414inline void setLaunchBoundsForKernel(Function &F, int MaxNumWorkGroups,
415 int MinBlocksPerSM = 0) {
416 // TODO: fix calculation of launch bounds.
417 // TODO: find maximum (hardcoded 1024) from device info.
418 // TODO: Setting as 1, BlockSize to replicate launch bounds settings
419 F.addFnAttr("amdgpu-flat-work-group-size",
420 "1," + std::to_string(std::min(1024, MaxNumWorkGroups)));
421 // F->addFnAttr("amdgpu-waves-per-eu", std::to_string(WavesPerEU));
422 if (MinBlocksPerSM != 0) {
423 // NOTE: We are missing a heuristic to define the `WavesPerEU`, as such we
424 // still need to study it. I restrict the waves by setting min equal to max
425 // and disallowing any heuristics that HIP will use internally.
426 // For more information please check:
427 // https://clang.llvm.org/docs/AttributeReference.html#amdgpu-waves-per-eu
428 F.addFnAttr("amdgpu-waves-per-eu", std::to_string(MinBlocksPerSM) + "," +
429 std::to_string(MinBlocksPerSM));
430 }
431
432 PROTEUS_DBG(Logger::logs("proteus")
433 << " => Set Workgroup size " << MaxNumWorkGroups
434 << " WavesPerEU (unused) " << MinBlocksPerSM << "\n");
435}
436
437inline std::unique_ptr<MemoryBuffer>
438codegenObject(Module &M, StringRef DeviceArch,
439 [[maybe_unused]] SmallPtrSetImpl<void *> &GlobalLinkedBinaries,
441 assert(GlobalLinkedBinaries.empty() &&
442 "Expected empty linked binaries for HIP");
443 Timer T;
444 SmallVector<std::unique_ptr<sys::fs::TempFile>> ObjectFiles;
445 switch (CGOption) {
446 case CodegenOption::RTC: {
447 auto Ret = detail::codegenRTC(M, DeviceArch);
449 << "Codegen RTC " << T.elapsed() << " ms\n");
450 return Ret;
451 }
452#if LLVM_VERSION_MAJOR >= 18
454 ObjectFiles = detail::codegenSerial(M, DeviceArch);
455 break;
457 ObjectFiles = detail::codegenParallel(M, DeviceArch);
458 break;
459#endif
460 default:
461 reportFatalError("Unknown Codegen Option");
462 }
463
464 if (ObjectFiles.empty())
465 reportFatalError("Expected non-empty vector of object files");
466
467#if LLVM_VERSION_MAJOR >= 18
468 auto ExpectedF = detail::createTempFile("proteus-jit", "o");
469 if (auto E = ExpectedF.takeError())
470 reportFatalError("Error creating shared object file " +
471 toString(std::move(E)));
472
473 auto SharedObject = std::move(*ExpectedF);
474
475 std::vector<const char *> Args{"ld.lld", "--no-undefined", "-shared", "-o",
476 SharedObject.TmpName.c_str()};
477 for (auto &File : ObjectFiles) {
478 if (!File)
479 continue;
480 Args.push_back(File->TmpName.c_str());
481 }
482
483 if (Config::get().ProteusDebugOutput) {
484 for (auto &Arg : Args) {
485 Logger::logs("proteus") << Arg << " ";
486 }
487 Logger::logs("proteus") << "\n";
488 }
489
491 << "Codegen object " << toString(CGOption) << "["
492 << ObjectFiles.size() << "] " << T.elapsed() << " ms\n");
493
494 T.reset();
495 // The LLD linker interface is not thread-safe, so we use a mutex.
496 static std::mutex Mutex;
497 {
498 std::lock_guard LockGuard{Mutex};
499 lld::Result S = lld::lldMain(Args, llvm::outs(), llvm::errs(),
500 {{lld::Gnu, &lld::elf::link}});
501 if (S.retCode)
502 reportFatalError("Error: lld failed");
503 }
504
505 ErrorOr<std::unique_ptr<MemoryBuffer>> Buffer =
506 MemoryBuffer::getFileAsStream(SharedObject.TmpName);
507 if (!Buffer)
508 reportFatalError("Error reading file: " + Buffer.getError().message());
509
510 // Remove temporary files.
511 for (auto &File : ObjectFiles) {
512 if (!File)
513 continue;
514 if (auto E = File->discard())
515 reportFatalError("Error removing object tmp file " +
516 toString(std::move(E)));
517 }
518 if (auto E = SharedObject.discard())
519 reportFatalError("Error removing shared object tmp file " +
520 toString(std::move(E)));
521
523 << "Codegen linking " << T.elapsed() << " ms\n");
524
525 return std::move(*Buffer);
526#else
527 reportFatalError("Expected LLVM18 for non-RTC codegen");
528#endif
529}
530
531} // namespace proteus
532
533#endif
char int void ** Args
Definition CompilerInterfaceHost.cpp:22
#define PROTEUS_DBG(x)
Definition Debug.h:9
#define PROTEUS_TIMER_OUTPUT(x)
Definition TimeTracing.h:54
#define proteusHiprtcErrCheck(CALL)
Definition UtilsHIP.h:28
static Config & get()
Definition Config.h:334
bool ProteusDebugOutput
Definition Config.h:350
static llvm::raw_ostream & outs(const std::string &Name)
Definition Logger.h:25
static llvm::raw_ostream & logs(const std::string &Name)
Definition Logger.h:19
Definition TimeTracing.h:40
void reset()
Definition TimeTracing.cpp:57
uint64_t elapsed()
Definition TimeTracing.cpp:51
Definition CompiledLibrary.h:7
const SmallVector< StringRef > & threadIdxXFnName()
Definition CoreLLVMCUDA.h:70
const SmallVector< StringRef > & gridDimYFnName()
Definition CoreLLVMCUDA.h:30
const SmallVector< StringRef > & threadIdxZFnName()
Definition CoreLLVMCUDA.h:80
const SmallVector< StringRef > & blockIdxZFnName()
Definition CoreLLVMCUDA.h:65
const SmallVector< StringRef > & gridDimZFnName()
Definition CoreLLVMCUDA.h:35
std::unique_ptr< MemoryBuffer > codegenRTC(Module &M, StringRef DeviceArch)
Definition CoreLLVMHIP.h:370
const SmallVector< StringRef > & gridDimXFnName()
Definition CoreLLVMCUDA.h:25
const SmallVector< StringRef > & blockIdxXFnName()
Definition CoreLLVMCUDA.h:55
Expected< std::unique_ptr< TargetMachine > > createTargetMachine(Module &M, StringRef Arch, unsigned OptLevel=3)
Definition CoreLLVM.h:52
const SmallVector< StringRef > & threadIdxYFnName()
Definition CoreLLVMCUDA.h:75
Expected< sys::fs::TempFile > createTempFile(StringRef Prefix, StringRef Suffix)
Definition CoreLLVMHIP.h:125
const SmallVector< StringRef > & blockIdxYFnName()
Definition CoreLLVMCUDA.h:60
const SmallVector< StringRef > & blockDimYFnName()
Definition CoreLLVMCUDA.h:45
const SmallVector< StringRef > & blockDimZFnName()
Definition CoreLLVMCUDA.h:50
const SmallVector< StringRef > & blockDimXFnName()
Definition CoreLLVMCUDA.h:40
Definition MemoryCache.h:26
void setLaunchBoundsForKernel(Function &F, int MaxThreadsPerSM, int MinBlocksPerSM=0)
Definition CoreLLVMCUDA.h:87
void reportFatalError(const llvm::Twine &Reason, const char *FILE, unsigned Line)
Definition Error.cpp:14
CodegenOption
Definition Config.h:16
std::unique_ptr< MemoryBuffer > codegenObject(Module &M, StringRef DeviceArch, SmallPtrSetImpl< void * > &GlobalLinkedBinaries, CodegenOption CGOption=CodegenOption::RTC)
Definition CoreLLVMCUDA.h:165
std::string toString(CodegenOption Option)
Definition Config.h:28