1#ifndef PROTEUS_CORE_LLVM_HIP_HPP
2#define PROTEUS_CORE_LLVM_HIP_HPP
4#include <llvm/Bitcode/BitcodeWriter.h>
5#include <llvm/CodeGen/MachineModuleInfo.h>
6#include <llvm/IR/DiagnosticPrinter.h>
7#include <llvm/IR/Function.h>
8#include <llvm/IR/LegacyPassManager.h>
9#include <llvm/IR/Module.h>
10#include <llvm/IR/Verifier.h>
11#include <llvm/LTO/LTO.h>
12#include <llvm/Support/CodeGen.h>
13#include <llvm/Support/FileSystem.h>
14#include <llvm/Support/MemoryBuffer.h>
15#include <llvm/Support/Path.h>
16#include <llvm/Support/Signals.h>
17#include <llvm/Support/TargetSelect.h>
18#include <llvm/Support/WithColor.h>
19#include <llvm/Target/TargetMachine.h>
21#if LLVM_VERSION_MAJOR >= 18
22#include <lld/Common/Driver.h>
42 "_ZNK17__HIP_CoordinatesI13__HIP_GridDimE3__XcvjEv",
43 "llvm.amdgcn.num.workgroups.x",
"_ZL20__hip_get_grid_dim_xv"};
49 "_ZNK17__HIP_CoordinatesI13__HIP_GridDimE3__YcvjEv",
50 "llvm.amdgcn.num.workgroups.y",
"_ZL20__hip_get_grid_dim_yv"};
56 "_ZNK17__HIP_CoordinatesI13__HIP_GridDimE3__ZcvjEv",
57 "llvm.amdgcn.num.workgroups.z",
"_ZL20__hip_get_grid_dim_zv"};
63 "_ZNK17__HIP_CoordinatesI14__HIP_BlockDimE3__XcvjEv",
64 "llvm.amdgcn.workgroup.size.x",
"_ZL21__hip_get_block_dim_xv"};
70 "_ZNK17__HIP_CoordinatesI14__HIP_BlockDimE3__YcvjEv",
71 "llvm.amdgcn.workgroup.size.y",
"_ZL21__hip_get_block_dim_yv"};
77 "_ZNK17__HIP_CoordinatesI14__HIP_BlockDimE3__ZcvjEv",
78 "llvm.amdgcn.workgroup.size.z",
"_ZL21__hip_get_block_dim_zv"};
84 "_ZNK17__HIP_CoordinatesI14__HIP_BlockIdxE3__XcvjEv",
85 "llvm.amdgcn.workgroup.id.x"};
91 "_ZNK17__HIP_CoordinatesI14__HIP_BlockIdxE3__YcvjEv",
92 "llvm.amdgcn.workgroup.id.y"};
98 "_ZNK17__HIP_CoordinatesI14__HIP_BlockIdxE3__ZcvjEv",
99 "llvm.amdgcn.workgroup.id.z"};
105 "_ZNK17__HIP_CoordinatesI15__HIP_ThreadIdxE3__XcvjEv",
106 "llvm.amdgcn.workitem.id.x"};
112 "_ZNK17__HIP_CoordinatesI15__HIP_ThreadIdxE3__YcvjEv",
113 "llvm.amdgcn.workitem.id.y"};
119 "_ZNK17__HIP_CoordinatesI15__HIP_ThreadIdxE3__ZcvjEv",
120 "llvm.amdgcn.workitem.id.z"};
127 sys::path::system_temp_directory(
true,
TmpDir);
131 FileName.append(Suffix.empty() ?
"-%%%%%%%" :
"-%%%%%%%.");
134 return sys::fs::TempFile::create(
TmpDir);
137#if LLVM_VERSION_MAJOR >= 18
140 [[
maybe_unused]]
char OptLevel =
'3',
int CodegenOptLevel = 3) {
148 std::unique_ptr<TargetMachine>
TM = std::move(*
ExpectedTM);
150 M.setDataLayout(
TM->createDataLayout());
152 legacy::PassManager
PM;
164 auto FileStream = std::make_unique<CachedFileStream>(
165 std::make_unique<llvm::raw_fd_ostream>(
ObjectFile.FD,
false));
167 CodeGenFileType::ObjectFile,
171 std::make_unique<sys::fs::TempFile>(std::move(
ObjectFile));
181 int CodegenOptLevel = 3) {
191 switch (
DI.getSeverity()) {
193 WithColor::error(
errs(),
"[proteus codegen]") <<
ErrStorage <<
"\n";
197 WithColor::warning(
errs(),
"[proteus codegen]") <<
ErrStorage <<
"\n";
200 WithColor::note(
errs(),
"[proteus codegen]") <<
ErrStorage <<
"\n";
209 Conf.CPU = DeviceArch;
212 Conf.DisableVerify =
true;
213 Conf.TimeTraceEnabled =
false;
214 Conf.DebugPassManager =
false;
215 Conf.VerifyEach =
false;
217 Conf.OptLevel = OptLevel;
221 std::max(1u, std::thread::hardware_concurrency());
229 std::unique_ptr<TargetMachine>
TM = std::move(*
ExpectedTM);
230 M.setDataLayout(
TM->createDataLayout());
239 auto IF =
cantFail(lto::InputFile::create(
245 const auto Symbols = IF->symbols();
253 Res.Prevailing = !
Sym.isUndefined() &&
256 Res.VisibleToRegularObj =
258 Sym.getVisibility() != GlobalValue::HiddenVisibility &&
259 !
Sym.canBeOmittedFromSymbolTable();
262 Sym.getVisibility() != GlobalValue::HiddenVisibility &&
263 (!
Sym.canBeOmittedFromSymbolTable());
265 Res.FinalDefinitionInLinkageUnit =
266 Sym.getVisibility() != GlobalValue::DefaultVisibility &&
267 (!
Sym.isUndefined() && !
Sym.isCommon());
271 Res.LinkerRedefined =
false;
275 lto::SymbolResolution &
Res) {
277 OutStream <<
"Vis: ";
278 switch (
Sym.getVisibility()) {
279 case GlobalValue::HiddenVisibility:
282 case GlobalValue::ProtectedVisibility:
285 case GlobalValue::DefaultVisibility:
290 OutStream <<
" Sym: ";
291 auto PrintBool = [&](
char C,
bool B) { OutStream << (
B ?
C :
'-'); };
299 OutStream <<
' ' <<
Sym.getName();
300 OutStream <<
"| P " <<
Res.Prevailing;
301 OutStream <<
" V " <<
Res.VisibleToRegularObj;
302 OutStream <<
" E " <<
Res.ExportDynamic;
303 OutStream <<
" F " <<
Res.FinalDefinitionInLinkageUnit;
323 const Twine & ) -> std::unique_ptr<CachedFileStream> {
329 std::make_unique<sys::fs::TempFile>(std::move(*
ExpectedF));
330 auto Ret = std::make_unique<CachedFileStream>(
343 "Errors encountered inside the LTO pipeline.")));
365 std::string
MArchOpt = (
"-march=" + DeviceArch).
str();
384 ModuleBuf.size(),
"", 0,
nullptr,
nullptr));
398 F.addFnAttr(
"amdgpu-flat-work-group-size",
407 F.addFnAttr(
"amdgpu-waves-per-eu",
413 <<
" WavesPerEU (unused) " <<
WavesPerEU <<
"\n");
416inline std::unique_ptr<MemoryBuffer>
420 assert(GlobalLinkedBinaries.empty() &&
421 "Expected empty linked binaries for HIP");
428 <<
"Codegen RTC " <<
T.elapsed() <<
" ms\n");
431#if LLVM_VERSION_MAJOR >= 18
433 ObjectFiles = detail::codegenSerial(M, DeviceArch);
436 ObjectFiles = detail::codegenParallel(M, DeviceArch);
446#if LLVM_VERSION_MAJOR >= 18
454 std::vector<const char *>
Args{
"ld.lld",
"--no-undefined",
"-shared",
"-o",
459 Args.push_back(File->TmpName.c_str());
470 <<
"Codegen object " <<
toString(CGOption) <<
"["
471 <<
ObjectFiles.size() <<
"] " <<
T.elapsed() <<
" ms\n");
475 static std::mutex Mutex;
479 {{lld::Gnu, &lld::elf::link}});
493 if (
auto E = File->discard())
502 <<
"Codegen linking " <<
T.elapsed() <<
" ms\n");
504 return std::move(*
Buffer);
char int void ** Args
Definition CompilerInterfaceHost.cpp:21
#define PROTEUS_DBG(x)
Definition Debug.h:9
#define PROTEUS_FATAL_ERROR(x)
Definition Error.h:7
#define PROTEUS_TIMER_OUTPUT(x)
Definition TimeTracing.hpp:57
#define proteusHiprtcErrCheck(CALL)
Definition UtilsHIP.h:28
static Config & get()
Definition Config.hpp:284
bool ProteusDebugOutput
Definition Config.hpp:300
static llvm::raw_ostream & outs(const std::string &Name)
Definition Logger.hpp:25
static llvm::raw_ostream & logs(const std::string &Name)
Definition Logger.hpp:19
Definition TimeTracing.hpp:36
const SmallVector< StringRef > & threadIdxXFnName()
Definition CoreLLVMCUDA.hpp:70
const SmallVector< StringRef > & gridDimYFnName()
Definition CoreLLVMCUDA.hpp:30
const SmallVector< StringRef > & threadIdxZFnName()
Definition CoreLLVMCUDA.hpp:80
const SmallVector< StringRef > & blockIdxZFnName()
Definition CoreLLVMCUDA.hpp:65
const SmallVector< StringRef > & gridDimZFnName()
Definition CoreLLVMCUDA.hpp:35
std::unique_ptr< MemoryBuffer > codegenRTC(Module &M, StringRef DeviceArch)
Definition CoreLLVMHIP.hpp:349
const SmallVector< StringRef > & gridDimXFnName()
Definition CoreLLVMCUDA.hpp:25
const SmallVector< StringRef > & blockIdxXFnName()
Definition CoreLLVMCUDA.hpp:55
Expected< std::unique_ptr< TargetMachine > > createTargetMachine(Module &M, StringRef Arch, unsigned OptLevel=3)
Definition CoreLLVM.hpp:52
const SmallVector< StringRef > & threadIdxYFnName()
Definition CoreLLVMCUDA.hpp:75
Expected< sys::fs::TempFile > createTempFile(StringRef Prefix, StringRef Suffix)
Definition CoreLLVMHIP.hpp:124
const SmallVector< StringRef > & blockIdxYFnName()
Definition CoreLLVMCUDA.hpp:60
const SmallVector< StringRef > & blockDimYFnName()
Definition CoreLLVMCUDA.hpp:45
const SmallVector< StringRef > & blockDimZFnName()
Definition CoreLLVMCUDA.hpp:50
const SmallVector< StringRef > & blockDimXFnName()
Definition CoreLLVMCUDA.hpp:40
Definition BuiltinsCUDA.cpp:4
void setLaunchBoundsForKernel(Function &F, int MaxThreadsPerSM, int MinBlocksPerSM=0)
Definition CoreLLVMCUDA.hpp:87
T getRuntimeConstantValue(void *Arg)
Definition CompilerInterfaceRuntimeConstantInfo.h:114
CodegenOption
Definition Config.hpp:14
std::unique_ptr< MemoryBuffer > codegenObject(Module &M, StringRef DeviceArch, SmallPtrSetImpl< void * > &GlobalLinkedBinaries, CodegenOption CGOption=CodegenOption::RTC)
Definition CoreLLVMCUDA.hpp:160
std::string toString(CodegenOption Option)
Definition Config.hpp:26