1#ifndef PROTEUS_CORE_LLVM_HIP_HPP
2#define PROTEUS_CORE_LLVM_HIP_HPP
12#include <llvm/Bitcode/BitcodeWriter.h>
13#include <llvm/CodeGen/MachineModuleInfo.h>
14#include <llvm/IR/DiagnosticPrinter.h>
15#include <llvm/IR/Function.h>
16#include <llvm/IR/LegacyPassManager.h>
17#include <llvm/IR/Module.h>
18#include <llvm/IR/Verifier.h>
19#include <llvm/LTO/LTO.h>
20#include <llvm/MC/MCSubtargetInfo.h>
21#include <llvm/Support/CodeGen.h>
22#include <llvm/Support/FileSystem.h>
23#include <llvm/Support/MemoryBuffer.h>
24#include <llvm/Support/Path.h>
25#include <llvm/Support/Signals.h>
26#include <llvm/Support/TargetSelect.h>
27#include <llvm/Support/WithColor.h>
28#include <llvm/Target/TargetMachine.h>
30#if LLVM_VERSION_MAJOR >= 18
31#include <lld/Common/Driver.h>
43 "_ZNK17__HIP_CoordinatesI13__HIP_GridDimE3__XcvjEv",
44 "llvm.amdgcn.num.workgroups.x",
"_ZL20__hip_get_grid_dim_xv"};
50 "_ZNK17__HIP_CoordinatesI13__HIP_GridDimE3__YcvjEv",
51 "llvm.amdgcn.num.workgroups.y",
"_ZL20__hip_get_grid_dim_yv"};
57 "_ZNK17__HIP_CoordinatesI13__HIP_GridDimE3__ZcvjEv",
58 "llvm.amdgcn.num.workgroups.z",
"_ZL20__hip_get_grid_dim_zv"};
64 "_ZNK17__HIP_CoordinatesI14__HIP_BlockDimE3__XcvjEv",
65 "llvm.amdgcn.workgroup.size.x",
"_ZL21__hip_get_block_dim_xv"};
71 "_ZNK17__HIP_CoordinatesI14__HIP_BlockDimE3__YcvjEv",
72 "llvm.amdgcn.workgroup.size.y",
"_ZL21__hip_get_block_dim_yv"};
78 "_ZNK17__HIP_CoordinatesI14__HIP_BlockDimE3__ZcvjEv",
79 "llvm.amdgcn.workgroup.size.z",
"_ZL21__hip_get_block_dim_zv"};
85 "_ZNK17__HIP_CoordinatesI14__HIP_BlockIdxE3__XcvjEv",
86 "llvm.amdgcn.workgroup.id.x"};
92 "_ZNK17__HIP_CoordinatesI14__HIP_BlockIdxE3__YcvjEv",
93 "llvm.amdgcn.workgroup.id.y"};
99 "_ZNK17__HIP_CoordinatesI14__HIP_BlockIdxE3__ZcvjEv",
100 "llvm.amdgcn.workgroup.id.z"};
106 "_ZNK17__HIP_CoordinatesI15__HIP_ThreadIdxE3__XcvjEv",
107 "llvm.amdgcn.workitem.id.x"};
113 "_ZNK17__HIP_CoordinatesI15__HIP_ThreadIdxE3__YcvjEv",
114 "llvm.amdgcn.workitem.id.y"};
120 "_ZNK17__HIP_CoordinatesI15__HIP_ThreadIdxE3__ZcvjEv",
121 "llvm.amdgcn.workitem.id.z"};
128 sys::path::system_temp_directory(
true,
TmpDir);
132 FileName.append(Suffix.empty() ?
"-%%%%%%%" :
"-%%%%%%%.");
135 return sys::fs::TempFile::create(
TmpDir);
138#if LLVM_VERSION_MAJOR >= 18
141 [[
maybe_unused]]
char OptLevel =
'3',
int CodegenOptLevel = 3) {
149 std::unique_ptr<TargetMachine>
TM = std::move(*
ExpectedTM);
151 M.setDataLayout(
TM->createDataLayout());
153 legacy::PassManager
PM;
156#if LLVM_VERSION_MAJOR >= 20
170 auto FileStream = std::make_unique<CachedFileStream>(
171 std::make_unique<llvm::raw_fd_ostream>(
ObjectFile.FD,
false));
173 CodeGenFileType::ObjectFile,
177 std::make_unique<sys::fs::TempFile>(std::move(
ObjectFile));
187 int CodegenOptLevel = 3) {
197 switch (
DI.getSeverity()) {
199 WithColor::error(
errs(),
"[proteus codegen]") <<
ErrStorage <<
"\n";
203 WithColor::warning(
errs(),
"[proteus codegen]") <<
ErrStorage <<
"\n";
206 WithColor::note(
errs(),
"[proteus codegen]") <<
ErrStorage <<
"\n";
219 std::unique_ptr<TargetMachine>
TM = std::move(*
ExpectedTM);
222 Conf.CPU = DeviceArch;
225 std::string
FeatureStr =
TM->getMCSubtargetInfo()->getFeatureString().str();
236 Conf.Options =
TM->Options;
238 Conf.DisableVerify =
true;
239 Conf.TimeTraceEnabled =
false;
240 Conf.DebugPassManager =
false;
241 Conf.VerifyEach =
false;
243 Conf.OptLevel = OptLevel;
247 std::max(1u, std::thread::hardware_concurrency());
251 M.setDataLayout(
TM->createDataLayout());
260 auto IF =
cantFail(lto::InputFile::create(
266 const auto Symbols = IF->symbols();
274 Res.Prevailing = !
Sym.isUndefined() &&
277 Res.VisibleToRegularObj =
279 Sym.getVisibility() != GlobalValue::HiddenVisibility &&
280 !
Sym.canBeOmittedFromSymbolTable();
283 Sym.getVisibility() != GlobalValue::HiddenVisibility &&
284 (!
Sym.canBeOmittedFromSymbolTable());
286 Res.FinalDefinitionInLinkageUnit =
287 Sym.getVisibility() != GlobalValue::DefaultVisibility &&
288 (!
Sym.isUndefined() && !
Sym.isCommon());
292 Res.LinkerRedefined =
false;
296 lto::SymbolResolution &
Res) {
298 OutStream <<
"Vis: ";
299 switch (
Sym.getVisibility()) {
300 case GlobalValue::HiddenVisibility:
303 case GlobalValue::ProtectedVisibility:
306 case GlobalValue::DefaultVisibility:
311 OutStream <<
" Sym: ";
312 auto PrintBool = [&](
char C,
bool B) { OutStream << (
B ?
C :
'-'); };
320 OutStream <<
' ' <<
Sym.getName();
321 OutStream <<
"| P " <<
Res.Prevailing;
322 OutStream <<
" V " <<
Res.VisibleToRegularObj;
323 OutStream <<
" E " <<
Res.ExportDynamic;
324 OutStream <<
" F " <<
Res.FinalDefinitionInLinkageUnit;
344 const Twine & ) -> std::unique_ptr<CachedFileStream> {
350 std::make_unique<sys::fs::TempFile>(std::move(*
ExpectedF));
351 auto Ret = std::make_unique<CachedFileStream>(
364 "Errors encountered inside the LTO pipeline.")));
386 std::string
MArchOpt = (
"-march=" + DeviceArch).
str();
405 ModuleBuf.size(),
"", 0,
nullptr,
nullptr));
415 int MinBlocksPerSM = 0) {
419 F.addFnAttr(
"amdgpu-flat-work-group-size",
422 if (MinBlocksPerSM != 0) {
428 F.addFnAttr(
"amdgpu-waves-per-eu", std::to_string(MinBlocksPerSM) +
"," +
429 std::to_string(MinBlocksPerSM));
434 <<
" WavesPerEU (unused) " << MinBlocksPerSM <<
"\n");
437inline std::unique_ptr<MemoryBuffer>
441 assert(GlobalLinkedBinaries.empty() &&
442 "Expected empty linked binaries for HIP");
449 <<
"Codegen RTC " <<
T.elapsed() <<
" ms\n");
452#if LLVM_VERSION_MAJOR >= 18
454 ObjectFiles = detail::codegenSerial(M, DeviceArch);
457 ObjectFiles = detail::codegenParallel(M, DeviceArch);
467#if LLVM_VERSION_MAJOR >= 18
475 std::vector<const char *>
Args{
"ld.lld",
"--no-undefined",
"-shared",
"-o",
480 Args.push_back(File->TmpName.c_str());
491 <<
"Codegen object " <<
toString(CGOption) <<
"["
492 <<
ObjectFiles.size() <<
"] " <<
T.elapsed() <<
" ms\n");
496 static std::mutex Mutex;
500 {{lld::Gnu, &lld::elf::link}});
514 if (
auto E = File->discard())
523 <<
"Codegen linking " <<
T.elapsed() <<
" ms\n");
525 return std::move(*Buffer);
char int void ** Args
Definition CompilerInterfaceHost.cpp:20
#define PROTEUS_DBG(x)
Definition Debug.h:9
#define PROTEUS_TIMER_OUTPUT(x)
Definition TimeTracing.hpp:54
#define proteusHiprtcErrCheck(CALL)
Definition UtilsHIP.h:28
static Config & get()
Definition Config.hpp:300
bool ProteusDebugOutput
Definition Config.hpp:316
static llvm::raw_ostream & outs(const std::string &Name)
Definition Logger.hpp:25
static llvm::raw_ostream & logs(const std::string &Name)
Definition Logger.hpp:19
Definition TimeTracing.hpp:40
const SmallVector< StringRef > & threadIdxXFnName()
Definition CoreLLVMCUDA.hpp:70
const SmallVector< StringRef > & gridDimYFnName()
Definition CoreLLVMCUDA.hpp:30
const SmallVector< StringRef > & threadIdxZFnName()
Definition CoreLLVMCUDA.hpp:80
const SmallVector< StringRef > & blockIdxZFnName()
Definition CoreLLVMCUDA.hpp:65
const SmallVector< StringRef > & gridDimZFnName()
Definition CoreLLVMCUDA.hpp:35
std::unique_ptr< MemoryBuffer > codegenRTC(Module &M, StringRef DeviceArch)
Definition CoreLLVMHIP.hpp:370
const SmallVector< StringRef > & gridDimXFnName()
Definition CoreLLVMCUDA.hpp:25
const SmallVector< StringRef > & blockIdxXFnName()
Definition CoreLLVMCUDA.hpp:55
Expected< std::unique_ptr< TargetMachine > > createTargetMachine(Module &M, StringRef Arch, unsigned OptLevel=3)
Definition CoreLLVM.hpp:52
const SmallVector< StringRef > & threadIdxYFnName()
Definition CoreLLVMCUDA.hpp:75
Expected< sys::fs::TempFile > createTempFile(StringRef Prefix, StringRef Suffix)
Definition CoreLLVMHIP.hpp:125
const SmallVector< StringRef > & blockIdxYFnName()
Definition CoreLLVMCUDA.hpp:60
const SmallVector< StringRef > & blockDimYFnName()
Definition CoreLLVMCUDA.hpp:45
const SmallVector< StringRef > & blockDimZFnName()
Definition CoreLLVMCUDA.hpp:50
const SmallVector< StringRef > & blockDimXFnName()
Definition CoreLLVMCUDA.hpp:40
Definition ObjectCacheChain.cpp:26
void setLaunchBoundsForKernel(Function &F, int MaxThreadsPerSM, int MinBlocksPerSM=0)
Definition CoreLLVMCUDA.hpp:87
void reportFatalError(const llvm::Twine &Reason, const char *FILE, unsigned Line)
Definition Error.cpp:14
T getRuntimeConstantValue(void *Arg)
Definition CompilerInterfaceRuntimeConstantInfo.h:113
CodegenOption
Definition Config.hpp:14
std::unique_ptr< MemoryBuffer > codegenObject(Module &M, StringRef DeviceArch, SmallPtrSetImpl< void * > &GlobalLinkedBinaries, CodegenOption CGOption=CodegenOption::RTC)
Definition CoreLLVMCUDA.hpp:165
std::string toString(CodegenOption Option)
Definition Config.hpp:26