1#ifndef PROTEUS_CORE_LLVM_HIP_HPP
2#define PROTEUS_CORE_LLVM_HIP_HPP
4#include <llvm/Bitcode/BitcodeWriter.h>
5#include <llvm/CodeGen/MachineModuleInfo.h>
6#include <llvm/IR/Function.h>
7#include <llvm/IR/LegacyPassManager.h>
8#include <llvm/IR/Module.h>
9#include <llvm/Support/CodeGen.h>
10#include <llvm/Support/FileSystem.h>
11#include <llvm/Support/MemoryBuffer.h>
12#include <llvm/Support/Path.h>
13#include <llvm/Support/TargetSelect.h>
14#include <llvm/Target/TargetMachine.h>
16#if LLVM_VERSION_MAJOR == 18
17#include <lld/Common/Driver.h>
33 static SmallVector<StringRef> Names = {
34 "_ZNK17__HIP_CoordinatesI13__HIP_GridDimE3__XcvjEv",
35 "llvm.amdgcn.num.workgroups.x"};
40 static SmallVector<StringRef> Names = {
41 "_ZNK17__HIP_CoordinatesI13__HIP_GridDimE3__YcvjEv",
42 "llvm.amdgcn.num.workgroups.y"};
47 static SmallVector<StringRef> Names = {
48 "_ZNK17__HIP_CoordinatesI13__HIP_GridDimE3__ZcvjEv",
49 "llvm.amdgcn.num.workgroups.z"};
54 static SmallVector<StringRef> Names = {
55 "_ZNK17__HIP_CoordinatesI14__HIP_BlockDimE3__XcvjEv",
56 "llvm.amdgcn.workgroup.size.x"};
61 static SmallVector<StringRef> Names = {
62 "_ZNK17__HIP_CoordinatesI14__HIP_BlockDimE3__YcvjEv",
63 "llvm.amdgcn.workgroup.size.y"};
68 static SmallVector<StringRef> Names = {
69 "_ZNK17__HIP_CoordinatesI14__HIP_BlockDimE3__ZcvjEv",
70 "llvm.amdgcn.workgroup.size.z"};
75 static SmallVector<StringRef> Names = {
76 "_ZNK17__HIP_CoordinatesI14__HIP_BlockIdxE3__XcvjEv",
77 "llvm.amdgcn.workgroup.id.x"};
82 static SmallVector<StringRef> Names = {
83 "_ZNK17__HIP_CoordinatesI14__HIP_BlockIdxE3__YcvjEv",
84 "llvm.amdgcn.workgroup.id.y"};
89 static SmallVector<StringRef> Names = {
90 "_ZNK17__HIP_CoordinatesI14__HIP_BlockIdxE3__ZcvjEv",
91 "llvm.amdgcn.workgroup.id.z"};
96 static SmallVector<StringRef> Names = {
97 "_ZNK17__HIP_CoordinatesI15__HIP_ThreadIdxE3__XcvjEv",
98 "llvm.amdgcn.workitem.id.x"};
103 static SmallVector<StringRef> Names = {
104 "_ZNK17__HIP_CoordinatesI15__HIP_ThreadIdxE3__YcvjEv",
105 "llvm.amdgcn.workitem.id.y"};
110 static SmallVector<StringRef> Names = {
111 "_ZNK17__HIP_CoordinatesI15__HIP_ThreadIdxE3__ZcvjEv",
112 "llvm.amdgcn.workitem.id.z"};
125 F.addFnAttr(
"amdgpu-flat-work-group-size",
126 "1," + std::to_string(std::min(1024, BlockSize)));
129 [[maybe_unused]]
int WavesPerEU = 0;
132 <<
"BlockSize " << BlockSize <<
" GridSize " << GridSize
133 <<
" => Set Wokgroup size " << BlockSize
134 <<
" WavesPerEU (unused) " << WavesPerEU <<
"\n");
137inline std::unique_ptr<MemoryBuffer>
139 SmallPtrSetImpl<void *> &GlobalLinkedBinaries,
140 bool UseRTC =
true) {
141 assert(GlobalLinkedBinaries.empty() &&
142 "Expected empty linked binaries for HIP");
147 SmallString<4096> ModuleBuf;
148 raw_svector_ostream ModuleBufOS(ModuleBuf);
149 WriteBitcodeToFile(M, ModuleBufOS);
151 hiprtcLinkState HipLinkStatePtr;
158 std::string MArchOpt = (
"-march=" + DeviceArch).str();
159 const char *OptArgs[] = {
"-mllvm",
"-unroll-threshold=1000",
161 std::vector<hiprtcJIT_option> JITOptions = {
162 HIPRTC_JIT_IR_TO_ISA_OPT_EXT, HIPRTC_JIT_IR_TO_ISA_OPT_COUNT_EXT};
163 size_t OptArgsSize = 3;
164 const void *JITOptionsValues[] = {(
void *)OptArgs, (
void *)(OptArgsSize)};
166 (
void **)JITOptionsValues,
173 HipLinkStatePtr, HIPRTC_JIT_INPUT_LLVM_BITCODE,
174 (
void *)ModuleBuf.data(), ModuleBuf.size(),
"", 0,
nullptr,
nullptr));
176 hiprtcLinkComplete(HipLinkStatePtr, (
void **)&BinOut, &BinSize));
178 return MemoryBuffer::getMemBuffer(StringRef{BinOut, BinSize});
181#if LLVM_VERSION_MAJOR == 18
186 std::unique_ptr<TargetMachine> TM = std::move(*TMExpected);
187 TargetLibraryInfoImpl TLII(Triple(M.getTargetTriple()));
189 legacy::PassManager PM;
190 PM.add(
new TargetLibraryInfoWrapperPass(TLII));
191 MachineModuleInfoWrapperPass *MMIWP =
new MachineModuleInfoWrapperPass(
192 reinterpret_cast<LLVMTargetMachine *
>(TM.get()));
194 SmallVector<char, 4096> ObjectCode;
195 raw_svector_ostream OS(ObjectCode);
196 TM->addPassesToEmitFile(PM, OS,
nullptr, CodeGenFileType::ObjectFile,
201 SmallString<64> TempDir;
202 SmallString<64> ObjectPath;
203 SmallString<64> SharedObjectPath;
205 static std::mutex Mutex;
207 sys::path::system_temp_directory(
true, TempDir);
209 if (
auto EC = sys::fs::createUniqueFile(TempDir +
"/proteus-jit-%%%%%%%.o",
210 ObjectFD, ObjectPath))
213 raw_fd_ostream OS(ObjectFD,
true);
214 OS << StringRef{ObjectCode.data(), ObjectCode.size()};
217 if (
auto EC = sys::fs::createUniqueFile(TempDir +
"/proteus-jit-%%%%%%%.so",
221 std::vector<const char *>
Args{
"ld.lld",
"--no-undefined",
222 "-shared", ObjectPath.c_str(),
223 "-o", SharedObjectPath.c_str()};
226 std::lock_guard LockGuard{Mutex};
227 lld::Result S = lld::lldMain(
Args, llvm::outs(), llvm::errs(),
228 {{lld::Gnu, &lld::elf::link}});
234 ErrorOr<std::unique_ptr<MemoryBuffer>> Buffer =
235 MemoryBuffer::getFileAsStream(SharedObjectPath);
239 sys::fs::remove(ObjectPath);
240 sys::fs::remove(SharedObjectPath);
242 return std::move(*Buffer);
char int void ** Args
Definition CompilerInterfaceHost.cpp:20
#define PROTEUS_DBG(x)
Definition Debug.h:7
#define PROTEUS_FATAL_ERROR(x)
Definition Error.h:4
#define proteusHiprtcErrCheck(CALL)
Definition UtilsHIP.h:28
static llvm::raw_ostream & logs(const std::string &Name)
Definition Logger.hpp:18
const SmallVector< StringRef > & threadIdxXFnName()
Definition CoreLLVMCUDA.hpp:68
const SmallVector< StringRef > & gridDimYFnName()
Definition CoreLLVMCUDA.hpp:28
const SmallVector< StringRef > & threadIdxZFnName()
Definition CoreLLVMCUDA.hpp:78
const SmallVector< StringRef > & blockIdxZFnName()
Definition CoreLLVMCUDA.hpp:63
const SmallVector< StringRef > & gridDimZFnName()
Definition CoreLLVMCUDA.hpp:33
const SmallVector< StringRef > & gridDimXFnName()
Definition CoreLLVMCUDA.hpp:23
const SmallVector< StringRef > & blockIdxXFnName()
Definition CoreLLVMCUDA.hpp:53
Expected< std::unique_ptr< TargetMachine > > createTargetMachine(Module &M, StringRef Arch, unsigned OptLevel=3)
Definition CoreLLVM.hpp:48
const SmallVector< StringRef > & threadIdxYFnName()
Definition CoreLLVMCUDA.hpp:73
const SmallVector< StringRef > & blockIdxYFnName()
Definition CoreLLVMCUDA.hpp:58
const SmallVector< StringRef > & blockDimYFnName()
Definition CoreLLVMCUDA.hpp:43
const SmallVector< StringRef > & blockDimZFnName()
Definition CoreLLVMCUDA.hpp:48
const SmallVector< StringRef > & blockDimXFnName()
Definition CoreLLVMCUDA.hpp:38
Definition JitEngine.cpp:20
void setLaunchBoundsForKernel(Module &M, Function &F, size_t GridSize, int BlockSize)
Definition CoreLLVMCUDA.hpp:85
std::unique_ptr< MemoryBuffer > codegenObject(Module &M, StringRef DeviceArch, SmallPtrSetImpl< void * > &GlobalLinkedBinaries, bool UseRTC=true)
Definition CoreLLVMCUDA.hpp:148