1#ifndef PROTEUS_CORE_LLVM_CUDA_HPP
2#define PROTEUS_CORE_LLVM_CUDA_HPP
4#include <llvm/ADT/SmallVector.h>
5#include <llvm/ADT/StringRef.h>
6#include <llvm/CodeGen/MachineModuleInfo.h>
7#include <llvm/IR/LegacyPassManager.h>
8#include <llvm/IR/Module.h>
9#include <llvm/Support/MemoryBufferRef.h>
10#include <llvm/Support/TargetSelect.h>
11#include <llvm/Target/TargetMachine.h>
88 int MinBlocksPerSM = 0) {
89 auto *M = F.getParent();
97 ConstantInt::get(Type::getInt32Ty(M->getContext()),
MDValue));
117 if (MinBlocksPerSM != 0)
135 std::unique_ptr<TargetMachine>
TM = std::move(*
TMExpected);
137 M.setDataLayout(
TM->createDataLayout());
139 legacy::PassManager
PM;
142#if LLVM_VERSION_MAJOR >= 20
150#if LLVM_VERSION_MAJOR >= 18
151 TM->addPassesToEmitFile(
PM,
PTXOS,
nullptr, CodeGenFileType::AssemblyFile,
161 <<
"Codegen ptx " <<
T.elapsed() <<
" ms\n");
164inline std::unique_ptr<MemoryBuffer>
180 std::string
ArchOpt = (
"--gpu-name=" + DeviceArch).
str();
182 if (!GlobalLinkedBinaries.empty())
200 auto ObjBuf = WritableMemoryBuffer::getNewUninitMemBuffer(
BinSize);
211 Logger::logs(
"proteus") <<
"=== nvPTXCompiler Log\n" <<
Log.get() <<
"\n";
217 if (!GlobalLinkedBinaries.empty()) {
235 for (
auto *
Ptr : GlobalLinkedBinaries) {
246 static_cast<void *
>(
ObjBuf->getBufferStart()), 1,
"", 0, 0, 0));
258 <<
"Codegen CUDA RTC " <<
T.elapsed() <<
" ms\n");
#define PROTEUS_FATAL_ERROR(x)
Definition Error.h:7
#define PROTEUS_TIMER_OUTPUT(x)
Definition TimeTracing.hpp:67
#define proteusNvPTXCompilerErrCheck(CALL)
Definition UtilsCUDA.h:39
#define proteusCuErrCheck(CALL)
Definition UtilsCUDA.h:28
static Config & get()
Definition Config.hpp:298
bool ProteusDebugOutput
Definition Config.hpp:314
static llvm::raw_ostream & outs(const std::string &Name)
Definition Logger.hpp:25
static llvm::raw_ostream & logs(const std::string &Name)
Definition Logger.hpp:19
Definition TimeTracing.hpp:46
const SmallVector< StringRef > & threadIdxXFnName()
Definition CoreLLVMCUDA.hpp:70
const SmallVector< StringRef > & gridDimYFnName()
Definition CoreLLVMCUDA.hpp:30
const SmallVector< StringRef > & threadIdxZFnName()
Definition CoreLLVMCUDA.hpp:80
const SmallVector< StringRef > & blockIdxZFnName()
Definition CoreLLVMCUDA.hpp:65
const SmallVector< StringRef > & gridDimZFnName()
Definition CoreLLVMCUDA.hpp:35
const SmallVector< StringRef > & gridDimXFnName()
Definition CoreLLVMCUDA.hpp:25
const SmallVector< StringRef > & blockIdxXFnName()
Definition CoreLLVMCUDA.hpp:55
Expected< std::unique_ptr< TargetMachine > > createTargetMachine(Module &M, StringRef Arch, unsigned OptLevel=3)
Definition CoreLLVM.hpp:52
const SmallVector< StringRef > & threadIdxYFnName()
Definition CoreLLVMCUDA.hpp:75
const SmallVector< StringRef > & blockIdxYFnName()
Definition CoreLLVMCUDA.hpp:60
const SmallVector< StringRef > & blockDimYFnName()
Definition CoreLLVMCUDA.hpp:45
const SmallVector< StringRef > & blockDimZFnName()
Definition CoreLLVMCUDA.hpp:50
const SmallVector< StringRef > & blockDimXFnName()
Definition CoreLLVMCUDA.hpp:40
Definition ObjectCacheChain.cpp:25
void codegenPTX(Module &M, StringRef DeviceArch, SmallVectorImpl< char > &PTXStr)
Definition CoreLLVMCUDA.hpp:121
void setLaunchBoundsForKernel(Function &F, int MaxThreadsPerSM, int MinBlocksPerSM=0)
Definition CoreLLVMCUDA.hpp:87
T getRuntimeConstantValue(void *Arg)
Definition CompilerInterfaceRuntimeConstantInfo.h:114
CodegenOption
Definition Config.hpp:14
std::unique_ptr< MemoryBuffer > codegenObject(Module &M, StringRef DeviceArch, SmallPtrSetImpl< void * > &GlobalLinkedBinaries, CodegenOption CGOption=CodegenOption::RTC)
Definition CoreLLVMCUDA.hpp:165
std::string toString(CodegenOption Option)
Definition Config.hpp:26