Proteus
Programmable JIT compilation and optimization for C/C++ using LLVM
Loading...
Searching...
No Matches
CoreDeviceCUDA.hpp
Go to the documentation of this file.
1#ifndef PROTEUS_CORE_CUDA_HPP
2#define PROTEUS_CORE_CUDA_HPP
3
5#include "proteus/UtilsCUDA.h"
6
7#include <llvm/ADT/StringRef.h>
8
9#include <unordered_map>
10
11namespace proteus {
12
13inline void *resolveDeviceGlobalAddr(const void *Addr) {
14 void *DevPtr = nullptr;
16 assert(DevPtr && "Expected non-null device pointer for global");
17
18 return DevPtr;
19}
20
22 dim3 BlockDim, void **KernelArgs,
24 return cudaLaunchKernel(KernelFunc, GridDim, BlockDim, KernelArgs, ShmemSize,
25 Stream);
26}
27
29 StringRef KernelName, const void *Image, bool RelinkGlobalsByCopy,
30 const std::unordered_map<std::string, GlobalVarInfo> &VarNameToGlobalInfo) {
32 CUmodule Mod;
33
35 if (RelinkGlobalsByCopy) {
36 for (auto &[GlobalName, GVI] : VarNameToGlobalInfo) {
37 if (!GVI.DevAddr)
38 reportFatalError("Cannot copy to Global Var " + GlobalName +
39 " without a concrete device address");
40
42 size_t Bytes;
44 cuModuleGetGlobal(&Dptr, &Bytes, Mod, (GlobalName + "$ptr").c_str()));
45
46 uint64_t PtrVal = (uint64_t)GVI.DevAddr;
48 }
49 }
51 cuModuleGetFunction(&KernelFunc, Mod, KernelName.str().c_str()));
52
53 return KernelFunc;
54}
55
57 dim3 BlockDim, void **KernelArgs,
59 cuLaunchKernel(KernelFunc, GridDim.x, GridDim.y, GridDim.z, BlockDim.x,
60 BlockDim.y, BlockDim.z, ShmemSize, Stream, KernelArgs,
61 nullptr);
62 return cudaGetLastError();
63}
64
65} // namespace proteus
66
67#endif
void char * KernelName
Definition CompilerInterfaceDevice.cpp:52
#define proteusCudaErrCheck(CALL)
Definition UtilsCUDA.h:18
#define proteusCuErrCheck(CALL)
Definition UtilsCUDA.h:28
Definition ObjectCacheChain.cpp:26
void reportFatalError(const llvm::Twine &Reason, const char *FILE, unsigned Line)
Definition Error.cpp:14
cudaError_t launchKernelDirect(void *KernelFunc, dim3 GridDim, dim3 BlockDim, void **KernelArgs, uint64_t ShmemSize, CUstream Stream)
Definition CoreDeviceCUDA.hpp:21
T getRuntimeConstantValue(void *Arg)
Definition CompilerInterfaceRuntimeConstantInfo.h:113
cudaError_t launchKernelFunction(CUfunction KernelFunc, dim3 GridDim, dim3 BlockDim, void **KernelArgs, uint64_t ShmemSize, CUstream Stream)
Definition CoreDeviceCUDA.hpp:56
void * resolveDeviceGlobalAddr(const void *Addr)
Definition CoreDeviceCUDA.hpp:13
CUfunction getKernelFunctionFromImage(StringRef KernelName, const void *Image, bool RelinkGlobalsByCopy, const std::unordered_map< std::string, GlobalVarInfo > &VarNameToGlobalInfo)
Definition CoreDeviceCUDA.hpp:28