1#ifndef PROTEUS_CORE_CUDA_HPP
2#define PROTEUS_CORE_CUDA_HPP
4#include <unordered_map>
6#include <llvm/ADT/StringRef.h>
13 void *DevPtr =
nullptr;
15 assert(DevPtr &&
"Expected non-null device pointer for global");
21 dim3 BlockDim,
void **KernelArgs,
22 uint64_t ShmemSize, CUstream Stream) {
23 return cudaLaunchKernel(KernelFunc, GridDim, BlockDim, KernelArgs, ShmemSize,
28 StringRef
KernelName,
const void *Image,
bool RelinkGlobalsByCopy,
29 const std::unordered_map<std::string, const void *> &VarNameToDevPtr) {
30 CUfunction KernelFunc;
34 if (RelinkGlobalsByCopy) {
35 for (
auto &[GlobalName, DevPtr] : VarNameToDevPtr) {
39 cuModuleGetGlobal(&Dptr, &Bytes, Mod, (GlobalName +
"$ptr").c_str()));
41 uint64_t PtrVal = (uint64_t)DevPtr;
46 cuModuleGetFunction(&KernelFunc, Mod,
KernelName.str().c_str()));
52 dim3 BlockDim,
void **KernelArgs,
53 uint64_t ShmemSize, CUstream Stream) {
54 cuLaunchKernel(KernelFunc, GridDim.x, GridDim.y, GridDim.z, BlockDim.x,
55 BlockDim.y, BlockDim.z, ShmemSize, Stream, KernelArgs,
57 return cudaGetLastError();
void char * KernelName
Definition CompilerInterfaceDevice.cpp:50
#define proteusCudaErrCheck(CALL)
Definition UtilsCUDA.h:18
#define proteusCuErrCheck(CALL)
Definition UtilsCUDA.h:28
Definition JitEngine.cpp:20
cudaError_t launchKernelDirect(void *KernelFunc, dim3 GridDim, dim3 BlockDim, void **KernelArgs, uint64_t ShmemSize, CUstream Stream)
Definition CoreDeviceCUDA.hpp:20
cudaError_t launchKernelFunction(CUfunction KernelFunc, dim3 GridDim, dim3 BlockDim, void **KernelArgs, uint64_t ShmemSize, CUstream Stream)
Definition CoreDeviceCUDA.hpp:51
CUfunction getKernelFunctionFromImage(StringRef KernelName, const void *Image, bool RelinkGlobalsByCopy, const std::unordered_map< std::string, const void * > &VarNameToDevPtr)
Definition CoreDeviceCUDA.hpp:27
void * resolveDeviceGlobalAddr(const void *Addr)
Definition CoreDeviceCUDA.hpp:12