Proteus
Programmable JIT compilation and optimization for C/C++ using LLVM
Loading...
Searching...
No Matches
CoreDeviceCUDA.hpp
Go to the documentation of this file.
1#ifndef PROTEUS_CORE_CUDA_HPP
2#define PROTEUS_CORE_CUDA_HPP
3
4#include <unordered_map>
5
6#include <llvm/ADT/StringRef.h>
7
8#include "proteus/UtilsCUDA.h"
9
10namespace proteus {
11
12inline void *resolveDeviceGlobalAddr(const void *Addr) {
13 void *DevPtr = nullptr;
14 proteusCudaErrCheck(cudaGetSymbolAddress(&DevPtr, Addr));
15 assert(DevPtr && "Expected non-null device pointer for global");
16
17 return DevPtr;
18}
19
20inline cudaError_t launchKernelDirect(void *KernelFunc, dim3 GridDim,
21 dim3 BlockDim, void **KernelArgs,
22 uint64_t ShmemSize, CUstream Stream) {
23 return cudaLaunchKernel(KernelFunc, GridDim, BlockDim, KernelArgs, ShmemSize,
24 Stream);
25}
26
28 StringRef KernelName, const void *Image, bool RelinkGlobalsByCopy,
29 const std::unordered_map<std::string, const void *> &VarNameToDevPtr) {
30 CUfunction KernelFunc;
31 CUmodule Mod;
32
33 proteusCuErrCheck(cuModuleLoadData(&Mod, Image));
34 if (RelinkGlobalsByCopy) {
35 for (auto &[GlobalName, DevPtr] : VarNameToDevPtr) {
36 CUdeviceptr Dptr;
37 size_t Bytes;
39 cuModuleGetGlobal(&Dptr, &Bytes, Mod, (GlobalName + "$ptr").c_str()));
40
41 uint64_t PtrVal = (uint64_t)DevPtr;
42 proteusCuErrCheck(cuMemcpyHtoD(Dptr, &PtrVal, Bytes));
43 }
44 }
46 cuModuleGetFunction(&KernelFunc, Mod, KernelName.str().c_str()));
47
48 return KernelFunc;
49}
50
51inline cudaError_t launchKernelFunction(CUfunction KernelFunc, dim3 GridDim,
52 dim3 BlockDim, void **KernelArgs,
53 uint64_t ShmemSize, CUstream Stream) {
54 cuLaunchKernel(KernelFunc, GridDim.x, GridDim.y, GridDim.z, BlockDim.x,
55 BlockDim.y, BlockDim.z, ShmemSize, Stream, KernelArgs,
56 nullptr);
57 return cudaGetLastError();
58}
59
60} // namespace proteus
61
62#endif
void char * KernelName
Definition CompilerInterfaceDevice.cpp:50
#define proteusCudaErrCheck(CALL)
Definition UtilsCUDA.h:18
#define proteusCuErrCheck(CALL)
Definition UtilsCUDA.h:28
Definition JitEngine.cpp:20
cudaError_t launchKernelDirect(void *KernelFunc, dim3 GridDim, dim3 BlockDim, void **KernelArgs, uint64_t ShmemSize, CUstream Stream)
Definition CoreDeviceCUDA.hpp:20
cudaError_t launchKernelFunction(CUfunction KernelFunc, dim3 GridDim, dim3 BlockDim, void **KernelArgs, uint64_t ShmemSize, CUstream Stream)
Definition CoreDeviceCUDA.hpp:51
CUfunction getKernelFunctionFromImage(StringRef KernelName, const void *Image, bool RelinkGlobalsByCopy, const std::unordered_map< std::string, const void * > &VarNameToDevPtr)
Definition CoreDeviceCUDA.hpp:27
void * resolveDeviceGlobalAddr(const void *Addr)
Definition CoreDeviceCUDA.hpp:12