Proteus
Programmable JIT compilation and optimization for C/C++ using LLVM
Loading...
Searching...
No Matches
CoreDeviceCUDA.h
Go to the documentation of this file.
1#ifndef PROTEUS_CORE_CUDA_H
2#define PROTEUS_CORE_CUDA_H
3
4#include "proteus/Error.h"
7
8#include <llvm/ADT/StringRef.h>
9
10#include <unordered_map>
11
12namespace proteus {
13
14extern "C" {
15// Definitions for function pointers to CUDA APIs that we will resolve at
16// runtime using builtins.
17// NOLINTBEGIN(readability-identifier-naming)
19 void **, const void *) = nullptr;
20inline cudaError_t (*__proteus_cudaLaunchKernel_ptr)(const void *, dim3, dim3,
21 void **, size_t,
22 cudaStream_t) = nullptr;
23}
24// NOLINTEND(readability-identifier-naming)
25
26inline void *resolveDeviceGlobalAddr(const void *Addr) {
27 void *DevPtr = nullptr;
30 assert(DevPtr && "Expected non-null device pointer for global");
31 return DevPtr;
32 }
33
34 reportFatalError("__proteus_cudaGetSymbolAddress_ptr is not initialized. "
35 "Ensure the CUDA runtime is properly linked.");
36}
37
38inline cudaError_t launchKernelDirect(void *KernelFunc, dim3 GridDim,
39 dim3 BlockDim, void **KernelArgs,
40 uint64_t ShmemSize, CUstream Stream) {
42 return __proteus_cudaLaunchKernel_ptr(KernelFunc, GridDim, BlockDim,
43 KernelArgs, ShmemSize, Stream);
44 }
45
46 reportFatalError("__proteus_cudaLaunchKernel_ptr is not initialized. Ensure "
47 "the CUDA runtime is properly linked.");
48}
49
51 StringRef KernelName, const void *Image, bool RelinkGlobalsByCopy,
52 const std::unordered_map<std::string, GlobalVarInfo> &VarNameToGlobalInfo) {
53 CUfunction KernelFunc;
54 CUmodule Mod;
55
56 proteusCuErrCheck(cuModuleLoadData(&Mod, Image));
57 if (RelinkGlobalsByCopy) {
58 for (auto &[GlobalName, GVI] : VarNameToGlobalInfo) {
59 if (!GVI.DevAddr)
60 reportFatalError("Cannot copy to Global Var " + GlobalName +
61 " without a concrete device address");
62
63 CUdeviceptr Dptr;
64 size_t Bytes;
66 cuModuleGetGlobal(&Dptr, &Bytes, Mod, (GlobalName + "$ptr").c_str()));
67
68 uint64_t PtrVal = (uint64_t)GVI.DevAddr;
69 proteusCuErrCheck(cuMemcpyHtoD(Dptr, &PtrVal, Bytes));
70 }
71 }
73 cuModuleGetFunction(&KernelFunc, Mod, KernelName.str().c_str()));
74
75 return KernelFunc;
76}
77
78inline cudaError_t launchKernelFunction(CUfunction KernelFunc, dim3 GridDim,
79 dim3 BlockDim, void **KernelArgs,
80 uint64_t ShmemSize, CUstream Stream) {
81 // Convert CUresult to cudaError_t for the caller, where we replace a
82 // cudaLaunchKernel call with cuLaunchKernel for the JIT module.
83 auto CUresultToCudaError = [](CUresult Res) -> cudaError_t {
84 switch (Res) {
85 case CUDA_SUCCESS:
86 return cudaSuccess;
87 case CUDA_ERROR_INVALID_VALUE:
88 return cudaErrorInvalidValue;
89 case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
90 return cudaErrorLaunchOutOfResources;
91 case CUDA_ERROR_LAUNCH_TIMEOUT:
92 return cudaErrorLaunchTimeout;
93 case CUDA_ERROR_LAUNCH_FAILED:
94 return cudaErrorLaunchFailure;
95 case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
96 return cudaErrorSharedObjectInitFailed;
97 case CUDA_ERROR_INVALID_HANDLE:
98 return cudaErrorInvalidResourceHandle;
99 case CUDA_ERROR_NOT_READY:
100 return cudaErrorNotReady;
101 case CUDA_ERROR_ILLEGAL_ADDRESS:
102 return cudaErrorIllegalAddress;
103 default:
104 return cudaErrorUnknown;
105 }
106 };
107
108 CUresult Res = cuLaunchKernel(KernelFunc, GridDim.x, GridDim.y, GridDim.z,
109 BlockDim.x, BlockDim.y, BlockDim.z, ShmemSize,
110 Stream, KernelArgs, nullptr);
111 return static_cast<cudaError_t>(CUresultToCudaError(Res));
112}
113
114} // namespace proteus
115
116#endif
void char * KernelName
Definition CompilerInterfaceDevice.cpp:54
#define proteusCuErrCheck(CALL)
Definition UtilsCUDA.h:28
Definition MemoryCache.h:26
cudaError_t(* __proteus_cudaGetSymbolAddress_ptr)(void **, const void *)
Definition CoreDeviceCUDA.h:18
void reportFatalError(const llvm::Twine &Reason, const char *FILE, unsigned Line)
Definition Error.cpp:14
cudaError_t launchKernelDirect(void *KernelFunc, dim3 GridDim, dim3 BlockDim, void **KernelArgs, uint64_t ShmemSize, CUstream Stream)
Definition CoreDeviceCUDA.h:38
cudaError_t launchKernelFunction(CUfunction KernelFunc, dim3 GridDim, dim3 BlockDim, void **KernelArgs, uint64_t ShmemSize, CUstream Stream)
Definition CoreDeviceCUDA.h:78
void * resolveDeviceGlobalAddr(const void *Addr)
Definition CoreDeviceCUDA.h:26
cudaError_t(* __proteus_cudaLaunchKernel_ptr)(const void *, dim3, dim3, void **, size_t, cudaStream_t)
Definition CoreDeviceCUDA.h:20
CUfunction getKernelFunctionFromImage(StringRef KernelName, const void *Image, bool RelinkGlobalsByCopy, const std::unordered_map< std::string, GlobalVarInfo > &VarNameToGlobalInfo)
Definition CoreDeviceCUDA.h:50