39 dim3 BlockDim,
void **KernelArgs,
40 uint64_t ShmemSize, CUstream Stream) {
43 KernelArgs, ShmemSize, Stream);
46 reportFatalError(
"__proteus_cudaLaunchKernel_ptr is not initialized. Ensure "
47 "the CUDA runtime is properly linked.");
51 StringRef
KernelName,
const void *Image,
bool RelinkGlobalsByCopy,
52 const std::unordered_map<std::string, GlobalVarInfo> &VarNameToGlobalInfo) {
53 CUfunction KernelFunc;
57 if (RelinkGlobalsByCopy) {
58 for (
auto &[GlobalName, GVI] : VarNameToGlobalInfo) {
61 " without a concrete device address");
66 cuModuleGetGlobal(&Dptr, &Bytes, Mod, (GlobalName +
"$ptr").c_str()));
68 uint64_t PtrVal = (uint64_t)GVI.DevAddr;
73 cuModuleGetFunction(&KernelFunc, Mod,
KernelName.str().c_str()));
79 dim3 BlockDim,
void **KernelArgs,
80 uint64_t ShmemSize, CUstream Stream) {
83 auto CUresultToCudaError = [](CUresult Res) -> cudaError_t {
87 case CUDA_ERROR_INVALID_VALUE:
88 return cudaErrorInvalidValue;
89 case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
90 return cudaErrorLaunchOutOfResources;
91 case CUDA_ERROR_LAUNCH_TIMEOUT:
92 return cudaErrorLaunchTimeout;
93 case CUDA_ERROR_LAUNCH_FAILED:
94 return cudaErrorLaunchFailure;
95 case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
96 return cudaErrorSharedObjectInitFailed;
97 case CUDA_ERROR_INVALID_HANDLE:
98 return cudaErrorInvalidResourceHandle;
99 case CUDA_ERROR_NOT_READY:
100 return cudaErrorNotReady;
101 case CUDA_ERROR_ILLEGAL_ADDRESS:
102 return cudaErrorIllegalAddress;
104 return cudaErrorUnknown;
109 constexpr size_t DefaultShmemSize = 48 * 1024;
111 if (ShmemSize >= DefaultShmemSize) {
113 KernelFunc, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES,
117 CUresult Res = cuLaunchKernel(KernelFunc, GridDim.x, GridDim.y, GridDim.z,
118 BlockDim.x, BlockDim.y, BlockDim.z, ShmemSize,
119 Stream, KernelArgs,
nullptr);
120 return static_cast<cudaError_t
>(CUresultToCudaError(Res));
cudaError_t launchKernelDirect(void *KernelFunc, dim3 GridDim, dim3 BlockDim, void **KernelArgs, uint64_t ShmemSize, CUstream Stream)
Definition CoreDeviceCUDA.h:38
cudaError_t launchKernelFunction(CUfunction KernelFunc, dim3 GridDim, dim3 BlockDim, void **KernelArgs, uint64_t ShmemSize, CUstream Stream)
Definition CoreDeviceCUDA.h:78
cudaError_t(* __proteus_cudaLaunchKernel_ptr)(const void *, dim3, dim3, void **, size_t, cudaStream_t)
Definition CoreDeviceCUDA.h:20
CUfunction getKernelFunctionFromImage(StringRef KernelName, const void *Image, bool RelinkGlobalsByCopy, const std::unordered_map< std::string, GlobalVarInfo > &VarNameToGlobalInfo)
Definition CoreDeviceCUDA.h:50