Proteus
Programmable JIT compilation and optimization for C/C++ using LLVM
Loading...
Searching...
No Matches
CoreDeviceHIP.h
Go to the documentation of this file.
1#ifndef CORE_HIP_H
2#define CORE_HIP_H
3
4#include "proteus/Error.h"
7
8#include <llvm/ADT/StringRef.h>
9
10#include <unordered_map>
11// NOTE: HIP_SYMBOL is defined only if HIP compilation is enabled (-x hip),
12// although it shouldn't be necessary since HIP RTC can JIT compile code. Also,
13// HIP_SYMBOL is defined differently depending on whether ROCm compiles for AMD
14// or NVIDIA. We repeat the AMD definition here for non-HIP compilation.
15#ifndef HIP_SYMBOL
16#define HIP_SYMBOL(x) x
17#endif
18
19namespace proteus {
20
21using namespace llvm;
22
23inline void *resolveDeviceGlobalAddr(const void *Addr) {
24 void *DevPtr = nullptr;
27 assert(DevPtr && "Expected non-null device pointer for global");
28
29 return DevPtr;
30}
31
32inline hipError_t launchKernelDirect(void *KernelFunc, dim3 GridDim,
33 dim3 BlockDim, void **KernelArgs,
34 uint64_t ShmemSize, hipStream_t Stream) {
35 return proteus::hipdyn::launchKernel(KernelFunc, GridDim, BlockDim,
36 KernelArgs, ShmemSize, Stream);
37}
38
39inline hipFunction_t getKernelFunctionFromImage(
40 StringRef KernelName, const void *Image, bool RelinkGlobalsByCopy,
41 const std::unordered_map<std::string, GlobalVarInfo> &VarNameToGlobalInfo) {
42 hipModule_t HipModule;
43 hipFunction_t KernelFunc;
44
46 if (RelinkGlobalsByCopy) {
47 for (auto &[GlobalName, GVI] : VarNameToGlobalInfo) {
48 if (!GVI.DevAddr)
49 reportFatalError("Cannot copy to Global Var " + GlobalName +
50 " without a concrete device address");
51
52 hipDeviceptr_t Dptr;
53 size_t Bytes;
55 &Dptr, &Bytes, HipModule, (GlobalName + "$ptr").c_str()));
56
57 uint64_t PtrVal = (uint64_t)GVI.DevAddr;
59 }
60 }
62 &KernelFunc, HipModule, KernelName.str().c_str()));
63
64 return KernelFunc;
65}
66
67inline hipError_t launchKernelFunction(hipFunction_t KernelFunc, dim3 GridDim,
68 dim3 BlockDim, void **KernelArgs,
69 uint64_t ShmemSize, hipStream_t Stream) {
71 KernelFunc, GridDim.x, GridDim.y, GridDim.z, BlockDim.x, BlockDim.y,
72 BlockDim.z, ShmemSize, Stream, KernelArgs, nullptr);
73}
74
75} // namespace proteus
76
77#endif
void char * KernelName
Definition CompilerInterfaceDevice.cpp:59
#define HIP_SYMBOL(x)
Definition CoreDeviceHIP.h:16
#define proteusHipErrCheck(CALL)
Definition UtilsHIP.h:20
Definition CompiledLibrary.h:7
hipError_t moduleLaunchKernel(hipFunction_t Function, unsigned int GridDimX, unsigned int GridDimY, unsigned int GridDimZ, unsigned int BlockDimX, unsigned int BlockDimY, unsigned int BlockDimZ, unsigned int SharedMemBytes, hipStream_t Stream, void **KernelParams, void **Extra)
Definition HIPRuntimeAPI.cpp:156
hipError_t getSymbolAddress(void **DevPtr, const void *Symbol)
Definition HIPRuntimeAPI.cpp:124
hipError_t memcpyHtoD(hipDeviceptr_t Dst, const void *Src, size_t SizeBytes)
Definition HIPRuntimeAPI.cpp:130
hipError_t moduleGetGlobal(hipDeviceptr_t *Dptr, size_t *Bytes, hipModule_t Module, const char *Name)
Definition HIPRuntimeAPI.cpp:142
hipError_t moduleGetFunction(hipFunction_t *Function, hipModule_t Module, const char *Name)
Definition HIPRuntimeAPI.cpp:149
hipError_t launchKernel(const void *FunctionAddress, dim3 NumBlocks, dim3 DimBlocks, void **Args, size_t SharedMemBytes, hipStream_t Stream)
Definition HIPRuntimeAPI.cpp:168
hipError_t moduleLoadData(hipModule_t *Module, const void *Image)
Definition HIPRuntimeAPI.cpp:136
Definition MemoryCache.h:27
void reportFatalError(const llvm::Twine &Reason, const char *FILE, unsigned Line)
Definition Error.cpp:14
cudaError_t launchKernelDirect(void *KernelFunc, dim3 GridDim, dim3 BlockDim, void **KernelArgs, uint64_t ShmemSize, CUstream Stream)
Definition CoreDeviceCUDA.h:38
cudaError_t launchKernelFunction(CUfunction KernelFunc, dim3 GridDim, dim3 BlockDim, void **KernelArgs, uint64_t ShmemSize, CUstream Stream)
Definition CoreDeviceCUDA.h:78
void * resolveDeviceGlobalAddr(const void *Addr)
Definition CoreDeviceCUDA.h:26
CUfunction getKernelFunctionFromImage(StringRef KernelName, const void *Image, bool RelinkGlobalsByCopy, const std::unordered_map< std::string, GlobalVarInfo > &VarNameToGlobalInfo)
Definition CoreDeviceCUDA.h:50