Proteus
Programmable JIT compilation and optimization for C/C++ using LLVM
Loading...
Searching...
No Matches
CoreDeviceHIP.hpp
Go to the documentation of this file.
1#ifndef CORE_HIP_HPP
2#define CORE_HIP_HPP
3
4#include <unordered_map>
5
6#include <llvm/ADT/StringRef.h>
7
8#include "proteus/UtilsHIP.h"
9
10// NOTE: HIP_SYMBOL is defined only if HIP compilation is enabled (-x hip),
11// although it shouldn't be necessary since HIP RTC can JIT compile code. Also,
12// HIP_SYMBOL is defined differently depending on whether ROCm compiles for AMD
13// or NVIDIA. We repeat the AMD definition here for non-HIP compilation.
14#ifndef HIP_SYMBOL
15#define HIP_SYMBOL(x) x
16#endif
17
18namespace proteus {
19
20using namespace llvm;
21
22inline void *resolveDeviceGlobalAddr(const void *Addr) {
23 void *DevPtr = nullptr;
24 proteusHipErrCheck(hipGetSymbolAddress(&DevPtr, HIP_SYMBOL(Addr)));
25 assert(DevPtr && "Expected non-null device pointer for global");
26
27 return DevPtr;
28}
29
30inline hipError_t launchKernelDirect(void *KernelFunc, dim3 GridDim,
31 dim3 BlockDim, void **KernelArgs,
32 uint64_t ShmemSize, hipStream_t Stream) {
33 return hipLaunchKernel(KernelFunc, GridDim, BlockDim, KernelArgs, ShmemSize,
34 Stream);
35}
36
37inline hipFunction_t getKernelFunctionFromImage(
38 StringRef KernelName, const void *Image, bool RelinkGlobalsByCopy,
39 const std::unordered_map<std::string, const void *> &VarNameToDevPtr) {
40 hipModule_t HipModule;
41 hipFunction_t KernelFunc;
42
43 proteusHipErrCheck(hipModuleLoadData(&HipModule, Image));
44 if (RelinkGlobalsByCopy) {
45 for (auto &[GlobalName, DevPtr] : VarNameToDevPtr) {
46 hipDeviceptr_t Dptr;
47 size_t Bytes;
48 proteusHipErrCheck(hipModuleGetGlobal(&Dptr, &Bytes, HipModule,
49 (GlobalName + "$ptr").c_str()));
50
51 uint64_t PtrVal = (uint64_t)DevPtr;
52 proteusHipErrCheck(hipMemcpyHtoD(Dptr, &PtrVal, Bytes));
53 }
54 }
56 hipModuleGetFunction(&KernelFunc, HipModule, KernelName.str().c_str()));
57
58 return KernelFunc;
59}
60
61inline hipError_t launchKernelFunction(hipFunction_t KernelFunc, dim3 GridDim,
62 dim3 BlockDim, void **KernelArgs,
63 uint64_t ShmemSize, hipStream_t Stream) {
64 return hipModuleLaunchKernel(KernelFunc, GridDim.x, GridDim.y, GridDim.z,
65 BlockDim.x, BlockDim.y, BlockDim.z, ShmemSize,
66 Stream, KernelArgs, nullptr);
67}
68
69} // namespace proteus
70
71#endif
void char * KernelName
Definition CompilerInterfaceDevice.cpp:50
#define HIP_SYMBOL(x)
Definition CoreDeviceHIP.hpp:15
#define proteusHipErrCheck(CALL)
Definition UtilsHIP.h:18
Definition JitEngine.cpp:20
cudaError_t launchKernelDirect(void *KernelFunc, dim3 GridDim, dim3 BlockDim, void **KernelArgs, uint64_t ShmemSize, CUstream Stream)
Definition CoreDeviceCUDA.hpp:20
cudaError_t launchKernelFunction(CUfunction KernelFunc, dim3 GridDim, dim3 BlockDim, void **KernelArgs, uint64_t ShmemSize, CUstream Stream)
Definition CoreDeviceCUDA.hpp:51
CUfunction getKernelFunctionFromImage(StringRef KernelName, const void *Image, bool RelinkGlobalsByCopy, const std::unordered_map< std::string, const void * > &VarNameToDevPtr)
Definition CoreDeviceCUDA.hpp:27
void * resolveDeviceGlobalAddr(const void *Addr)
Definition CoreDeviceCUDA.hpp:12