Proteus
Programmable JIT compilation and optimization for C/C++ using LLVM
Loading...
Searching...
No Matches
CoreDeviceHIP.hpp
Go to the documentation of this file.
1#ifndef CORE_HIP_HPP
2#define CORE_HIP_HPP
3
4#include <unordered_map>
5
6#include <llvm/ADT/StringRef.h>
7
8#include "proteus/UtilsHIP.h"
9
10// NOTE: HIP_SYMBOL is defined only if HIP compilation is enabled (-x hip),
11// although it shouldn't be necessary since HIP RTC can JIT compile code. Also,
12// HIP_SYMBOL is defined differently depending on whether ROCm compiles for AMD
13// or NVIDIA. We repeat the AMD definition here for non-HIP compilation.
14#ifndef HIP_SYMBOL
15#define HIP_SYMBOL(x) x
16#endif
17
18namespace proteus {
19
20using namespace llvm;
21
22inline void *resolveDeviceGlobalAddr(const void *Addr) {
23 void *DevPtr = nullptr;
25 assert(DevPtr && "Expected non-null device pointer for global");
26
27 return DevPtr;
28}
29
31 dim3 BlockDim, void **KernelArgs,
33 return hipLaunchKernel(KernelFunc, GridDim, BlockDim, KernelArgs, ShmemSize,
34 Stream);
35}
36
38 StringRef KernelName, const void *Image, bool RelinkGlobalsByCopy,
39 const std::unordered_map<std::string, const void *> &VarNameToDevPtr) {
42
44 if (RelinkGlobalsByCopy) {
45 for (auto &[GlobalName, DevPtr] : VarNameToDevPtr) {
47 size_t Bytes;
49 (GlobalName + "$ptr").c_str()));
50
51 uint64_t PtrVal = (uint64_t)DevPtr;
53 }
54 }
57
58 return KernelFunc;
59}
60
62 dim3 BlockDim, void **KernelArgs,
64 return hipModuleLaunchKernel(KernelFunc, GridDim.x, GridDim.y, GridDim.z,
65 BlockDim.x, BlockDim.y, BlockDim.z, ShmemSize,
66 Stream, KernelArgs, nullptr);
67}
68
69} // namespace proteus
70
71#endif
void char * KernelName
Definition CompilerInterfaceDevice.cpp:50
#define HIP_SYMBOL(x)
Definition CoreDeviceHIP.hpp:15
#define proteusHipErrCheck(CALL)
Definition UtilsHIP.h:18
Definition Helpers.h:138
Definition BuiltinsCUDA.cpp:4
cudaError_t launchKernelDirect(void *KernelFunc, dim3 GridDim, dim3 BlockDim, void **KernelArgs, uint64_t ShmemSize, CUstream Stream)
Definition CoreDeviceCUDA.hpp:20
T getRuntimeConstantValue(void *Arg)
Definition CompilerInterfaceRuntimeConstantInfo.h:114
cudaError_t launchKernelFunction(CUfunction KernelFunc, dim3 GridDim, dim3 BlockDim, void **KernelArgs, uint64_t ShmemSize, CUstream Stream)
Definition CoreDeviceCUDA.hpp:51
CUfunction getKernelFunctionFromImage(StringRef KernelName, const void *Image, bool RelinkGlobalsByCopy, const std::unordered_map< std::string, const void * > &VarNameToDevPtr)
Definition CoreDeviceCUDA.hpp:27
void * resolveDeviceGlobalAddr(const void *Addr)
Definition CoreDeviceCUDA.hpp:12