Proteus
Programmable JIT compilation and optimization for C/C++ using LLVM
Loading...
Searching...
No Matches
Builtins.hpp
Go to the documentation of this file.
1#ifndef PROTEUS_FRONTEND_BUILTINS_HPP
2#define PROTEUS_FRONTEND_BUILTINS_HPP
3
5
6namespace proteus {
7namespace builtins {
8
9#if PROTEUS_ENABLE_HIP
10namespace hip {
11
12namespace detail {
13
14// Offsets in implicit arg pts in i32 step.
15constexpr unsigned OffsetGridDimX = 0;
16constexpr unsigned OffsetGridDimY = 1;
17constexpr unsigned OffsetGridDimZ = 2;
18
19inline Value *getGridDim(Func &Fn, unsigned Offset) {
20 // An alternative way is by using __ockl_get_num_groups but needs to link with
21 // hip bitcode libraries.
22 constexpr int ConstantAddressSpace = 4;
23 auto &Ctx = Fn.getFunction()->getContext();
24 auto &M = *Fn.getFunction()->getParent();
25
26 auto &IRB = Fn.getIRB();
27 FunctionCallee Callee =
28 M.getOrInsertFunction("llvm.amdgcn.implicitarg.ptr",
29 PointerType::get(Ctx, ConstantAddressSpace));
30 auto *Call = IRB.CreateCall(Callee);
31 auto *GEP = IRB.CreateInBoundsGEP(
32 IRB.getInt32Ty(), Call, {ConstantInt::get(IRB.getInt64Ty(), Offset)});
33 auto *Load = IRB.CreateLoad(IRB.getInt32Ty(), GEP);
34
35 return Load;
36}
37
38// Offsets in implicit arg pts in i16 step.
39constexpr unsigned OffsetBlockDimX = 6;
40constexpr unsigned OffsetBlockDimY = 7;
41constexpr unsigned OffsetBlockDimZ = 8;
42
43inline Value *getBlockDim(Func &Fn, unsigned Offset) {
44 // An alternative way is by using __ockl_get_local_size but needs to link with
45 // hip bitcode libraries.
46 constexpr int ConstantAddressSpace = 4;
47 auto &Ctx = Fn.getFunction()->getContext();
48 auto &M = *Fn.getFunction()->getParent();
49
50 auto &IRB = Fn.getIRB();
51 FunctionCallee Callee =
52 M.getOrInsertFunction("llvm.amdgcn.implicitarg.ptr",
53 PointerType::get(Ctx, ConstantAddressSpace));
54 auto *Call = IRB.CreateCall(Callee);
55 auto *GEP = IRB.CreateInBoundsGEP(
56 IRB.getInt16Ty(), Call, {ConstantInt::get(IRB.getInt64Ty(), Offset)});
57 auto *Load = IRB.CreateLoad(IRB.getInt16Ty(), GEP);
58 auto *Conv = IRB.CreateZExt(Load, IRB.getInt32Ty());
59
60 return Conv;
61}
62
63} // namespace detail
64
65inline Var &getThreadIdX(Func &Fn) {
66 auto &Ctx = Fn.getFunction()->getContext();
67 auto &M = *Fn.getFunction()->getParent();
68
69 Var &Ret = Fn.declVarInternal("threadIdx.x", TypeMap<int>::get(Ctx));
70
71 auto &IRB = Fn.getIRB();
72 FunctionCallee Callee = M.getOrInsertFunction("llvm.amdgcn.workitem.id.x",
74 auto *Call = IRB.CreateCall(Callee);
75 Ret.storeValue(Call);
76
77 return Ret;
78}
79
80inline Var &getBlockIdX(Func &Fn) {
81 auto &Ctx = Fn.getFunction()->getContext();
82 auto &M = *Fn.getFunction()->getParent();
83
84 Var &Ret = Fn.declVarInternal("blockIdx.x", TypeMap<int>::get(Ctx));
85
86 auto &IRB = Fn.getIRB();
87 FunctionCallee Callee = M.getOrInsertFunction("llvm.amdgcn.workgroup.id.x",
89 auto *Call = IRB.CreateCall(Callee);
90 Ret.storeValue(Call);
91
92 return Ret;
93}
94
95inline Var &getBlockDimX(Func &Fn) {
96 auto &Ctx = Fn.getFunction()->getContext();
97 // TODO: Return an "int" variable, could be a different type.
98 Var &Ret = Fn.declVarInternal("blockDim.x", TypeMap<int>::get(Ctx));
99
100 Value *Conv = detail::getBlockDim(Fn, detail::OffsetBlockDimX);
101 Ret.storeValue(Conv);
102
103 return Ret;
104}
105
106inline Var &getGridDimX(Func &Fn) {
107 auto &Ctx = Fn.getFunction()->getContext();
108 Var &Ret = Fn.declVarInternal("gridDim.x", TypeMap<int>::get(Ctx));
109
110 Value *Conv = detail::getGridDim(Fn, detail::OffsetGridDimX);
111 Ret.storeValue(Conv);
112
113 return Ret;
114}
115
116} // namespace hip
117#endif
118
119#if PROTEUS_ENABLE_CUDA
120namespace cuda {
121inline Var &getThreadIdX(Func &Fn) {
122 auto &Ctx = Fn.getFunction()->getContext();
123 auto &M = *Fn.getFunction()->getParent();
124
125 Var &Ret = Fn.declVarInternal("threadIdx.x", TypeMap<int>::get(Ctx));
126
127 auto &IRB = Fn.getIRB();
128 FunctionCallee Callee = M.getOrInsertFunction("llvm.nvvm.read.ptx.sreg.tid.x",
129 TypeMap<int>::get(Ctx));
130 auto *Call = IRB.CreateCall(Callee);
131 Ret.storeValue(Call);
132
133 return Ret;
134}
135
136inline Var &getBlockIdX(Func &Fn) {
137 auto &Ctx = Fn.getFunction()->getContext();
138 auto &M = *Fn.getFunction()->getParent();
139
140 Var &Ret = Fn.declVarInternal("blockIdx.x", TypeMap<int>::get(Ctx));
141
142 auto &IRB = Fn.getIRB();
143 FunctionCallee Callee = M.getOrInsertFunction(
144 "llvm.nvvm.read.ptx.sreg.ctaid.x", TypeMap<int>::get(Ctx));
145 auto *Call = IRB.CreateCall(Callee);
146 Ret.storeValue(Call);
147
148 return Ret;
149}
150
151inline Var &getBlockDimX(Func &Fn) {
152 auto &Ctx = Fn.getFunction()->getContext();
153 auto &M = *Fn.getFunction()->getParent();
154
155 Var &Ret = Fn.declVarInternal("blockDim.x", TypeMap<int>::get(Ctx));
156
157 auto &IRB = Fn.getIRB();
158 FunctionCallee Callee = M.getOrInsertFunction(
159 "llvm.nvvm.read.ptx.sreg.ntid.x", TypeMap<int>::get(Ctx));
160 auto *Call = IRB.CreateCall(Callee);
161 Ret.storeValue(Call);
162
163 return Ret;
164}
165
166inline Var &getGridDimX(Func &Fn) {
167 auto &Ctx = Fn.getFunction()->getContext();
168 auto &M = *Fn.getFunction()->getParent();
169
170 Var &Ret = Fn.declVarInternal("blockDim.x", TypeMap<int>::get(Ctx));
171
172 auto &IRB = Fn.getIRB();
173 FunctionCallee Callee = M.getOrInsertFunction(
174 "llvm.nvvm.read.ptx.sreg.nctaid.x", TypeMap<int>::get(Ctx));
175 auto *Call = IRB.CreateCall(Callee);
176 Ret.storeValue(Call);
177
178 return Ret;
179}
180
181} // namespace cuda
182#endif
183} // namespace builtins
184} // namespace proteus
185
186#endif
Definition Func.hpp:19
Function * getFunction()
Definition Func.cpp:58
Var & declVarInternal(StringRef Name, Type *Ty, Type *PointerElemType=nullptr)
Definition Func.cpp:17
IRBuilderBase & getIRB()
Definition Func.cpp:11
Definition Dispatcher.cpp:14
Definition TypeMap.hpp:13
Definition Var.hpp:13
void storeValue(Value *Val)
Definition Var.cpp:102