]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm-project/clang/include/clang/Basic/BuiltinsNVPTX.def
Merge llvm-project main llvmorg-16-init-18548-gb0daacf58f41
[FreeBSD/FreeBSD.git] / contrib / llvm-project / clang / include / clang / Basic / BuiltinsNVPTX.def
1 //===--- BuiltinsPTX.def - PTX Builtin function database ----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the PTX-specific builtin function database.  Users of
10 // this file must define the BUILTIN macro to make use of this information.
11 //
12 //===----------------------------------------------------------------------===//
13
14 // The format of this database matches clang/Basic/Builtins.def.
15
16 #if defined(BUILTIN) && !defined(TARGET_BUILTIN)
17 #   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
18 #endif
19
20 #pragma push_macro("SM_53")
21 #pragma push_macro("SM_70")
22 #pragma push_macro("SM_72")
23 #pragma push_macro("SM_75")
24 #pragma push_macro("SM_80")
25 #pragma push_macro("SM_86")
26 #pragma push_macro("SM_87")
27 #pragma push_macro("SM_89")
28 #pragma push_macro("SM_90")
29 #define SM_90 "sm_90"
30 #define SM_89 "sm_89|" SM_90
31 #define SM_87 "sm_87|" SM_89
32 #define SM_86 "sm_86|" SM_87
33 #define SM_80 "sm_80|" SM_86
34 #define SM_75 "sm_75|" SM_80
35 #define SM_72 "sm_72|" SM_75
36 #define SM_70 "sm_70|" SM_72
37
38 #pragma push_macro("SM_60")
39 #define SM_60 "sm_60|sm_61|sm_62|" SM_70
40 #define SM_53 "sm_53|" SM_60
41
42 #pragma push_macro("PTX42")
43 #pragma push_macro("PTX60")
44 #pragma push_macro("PTX61")
45 #pragma push_macro("PTX63")
46 #pragma push_macro("PTX64")
47 #pragma push_macro("PTX65")
48 #pragma push_macro("PTX70")
49 #pragma push_macro("PTX71")
50 #pragma push_macro("PTX72")
51 #pragma push_macro("PTX73")
52 #pragma push_macro("PTX74")
53 #pragma push_macro("PTX75")
54 #pragma push_macro("PTX76")
55 #pragma push_macro("PTX77")
56 #pragma push_macro("PTX78")
57 #define PTX78 "ptx78"
58 #define PTX77 "ptx77|" PTX78
59 #define PTX76 "ptx76|" PTX77
60 #define PTX75 "ptx75|" PTX76
61 #define PTX74 "ptx74|" PTX75
62 #define PTX73 "ptx73|" PTX74
63 #define PTX72 "ptx72|" PTX73
64 #define PTX71 "ptx71|" PTX72
65 #define PTX70 "ptx70|" PTX71
66 #define PTX65 "ptx65|" PTX70
67 #define PTX64 "ptx64|" PTX65
68 #define PTX63 "ptx63|" PTX64
69 #define PTX61 "ptx61|" PTX63
70 #define PTX60 "ptx60|" PTX61
71 #define PTX42 "ptx42|" PTX60
72
73 #pragma push_macro("AND")
74 #define AND(a, b) "(" a "),(" b ")"
75
76 // Special Registers
77
78 BUILTIN(__nvvm_read_ptx_sreg_tid_x, "i", "nc")
79 BUILTIN(__nvvm_read_ptx_sreg_tid_y, "i", "nc")
80 BUILTIN(__nvvm_read_ptx_sreg_tid_z, "i", "nc")
81 BUILTIN(__nvvm_read_ptx_sreg_tid_w, "i", "nc")
82
83 BUILTIN(__nvvm_read_ptx_sreg_ntid_x, "i", "nc")
84 BUILTIN(__nvvm_read_ptx_sreg_ntid_y, "i", "nc")
85 BUILTIN(__nvvm_read_ptx_sreg_ntid_z, "i", "nc")
86 BUILTIN(__nvvm_read_ptx_sreg_ntid_w, "i", "nc")
87
88 BUILTIN(__nvvm_read_ptx_sreg_ctaid_x, "i", "nc")
89 BUILTIN(__nvvm_read_ptx_sreg_ctaid_y, "i", "nc")
90 BUILTIN(__nvvm_read_ptx_sreg_ctaid_z, "i", "nc")
91 BUILTIN(__nvvm_read_ptx_sreg_ctaid_w, "i", "nc")
92
93 BUILTIN(__nvvm_read_ptx_sreg_nctaid_x, "i", "nc")
94 BUILTIN(__nvvm_read_ptx_sreg_nctaid_y, "i", "nc")
95 BUILTIN(__nvvm_read_ptx_sreg_nctaid_z, "i", "nc")
96 BUILTIN(__nvvm_read_ptx_sreg_nctaid_w, "i", "nc")
97
98 BUILTIN(__nvvm_read_ptx_sreg_laneid, "i", "nc")
99 BUILTIN(__nvvm_read_ptx_sreg_warpid, "i", "nc")
100 BUILTIN(__nvvm_read_ptx_sreg_nwarpid, "i", "nc")
101
102 BUILTIN(__nvvm_read_ptx_sreg_smid, "i", "nc")
103 BUILTIN(__nvvm_read_ptx_sreg_nsmid, "i", "nc")
104 BUILTIN(__nvvm_read_ptx_sreg_gridid, "i", "nc")
105
106 BUILTIN(__nvvm_read_ptx_sreg_lanemask_eq, "i", "nc")
107 BUILTIN(__nvvm_read_ptx_sreg_lanemask_le, "i", "nc")
108 BUILTIN(__nvvm_read_ptx_sreg_lanemask_lt, "i", "nc")
109 BUILTIN(__nvvm_read_ptx_sreg_lanemask_ge, "i", "nc")
110 BUILTIN(__nvvm_read_ptx_sreg_lanemask_gt, "i", "nc")
111
112 BUILTIN(__nvvm_read_ptx_sreg_clock, "i", "n")
113 BUILTIN(__nvvm_read_ptx_sreg_clock64, "LLi", "n")
114
115 BUILTIN(__nvvm_read_ptx_sreg_pm0, "i", "n")
116 BUILTIN(__nvvm_read_ptx_sreg_pm1, "i", "n")
117 BUILTIN(__nvvm_read_ptx_sreg_pm2, "i", "n")
118 BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n")
119
120 // MISC
121
122 BUILTIN(__nvvm_prmt, "UiUiUiUi", "")
123
124 // Min Max
125
126 TARGET_BUILTIN(__nvvm_fmin_f16, "hhh", "", AND(SM_80, PTX70))
127 TARGET_BUILTIN(__nvvm_fmin_ftz_f16, "hhh", "", AND(SM_80, PTX70))
128 TARGET_BUILTIN(__nvvm_fmin_nan_f16, "hhh", "", AND(SM_80, PTX70))
129 TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16, "hhh", "", AND(SM_80, PTX70))
130 TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
131 TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
132 TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
133 TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16, "hhh", "",
134                AND(SM_86, PTX72))
135 TARGET_BUILTIN(__nvvm_fmin_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
136 TARGET_BUILTIN(__nvvm_fmin_ftz_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
137 TARGET_BUILTIN(__nvvm_fmin_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
138 TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
139 TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16x2, "V2hV2hV2h", "",
140                AND(SM_86, PTX72))
141 TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16x2, "V2hV2hV2h", "",
142                AND(SM_86, PTX72))
143 TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
144                AND(SM_86, PTX72))
145 TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
146                AND(SM_86, PTX72))
147 TARGET_BUILTIN(__nvvm_fmin_bf16, "UsUsUs", "", AND(SM_80, PTX70))
148 TARGET_BUILTIN(__nvvm_fmin_nan_bf16, "UsUsUs", "", AND(SM_80, PTX70))
149 TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16, "UsUsUs", "", AND(SM_86, PTX72))
150 TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16, "UsUsUs", "",
151                AND(SM_86, PTX72))
152 TARGET_BUILTIN(__nvvm_fmin_bf16x2, "ZUiZUiZUi", "", AND(SM_80, PTX70))
153 TARGET_BUILTIN(__nvvm_fmin_nan_bf16x2, "ZUiZUiZUi", "", AND(SM_80, PTX70))
154 TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16x2, "ZUiZUiZUi", "",
155                AND(SM_86, PTX72))
156 TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16x2, "ZUiZUiZUi", "",
157                AND(SM_86, PTX72))
158 BUILTIN(__nvvm_fmin_f, "fff", "")
159 BUILTIN(__nvvm_fmin_ftz_f, "fff", "")
160 TARGET_BUILTIN(__nvvm_fmin_nan_f, "fff", "", AND(SM_80, PTX70))
161 TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f, "fff", "", AND(SM_80, PTX70))
162 TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
163 TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
164 TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
165 TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
166 BUILTIN(__nvvm_fmin_d, "ddd", "")
167
168 TARGET_BUILTIN(__nvvm_fmax_f16, "hhh", "", AND(SM_80, PTX70))
169 TARGET_BUILTIN(__nvvm_fmax_ftz_f16, "hhh", "", AND(SM_80, PTX70))
170 TARGET_BUILTIN(__nvvm_fmax_nan_f16, "hhh", "", AND(SM_80, PTX70))
171 TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16, "hhh", "", AND(SM_80, PTX70))
172 TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
173 TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
174 TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
175 TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16, "hhh", "",
176                AND(SM_86, PTX72))
177 TARGET_BUILTIN(__nvvm_fmax_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
178 TARGET_BUILTIN(__nvvm_fmax_ftz_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
179 TARGET_BUILTIN(__nvvm_fmax_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
180 TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
181 TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16x2, "V2hV2hV2h", "",
182                AND(SM_86, PTX72))
183 TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16x2, "V2hV2hV2h", "",
184                AND(SM_86, PTX72))
185 TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
186                AND(SM_86, PTX72))
187 TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
188                AND(SM_86, PTX72))
189 TARGET_BUILTIN(__nvvm_fmax_bf16, "UsUsUs", "", AND(SM_80, PTX70))
190 TARGET_BUILTIN(__nvvm_fmax_nan_bf16, "UsUsUs", "", AND(SM_80, PTX70))
191 TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16, "UsUsUs", "", AND(SM_86, PTX72))
192 TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16, "UsUsUs", "",
193                AND(SM_86, PTX72))
194 TARGET_BUILTIN(__nvvm_fmax_bf16x2, "ZUiZUiZUi", "", AND(SM_80, PTX70))
195 TARGET_BUILTIN(__nvvm_fmax_nan_bf16x2, "ZUiZUiZUi", "", AND(SM_80, PTX70))
196 TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16x2, "ZUiZUiZUi", "",
197                AND(SM_86, PTX72))
198 TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16x2, "ZUiZUiZUi", "",
199                AND(SM_86, PTX72))
200 BUILTIN(__nvvm_fmax_f, "fff", "")
201 BUILTIN(__nvvm_fmax_ftz_f, "fff", "")
202 TARGET_BUILTIN(__nvvm_fmax_nan_f, "fff", "", AND(SM_80, PTX70))
203 TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f, "fff", "", AND(SM_80, PTX70))
204 TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
205 TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
206 TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
207 TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
208 BUILTIN(__nvvm_fmax_d, "ddd", "")
209
210 // Multiplication
211
212 BUILTIN(__nvvm_mulhi_i, "iii", "")
213 BUILTIN(__nvvm_mulhi_ui, "UiUiUi", "")
214 BUILTIN(__nvvm_mulhi_ll, "LLiLLiLLi", "")
215 BUILTIN(__nvvm_mulhi_ull, "ULLiULLiULLi", "")
216
217 BUILTIN(__nvvm_mul_rn_ftz_f,  "fff", "")
218 BUILTIN(__nvvm_mul_rn_f,  "fff", "")
219 BUILTIN(__nvvm_mul_rz_ftz_f,  "fff", "")
220 BUILTIN(__nvvm_mul_rz_f,  "fff", "")
221 BUILTIN(__nvvm_mul_rm_ftz_f,  "fff", "")
222 BUILTIN(__nvvm_mul_rm_f,  "fff", "")
223 BUILTIN(__nvvm_mul_rp_ftz_f,  "fff", "")
224 BUILTIN(__nvvm_mul_rp_f,  "fff", "")
225
226 BUILTIN(__nvvm_mul_rn_d,  "ddd", "")
227 BUILTIN(__nvvm_mul_rz_d,  "ddd", "")
228 BUILTIN(__nvvm_mul_rm_d,  "ddd", "")
229 BUILTIN(__nvvm_mul_rp_d,  "ddd", "")
230
231 BUILTIN(__nvvm_mul24_i,  "iii", "")
232 BUILTIN(__nvvm_mul24_ui,  "UiUiUi", "")
233
234 // Div
235
236 BUILTIN(__nvvm_div_approx_ftz_f,  "fff", "")
237 BUILTIN(__nvvm_div_approx_f,  "fff", "")
238
239 BUILTIN(__nvvm_div_rn_ftz_f,  "fff", "")
240 BUILTIN(__nvvm_div_rn_f,  "fff", "")
241 BUILTIN(__nvvm_div_rz_ftz_f,  "fff", "")
242 BUILTIN(__nvvm_div_rz_f,  "fff", "")
243 BUILTIN(__nvvm_div_rm_ftz_f,  "fff", "")
244 BUILTIN(__nvvm_div_rm_f,  "fff", "")
245 BUILTIN(__nvvm_div_rp_ftz_f,  "fff", "")
246 BUILTIN(__nvvm_div_rp_f,  "fff", "")
247
248 BUILTIN(__nvvm_div_rn_d,  "ddd", "")
249 BUILTIN(__nvvm_div_rz_d,  "ddd", "")
250 BUILTIN(__nvvm_div_rm_d,  "ddd", "")
251 BUILTIN(__nvvm_div_rp_d,  "ddd", "")
252
253 // Sad
254
255 BUILTIN(__nvvm_sad_i, "iiii", "")
256 BUILTIN(__nvvm_sad_ui, "UiUiUiUi", "")
257
258 // Floor, Ceil
259
260 BUILTIN(__nvvm_floor_ftz_f, "ff", "")
261 BUILTIN(__nvvm_floor_f, "ff", "")
262 BUILTIN(__nvvm_floor_d, "dd", "")
263
264 BUILTIN(__nvvm_ceil_ftz_f, "ff", "")
265 BUILTIN(__nvvm_ceil_f, "ff", "")
266 BUILTIN(__nvvm_ceil_d, "dd", "")
267
268 // Abs
269
270 BUILTIN(__nvvm_fabs_ftz_f, "ff", "")
271 BUILTIN(__nvvm_fabs_f, "ff", "")
272 BUILTIN(__nvvm_fabs_d, "dd", "")
273
274 // Round
275
276 BUILTIN(__nvvm_round_ftz_f, "ff", "")
277 BUILTIN(__nvvm_round_f, "ff", "")
278 BUILTIN(__nvvm_round_d, "dd", "")
279
280 // Trunc
281
282 BUILTIN(__nvvm_trunc_ftz_f, "ff", "")
283 BUILTIN(__nvvm_trunc_f, "ff", "")
284 BUILTIN(__nvvm_trunc_d, "dd", "")
285
286 // Saturate
287
288 BUILTIN(__nvvm_saturate_ftz_f, "ff", "")
289 BUILTIN(__nvvm_saturate_f, "ff", "")
290 BUILTIN(__nvvm_saturate_d, "dd", "")
291
292 // Exp2, Log2
293
294 BUILTIN(__nvvm_ex2_approx_ftz_f, "ff", "")
295 BUILTIN(__nvvm_ex2_approx_f, "ff", "")
296 BUILTIN(__nvvm_ex2_approx_d, "dd", "")
297 TARGET_BUILTIN(__nvvm_ex2_approx_f16, "hh", "", AND(SM_75, PTX70))
298 TARGET_BUILTIN(__nvvm_ex2_approx_f16x2, "V2hV2h", "", AND(SM_75, PTX70))
299
300 BUILTIN(__nvvm_lg2_approx_ftz_f, "ff", "")
301 BUILTIN(__nvvm_lg2_approx_f, "ff", "")
302 BUILTIN(__nvvm_lg2_approx_d, "dd", "")
303
304 // Sin, Cos
305
306 BUILTIN(__nvvm_sin_approx_ftz_f, "ff", "")
307 BUILTIN(__nvvm_sin_approx_f, "ff", "")
308
309 BUILTIN(__nvvm_cos_approx_ftz_f, "ff", "")
310 BUILTIN(__nvvm_cos_approx_f, "ff", "")
311
312 // Fma
313
314 TARGET_BUILTIN(__nvvm_fma_rn_f16, "hhhh", "", AND(SM_53, PTX42))
315 TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16, "hhhh", "", AND(SM_53, PTX42))
316 TARGET_BUILTIN(__nvvm_fma_rn_sat_f16, "hhhh", "", AND(SM_53, PTX42))
317 TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16, "hhhh", "", AND(SM_53, PTX42))
318 TARGET_BUILTIN(__nvvm_fma_rn_relu_f16, "hhhh", "", AND(SM_80, PTX70))
319 TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16, "hhhh", "", AND(SM_80, PTX70))
320 TARGET_BUILTIN(__nvvm_fma_rn_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
321 TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
322 TARGET_BUILTIN(__nvvm_fma_rn_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
323 TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
324 TARGET_BUILTIN(__nvvm_fma_rn_relu_f16x2, "V2hV2hV2hV2h", "", AND(SM_80, PTX70))
325 TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16x2, "V2hV2hV2hV2h", "", AND(SM_80, PTX70))
326 TARGET_BUILTIN(__nvvm_fma_rn_bf16, "UsUsUsUs", "", AND(SM_80, PTX70))
327 TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16, "UsUsUsUs", "", AND(SM_80, PTX70))
328 TARGET_BUILTIN(__nvvm_fma_rn_bf16x2, "ZUiZUiZUiZUi", "", AND(SM_80, PTX70))
329 TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16x2, "ZUiZUiZUiZUi", "", AND(SM_80, PTX70))
330 BUILTIN(__nvvm_fma_rn_ftz_f, "ffff", "")
331 BUILTIN(__nvvm_fma_rn_f, "ffff", "")
332 BUILTIN(__nvvm_fma_rz_ftz_f, "ffff", "")
333 BUILTIN(__nvvm_fma_rz_f, "ffff", "")
334 BUILTIN(__nvvm_fma_rm_ftz_f, "ffff", "")
335 BUILTIN(__nvvm_fma_rm_f, "ffff", "")
336 BUILTIN(__nvvm_fma_rp_ftz_f, "ffff", "")
337 BUILTIN(__nvvm_fma_rp_f, "ffff", "")
338 BUILTIN(__nvvm_fma_rn_d, "dddd", "")
339 BUILTIN(__nvvm_fma_rz_d, "dddd", "")
340 BUILTIN(__nvvm_fma_rm_d, "dddd", "")
341 BUILTIN(__nvvm_fma_rp_d, "dddd", "")
342
343 // Rcp
344
345 BUILTIN(__nvvm_rcp_rn_ftz_f, "ff", "")
346 BUILTIN(__nvvm_rcp_rn_f, "ff", "")
347 BUILTIN(__nvvm_rcp_rz_ftz_f, "ff", "")
348 BUILTIN(__nvvm_rcp_rz_f, "ff", "")
349 BUILTIN(__nvvm_rcp_rm_ftz_f, "ff", "")
350 BUILTIN(__nvvm_rcp_rm_f, "ff", "")
351 BUILTIN(__nvvm_rcp_rp_ftz_f, "ff", "")
352 BUILTIN(__nvvm_rcp_rp_f, "ff", "")
353
354 BUILTIN(__nvvm_rcp_rn_d, "dd", "")
355 BUILTIN(__nvvm_rcp_rz_d, "dd", "")
356 BUILTIN(__nvvm_rcp_rm_d, "dd", "")
357 BUILTIN(__nvvm_rcp_rp_d, "dd", "")
358
359 BUILTIN(__nvvm_rcp_approx_ftz_f, "ff", "")
360 BUILTIN(__nvvm_rcp_approx_ftz_d, "dd", "")
361
362 // Sqrt
363
364 BUILTIN(__nvvm_sqrt_rn_ftz_f, "ff", "")
365 BUILTIN(__nvvm_sqrt_rn_f, "ff", "")
366 BUILTIN(__nvvm_sqrt_rz_ftz_f, "ff", "")
367 BUILTIN(__nvvm_sqrt_rz_f, "ff", "")
368 BUILTIN(__nvvm_sqrt_rm_ftz_f, "ff", "")
369 BUILTIN(__nvvm_sqrt_rm_f, "ff", "")
370 BUILTIN(__nvvm_sqrt_rp_ftz_f, "ff", "")
371 BUILTIN(__nvvm_sqrt_rp_f, "ff", "")
372 BUILTIN(__nvvm_sqrt_approx_ftz_f, "ff", "")
373 BUILTIN(__nvvm_sqrt_approx_f, "ff", "")
374
375 BUILTIN(__nvvm_sqrt_rn_d, "dd", "")
376 BUILTIN(__nvvm_sqrt_rz_d, "dd", "")
377 BUILTIN(__nvvm_sqrt_rm_d, "dd", "")
378 BUILTIN(__nvvm_sqrt_rp_d, "dd", "")
379
380 // Rsqrt
381
382 BUILTIN(__nvvm_rsqrt_approx_ftz_f, "ff", "")
383 BUILTIN(__nvvm_rsqrt_approx_f, "ff", "")
384 BUILTIN(__nvvm_rsqrt_approx_d, "dd", "")
385
386 // Add
387
388 BUILTIN(__nvvm_add_rn_ftz_f, "fff", "")
389 BUILTIN(__nvvm_add_rn_f, "fff", "")
390 BUILTIN(__nvvm_add_rz_ftz_f, "fff", "")
391 BUILTIN(__nvvm_add_rz_f, "fff", "")
392 BUILTIN(__nvvm_add_rm_ftz_f, "fff", "")
393 BUILTIN(__nvvm_add_rm_f, "fff", "")
394 BUILTIN(__nvvm_add_rp_ftz_f, "fff", "")
395 BUILTIN(__nvvm_add_rp_f, "fff", "")
396
397 BUILTIN(__nvvm_add_rn_d, "ddd", "")
398 BUILTIN(__nvvm_add_rz_d, "ddd", "")
399 BUILTIN(__nvvm_add_rm_d, "ddd", "")
400 BUILTIN(__nvvm_add_rp_d, "ddd", "")
401
402 // Convert
403
404 BUILTIN(__nvvm_d2f_rn_ftz, "fd", "")
405 BUILTIN(__nvvm_d2f_rn, "fd", "")
406 BUILTIN(__nvvm_d2f_rz_ftz, "fd", "")
407 BUILTIN(__nvvm_d2f_rz, "fd", "")
408 BUILTIN(__nvvm_d2f_rm_ftz, "fd", "")
409 BUILTIN(__nvvm_d2f_rm, "fd", "")
410 BUILTIN(__nvvm_d2f_rp_ftz, "fd", "")
411 BUILTIN(__nvvm_d2f_rp, "fd", "")
412
413 BUILTIN(__nvvm_d2i_rn, "id", "")
414 BUILTIN(__nvvm_d2i_rz, "id", "")
415 BUILTIN(__nvvm_d2i_rm, "id", "")
416 BUILTIN(__nvvm_d2i_rp, "id", "")
417
418 BUILTIN(__nvvm_d2ui_rn, "Uid", "")
419 BUILTIN(__nvvm_d2ui_rz, "Uid", "")
420 BUILTIN(__nvvm_d2ui_rm, "Uid", "")
421 BUILTIN(__nvvm_d2ui_rp, "Uid", "")
422
423 BUILTIN(__nvvm_i2d_rn, "di", "")
424 BUILTIN(__nvvm_i2d_rz, "di", "")
425 BUILTIN(__nvvm_i2d_rm, "di", "")
426 BUILTIN(__nvvm_i2d_rp, "di", "")
427
428 BUILTIN(__nvvm_ui2d_rn, "dUi", "")
429 BUILTIN(__nvvm_ui2d_rz, "dUi", "")
430 BUILTIN(__nvvm_ui2d_rm, "dUi", "")
431 BUILTIN(__nvvm_ui2d_rp, "dUi", "")
432
433 BUILTIN(__nvvm_f2i_rn_ftz, "if", "")
434 BUILTIN(__nvvm_f2i_rn, "if", "")
435 BUILTIN(__nvvm_f2i_rz_ftz, "if", "")
436 BUILTIN(__nvvm_f2i_rz, "if", "")
437 BUILTIN(__nvvm_f2i_rm_ftz, "if", "")
438 BUILTIN(__nvvm_f2i_rm, "if", "")
439 BUILTIN(__nvvm_f2i_rp_ftz, "if", "")
440 BUILTIN(__nvvm_f2i_rp, "if", "")
441
442 BUILTIN(__nvvm_f2ui_rn_ftz, "Uif", "")
443 BUILTIN(__nvvm_f2ui_rn, "Uif", "")
444 BUILTIN(__nvvm_f2ui_rz_ftz, "Uif", "")
445 BUILTIN(__nvvm_f2ui_rz, "Uif", "")
446 BUILTIN(__nvvm_f2ui_rm_ftz, "Uif", "")
447 BUILTIN(__nvvm_f2ui_rm, "Uif", "")
448 BUILTIN(__nvvm_f2ui_rp_ftz, "Uif", "")
449 BUILTIN(__nvvm_f2ui_rp, "Uif", "")
450
451 BUILTIN(__nvvm_i2f_rn, "fi", "")
452 BUILTIN(__nvvm_i2f_rz, "fi", "")
453 BUILTIN(__nvvm_i2f_rm, "fi", "")
454 BUILTIN(__nvvm_i2f_rp, "fi", "")
455
456 BUILTIN(__nvvm_ui2f_rn, "fUi", "")
457 BUILTIN(__nvvm_ui2f_rz, "fUi", "")
458 BUILTIN(__nvvm_ui2f_rm, "fUi", "")
459 BUILTIN(__nvvm_ui2f_rp, "fUi", "")
460
461 BUILTIN(__nvvm_lohi_i2d, "dii", "")
462
463 BUILTIN(__nvvm_d2i_lo, "id", "")
464 BUILTIN(__nvvm_d2i_hi, "id", "")
465
466 BUILTIN(__nvvm_f2ll_rn_ftz, "LLif", "")
467 BUILTIN(__nvvm_f2ll_rn, "LLif", "")
468 BUILTIN(__nvvm_f2ll_rz_ftz, "LLif", "")
469 BUILTIN(__nvvm_f2ll_rz, "LLif", "")
470 BUILTIN(__nvvm_f2ll_rm_ftz, "LLif", "")
471 BUILTIN(__nvvm_f2ll_rm, "LLif", "")
472 BUILTIN(__nvvm_f2ll_rp_ftz, "LLif", "")
473 BUILTIN(__nvvm_f2ll_rp, "LLif", "")
474
475 BUILTIN(__nvvm_f2ull_rn_ftz, "ULLif", "")
476 BUILTIN(__nvvm_f2ull_rn, "ULLif", "")
477 BUILTIN(__nvvm_f2ull_rz_ftz, "ULLif", "")
478 BUILTIN(__nvvm_f2ull_rz, "ULLif", "")
479 BUILTIN(__nvvm_f2ull_rm_ftz, "ULLif", "")
480 BUILTIN(__nvvm_f2ull_rm, "ULLif", "")
481 BUILTIN(__nvvm_f2ull_rp_ftz, "ULLif", "")
482 BUILTIN(__nvvm_f2ull_rp, "ULLif", "")
483
484 BUILTIN(__nvvm_d2ll_rn, "LLid", "")
485 BUILTIN(__nvvm_d2ll_rz, "LLid", "")
486 BUILTIN(__nvvm_d2ll_rm, "LLid", "")
487 BUILTIN(__nvvm_d2ll_rp, "LLid", "")
488
489 BUILTIN(__nvvm_d2ull_rn, "ULLid", "")
490 BUILTIN(__nvvm_d2ull_rz, "ULLid", "")
491 BUILTIN(__nvvm_d2ull_rm, "ULLid", "")
492 BUILTIN(__nvvm_d2ull_rp, "ULLid", "")
493
494 BUILTIN(__nvvm_ll2f_rn, "fLLi", "")
495 BUILTIN(__nvvm_ll2f_rz, "fLLi", "")
496 BUILTIN(__nvvm_ll2f_rm, "fLLi", "")
497 BUILTIN(__nvvm_ll2f_rp, "fLLi", "")
498
499 BUILTIN(__nvvm_ull2f_rn, "fULLi", "")
500 BUILTIN(__nvvm_ull2f_rz, "fULLi", "")
501 BUILTIN(__nvvm_ull2f_rm, "fULLi", "")
502 BUILTIN(__nvvm_ull2f_rp, "fULLi", "")
503
504 BUILTIN(__nvvm_ll2d_rn, "dLLi", "")
505 BUILTIN(__nvvm_ll2d_rz, "dLLi", "")
506 BUILTIN(__nvvm_ll2d_rm, "dLLi", "")
507 BUILTIN(__nvvm_ll2d_rp, "dLLi", "")
508
509 BUILTIN(__nvvm_ull2d_rn, "dULLi", "")
510 BUILTIN(__nvvm_ull2d_rz, "dULLi", "")
511 BUILTIN(__nvvm_ull2d_rm, "dULLi", "")
512 BUILTIN(__nvvm_ull2d_rp, "dULLi", "")
513
514 BUILTIN(__nvvm_f2h_rn_ftz, "Usf", "")
515 BUILTIN(__nvvm_f2h_rn, "Usf", "")
516
517 TARGET_BUILTIN(__nvvm_ff2bf16x2_rn, "ZUiff", "", AND(SM_80,PTX70))
518 TARGET_BUILTIN(__nvvm_ff2bf16x2_rn_relu, "ZUiff", "", AND(SM_80,PTX70))
519 TARGET_BUILTIN(__nvvm_ff2bf16x2_rz, "ZUiff", "", AND(SM_80,PTX70))
520 TARGET_BUILTIN(__nvvm_ff2bf16x2_rz_relu, "ZUiff", "", AND(SM_80,PTX70))
521
522 TARGET_BUILTIN(__nvvm_ff2f16x2_rn, "V2hff", "", AND(SM_80,PTX70))
523 TARGET_BUILTIN(__nvvm_ff2f16x2_rn_relu, "V2hff", "", AND(SM_80,PTX70))
524 TARGET_BUILTIN(__nvvm_ff2f16x2_rz, "V2hff", "", AND(SM_80,PTX70))
525 TARGET_BUILTIN(__nvvm_ff2f16x2_rz_relu, "V2hff", "", AND(SM_80,PTX70))
526
527 TARGET_BUILTIN(__nvvm_f2bf16_rn, "ZUsf", "", AND(SM_80,PTX70))
528 TARGET_BUILTIN(__nvvm_f2bf16_rn_relu, "ZUsf", "", AND(SM_80,PTX70))
529 TARGET_BUILTIN(__nvvm_f2bf16_rz, "ZUsf", "", AND(SM_80,PTX70))
530 TARGET_BUILTIN(__nvvm_f2bf16_rz_relu, "ZUsf", "", AND(SM_80,PTX70))
531
532 TARGET_BUILTIN(__nvvm_f2tf32_rna, "ZUif", "", AND(SM_80,PTX70))
533
534 // Bitcast
535
536 BUILTIN(__nvvm_bitcast_f2i, "if", "")
537 BUILTIN(__nvvm_bitcast_i2f, "fi", "")
538
539 BUILTIN(__nvvm_bitcast_ll2d, "dLLi", "")
540 BUILTIN(__nvvm_bitcast_d2ll, "LLid", "")
541
542 // FNS
543 TARGET_BUILTIN(__nvvm_fns, "UiUiUii", "n", PTX60)
544
545 // Sync
546
547 BUILTIN(__syncthreads, "v", "")
548 BUILTIN(__nvvm_bar0_popc, "ii", "")
549 BUILTIN(__nvvm_bar0_and, "ii", "")
550 BUILTIN(__nvvm_bar0_or, "ii", "")
551 BUILTIN(__nvvm_bar_sync, "vi", "n")
552 TARGET_BUILTIN(__nvvm_bar_warp_sync, "vUi", "n", PTX60)
553 TARGET_BUILTIN(__nvvm_barrier_sync, "vUi", "n", PTX60)
554 TARGET_BUILTIN(__nvvm_barrier_sync_cnt, "vUiUi", "n", PTX60)
555
556 // Shuffle
557
558 BUILTIN(__nvvm_shfl_down_i32, "iiii", "")
559 BUILTIN(__nvvm_shfl_down_f32, "ffii", "")
560 BUILTIN(__nvvm_shfl_up_i32, "iiii", "")
561 BUILTIN(__nvvm_shfl_up_f32, "ffii", "")
562 BUILTIN(__nvvm_shfl_bfly_i32, "iiii", "")
563 BUILTIN(__nvvm_shfl_bfly_f32, "ffii", "")
564 BUILTIN(__nvvm_shfl_idx_i32, "iiii", "")
565 BUILTIN(__nvvm_shfl_idx_f32, "ffii", "")
566
567 TARGET_BUILTIN(__nvvm_shfl_sync_down_i32, "iUiiii", "", PTX60)
568 TARGET_BUILTIN(__nvvm_shfl_sync_down_f32, "fUifii", "", PTX60)
569 TARGET_BUILTIN(__nvvm_shfl_sync_up_i32, "iUiiii", "", PTX60)
570 TARGET_BUILTIN(__nvvm_shfl_sync_up_f32, "fUifii", "", PTX60)
571 TARGET_BUILTIN(__nvvm_shfl_sync_bfly_i32, "iUiiii", "", PTX60)
572 TARGET_BUILTIN(__nvvm_shfl_sync_bfly_f32, "fUifii", "", PTX60)
573 TARGET_BUILTIN(__nvvm_shfl_sync_idx_i32, "iUiiii", "", PTX60)
574 TARGET_BUILTIN(__nvvm_shfl_sync_idx_f32, "fUifii", "", PTX60)
575
576 // Vote
577 BUILTIN(__nvvm_vote_all, "bb", "")
578 BUILTIN(__nvvm_vote_any, "bb", "")
579 BUILTIN(__nvvm_vote_uni, "bb", "")
580 BUILTIN(__nvvm_vote_ballot, "Uib", "")
581
582 TARGET_BUILTIN(__nvvm_vote_all_sync, "bUib", "", PTX60)
583 TARGET_BUILTIN(__nvvm_vote_any_sync, "bUib", "", PTX60)
584 TARGET_BUILTIN(__nvvm_vote_uni_sync, "bUib", "", PTX60)
585 TARGET_BUILTIN(__nvvm_vote_ballot_sync, "UiUib", "", PTX60)
586
587 // Match
588 TARGET_BUILTIN(__nvvm_match_any_sync_i32, "UiUiUi", "", AND(SM_70,PTX60))
589 TARGET_BUILTIN(__nvvm_match_any_sync_i64, "UiUiWi", "", AND(SM_70,PTX60))
590 // These return a pair {value, predicate}, which requires custom lowering.
591 TARGET_BUILTIN(__nvvm_match_all_sync_i32p, "UiUiUii*", "", AND(SM_70,PTX60))
592 TARGET_BUILTIN(__nvvm_match_all_sync_i64p, "UiUiWii*", "", AND(SM_70,PTX60))
593
594 // Redux
595 TARGET_BUILTIN(__nvvm_redux_sync_add, "iii", "", AND(SM_80,PTX70))
596 TARGET_BUILTIN(__nvvm_redux_sync_min, "iii", "", AND(SM_80,PTX70))
597 TARGET_BUILTIN(__nvvm_redux_sync_max, "iii", "", AND(SM_80,PTX70))
598 TARGET_BUILTIN(__nvvm_redux_sync_umin, "UiUii", "", AND(SM_80,PTX70))
599 TARGET_BUILTIN(__nvvm_redux_sync_umax, "UiUii", "", AND(SM_80,PTX70))
600 TARGET_BUILTIN(__nvvm_redux_sync_and, "iii", "", AND(SM_80,PTX70))
601 TARGET_BUILTIN(__nvvm_redux_sync_xor, "iii", "", AND(SM_80,PTX70))
602 TARGET_BUILTIN(__nvvm_redux_sync_or, "iii", "", AND(SM_80,PTX70))
603
604 // Membar
605
606 BUILTIN(__nvvm_membar_cta, "v", "")
607 BUILTIN(__nvvm_membar_gl, "v", "")
608 BUILTIN(__nvvm_membar_sys, "v", "")
609
610 // mbarrier
611
612 TARGET_BUILTIN(__nvvm_mbarrier_init, "vWi*i", "", AND(SM_80,PTX70))
613 TARGET_BUILTIN(__nvvm_mbarrier_init_shared, "vWi*3i", "", AND(SM_80,PTX70))
614
615 TARGET_BUILTIN(__nvvm_mbarrier_inval, "vWi*", "", AND(SM_80,PTX70))
616 TARGET_BUILTIN(__nvvm_mbarrier_inval_shared, "vWi*3", "", AND(SM_80,PTX70))
617
618 TARGET_BUILTIN(__nvvm_mbarrier_arrive, "WiWi*", "", AND(SM_80,PTX70))
619 TARGET_BUILTIN(__nvvm_mbarrier_arrive_shared, "WiWi*3", "", AND(SM_80,PTX70))
620 TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete, "WiWi*i", "", AND(SM_80,PTX70))
621 TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete_shared, "WiWi*3i", "", AND(SM_80,PTX70))
622
623 TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop, "WiWi*", "", AND(SM_80,PTX70))
624 TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_shared, "WiWi*3", "", AND(SM_80,PTX70))
625 TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete, "WiWi*i", "", AND(SM_80,PTX70))
626 TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete_shared, "WiWi*3i", "", AND(SM_80,PTX70))
627
628 TARGET_BUILTIN(__nvvm_mbarrier_test_wait, "bWi*Wi", "", AND(SM_80,PTX70))
629 TARGET_BUILTIN(__nvvm_mbarrier_test_wait_shared, "bWi*3Wi", "", AND(SM_80,PTX70))
630
631 TARGET_BUILTIN(__nvvm_mbarrier_pending_count, "iWi", "", AND(SM_80,PTX70))
632
633 // Memcpy, Memset
634
635 BUILTIN(__nvvm_memcpy, "vUc*Uc*zi","")
636 BUILTIN(__nvvm_memset, "vUc*Uczi","")
637
638 // Image
639
640 BUILTIN(__builtin_ptx_read_image2Dfi_, "V4fiiii", "")
641 BUILTIN(__builtin_ptx_read_image2Dff_, "V4fiiff", "")
642 BUILTIN(__builtin_ptx_read_image2Dii_, "V4iiiii", "")
643 BUILTIN(__builtin_ptx_read_image2Dif_, "V4iiiff", "")
644
645 BUILTIN(__builtin_ptx_read_image3Dfi_, "V4fiiiiii", "")
646 BUILTIN(__builtin_ptx_read_image3Dff_, "V4fiiffff", "")
647 BUILTIN(__builtin_ptx_read_image3Dii_, "V4iiiiiii", "")
648 BUILTIN(__builtin_ptx_read_image3Dif_, "V4iiiffff", "")
649
650 BUILTIN(__builtin_ptx_write_image2Df_, "viiiffff", "")
651 BUILTIN(__builtin_ptx_write_image2Di_, "viiiiiii", "")
652 BUILTIN(__builtin_ptx_write_image2Dui_, "viiiUiUiUiUi", "")
653 BUILTIN(__builtin_ptx_get_image_depthi_, "ii", "")
654 BUILTIN(__builtin_ptx_get_image_heighti_, "ii", "")
655 BUILTIN(__builtin_ptx_get_image_widthi_, "ii", "")
656 BUILTIN(__builtin_ptx_get_image_channel_data_typei_, "ii", "")
657 BUILTIN(__builtin_ptx_get_image_channel_orderi_, "ii", "")
658
659 // Atomic
660 //
661 // We need the atom intrinsics because
662 // - they are used in converging analysis
663 // - they are used in address space analysis and optimization
664 // So it does not hurt to expose them as builtins.
665 //
666 BUILTIN(__nvvm_atom_add_gen_i, "iiD*i", "n")
667 TARGET_BUILTIN(__nvvm_atom_cta_add_gen_i, "iiD*i", "n", SM_60)
668 TARGET_BUILTIN(__nvvm_atom_sys_add_gen_i, "iiD*i", "n", SM_60)
669 BUILTIN(__nvvm_atom_add_gen_l, "LiLiD*Li", "n")
670 TARGET_BUILTIN(__nvvm_atom_cta_add_gen_l, "LiLiD*Li", "n", SM_60)
671 TARGET_BUILTIN(__nvvm_atom_sys_add_gen_l, "LiLiD*Li", "n", SM_60)
672 BUILTIN(__nvvm_atom_add_gen_ll, "LLiLLiD*LLi", "n")
673 TARGET_BUILTIN(__nvvm_atom_cta_add_gen_ll, "LLiLLiD*LLi", "n", SM_60)
674 TARGET_BUILTIN(__nvvm_atom_sys_add_gen_ll, "LLiLLiD*LLi", "n", SM_60)
675 BUILTIN(__nvvm_atom_add_gen_f, "ffD*f", "n")
676 TARGET_BUILTIN(__nvvm_atom_cta_add_gen_f, "ffD*f", "n", SM_60)
677 TARGET_BUILTIN(__nvvm_atom_sys_add_gen_f, "ffD*f", "n", SM_60)
678 TARGET_BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n", SM_60)
679 TARGET_BUILTIN(__nvvm_atom_cta_add_gen_d, "ddD*d", "n", SM_60)
680 TARGET_BUILTIN(__nvvm_atom_sys_add_gen_d, "ddD*d", "n", SM_60)
681
682 BUILTIN(__nvvm_atom_sub_gen_i, "iiD*i", "n")
683 BUILTIN(__nvvm_atom_sub_gen_l, "LiLiD*Li", "n")
684 BUILTIN(__nvvm_atom_sub_gen_ll, "LLiLLiD*LLi", "n")
685
686 BUILTIN(__nvvm_atom_xchg_gen_i, "iiD*i", "n")
687 TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_i, "iiD*i", "n", SM_60)
688 TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_i, "iiD*i", "n", SM_60)
689 BUILTIN(__nvvm_atom_xchg_gen_l, "LiLiD*Li", "n")
690 TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_l, "LiLiD*Li", "n", SM_60)
691 TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_l, "LiLiD*Li", "n", SM_60)
692 BUILTIN(__nvvm_atom_xchg_gen_ll, "LLiLLiD*LLi", "n")
693 TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60)
694 TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60)
695
696 BUILTIN(__nvvm_atom_max_gen_i, "iiD*i", "n")
697 TARGET_BUILTIN(__nvvm_atom_cta_max_gen_i, "iiD*i", "n", SM_60)
698 TARGET_BUILTIN(__nvvm_atom_sys_max_gen_i, "iiD*i", "n", SM_60)
699 BUILTIN(__nvvm_atom_max_gen_ui, "UiUiD*Ui", "n")
700 TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ui, "UiUiD*Ui", "n", SM_60)
701 TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ui, "UiUiD*Ui", "n", SM_60)
702 BUILTIN(__nvvm_atom_max_gen_l, "LiLiD*Li", "n")
703 TARGET_BUILTIN(__nvvm_atom_cta_max_gen_l, "LiLiD*Li", "n", SM_60)
704 TARGET_BUILTIN(__nvvm_atom_sys_max_gen_l, "LiLiD*Li", "n", SM_60)
705 BUILTIN(__nvvm_atom_max_gen_ul, "ULiULiD*ULi", "n")
706 TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ul, "ULiULiD*ULi", "n", SM_60)
707 TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ul, "ULiULiD*ULi", "n", SM_60)
708 BUILTIN(__nvvm_atom_max_gen_ll, "LLiLLiD*LLi", "n")
709 TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ll, "LLiLLiD*LLi", "n", SM_60)
710 TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ll, "LLiLLiD*LLi", "n", SM_60)
711 BUILTIN(__nvvm_atom_max_gen_ull, "ULLiULLiD*ULLi", "n")
712 TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
713 TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
714
715 BUILTIN(__nvvm_atom_min_gen_i, "iiD*i", "n")
716 TARGET_BUILTIN(__nvvm_atom_cta_min_gen_i, "iiD*i", "n", SM_60)
717 TARGET_BUILTIN(__nvvm_atom_sys_min_gen_i, "iiD*i", "n", SM_60)
718 BUILTIN(__nvvm_atom_min_gen_ui, "UiUiD*Ui", "n")
719 TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ui, "UiUiD*Ui", "n", SM_60)
720 TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ui, "UiUiD*Ui", "n", SM_60)
721 BUILTIN(__nvvm_atom_min_gen_l, "LiLiD*Li", "n")
722 TARGET_BUILTIN(__nvvm_atom_cta_min_gen_l, "LiLiD*Li", "n", SM_60)
723 TARGET_BUILTIN(__nvvm_atom_sys_min_gen_l, "LiLiD*Li", "n", SM_60)
724 BUILTIN(__nvvm_atom_min_gen_ul, "ULiULiD*ULi", "n")
725 TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ul, "ULiULiD*ULi", "n", SM_60)
726 TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ul, "ULiULiD*ULi", "n", SM_60)
727 BUILTIN(__nvvm_atom_min_gen_ll, "LLiLLiD*LLi", "n")
728 TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ll, "LLiLLiD*LLi", "n", SM_60)
729 TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ll, "LLiLLiD*LLi", "n", SM_60)
730 BUILTIN(__nvvm_atom_min_gen_ull, "ULLiULLiD*ULLi", "n")
731 TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
732 TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
733
734 BUILTIN(__nvvm_atom_inc_gen_ui, "UiUiD*Ui", "n")
735 TARGET_BUILTIN(__nvvm_atom_cta_inc_gen_ui, "UiUiD*Ui", "n", SM_60)
736 TARGET_BUILTIN(__nvvm_atom_sys_inc_gen_ui, "UiUiD*Ui", "n", SM_60)
737 BUILTIN(__nvvm_atom_dec_gen_ui, "UiUiD*Ui", "n")
738 TARGET_BUILTIN(__nvvm_atom_cta_dec_gen_ui, "UiUiD*Ui", "n", SM_60)
739 TARGET_BUILTIN(__nvvm_atom_sys_dec_gen_ui, "UiUiD*Ui", "n", SM_60)
740
741 BUILTIN(__nvvm_atom_and_gen_i, "iiD*i", "n")
742 TARGET_BUILTIN(__nvvm_atom_cta_and_gen_i, "iiD*i", "n", SM_60)
743 TARGET_BUILTIN(__nvvm_atom_sys_and_gen_i, "iiD*i", "n", SM_60)
744 BUILTIN(__nvvm_atom_and_gen_l, "LiLiD*Li", "n")
745 TARGET_BUILTIN(__nvvm_atom_cta_and_gen_l, "LiLiD*Li", "n", SM_60)
746 TARGET_BUILTIN(__nvvm_atom_sys_and_gen_l, "LiLiD*Li", "n", SM_60)
747 BUILTIN(__nvvm_atom_and_gen_ll, "LLiLLiD*LLi", "n")
748 TARGET_BUILTIN(__nvvm_atom_cta_and_gen_ll, "LLiLLiD*LLi", "n", SM_60)
749 TARGET_BUILTIN(__nvvm_atom_sys_and_gen_ll, "LLiLLiD*LLi", "n", SM_60)
750
751 BUILTIN(__nvvm_atom_or_gen_i, "iiD*i", "n")
752 TARGET_BUILTIN(__nvvm_atom_cta_or_gen_i, "iiD*i", "n", SM_60)
753 TARGET_BUILTIN(__nvvm_atom_sys_or_gen_i, "iiD*i", "n", SM_60)
754 BUILTIN(__nvvm_atom_or_gen_l, "LiLiD*Li", "n")
755 TARGET_BUILTIN(__nvvm_atom_cta_or_gen_l, "LiLiD*Li", "n", SM_60)
756 TARGET_BUILTIN(__nvvm_atom_sys_or_gen_l, "LiLiD*Li", "n", SM_60)
757 BUILTIN(__nvvm_atom_or_gen_ll, "LLiLLiD*LLi", "n")
758 TARGET_BUILTIN(__nvvm_atom_cta_or_gen_ll, "LLiLLiD*LLi", "n", SM_60)
759 TARGET_BUILTIN(__nvvm_atom_sys_or_gen_ll, "LLiLLiD*LLi", "n", SM_60)
760
761 BUILTIN(__nvvm_atom_xor_gen_i, "iiD*i", "n")
762 TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_i, "iiD*i", "n", SM_60)
763 TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_i, "iiD*i", "n", SM_60)
764 BUILTIN(__nvvm_atom_xor_gen_l, "LiLiD*Li", "n")
765 TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_l, "LiLiD*Li", "n", SM_60)
766 TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_l, "LiLiD*Li", "n", SM_60)
767 BUILTIN(__nvvm_atom_xor_gen_ll, "LLiLLiD*LLi", "n")
768 TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60)
769 TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60)
770
771 BUILTIN(__nvvm_atom_cas_gen_i, "iiD*ii", "n")
772 TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_i, "iiD*ii", "n", SM_60)
773 TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_i, "iiD*ii", "n", SM_60)
774 BUILTIN(__nvvm_atom_cas_gen_l, "LiLiD*LiLi", "n")
775 TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_l, "LiLiD*LiLi", "n", SM_60)
776 TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_l, "LiLiD*LiLi", "n", SM_60)
777 BUILTIN(__nvvm_atom_cas_gen_ll, "LLiLLiD*LLiLLi", "n")
778 TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60)
779 TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60)
780
781 // Compiler Error Warn
782 BUILTIN(__nvvm_compiler_error, "vcC*4", "n")
783 BUILTIN(__nvvm_compiler_warn, "vcC*4", "n")
784
785 // __ldg.  This is not implemented as a builtin by nvcc.
786 BUILTIN(__nvvm_ldg_c, "ccC*", "")
787 BUILTIN(__nvvm_ldg_s, "ssC*", "")
788 BUILTIN(__nvvm_ldg_i, "iiC*", "")
789 BUILTIN(__nvvm_ldg_l, "LiLiC*", "")
790 BUILTIN(__nvvm_ldg_ll, "LLiLLiC*", "")
791
792 BUILTIN(__nvvm_ldg_uc, "UcUcC*", "")
793 BUILTIN(__nvvm_ldg_us, "UsUsC*", "")
794 BUILTIN(__nvvm_ldg_ui, "UiUiC*", "")
795 BUILTIN(__nvvm_ldg_ul, "ULiULiC*", "")
796 BUILTIN(__nvvm_ldg_ull, "ULLiULLiC*", "")
797
798 BUILTIN(__nvvm_ldg_f, "ffC*", "")
799 BUILTIN(__nvvm_ldg_d, "ddC*", "")
800
801 BUILTIN(__nvvm_ldg_c2, "E2cE2cC*", "")
802 BUILTIN(__nvvm_ldg_c4, "E4cE4cC*", "")
803 BUILTIN(__nvvm_ldg_s2, "E2sE2sC*", "")
804 BUILTIN(__nvvm_ldg_s4, "E4sE4sC*", "")
805 BUILTIN(__nvvm_ldg_i2, "E2iE2iC*", "")
806 BUILTIN(__nvvm_ldg_i4, "E4iE4iC*", "")
807 BUILTIN(__nvvm_ldg_ll2, "E2LLiE2LLiC*", "")
808
809 BUILTIN(__nvvm_ldg_uc2, "E2UcE2UcC*", "")
810 BUILTIN(__nvvm_ldg_uc4, "E4UcE4UcC*", "")
811 BUILTIN(__nvvm_ldg_us2, "E2UsE2UsC*", "")
812 BUILTIN(__nvvm_ldg_us4, "E4UsE4UsC*", "")
813 BUILTIN(__nvvm_ldg_ui2, "E2UiE2UiC*", "")
814 BUILTIN(__nvvm_ldg_ui4, "E4UiE4UiC*", "")
815 BUILTIN(__nvvm_ldg_ull2, "E2ULLiE2ULLiC*", "")
816
817 BUILTIN(__nvvm_ldg_f2, "E2fE2fC*", "")
818 BUILTIN(__nvvm_ldg_f4, "E4fE4fC*", "")
819 BUILTIN(__nvvm_ldg_d2, "E2dE2dC*", "")
820
821 // Address space predicates.
822 BUILTIN(__nvvm_isspacep_const, "bvC*", "nc")
823 BUILTIN(__nvvm_isspacep_global, "bvC*", "nc")
824 BUILTIN(__nvvm_isspacep_local, "bvC*", "nc")
825 BUILTIN(__nvvm_isspacep_shared, "bvC*", "nc")
826
827 // Builtins to support WMMA instructions on sm_70
828 TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60))
829 TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX60))
830 TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX60))
831 TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX60))
832 TARGET_BUILTIN(__hmma_m16n16k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX60))
833 TARGET_BUILTIN(__hmma_m16n16k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX60))
834
835 TARGET_BUILTIN(__hmma_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61))
836 TARGET_BUILTIN(__hmma_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61))
837 TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
838 TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
839 TARGET_BUILTIN(__hmma_m32n8k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61))
840 TARGET_BUILTIN(__hmma_m32n8k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61))
841
842 TARGET_BUILTIN(__hmma_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61))
843 TARGET_BUILTIN(__hmma_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61))
844 TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
845 TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
846 TARGET_BUILTIN(__hmma_m8n32k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61))
847 TARGET_BUILTIN(__hmma_m8n32k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61))
848
849 TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX60))
850 TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX60))
851 TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX60))
852 TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX60))
853
854 TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
855 TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
856 TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
857 TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
858
859 TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
860 TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
861 TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
862 TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
863
864 // Builtins to support integer and sub-integer WMMA instructions on sm_72/sm_75
865 TARGET_BUILTIN(__bmma_m8n8k128_ld_a_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63))
866 TARGET_BUILTIN(__bmma_m8n8k128_ld_b_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63))
867 TARGET_BUILTIN(__bmma_m8n8k128_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63))
868 TARGET_BUILTIN(__bmma_m8n8k128_mma_and_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_80,PTX71))
869 TARGET_BUILTIN(__bmma_m8n8k128_mma_xor_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_75,PTX63))
870 TARGET_BUILTIN(__bmma_m8n8k128_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63))
871 TARGET_BUILTIN(__imma_m16n16k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
872 TARGET_BUILTIN(__imma_m16n16k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
873 TARGET_BUILTIN(__imma_m16n16k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
874 TARGET_BUILTIN(__imma_m16n16k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
875 TARGET_BUILTIN(__imma_m16n16k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63))
876 TARGET_BUILTIN(__imma_m16n16k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
877 TARGET_BUILTIN(__imma_m16n16k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
878 TARGET_BUILTIN(__imma_m16n16k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63))
879 TARGET_BUILTIN(__imma_m32n8k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
880 TARGET_BUILTIN(__imma_m32n8k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
881 TARGET_BUILTIN(__imma_m32n8k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
882 TARGET_BUILTIN(__imma_m32n8k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
883 TARGET_BUILTIN(__imma_m32n8k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63))
884 TARGET_BUILTIN(__imma_m32n8k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
885 TARGET_BUILTIN(__imma_m32n8k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
886 TARGET_BUILTIN(__imma_m32n8k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63))
887 TARGET_BUILTIN(__imma_m8n32k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
888 TARGET_BUILTIN(__imma_m8n32k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
889 TARGET_BUILTIN(__imma_m8n32k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
890 TARGET_BUILTIN(__imma_m8n32k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
891 TARGET_BUILTIN(__imma_m8n32k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63))
892 TARGET_BUILTIN(__imma_m8n32k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
893 TARGET_BUILTIN(__imma_m8n32k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
894 TARGET_BUILTIN(__imma_m8n32k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63))
895 TARGET_BUILTIN(__imma_m8n8k32_ld_a_s4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
896 TARGET_BUILTIN(__imma_m8n8k32_ld_a_u4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
897 TARGET_BUILTIN(__imma_m8n8k32_ld_b_s4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
898 TARGET_BUILTIN(__imma_m8n8k32_ld_b_u4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
899 TARGET_BUILTIN(__imma_m8n8k32_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63))
900 TARGET_BUILTIN(__imma_m8n8k32_mma_s4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63))
901 TARGET_BUILTIN(__imma_m8n8k32_mma_u4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63))
902 TARGET_BUILTIN(__imma_m8n8k32_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63))
903
904 // Builtins to support double and alternate float WMMA instructions on sm_80
905 TARGET_BUILTIN(__dmma_m8n8k4_ld_a, "vd*dC*UiIi", "", AND(SM_80,PTX70))
906 TARGET_BUILTIN(__dmma_m8n8k4_ld_b, "vd*dC*UiIi", "", AND(SM_80,PTX70))
907 TARGET_BUILTIN(__dmma_m8n8k4_ld_c, "vd*dC*UiIi", "", AND(SM_80,PTX70))
908 TARGET_BUILTIN(__dmma_m8n8k4_st_c_f64, "vd*dC*UiIi", "", AND(SM_80,PTX70))
909 TARGET_BUILTIN(__dmma_m8n8k4_mma_f64, "vd*dC*dC*dC*IiIi", "", AND(SM_80,PTX70))
910
911 TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
912 TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
913 TARGET_BUILTIN(__mma_bf16_m16n16k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
914 TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
915 TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
916 TARGET_BUILTIN(__mma_bf16_m8n32k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
917 TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
918 TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
919 TARGET_BUILTIN(__mma_bf16_m32n8k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
920
921 TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
922 TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
923 TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_c, "vf*fC*UiIi", "", AND(SM_80,PTX70))
924 TARGET_BUILTIN(__mma_m16n16k8_st_c_f32, "vf*fC*UiIi", "", AND(SM_80,PTX70))
925 TARGET_BUILTIN(__mma_tf32_m16n16k8_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
926
927 // Async Copy
928 TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive, "vWi*", "", AND(SM_80,PTX70))
929 TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_shared, "vWi*3", "", AND(SM_80,PTX70))
930 TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc, "vWi*", "", AND(SM_80,PTX70))
931 TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc_shared, "vWi*3", "", AND(SM_80,PTX70))
932
933 TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_4, "vv*3vC*1", "", AND(SM_80,PTX70))
934 TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_8, "vv*3vC*1", "", AND(SM_80,PTX70))
935 TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_16, "vv*3vC*1", "", AND(SM_80,PTX70))
936 TARGET_BUILTIN(__nvvm_cp_async_cg_shared_global_16, "vv*3vC*1", "", AND(SM_80,PTX70))
937
938 TARGET_BUILTIN(__nvvm_cp_async_commit_group, "v", "", AND(SM_80,PTX70))
939 TARGET_BUILTIN(__nvvm_cp_async_wait_group, "vIi", "", AND(SM_80,PTX70))
940 TARGET_BUILTIN(__nvvm_cp_async_wait_all, "v", "", AND(SM_80,PTX70))
941
942
943 // bf16, bf16x2 abs, neg
944 TARGET_BUILTIN(__nvvm_abs_bf16, "UsUs", "", AND(SM_80,PTX70))
945 TARGET_BUILTIN(__nvvm_abs_bf16x2, "ZUiZUi", "", AND(SM_80,PTX70))
946 TARGET_BUILTIN(__nvvm_neg_bf16, "UsUs", "", AND(SM_80,PTX70))
947 TARGET_BUILTIN(__nvvm_neg_bf16x2, "ZUiZUi", "", AND(SM_80,PTX70))
948
949 #undef BUILTIN
950 #undef TARGET_BUILTIN
951 #pragma pop_macro("AND")
952 #pragma pop_macro("SM_53")
953 #pragma pop_macro("SM_60")
954 #pragma pop_macro("SM_70")
955 #pragma pop_macro("SM_72")
956 #pragma pop_macro("SM_75")
957 #pragma pop_macro("SM_80")
958 #pragma pop_macro("SM_86")
959 #pragma pop_macro("SM_87")
960 #pragma pop_macro("SM_89")
961 #pragma pop_macro("SM_90")
962 #pragma pop_macro("PTX42")
963 #pragma pop_macro("PTX60")
964 #pragma pop_macro("PTX61")
965 #pragma pop_macro("PTX63")
966 #pragma pop_macro("PTX64")
967 #pragma pop_macro("PTX65")
968 #pragma pop_macro("PTX70")
969 #pragma pop_macro("PTX71")
970 #pragma pop_macro("PTX72")
971 #pragma pop_macro("PTX73")
972 #pragma pop_macro("PTX74")
973 #pragma pop_macro("PTX75")
974 #pragma pop_macro("PTX76")
975 #pragma pop_macro("PTX77")
976 #pragma pop_macro("PTX78")