]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm-project/clang/lib/Headers/avx512erintrin.h
MFC r355940:
[FreeBSD/FreeBSD.git] / contrib / llvm-project / clang / lib / Headers / avx512erintrin.h
1 /*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 #ifndef __IMMINTRIN_H
10 #error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
11 #endif
12
13 #ifndef __AVX512ERINTRIN_H
14 #define __AVX512ERINTRIN_H
15
16 /* exp2a23 */
17 #define _mm512_exp2a23_round_pd(A, R) \
18   (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
19                                       (__v8df)_mm512_setzero_pd(), \
20                                       (__mmask8)-1, (int)(R))
21
22 #define _mm512_mask_exp2a23_round_pd(S, M, A, R) \
23   (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
24                                       (__v8df)(__m512d)(S), (__mmask8)(M), \
25                                       (int)(R))
26
27 #define _mm512_maskz_exp2a23_round_pd(M, A, R) \
28   (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
29                                       (__v8df)_mm512_setzero_pd(), \
30                                       (__mmask8)(M), (int)(R))
31
32 #define _mm512_exp2a23_pd(A) \
33   _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
34
35 #define _mm512_mask_exp2a23_pd(S, M, A) \
36   _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
37
38 #define _mm512_maskz_exp2a23_pd(M, A) \
39   _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
40
41 #define _mm512_exp2a23_round_ps(A, R) \
42   (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
43                                      (__v16sf)_mm512_setzero_ps(), \
44                                      (__mmask16)-1, (int)(R))
45
46 #define _mm512_mask_exp2a23_round_ps(S, M, A, R) \
47   (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
48                                      (__v16sf)(__m512)(S), (__mmask16)(M), \
49                                      (int)(R))
50
51 #define _mm512_maskz_exp2a23_round_ps(M, A, R) \
52   (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
53                                      (__v16sf)_mm512_setzero_ps(), \
54                                      (__mmask16)(M), (int)(R))
55
56 #define _mm512_exp2a23_ps(A) \
57   _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
58
59 #define _mm512_mask_exp2a23_ps(S, M, A) \
60   _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
61
62 #define _mm512_maskz_exp2a23_ps(M, A) \
63   _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
64
65 /* rsqrt28 */
66 #define _mm512_rsqrt28_round_pd(A, R) \
67   (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
68                                          (__v8df)_mm512_setzero_pd(), \
69                                          (__mmask8)-1, (int)(R))
70
71 #define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \
72   (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
73                                          (__v8df)(__m512d)(S), (__mmask8)(M), \
74                                          (int)(R))
75
76 #define _mm512_maskz_rsqrt28_round_pd(M, A, R) \
77   (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
78                                          (__v8df)_mm512_setzero_pd(), \
79                                          (__mmask8)(M), (int)(R))
80
81 #define _mm512_rsqrt28_pd(A) \
82   _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
83
84 #define _mm512_mask_rsqrt28_pd(S, M, A) \
85   _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
86
87 #define _mm512_maskz_rsqrt28_pd(M, A) \
88   _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
89
90 #define _mm512_rsqrt28_round_ps(A, R) \
91   (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
92                                         (__v16sf)_mm512_setzero_ps(), \
93                                         (__mmask16)-1, (int)(R))
94
95 #define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \
96   (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
97                                         (__v16sf)(__m512)(S), (__mmask16)(M), \
98                                         (int)(R))
99
100 #define _mm512_maskz_rsqrt28_round_ps(M, A, R) \
101   (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
102                                         (__v16sf)_mm512_setzero_ps(), \
103                                         (__mmask16)(M), (int)(R))
104
105 #define _mm512_rsqrt28_ps(A) \
106   _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
107
108 #define _mm512_mask_rsqrt28_ps(S, M, A) \
109   _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION)
110
111 #define _mm512_maskz_rsqrt28_ps(M, A) \
112   _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
113
114 #define _mm_rsqrt28_round_ss(A, B, R) \
115   (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
116                                               (__v4sf)(__m128)(B), \
117                                               (__v4sf)_mm_setzero_ps(), \
118                                               (__mmask8)-1, (int)(R))
119
120 #define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \
121   (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
122                                               (__v4sf)(__m128)(B), \
123                                               (__v4sf)(__m128)(S), \
124                                               (__mmask8)(M), (int)(R))
125
126 #define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \
127   (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
128                                               (__v4sf)(__m128)(B), \
129                                               (__v4sf)_mm_setzero_ps(), \
130                                               (__mmask8)(M), (int)(R))
131
132 #define _mm_rsqrt28_ss(A, B) \
133   _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
134
135 #define _mm_mask_rsqrt28_ss(S, M, A, B) \
136   _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
137
138 #define _mm_maskz_rsqrt28_ss(M, A, B) \
139   _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
140
141 #define _mm_rsqrt28_round_sd(A, B, R) \
142   (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
143                                                (__v2df)(__m128d)(B), \
144                                                (__v2df)_mm_setzero_pd(), \
145                                                (__mmask8)-1, (int)(R))
146
147 #define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \
148   (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
149                                                (__v2df)(__m128d)(B), \
150                                                (__v2df)(__m128d)(S), \
151                                                (__mmask8)(M), (int)(R))
152
153 #define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \
154   (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
155                                                (__v2df)(__m128d)(B), \
156                                                (__v2df)_mm_setzero_pd(), \
157                                                (__mmask8)(M), (int)(R))
158
159 #define _mm_rsqrt28_sd(A, B) \
160   _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
161
162 #define _mm_mask_rsqrt28_sd(S, M, A, B) \
163   _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
164
165 #define _mm_maskz_rsqrt28_sd(M, A, B) \
166   _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
167
168 /* rcp28 */
169 #define _mm512_rcp28_round_pd(A, R) \
170   (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
171                                        (__v8df)_mm512_setzero_pd(), \
172                                        (__mmask8)-1, (int)(R))
173
174 #define _mm512_mask_rcp28_round_pd(S, M, A, R) \
175   (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
176                                        (__v8df)(__m512d)(S), (__mmask8)(M), \
177                                        (int)(R))
178
179 #define _mm512_maskz_rcp28_round_pd(M, A, R) \
180   (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
181                                        (__v8df)_mm512_setzero_pd(), \
182                                        (__mmask8)(M), (int)(R))
183
184 #define _mm512_rcp28_pd(A) \
185   _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
186
187 #define _mm512_mask_rcp28_pd(S, M, A) \
188   _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
189
190 #define _mm512_maskz_rcp28_pd(M, A) \
191   _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
192
193 #define _mm512_rcp28_round_ps(A, R) \
194   (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
195                                       (__v16sf)_mm512_setzero_ps(), \
196                                       (__mmask16)-1, (int)(R))
197
198 #define _mm512_mask_rcp28_round_ps(S, M, A, R) \
199   (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
200                                       (__v16sf)(__m512)(S), (__mmask16)(M), \
201                                       (int)(R))
202
203 #define _mm512_maskz_rcp28_round_ps(M, A, R) \
204   (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
205                                       (__v16sf)_mm512_setzero_ps(), \
206                                       (__mmask16)(M), (int)(R))
207
208 #define _mm512_rcp28_ps(A) \
209   _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
210
211 #define _mm512_mask_rcp28_ps(S, M, A) \
212   _mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
213
214 #define _mm512_maskz_rcp28_ps(M, A) \
215   _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
216
217 #define _mm_rcp28_round_ss(A, B, R) \
218   (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
219                                             (__v4sf)(__m128)(B), \
220                                             (__v4sf)_mm_setzero_ps(), \
221                                             (__mmask8)-1, (int)(R))
222
223 #define _mm_mask_rcp28_round_ss(S, M, A, B, R) \
224   (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
225                                             (__v4sf)(__m128)(B), \
226                                             (__v4sf)(__m128)(S), \
227                                             (__mmask8)(M), (int)(R))
228
229 #define _mm_maskz_rcp28_round_ss(M, A, B, R) \
230   (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
231                                             (__v4sf)(__m128)(B), \
232                                             (__v4sf)_mm_setzero_ps(), \
233                                             (__mmask8)(M), (int)(R))
234
235 #define _mm_rcp28_ss(A, B) \
236   _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
237
238 #define _mm_mask_rcp28_ss(S, M, A, B) \
239   _mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
240
241 #define _mm_maskz_rcp28_ss(M, A, B) \
242   _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
243
244 #define _mm_rcp28_round_sd(A, B, R) \
245   (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
246                                              (__v2df)(__m128d)(B), \
247                                              (__v2df)_mm_setzero_pd(), \
248                                              (__mmask8)-1, (int)(R))
249
250 #define _mm_mask_rcp28_round_sd(S, M, A, B, R) \
251   (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
252                                              (__v2df)(__m128d)(B), \
253                                              (__v2df)(__m128d)(S), \
254                                              (__mmask8)(M), (int)(R))
255
256 #define _mm_maskz_rcp28_round_sd(M, A, B, R) \
257   (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
258                                              (__v2df)(__m128d)(B), \
259                                              (__v2df)_mm_setzero_pd(), \
260                                              (__mmask8)(M), (int)(R))
261
262 #define _mm_rcp28_sd(A, B) \
263   _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
264
265 #define _mm_mask_rcp28_sd(S, M, A, B) \
266   _mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
267
268 #define _mm_maskz_rcp28_sd(M, A, B) \
269   _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
270
271 #endif /* __AVX512ERINTRIN_H */