1 /*===------------- avx512vlvnniintrin.h - VNNI intrinsics ------------------===
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 *===-----------------------------------------------------------------------===
25 #error "Never use <avx512vlvnniintrin.h> directly; include <immintrin.h> instead."
28 #ifndef __AVX512VLVNNIINTRIN_H
29 #define __AVX512VLVNNIINTRIN_H
31 /* Define the default attributes for the functions in this file. */
32 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(128)))
33 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(256)))
36 static __inline__ __m256i __DEFAULT_FN_ATTRS256
37 _mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B)
39 return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A,
43 static __inline__ __m256i __DEFAULT_FN_ATTRS256
44 _mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
46 return (__m256i)__builtin_ia32_selectd_256(__U,
47 (__v8si)_mm256_dpbusd_epi32(__S, __A, __B),
51 static __inline__ __m256i __DEFAULT_FN_ATTRS256
52 _mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
54 return (__m256i)__builtin_ia32_selectd_256(__U,
55 (__v8si)_mm256_dpbusd_epi32(__S, __A, __B),
56 (__v8si)_mm256_setzero_si256());
59 static __inline__ __m256i __DEFAULT_FN_ATTRS256
60 _mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B)
62 return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A,
66 static __inline__ __m256i __DEFAULT_FN_ATTRS256
67 _mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
69 return (__m256i)__builtin_ia32_selectd_256(__U,
70 (__v8si)_mm256_dpbusds_epi32(__S, __A, __B),
74 static __inline__ __m256i __DEFAULT_FN_ATTRS256
75 _mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
77 return (__m256i)__builtin_ia32_selectd_256(__U,
78 (__v8si)_mm256_dpbusds_epi32(__S, __A, __B),
79 (__v8si)_mm256_setzero_si256());
82 static __inline__ __m256i __DEFAULT_FN_ATTRS256
83 _mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B)
85 return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A,
89 static __inline__ __m256i __DEFAULT_FN_ATTRS256
90 _mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
92 return (__m256i)__builtin_ia32_selectd_256(__U,
93 (__v8si)_mm256_dpwssd_epi32(__S, __A, __B),
97 static __inline__ __m256i __DEFAULT_FN_ATTRS256
98 _mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
100 return (__m256i)__builtin_ia32_selectd_256(__U,
101 (__v8si)_mm256_dpwssd_epi32(__S, __A, __B),
102 (__v8si)_mm256_setzero_si256());
105 static __inline__ __m256i __DEFAULT_FN_ATTRS256
106 _mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B)
108 return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A,
112 static __inline__ __m256i __DEFAULT_FN_ATTRS256
113 _mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
115 return (__m256i)__builtin_ia32_selectd_256(__U,
116 (__v8si)_mm256_dpwssds_epi32(__S, __A, __B),
120 static __inline__ __m256i __DEFAULT_FN_ATTRS256
121 _mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
123 return (__m256i)__builtin_ia32_selectd_256(__U,
124 (__v8si)_mm256_dpwssds_epi32(__S, __A, __B),
125 (__v8si)_mm256_setzero_si256());
128 static __inline__ __m128i __DEFAULT_FN_ATTRS128
129 _mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B)
131 return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A,
135 static __inline__ __m128i __DEFAULT_FN_ATTRS128
136 _mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
138 return (__m128i)__builtin_ia32_selectd_128(__U,
139 (__v4si)_mm_dpbusd_epi32(__S, __A, __B),
143 static __inline__ __m128i __DEFAULT_FN_ATTRS128
144 _mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
146 return (__m128i)__builtin_ia32_selectd_128(__U,
147 (__v4si)_mm_dpbusd_epi32(__S, __A, __B),
148 (__v4si)_mm_setzero_si128());
151 static __inline__ __m128i __DEFAULT_FN_ATTRS128
152 _mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B)
154 return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A,
158 static __inline__ __m128i __DEFAULT_FN_ATTRS128
159 _mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
161 return (__m128i)__builtin_ia32_selectd_128(__U,
162 (__v4si)_mm_dpbusds_epi32(__S, __A, __B),
166 static __inline__ __m128i __DEFAULT_FN_ATTRS128
167 _mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
169 return (__m128i)__builtin_ia32_selectd_128(__U,
170 (__v4si)_mm_dpbusds_epi32(__S, __A, __B),
171 (__v4si)_mm_setzero_si128());
174 static __inline__ __m128i __DEFAULT_FN_ATTRS128
175 _mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B)
177 return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A,
181 static __inline__ __m128i __DEFAULT_FN_ATTRS128
182 _mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
184 return (__m128i)__builtin_ia32_selectd_128(__U,
185 (__v4si)_mm_dpwssd_epi32(__S, __A, __B),
189 static __inline__ __m128i __DEFAULT_FN_ATTRS128
190 _mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
192 return (__m128i)__builtin_ia32_selectd_128(__U,
193 (__v4si)_mm_dpwssd_epi32(__S, __A, __B),
194 (__v4si)_mm_setzero_si128());
197 static __inline__ __m128i __DEFAULT_FN_ATTRS128
198 _mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B)
200 return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A,
204 static __inline__ __m128i __DEFAULT_FN_ATTRS128
205 _mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
207 return (__m128i)__builtin_ia32_selectd_128(__U,
208 (__v4si)_mm_dpwssds_epi32(__S, __A, __B),
212 static __inline__ __m128i __DEFAULT_FN_ATTRS128
213 _mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
215 return (__m128i)__builtin_ia32_selectd_128(__U,
216 (__v4si)_mm_dpwssds_epi32(__S, __A, __B),
217 (__v4si)_mm_setzero_si128());
220 #undef __DEFAULT_FN_ATTRS128
221 #undef __DEFAULT_FN_ATTRS256