]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - module/zfs/vdev_raidz_math_avx2.c
Vendor import of openzfs master @ 184df27eef0abdc7ab2105b21257f753834b936b
[FreeBSD/FreeBSD.git] / module / zfs / vdev_raidz_math_avx2.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
23  */
24 #include <sys/isa_defs.h>
25
26 #if defined(__x86_64) && defined(HAVE_AVX2)
27
28 #include <sys/types.h>
29 #include <sys/simd.h>
30
31 #ifdef __linux__
32 #define __asm __asm__ __volatile__
33 #endif
34
35 #define _REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
36 #define REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)
37
38 #define VR0_(REG, ...) "ymm"#REG
39 #define VR1_(_1, REG, ...) "ymm"#REG
40 #define VR2_(_1, _2, REG, ...) "ymm"#REG
41 #define VR3_(_1, _2, _3, REG, ...) "ymm"#REG
42 #define VR4_(_1, _2, _3, _4, REG, ...) "ymm"#REG
43 #define VR5_(_1, _2, _3, _4, _5, REG, ...) "ymm"#REG
44 #define VR6_(_1, _2, _3, _4, _5, _6, REG, ...) "ymm"#REG
45 #define VR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "ymm"#REG
46
47 #define VR0(r...) VR0_(r)
48 #define VR1(r...) VR1_(r)
49 #define VR2(r...) VR2_(r, 1)
50 #define VR3(r...) VR3_(r, 1, 2)
51 #define VR4(r...) VR4_(r, 1, 2)
52 #define VR5(r...) VR5_(r, 1, 2, 3)
53 #define VR6(r...) VR6_(r, 1, 2, 3, 4)
54 #define VR7(r...) VR7_(r, 1, 2, 3, 4, 5)
55
56 #define R_01(REG1, REG2, ...) REG1, REG2
57 #define _R_23(_0, _1, REG2, REG3, ...) REG2, REG3
58 #define R_23(REG...) _R_23(REG, 1, 2, 3)
59
60 #define ZFS_ASM_BUG()   ASSERT(0)
61
62 extern const uint8_t gf_clmul_mod_lt[4*256][16];
63
64 #define ELEM_SIZE 32
65
66 typedef struct v {
67         uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE)));
68 } v_t;
69
70
71 #define XOR_ACC(src, r...)                                              \
72 {                                                                       \
73         switch (REG_CNT(r)) {                                           \
74         case 4:                                                         \
75                 __asm(                                                  \
76                     "vpxor 0x00(%[SRC]), %%" VR0(r)", %%" VR0(r) "\n"   \
77                     "vpxor 0x20(%[SRC]), %%" VR1(r)", %%" VR1(r) "\n"   \
78                     "vpxor 0x40(%[SRC]), %%" VR2(r)", %%" VR2(r) "\n"   \
79                     "vpxor 0x60(%[SRC]), %%" VR3(r)", %%" VR3(r) "\n"   \
80                     : : [SRC] "r" (src));                               \
81                 break;                                                  \
82         case 2:                                                         \
83                 __asm(                                                  \
84                     "vpxor 0x00(%[SRC]), %%" VR0(r)", %%" VR0(r) "\n"   \
85                     "vpxor 0x20(%[SRC]), %%" VR1(r)", %%" VR1(r) "\n"   \
86                     : : [SRC] "r" (src));                               \
87                 break;                                                  \
88         default:                                                        \
89                 ZFS_ASM_BUG();                                          \
90         }                                                               \
91 }
92
93 #define XOR(r...)                                                       \
94 {                                                                       \
95         switch (REG_CNT(r)) {                                           \
96         case 8:                                                         \
97                 __asm(                                                  \
98                     "vpxor %" VR0(r) ", %" VR4(r)", %" VR4(r) "\n"      \
99                     "vpxor %" VR1(r) ", %" VR5(r)", %" VR5(r) "\n"      \
100                     "vpxor %" VR2(r) ", %" VR6(r)", %" VR6(r) "\n"      \
101                     "vpxor %" VR3(r) ", %" VR7(r)", %" VR7(r));         \
102                 break;                                                  \
103         case 4:                                                         \
104                 __asm(                                                  \
105                     "vpxor %" VR0(r) ", %" VR2(r)", %" VR2(r) "\n"      \
106                     "vpxor %" VR1(r) ", %" VR3(r)", %" VR3(r));         \
107                 break;                                                  \
108         default:                                                        \
109                 ZFS_ASM_BUG();                                          \
110         }                                                               \
111 }
112
113 #define ZERO(r...)      XOR(r, r)
114
115 #define COPY(r...)                                                      \
116 {                                                                       \
117         switch (REG_CNT(r)) {                                           \
118         case 8:                                                         \
119                 __asm(                                                  \
120                     "vmovdqa %" VR0(r) ", %" VR4(r) "\n"                \
121                     "vmovdqa %" VR1(r) ", %" VR5(r) "\n"                \
122                     "vmovdqa %" VR2(r) ", %" VR6(r) "\n"                \
123                     "vmovdqa %" VR3(r) ", %" VR7(r));                   \
124                 break;                                                  \
125         case 4:                                                         \
126                 __asm(                                                  \
127                     "vmovdqa %" VR0(r) ", %" VR2(r) "\n"                \
128                     "vmovdqa %" VR1(r) ", %" VR3(r));                   \
129                 break;                                                  \
130         default:                                                        \
131                 ZFS_ASM_BUG();                                          \
132         }                                                               \
133 }
134
135 #define LOAD(src, r...)                                                 \
136 {                                                                       \
137         switch (REG_CNT(r)) {                                           \
138         case 4:                                                         \
139                 __asm(                                                  \
140                     "vmovdqa 0x00(%[SRC]), %%" VR0(r) "\n"              \
141                     "vmovdqa 0x20(%[SRC]), %%" VR1(r) "\n"              \
142                     "vmovdqa 0x40(%[SRC]), %%" VR2(r) "\n"              \
143                     "vmovdqa 0x60(%[SRC]), %%" VR3(r) "\n"              \
144                     : : [SRC] "r" (src));                               \
145                 break;                                                  \
146         case 2:                                                         \
147                 __asm(                                                  \
148                     "vmovdqa 0x00(%[SRC]), %%" VR0(r) "\n"              \
149                     "vmovdqa 0x20(%[SRC]), %%" VR1(r) "\n"              \
150                     : : [SRC] "r" (src));                               \
151                 break;                                                  \
152         default:                                                        \
153                 ZFS_ASM_BUG();                                          \
154         }                                                               \
155 }
156
157 #define STORE(dst, r...)                                                \
158 {                                                                       \
159         switch (REG_CNT(r)) {                                           \
160         case 4:                                                         \
161                 __asm(                                                  \
162                     "vmovdqa %%" VR0(r) ", 0x00(%[DST])\n"              \
163                     "vmovdqa %%" VR1(r) ", 0x20(%[DST])\n"              \
164                     "vmovdqa %%" VR2(r) ", 0x40(%[DST])\n"              \
165                     "vmovdqa %%" VR3(r) ", 0x60(%[DST])\n"              \
166                     : : [DST] "r" (dst));                               \
167                 break;                                                  \
168         case 2:                                                         \
169                 __asm(                                                  \
170                     "vmovdqa %%" VR0(r) ", 0x00(%[DST])\n"              \
171                     "vmovdqa %%" VR1(r) ", 0x20(%[DST])\n"              \
172                     : : [DST] "r" (dst));                               \
173                 break;                                                  \
174         default:                                                        \
175                 ZFS_ASM_BUG();                                          \
176         }                                                               \
177 }
178
179 #define FLUSH()                                                         \
180 {                                                                       \
181         __asm("vzeroupper");                                            \
182 }
183
184 #define MUL2_SETUP()                                                    \
185 {                                                                       \
186         __asm("vmovq %0,   %%xmm14" :: "r"(0x1d1d1d1d1d1d1d1d));        \
187         __asm("vpbroadcastq %xmm14, %ymm14");                           \
188         __asm("vpxor        %ymm15, %ymm15 ,%ymm15");                   \
189 }
190
191 #define _MUL2(r...)                                                     \
192 {                                                                       \
193         switch  (REG_CNT(r)) {                                          \
194         case 2:                                                         \
195                 __asm(                                                  \
196                     "vpcmpgtb %" VR0(r)", %ymm15,     %ymm12\n"         \
197                     "vpcmpgtb %" VR1(r)", %ymm15,     %ymm13\n"         \
198                     "vpaddb   %" VR0(r)", %" VR0(r)", %" VR0(r) "\n"    \
199                     "vpaddb   %" VR1(r)", %" VR1(r)", %" VR1(r) "\n"    \
200                     "vpand    %ymm14,     %ymm12,     %ymm12\n"         \
201                     "vpand    %ymm14,     %ymm13,     %ymm13\n"         \
202                     "vpxor    %ymm12,     %" VR0(r)", %" VR0(r) "\n"    \
203                     "vpxor    %ymm13,     %" VR1(r)", %" VR1(r));       \
204                 break;                                                  \
205         default:                                                        \
206                 ZFS_ASM_BUG();                                          \
207         }                                                               \
208 }
209
210 #define MUL2(r...)                                                      \
211 {                                                                       \
212         switch (REG_CNT(r)) {                                           \
213         case 4:                                                         \
214             _MUL2(R_01(r));                                             \
215             _MUL2(R_23(r));                                             \
216             break;                                                      \
217         case 2:                                                         \
218             _MUL2(r);                                                   \
219             break;                                                      \
220         default:                                                        \
221                 ZFS_ASM_BUG();                                          \
222         }                                                               \
223 }
224
225 #define MUL4(r...)                                                      \
226 {                                                                       \
227         MUL2(r);                                                        \
228         MUL2(r);                                                        \
229 }
230
231 #define _0f             "ymm15"
232 #define _as             "ymm14"
233 #define _bs             "ymm13"
234 #define _ltmod          "ymm12"
235 #define _ltmul          "ymm11"
236 #define _ta             "ymm10"
237 #define _tb             "ymm15"
238
239 static const uint8_t __attribute__((aligned(32))) _mul_mask = 0x0F;
240
241 #define _MULx2(c, r...)                                                 \
242 {                                                                       \
243         switch (REG_CNT(r)) {                                           \
244         case 2:                                                         \
245                 __asm(                                                  \
246                     "vpbroadcastb (%[mask]), %%" _0f "\n"               \
247                     /* upper bits */                                    \
248                     "vbroadcasti128 0x00(%[lt]), %%" _ltmod "\n"        \
249                     "vbroadcasti128 0x10(%[lt]), %%" _ltmul "\n"        \
250                                                                         \
251                     "vpsraw $0x4, %%" VR0(r) ", %%"_as "\n"             \
252                     "vpsraw $0x4, %%" VR1(r) ", %%"_bs "\n"             \
253                     "vpand %%" _0f ", %%" VR0(r) ", %%" VR0(r) "\n"     \
254                     "vpand %%" _0f ", %%" VR1(r) ", %%" VR1(r) "\n"     \
255                     "vpand %%" _0f ", %%" _as ", %%" _as "\n"           \
256                     "vpand %%" _0f ", %%" _bs ", %%" _bs "\n"           \
257                                                                         \
258                     "vpshufb %%" _as ", %%" _ltmod ", %%" _ta "\n"      \
259                     "vpshufb %%" _bs ", %%" _ltmod ", %%" _tb "\n"      \
260                     "vpshufb %%" _as ", %%" _ltmul ", %%" _as "\n"      \
261                     "vpshufb %%" _bs ", %%" _ltmul ", %%" _bs "\n"      \
262                     /* lower bits */                                    \
263                     "vbroadcasti128 0x20(%[lt]), %%" _ltmod "\n"        \
264                     "vbroadcasti128 0x30(%[lt]), %%" _ltmul "\n"        \
265                                                                         \
266                     "vpxor %%" _ta ", %%" _as ", %%" _as "\n"           \
267                     "vpxor %%" _tb ", %%" _bs ", %%" _bs "\n"           \
268                                                                         \
269                     "vpshufb %%" VR0(r) ", %%" _ltmod ", %%" _ta "\n"   \
270                     "vpshufb %%" VR1(r) ", %%" _ltmod ", %%" _tb "\n"   \
271                     "vpshufb %%" VR0(r) ", %%" _ltmul ", %%" VR0(r) "\n"\
272                     "vpshufb %%" VR1(r) ", %%" _ltmul ", %%" VR1(r) "\n"\
273                                                                         \
274                     "vpxor %%" _ta ", %%" VR0(r) ", %%" VR0(r) "\n"     \
275                     "vpxor %%" _as ", %%" VR0(r) ", %%" VR0(r) "\n"     \
276                     "vpxor %%" _tb ", %%" VR1(r) ", %%" VR1(r) "\n"     \
277                     "vpxor %%" _bs ", %%" VR1(r) ", %%" VR1(r) "\n"     \
278                     : : [mask] "r" (&_mul_mask),                        \
279                     [lt] "r" (gf_clmul_mod_lt[4*(c)]));                 \
280                 break;                                                  \
281         default:                                                        \
282                 ZFS_ASM_BUG();                                          \
283         }                                                               \
284 }
285
286 #define MUL(c, r...)                                                    \
287 {                                                                       \
288         switch (REG_CNT(r)) {                                           \
289         case 4:                                                         \
290                 _MULx2(c, R_01(r));                                     \
291                 _MULx2(c, R_23(r));                                     \
292                 break;                                                  \
293         case 2:                                                         \
294                 _MULx2(c, R_01(r));                                     \
295                 break;                                                  \
296         default:                                                        \
297                 ZFS_ASM_BUG();                                          \
298         }                                                               \
299 }
300
301 #define raidz_math_begin()      kfpu_begin()
302 #define raidz_math_end()                                                \
303 {                                                                       \
304         FLUSH();                                                        \
305         kfpu_end();                                                     \
306 }
307
308
309 #define SYN_STRIDE              4
310
311 #define ZERO_STRIDE             4
312 #define ZERO_DEFINE()           {}
313 #define ZERO_D                  0, 1, 2, 3
314
315 #define COPY_STRIDE             4
316 #define COPY_DEFINE()           {}
317 #define COPY_D                  0, 1, 2, 3
318
319 #define ADD_STRIDE              4
320 #define ADD_DEFINE()            {}
321 #define ADD_D                   0, 1, 2, 3
322
323 #define MUL_STRIDE              4
324 #define MUL_DEFINE()            {}
325 #define MUL_D                   0, 1, 2, 3
326
327 #define GEN_P_STRIDE            4
328 #define GEN_P_DEFINE()          {}
329 #define GEN_P_P                 0, 1, 2, 3
330
331 #define GEN_PQ_STRIDE           4
332 #define GEN_PQ_DEFINE()         {}
333 #define GEN_PQ_D                0, 1, 2, 3
334 #define GEN_PQ_C                4, 5, 6, 7
335
336 #define GEN_PQR_STRIDE          4
337 #define GEN_PQR_DEFINE()        {}
338 #define GEN_PQR_D               0, 1, 2, 3
339 #define GEN_PQR_C               4, 5, 6, 7
340
341 #define SYN_Q_DEFINE()          {}
342 #define SYN_Q_D                 0, 1, 2, 3
343 #define SYN_Q_X                 4, 5, 6, 7
344
345 #define SYN_R_DEFINE()          {}
346 #define SYN_R_D                 0, 1, 2, 3
347 #define SYN_R_X                 4, 5, 6, 7
348
349 #define SYN_PQ_DEFINE()         {}
350 #define SYN_PQ_D                0, 1, 2, 3
351 #define SYN_PQ_X                4, 5, 6, 7
352
353 #define REC_PQ_STRIDE           2
354 #define REC_PQ_DEFINE()         {}
355 #define REC_PQ_X                0, 1
356 #define REC_PQ_Y                2, 3
357 #define REC_PQ_T                4, 5
358
359 #define SYN_PR_DEFINE()         {}
360 #define SYN_PR_D                0, 1, 2, 3
361 #define SYN_PR_X                4, 5, 6, 7
362
363 #define REC_PR_STRIDE           2
364 #define REC_PR_DEFINE()         {}
365 #define REC_PR_X                0, 1
366 #define REC_PR_Y                2, 3
367 #define REC_PR_T                4, 5
368
369 #define SYN_QR_DEFINE()         {}
370 #define SYN_QR_D                0, 1, 2, 3
371 #define SYN_QR_X                4, 5, 6, 7
372
373 #define REC_QR_STRIDE           2
374 #define REC_QR_DEFINE()         {}
375 #define REC_QR_X                0, 1
376 #define REC_QR_Y                2, 3
377 #define REC_QR_T                4, 5
378
379 #define SYN_PQR_DEFINE()        {}
380 #define SYN_PQR_D               0, 1, 2, 3
381 #define SYN_PQR_X               4, 5, 6, 7
382
383 #define REC_PQR_STRIDE          2
384 #define REC_PQR_DEFINE()        {}
385 #define REC_PQR_X               0, 1
386 #define REC_PQR_Y               2, 3
387 #define REC_PQR_Z               4, 5
388 #define REC_PQR_XS              6, 7
389 #define REC_PQR_YS              8, 9
390
391
392 #include <sys/vdev_raidz_impl.h>
393 #include "vdev_raidz_math_impl.h"
394
395 DEFINE_GEN_METHODS(avx2);
396 DEFINE_REC_METHODS(avx2);
397
398 static boolean_t
399 raidz_will_avx2_work(void)
400 {
401         return (kfpu_allowed() && zfs_avx_available() && zfs_avx2_available());
402 }
403
404 const raidz_impl_ops_t vdev_raidz_avx2_impl = {
405         .init = NULL,
406         .fini = NULL,
407         .gen = RAIDZ_GEN_METHODS(avx2),
408         .rec = RAIDZ_REC_METHODS(avx2),
409         .is_supported = &raidz_will_avx2_work,
410         .name = "avx2"
411 };
412
413 #endif /* defined(__x86_64) && defined(HAVE_AVX2) */