4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
24 * Copyright (c) 2019-2022 Samuel Neves
25 * Copyright (c) 2022-2023 Tino Reichardt <milky-zfs@mcmilk.de>
27 * This is converted assembly: SSE4.1 -> ARMv8-A
28 * Used tools: SIMDe https://github.com/simd-everywhere/simde
30 * Should work on FreeBSD, Linux and macOS
31 * see: https://github.com/mcmilk/BLAKE3-tests/blob/master/contrib/simde.sh
34 #if defined(__aarch64__)
36 /* make gcc <= 9 happy */
37 #if !defined(LD_VERSION) || LD_VERSION >= 233010000
38 #define CFI_NEGATE_RA_STATE .cfi_negate_ra_state
40 #define CFI_NEGATE_RA_STATE
44 .section .note.gnu.property,"a",@note
56 .globl zfs_blake3_compress_in_place_sse41
58 .type zfs_blake3_compress_in_place_sse41,@function
59 zfs_blake3_compress_in_place_sse41:
64 stp x29, x30, [sp, #64]
81 eor v0.16b, v2.16b, v0.16b
82 eor v1.16b, v3.16b, v1.16b
83 ldp x29, x30, [sp, #64]
90 .size zfs_blake3_compress_in_place_sse41, .Lfunc_end0-zfs_blake3_compress_in_place_sse41
93 .section .rodata.cst16,"aM",@progbits,16
96 .xword -4942790177982912921
97 .xword -6534734903820487822
134 .type compress_pre,@function
139 movi d0, #0x0000ff000000ff
144 ldr q4, [x8, :lo12:.LCPI1_0]
147 and v0.8b, v1.8b, v0.8b
149 stp q5, q4, [x0, #16]
153 uzp1 v3.4s, v6.4s, v7.4s
154 add v0.4s, v2.4s, v3.4s
155 uzp2 v2.4s, v6.4s, v7.4s
156 add v16.4s, v0.4s, v5.4s
157 ldr q0, [x8, :lo12:.LCPI1_1]
159 eor v1.16b, v16.16b, v1.16b
160 add v7.4s, v16.4s, v2.4s
161 tbl v1.16b, { v1.16b }, v0.16b
162 add v4.4s, v1.4s, v4.4s
163 eor v5.16b, v4.16b, v5.16b
164 ushr v6.4s, v5.4s, #12
165 shl v5.4s, v5.4s, #20
166 orr v5.16b, v5.16b, v6.16b
167 add v6.4s, v7.4s, v5.4s
168 eor v7.16b, v1.16b, v6.16b
169 ldr q1, [x8, :lo12:.LCPI1_2]
171 tbl v7.16b, { v7.16b }, v1.16b
172 ld2 { v16.4s, v17.4s }, [x8]
173 add v4.4s, v4.4s, v7.4s
174 ext v7.16b, v7.16b, v7.16b, #8
175 add v6.4s, v6.4s, v16.4s
176 eor v5.16b, v4.16b, v5.16b
177 ext v4.16b, v4.16b, v4.16b, #4
178 ext v16.16b, v16.16b, v16.16b, #12
179 ext v6.16b, v6.16b, v6.16b, #12
180 ushr v18.4s, v5.4s, #7
181 shl v5.4s, v5.4s, #25
182 orr v5.16b, v5.16b, v18.16b
183 ext v18.16b, v17.16b, v17.16b, #12
184 add v6.4s, v6.4s, v5.4s
186 eor v7.16b, v7.16b, v6.16b
187 add v6.4s, v6.4s, v18.4s
188 mov v17.s[1], v16.s[2]
189 tbl v7.16b, { v7.16b }, v0.16b
190 add v4.4s, v4.4s, v7.4s
191 eor v5.16b, v4.16b, v5.16b
192 ushr v19.4s, v5.4s, #12
193 shl v5.4s, v5.4s, #20
194 orr v5.16b, v5.16b, v19.16b
195 uzp1 v19.4s, v3.4s, v3.4s
196 add v6.4s, v6.4s, v5.4s
197 ext v19.16b, v19.16b, v3.16b, #8
198 eor v7.16b, v7.16b, v6.16b
199 uzp2 v19.4s, v19.4s, v2.4s
200 tbl v7.16b, { v7.16b }, v1.16b
201 add v6.4s, v6.4s, v19.4s
202 add v4.4s, v4.4s, v7.4s
203 ext v6.16b, v6.16b, v6.16b, #4
204 ext v7.16b, v7.16b, v7.16b, #8
205 eor v5.16b, v4.16b, v5.16b
206 ext v4.16b, v4.16b, v4.16b, #12
207 ushr v20.4s, v5.4s, #7
208 shl v5.4s, v5.4s, #25
209 orr v5.16b, v5.16b, v20.16b
210 ext v20.16b, v3.16b, v3.16b, #12
211 add v6.4s, v6.4s, v5.4s
212 ext v3.16b, v3.16b, v20.16b, #12
213 eor v7.16b, v7.16b, v6.16b
215 tbl v7.16b, { v7.16b }, v0.16b
216 trn2 v3.4s, v3.4s, v17.4s
217 add v4.4s, v4.4s, v7.4s
218 add v6.4s, v6.4s, v3.4s
219 eor v5.16b, v4.16b, v5.16b
220 ushr v17.4s, v5.4s, #12
221 shl v5.4s, v5.4s, #20
222 orr v5.16b, v5.16b, v17.16b
223 zip1 v17.2d, v18.2d, v2.2d
224 zip2 v2.4s, v2.4s, v18.4s
225 add v6.4s, v6.4s, v5.4s
226 mov v17.s[3], v16.s[3]
227 zip1 v18.4s, v2.4s, v16.4s
228 zip1 v2.4s, v16.4s, v2.4s
229 eor v7.16b, v7.16b, v6.16b
230 ext v6.16b, v6.16b, v6.16b, #12
231 ext v16.16b, v2.16b, v18.16b, #8
232 tbl v7.16b, { v7.16b }, v1.16b
233 add v20.4s, v4.4s, v7.4s
234 ext v4.16b, v17.16b, v17.16b, #12
235 ext v7.16b, v7.16b, v7.16b, #8
236 eor v5.16b, v20.16b, v5.16b
237 uzp1 v4.4s, v17.4s, v4.4s
238 ushr v17.4s, v5.4s, #7
239 shl v5.4s, v5.4s, #25
240 add v6.4s, v6.4s, v4.4s
241 orr v5.16b, v5.16b, v17.16b
242 ext v17.16b, v20.16b, v20.16b, #4
243 add v6.4s, v6.4s, v5.4s
244 eor v7.16b, v7.16b, v6.16b
245 add v6.4s, v6.4s, v16.4s
246 tbl v7.16b, { v7.16b }, v0.16b
247 add v17.4s, v17.4s, v7.4s
248 eor v5.16b, v17.16b, v5.16b
249 ushr v2.4s, v5.4s, #12
250 shl v5.4s, v5.4s, #20
251 orr v2.16b, v5.16b, v2.16b
252 add v5.4s, v6.4s, v2.4s
253 ext v6.16b, v19.16b, v19.16b, #4
254 eor v7.16b, v7.16b, v5.16b
255 uzp1 v18.4s, v6.4s, v6.4s
256 tbl v7.16b, { v7.16b }, v1.16b
257 ext v18.16b, v18.16b, v6.16b, #8
258 add v17.4s, v17.4s, v7.4s
259 uzp2 v18.4s, v18.4s, v3.4s
260 ext v7.16b, v7.16b, v7.16b, #8
261 eor v2.16b, v17.16b, v2.16b
262 add v5.4s, v5.4s, v18.4s
263 ext v17.16b, v17.16b, v17.16b, #12
264 ushr v19.4s, v2.4s, #7
265 shl v2.4s, v2.4s, #25
266 ext v5.16b, v5.16b, v5.16b, #4
267 orr v2.16b, v2.16b, v19.16b
268 ext v19.16b, v6.16b, v6.16b, #12
269 add v5.4s, v5.4s, v2.4s
270 ext v6.16b, v6.16b, v19.16b, #12
272 eor v7.16b, v7.16b, v5.16b
274 mov v19.s[1], v4.s[2]
275 tbl v7.16b, { v7.16b }, v0.16b
276 add v17.4s, v17.4s, v7.4s
277 eor v20.16b, v17.16b, v2.16b
278 trn2 v2.4s, v6.4s, v19.4s
279 ushr v6.4s, v20.4s, #12
280 shl v19.4s, v20.4s, #20
281 add v5.4s, v5.4s, v2.4s
282 orr v6.16b, v19.16b, v6.16b
283 add v19.4s, v5.4s, v6.4s
284 eor v5.16b, v7.16b, v19.16b
285 zip1 v7.2d, v16.2d, v3.2d
286 zip2 v3.4s, v3.4s, v16.4s
287 tbl v20.16b, { v5.16b }, v1.16b
289 add v17.4s, v17.4s, v20.4s
290 ext v5.16b, v7.16b, v7.16b, #12
291 eor v6.16b, v17.16b, v6.16b
292 uzp1 v5.4s, v7.4s, v5.4s
293 ext v7.16b, v19.16b, v19.16b, #12
294 ext v17.16b, v17.16b, v17.16b, #4
295 ushr v19.4s, v6.4s, #7
296 shl v6.4s, v6.4s, #25
297 add v7.4s, v7.4s, v5.4s
298 orr v6.16b, v6.16b, v19.16b
299 ext v19.16b, v20.16b, v20.16b, #8
300 add v7.4s, v7.4s, v6.4s
301 eor v19.16b, v19.16b, v7.16b
302 tbl v19.16b, { v19.16b }, v0.16b
303 add v16.4s, v17.4s, v19.4s
304 zip1 v17.4s, v3.4s, v4.4s
305 zip1 v3.4s, v4.4s, v3.4s
306 eor v4.16b, v16.16b, v6.16b
307 ext v17.16b, v3.16b, v17.16b, #8
308 ushr v3.4s, v4.4s, #12
309 shl v4.4s, v4.4s, #20
310 add v6.4s, v7.4s, v17.4s
311 orr v3.16b, v4.16b, v3.16b
312 add v4.4s, v6.4s, v3.4s
313 ext v6.16b, v18.16b, v18.16b, #4
314 eor v7.16b, v19.16b, v4.16b
315 uzp1 v18.4s, v6.4s, v6.4s
316 tbl v7.16b, { v7.16b }, v1.16b
317 ext v18.16b, v18.16b, v6.16b, #8
318 add v16.4s, v16.4s, v7.4s
319 uzp2 v18.4s, v18.4s, v2.4s
320 ext v7.16b, v7.16b, v7.16b, #8
321 eor v3.16b, v16.16b, v3.16b
322 add v4.4s, v4.4s, v18.4s
323 ext v16.16b, v16.16b, v16.16b, #12
324 ushr v19.4s, v3.4s, #7
325 shl v3.4s, v3.4s, #25
326 ext v4.16b, v4.16b, v4.16b, #4
327 orr v3.16b, v3.16b, v19.16b
328 ext v19.16b, v6.16b, v6.16b, #12
329 add v4.4s, v4.4s, v3.4s
330 ext v6.16b, v6.16b, v19.16b, #12
332 eor v7.16b, v7.16b, v4.16b
334 mov v19.s[1], v5.s[2]
335 tbl v7.16b, { v7.16b }, v0.16b
336 add v16.4s, v16.4s, v7.4s
337 eor v20.16b, v16.16b, v3.16b
338 trn2 v3.4s, v6.4s, v19.4s
339 ushr v6.4s, v20.4s, #12
340 shl v19.4s, v20.4s, #20
341 add v4.4s, v4.4s, v3.4s
342 orr v6.16b, v19.16b, v6.16b
343 zip1 v19.2d, v17.2d, v2.2d
344 zip2 v2.4s, v2.4s, v17.4s
345 add v4.4s, v4.4s, v6.4s
346 mov v19.s[3], v5.s[3]
347 zip1 v17.4s, v2.4s, v5.4s
348 zip1 v2.4s, v5.4s, v2.4s
349 eor v7.16b, v7.16b, v4.16b
350 ext v20.16b, v19.16b, v19.16b, #12
351 ext v4.16b, v4.16b, v4.16b, #12
352 ext v2.16b, v2.16b, v17.16b, #8
353 tbl v7.16b, { v7.16b }, v1.16b
354 add v16.4s, v16.4s, v7.4s
355 ext v7.16b, v7.16b, v7.16b, #8
356 eor v21.16b, v16.16b, v6.16b
357 uzp1 v6.4s, v19.4s, v20.4s
358 ext v16.16b, v16.16b, v16.16b, #4
359 ushr v19.4s, v21.4s, #7
360 shl v20.4s, v21.4s, #25
361 add v4.4s, v4.4s, v6.4s
362 orr v19.16b, v20.16b, v19.16b
363 add v4.4s, v4.4s, v19.4s
364 eor v7.16b, v7.16b, v4.16b
365 add v4.4s, v4.4s, v2.4s
366 tbl v7.16b, { v7.16b }, v0.16b
367 add v16.4s, v16.4s, v7.4s
368 eor v5.16b, v16.16b, v19.16b
369 ushr v17.4s, v5.4s, #12
370 shl v5.4s, v5.4s, #20
371 orr v5.16b, v5.16b, v17.16b
372 ext v17.16b, v18.16b, v18.16b, #4
373 add v4.4s, v4.4s, v5.4s
374 uzp1 v18.4s, v17.4s, v17.4s
375 eor v7.16b, v7.16b, v4.16b
376 ext v18.16b, v18.16b, v17.16b, #8
377 tbl v7.16b, { v7.16b }, v1.16b
378 uzp2 v18.4s, v18.4s, v3.4s
379 add v16.4s, v16.4s, v7.4s
380 add v4.4s, v4.4s, v18.4s
381 ext v7.16b, v7.16b, v7.16b, #8
382 eor v5.16b, v16.16b, v5.16b
383 ext v4.16b, v4.16b, v4.16b, #4
384 ext v16.16b, v16.16b, v16.16b, #12
385 ushr v19.4s, v5.4s, #7
386 shl v5.4s, v5.4s, #25
387 orr v5.16b, v5.16b, v19.16b
388 add v19.4s, v4.4s, v5.4s
389 eor v4.16b, v7.16b, v19.16b
390 ext v7.16b, v17.16b, v17.16b, #12
391 tbl v20.16b, { v4.16b }, v0.16b
392 ext v4.16b, v17.16b, v7.16b, #12
394 add v16.4s, v16.4s, v20.4s
397 eor v5.16b, v16.16b, v5.16b
398 trn2 v4.4s, v4.4s, v7.4s
399 ushr v7.4s, v5.4s, #12
400 shl v5.4s, v5.4s, #20
401 add v17.4s, v19.4s, v4.4s
402 zip1 v19.2d, v2.2d, v3.2d
403 zip2 v2.4s, v3.4s, v2.4s
404 orr v5.16b, v5.16b, v7.16b
405 mov v19.s[3], v6.s[3]
406 add v7.4s, v17.4s, v5.4s
407 eor v17.16b, v20.16b, v7.16b
408 ext v20.16b, v19.16b, v19.16b, #12
409 ext v7.16b, v7.16b, v7.16b, #12
410 tbl v17.16b, { v17.16b }, v1.16b
411 add v16.4s, v16.4s, v17.4s
412 ext v17.16b, v17.16b, v17.16b, #8
413 eor v21.16b, v16.16b, v5.16b
414 uzp1 v5.4s, v19.4s, v20.4s
415 ext v16.16b, v16.16b, v16.16b, #4
416 ushr v19.4s, v21.4s, #7
417 shl v20.4s, v21.4s, #25
418 add v7.4s, v7.4s, v5.4s
419 orr v19.16b, v20.16b, v19.16b
420 add v7.4s, v7.4s, v19.4s
421 eor v17.16b, v17.16b, v7.16b
422 tbl v17.16b, { v17.16b }, v0.16b
423 add v3.4s, v16.4s, v17.4s
424 zip1 v16.4s, v2.4s, v6.4s
425 zip1 v2.4s, v6.4s, v2.4s
426 eor v6.16b, v3.16b, v19.16b
427 ext v16.16b, v2.16b, v16.16b, #8
428 ushr v2.4s, v6.4s, #12
429 shl v6.4s, v6.4s, #20
430 add v7.4s, v7.4s, v16.4s
431 orr v2.16b, v6.16b, v2.16b
432 add v6.4s, v7.4s, v2.4s
433 ext v7.16b, v18.16b, v18.16b, #4
434 eor v17.16b, v17.16b, v6.16b
435 uzp1 v18.4s, v7.4s, v7.4s
436 tbl v17.16b, { v17.16b }, v1.16b
437 ext v18.16b, v18.16b, v7.16b, #8
438 add v3.4s, v3.4s, v17.4s
439 uzp2 v18.4s, v18.4s, v4.4s
440 eor v2.16b, v3.16b, v2.16b
441 add v6.4s, v6.4s, v18.4s
442 ext v3.16b, v3.16b, v3.16b, #12
443 ext v18.16b, v18.16b, v18.16b, #4
444 ushr v19.4s, v2.4s, #7
445 shl v2.4s, v2.4s, #25
446 ext v6.16b, v6.16b, v6.16b, #4
447 orr v19.16b, v2.16b, v19.16b
448 ext v2.16b, v17.16b, v17.16b, #8
449 ext v17.16b, v7.16b, v7.16b, #12
450 add v6.4s, v6.4s, v19.4s
451 eor v2.16b, v2.16b, v6.16b
452 tbl v20.16b, { v2.16b }, v0.16b
453 ext v2.16b, v7.16b, v17.16b, #12
455 add v17.4s, v3.4s, v20.4s
458 eor v19.16b, v17.16b, v19.16b
459 trn2 v3.4s, v3.4s, v7.4s
460 ushr v21.4s, v19.4s, #12
461 shl v19.4s, v19.4s, #20
462 add v6.4s, v6.4s, v3.4s
463 orr v19.16b, v19.16b, v21.16b
464 add v21.4s, v6.4s, v19.4s
465 eor v6.16b, v20.16b, v21.16b
466 zip1 v20.2d, v16.2d, v4.2d
467 zip2 v4.4s, v4.4s, v16.4s
468 tbl v22.16b, { v6.16b }, v1.16b
469 mov v20.s[3], v5.s[3]
470 add v17.4s, v17.4s, v22.4s
471 ext v6.16b, v20.16b, v20.16b, #12
472 eor v19.16b, v17.16b, v19.16b
473 uzp1 v6.4s, v20.4s, v6.4s
474 ext v20.16b, v21.16b, v21.16b, #12
475 ext v17.16b, v17.16b, v17.16b, #4
476 ushr v21.4s, v19.4s, #7
477 shl v19.4s, v19.4s, #25
478 add v20.4s, v20.4s, v6.4s
479 orr v19.16b, v19.16b, v21.16b
480 ext v21.16b, v22.16b, v22.16b, #8
481 add v20.4s, v20.4s, v19.4s
482 eor v21.16b, v21.16b, v20.16b
483 tbl v21.16b, { v21.16b }, v0.16b
484 add v16.4s, v17.4s, v21.4s
485 zip1 v17.4s, v4.4s, v5.4s
486 zip1 v4.4s, v5.4s, v4.4s
487 eor v5.16b, v16.16b, v19.16b
488 ext v4.16b, v4.16b, v17.16b, #8
489 ushr v17.4s, v5.4s, #12
490 shl v5.4s, v5.4s, #20
491 add v19.4s, v20.4s, v4.4s
492 ext v20.16b, v18.16b, v18.16b, #8
493 zip1 v3.2d, v4.2d, v3.2d
494 orr v5.16b, v5.16b, v17.16b
495 zip2 v2.4s, v2.4s, v4.4s
496 uzp2 v7.4s, v20.4s, v7.4s
498 add v17.4s, v19.4s, v5.4s
499 ext v7.16b, v7.16b, v20.16b, #4
500 eor v19.16b, v21.16b, v17.16b
501 ext v17.16b, v17.16b, v17.16b, #4
502 tbl v19.16b, { v19.16b }, v1.16b
503 add v7.4s, v17.4s, v7.4s
504 add v16.4s, v16.4s, v19.4s
505 ext v17.16b, v19.16b, v19.16b, #8
506 ext v19.16b, v18.16b, v18.16b, #12
507 eor v5.16b, v16.16b, v5.16b
508 ext v16.16b, v16.16b, v16.16b, #12
509 ext v18.16b, v18.16b, v19.16b, #12
511 ushr v20.4s, v5.4s, #7
512 shl v5.4s, v5.4s, #25
514 mov v19.s[1], v6.s[2]
515 orr v5.16b, v5.16b, v20.16b
516 trn2 v18.4s, v18.4s, v19.4s
517 add v7.4s, v5.4s, v7.4s
518 eor v17.16b, v17.16b, v7.16b
519 add v7.4s, v7.4s, v18.4s
520 ext v18.16b, v3.16b, v3.16b, #12
521 tbl v17.16b, { v17.16b }, v0.16b
522 uzp1 v3.4s, v3.4s, v18.4s
523 add v16.4s, v16.4s, v17.4s
524 eor v5.16b, v16.16b, v5.16b
525 ushr v19.4s, v5.4s, #12
526 shl v5.4s, v5.4s, #20
527 orr v5.16b, v5.16b, v19.16b
528 add v7.4s, v7.4s, v5.4s
529 eor v17.16b, v17.16b, v7.16b
530 ext v7.16b, v7.16b, v7.16b, #12
531 tbl v17.16b, { v17.16b }, v1.16b
532 add v3.4s, v7.4s, v3.4s
533 add v16.4s, v16.4s, v17.4s
534 ext v7.16b, v17.16b, v17.16b, #8
535 eor v5.16b, v16.16b, v5.16b
536 ext v16.16b, v16.16b, v16.16b, #4
537 ushr v18.4s, v5.4s, #7
538 shl v5.4s, v5.4s, #25
539 orr v5.16b, v5.16b, v18.16b
540 add v3.4s, v3.4s, v5.4s
541 eor v7.16b, v7.16b, v3.16b
542 tbl v0.16b, { v7.16b }, v0.16b
543 zip1 v7.4s, v2.4s, v6.4s
544 zip1 v2.4s, v6.4s, v2.4s
545 add v4.4s, v16.4s, v0.4s
546 ext v2.16b, v2.16b, v7.16b, #8
547 eor v5.16b, v4.16b, v5.16b
548 add v2.4s, v3.4s, v2.4s
549 ushr v6.4s, v5.4s, #12
550 shl v5.4s, v5.4s, #20
551 orr v3.16b, v5.16b, v6.16b
552 add v2.4s, v2.4s, v3.4s
553 eor v0.16b, v0.16b, v2.16b
554 ext v2.16b, v2.16b, v2.16b, #4
555 tbl v0.16b, { v0.16b }, v1.16b
556 add v1.4s, v4.4s, v0.4s
557 ext v0.16b, v0.16b, v0.16b, #8
558 eor v3.16b, v1.16b, v3.16b
559 ext v1.16b, v1.16b, v1.16b, #12
560 ushr v4.4s, v3.4s, #7
561 shl v3.4s, v3.4s, #25
562 stp q1, q0, [x0, #32]
563 orr v3.16b, v3.16b, v4.16b
567 .size compress_pre, .Lfunc_end1-compress_pre
570 .globl zfs_blake3_compress_xof_sse41
572 .type zfs_blake3_compress_xof_sse41,@function
573 zfs_blake3_compress_xof_sse41:
578 stp x29, x30, [sp, #64]
580 stp x20, x19, [sp, #80]
596 ldp q2, q3, [sp, #32]
597 eor v0.16b, v2.16b, v0.16b
598 eor v1.16b, v3.16b, v1.16b
599 ldp x29, x30, [sp, #64]
602 eor v0.16b, v0.16b, v2.16b
605 eor v0.16b, v0.16b, v3.16b
607 ldp x20, x19, [sp, #80]
612 .size zfs_blake3_compress_xof_sse41, .Lfunc_end2-zfs_blake3_compress_xof_sse41
615 .section .rodata.cst16,"aM",@progbits,16
662 .globl zfs_blake3_hash_many_sse41
664 .type zfs_blake3_hash_many_sse41,@function
665 zfs_blake3_hash_many_sse41:
668 stp d15, d14, [sp, #-144]!
669 stp d13, d12, [sp, #16]
670 stp d11, d10, [sp, #32]
671 stp d9, d8, [sp, #48]
672 stp x29, x27, [sp, #64]
673 stp x26, x25, [sp, #80]
674 stp x24, x23, [sp, #96]
675 stp x22, x21, [sp, #112]
676 stp x20, x19, [sp, #128]
678 .cfi_def_cfa_offset 512
691 .cfi_offset b10, -104
692 .cfi_offset b11, -112
693 .cfi_offset b12, -120
694 .cfi_offset b13, -128
695 .cfi_offset b14, -136
696 .cfi_offset b15, -144
707 movk w15, #27145, lsl #16
708 movk w16, #47975, lsl #16
709 ldr q0, [x12, :lo12:.LCPI3_0]
715 and v0.16b, v1.16b, v0.16b
716 ldr q1, [x11, :lo12:.LCPI3_1]
717 movk w13, #15470, lsl #16
718 movk w14, #42319, lsl #16
720 stp q0, q1, [sp, #16]
721 orr v0.4s, #128, lsl #24
724 stp q0, q14, [sp, #48]
727 zip1 v0.4s, v29.4s, v8.4s
729 zip1 v1.4s, v30.4s, v31.4s
731 zip1 v2.4s, v24.4s, v18.4s
733 zip1 v3.4s, v25.4s, v26.4s
735 zip2 v6.4s, v29.4s, v8.4s
737 zip1 v4.2d, v0.2d, v1.2d
739 zip2 v7.4s, v30.4s, v31.4s
740 zip1 v5.2d, v2.2d, v3.2d
741 zip2 v0.2d, v0.2d, v1.2d
742 zip2 v1.2d, v2.2d, v3.2d
743 zip2 v2.4s, v24.4s, v18.4s
744 zip2 v3.4s, v25.4s, v26.4s
746 zip2 v4.2d, v6.2d, v7.2d
747 stp q0, q1, [x8, #32]
748 zip1 v0.2d, v6.2d, v7.2d
749 zip1 v1.2d, v2.2d, v3.2d
750 zip2 v2.2d, v2.2d, v3.2d
751 stp q0, q1, [x8, #64]
752 stp q4, q2, [x8, #96]
761 ld1r { v29.4s }, [x15], #4
762 ld1r { v30.4s }, [x16]
764 ld1r { v31.4s }, [x17]
766 ld1r { v24.4s }, [x19]
767 ld1r { v18.4s }, [x20]
768 ld1r { v25.4s }, [x16]
769 ld1r { v8.4s }, [x15]
770 ld1r { v26.4s }, [x17]
776 ldp x19, x20, [x0, #16]
777 add v1.4s, v0.4s, v1.4s
779 movi v0.4s, #128, lsl #24
782 eor v0.16b, v1.16b, v0.16b
784 cmgt v0.4s, v1.4s, v0.4s
787 sub v0.4s, v1.4s, v0.4s
797 csel w27, w9, wzr, eq
803 zip1 v22.4s, v1.4s, v4.4s
804 zip2 v20.4s, v1.4s, v4.4s
806 zip1 v17.4s, v2.4s, v5.4s
807 zip2 v23.4s, v2.4s, v5.4s
809 zip1 v19.4s, v6.4s, v16.4s
810 zip2 v1.4s, v6.4s, v16.4s
811 ldp q27, q28, [x23, #32]
812 zip1 v4.4s, v7.4s, v21.4s
813 zip2 v5.4s, v7.4s, v21.4s
814 zip2 v15.2d, v17.2d, v4.2d
815 ldp q9, q10, [x24, #32]
816 mov v17.d[1], v4.d[0]
817 add v4.4s, v30.4s, v25.4s
818 zip2 v11.2d, v23.2d, v5.2d
819 zip2 v3.4s, v27.4s, v9.4s
820 zip1 v7.4s, v27.4s, v9.4s
821 ldp q12, q6, [x22, #32]
822 mov v23.d[1], v5.d[0]
823 stp q11, q3, [sp, #256]
824 add v5.4s, v31.4s, v26.4s
825 add v4.4s, v4.4s, v17.4s
827 ldp q16, q2, [x25, #32]
828 add v5.4s, v5.4s, v23.4s
829 zip1 v3.4s, v12.4s, v16.4s
830 eor v0.16b, v5.16b, v0.16b
831 zip1 v9.4s, v6.4s, v2.4s
832 zip2 v2.4s, v6.4s, v2.4s
833 stp q7, q3, [sp, #208]
834 zip2 v3.4s, v12.4s, v16.4s
835 zip1 v12.4s, v28.4s, v10.4s
836 zip2 v10.4s, v28.4s, v10.4s
837 stp q17, q2, [sp, #160]
838 zip2 v28.2d, v22.2d, v19.2d
839 mov v22.d[1], v19.d[0]
841 add v2.4s, v8.4s, v18.4s
842 eor v16.16b, v4.16b, v13.16b
845 stp q22, q28, [sp, #320]
846 zip2 v22.2d, v20.2d, v1.2d
847 mov v20.d[1], v1.d[0]
848 add v1.4s, v29.4s, v24.4s
849 add v4.4s, v4.4s, v15.4s
850 add v5.4s, v5.4s, v11.4s
851 add v2.4s, v2.4s, v20.4s
852 stp q15, q20, [sp, #288]
853 add v1.4s, v1.4s, v3.4s
858 eor v6.16b, v1.16b, v3.16b
860 add v1.4s, v1.4s, v28.4s
863 eor v7.16b, v2.16b, v3.16b
864 ldp q27, q3, [sp, #32]
865 add v2.4s, v2.4s, v22.4s
866 tbl v6.16b, { v6.16b }, v27.16b
867 tbl v7.16b, { v7.16b }, v27.16b
868 tbl v16.16b, { v16.16b }, v27.16b
869 tbl v0.16b, { v0.16b }, v27.16b
870 add v19.4s, v6.4s, v14.4s
871 add v21.4s, v7.4s, v3.4s
872 add v30.4s, v16.4s, v17.4s
873 add v31.4s, v0.4s, v20.4s
874 eor v24.16b, v19.16b, v24.16b
875 eor v17.16b, v21.16b, v18.16b
876 ushr v18.4s, v24.4s, #12
877 shl v20.4s, v24.4s, #20
878 eor v24.16b, v30.16b, v25.16b
879 eor v25.16b, v31.16b, v26.16b
880 ushr v26.4s, v17.4s, #12
881 shl v17.4s, v17.4s, #20
882 ushr v29.4s, v24.4s, #12
883 shl v24.4s, v24.4s, #20
884 ushr v8.4s, v25.4s, #12
885 shl v25.4s, v25.4s, #20
886 orr v3.16b, v20.16b, v18.16b
887 ldr q18, [x10, :lo12:.LCPI3_2]
888 orr v13.16b, v17.16b, v26.16b
889 orr v24.16b, v24.16b, v29.16b
890 orr v14.16b, v25.16b, v8.16b
891 add v8.4s, v1.4s, v3.4s
892 add v29.4s, v2.4s, v13.4s
893 add v17.4s, v4.4s, v24.4s
894 add v20.4s, v5.4s, v14.4s
895 eor v1.16b, v6.16b, v8.16b
896 eor v2.16b, v7.16b, v29.16b
897 eor v4.16b, v16.16b, v17.16b
898 eor v0.16b, v0.16b, v20.16b
899 tbl v25.16b, { v1.16b }, v18.16b
900 tbl v16.16b, { v2.16b }, v18.16b
901 tbl v6.16b, { v4.16b }, v18.16b
902 tbl v4.16b, { v0.16b }, v18.16b
903 add v19.4s, v19.4s, v25.4s
904 add v21.4s, v21.4s, v16.4s
905 add v26.4s, v30.4s, v6.4s
906 add v7.4s, v31.4s, v4.4s
907 eor v0.16b, v19.16b, v3.16b
908 eor v1.16b, v21.16b, v13.16b
909 eor v2.16b, v26.16b, v24.16b
910 eor v3.16b, v7.16b, v14.16b
911 ushr v5.4s, v0.4s, #7
912 shl v0.4s, v0.4s, #25
913 ushr v24.4s, v1.4s, #7
914 shl v1.4s, v1.4s, #25
915 ushr v30.4s, v2.4s, #7
916 shl v2.4s, v2.4s, #25
917 orr v5.16b, v0.16b, v5.16b
918 orr v0.16b, v1.16b, v24.16b
919 ushr v31.4s, v3.4s, #7
920 orr v2.16b, v2.16b, v30.16b
921 ldp q24, q30, [sp, #208]
922 shl v3.4s, v3.4s, #25
923 zip2 v14.2d, v12.2d, v9.2d
925 orr v1.16b, v3.16b, v31.16b
926 zip2 v3.2d, v24.2d, v30.2d
928 mov v22.d[1], v30.d[0]
931 stp q22, q14, [sp, #224]
932 mov v24.d[1], v30.d[0]
933 add v12.4s, v8.4s, v22.4s
934 mov v31.d[1], v9.d[0]
935 add v22.4s, v29.4s, v24.4s
937 zip2 v28.2d, v28.2d, v30.2d
939 mov v15.d[1], v29.d[0]
940 zip2 v8.2d, v10.2d, v29.2d
941 add v10.4s, v12.4s, v0.4s
942 add v22.4s, v22.4s, v2.4s
944 add v20.4s, v20.4s, v15.4s
945 add v17.4s, v17.4s, v31.4s
946 stp q3, q8, [sp, #192]
947 eor v4.16b, v4.16b, v10.16b
948 eor v25.16b, v25.16b, v22.16b
949 add v20.4s, v20.4s, v5.4s
950 add v17.4s, v17.4s, v1.4s
951 tbl v4.16b, { v4.16b }, v27.16b
952 tbl v25.16b, { v25.16b }, v27.16b
953 eor v6.16b, v6.16b, v20.16b
954 eor v16.16b, v16.16b, v17.16b
955 add v26.4s, v26.4s, v4.4s
956 add v7.4s, v7.4s, v25.4s
957 tbl v6.16b, { v6.16b }, v27.16b
958 tbl v16.16b, { v16.16b }, v27.16b
959 eor v0.16b, v26.16b, v0.16b
960 eor v2.16b, v7.16b, v2.16b
961 add v21.4s, v21.4s, v6.4s
962 add v19.4s, v19.4s, v16.4s
963 ushr v12.4s, v0.4s, #12
964 shl v0.4s, v0.4s, #20
965 ushr v13.4s, v2.4s, #12
966 shl v2.4s, v2.4s, #20
967 eor v5.16b, v21.16b, v5.16b
968 eor v1.16b, v19.16b, v1.16b
969 orr v0.16b, v0.16b, v12.16b
970 add v10.4s, v10.4s, v3.4s
971 orr v2.16b, v2.16b, v13.16b
972 ushr v13.4s, v5.4s, #12
973 shl v5.4s, v5.4s, #20
974 add v22.4s, v22.4s, v28.4s
975 ushr v12.4s, v1.4s, #12
976 shl v1.4s, v1.4s, #20
977 add v10.4s, v10.4s, v0.4s
978 orr v5.16b, v5.16b, v13.16b
979 add v22.4s, v22.4s, v2.4s
980 add v20.4s, v20.4s, v8.4s
981 orr v1.16b, v1.16b, v12.16b
982 add v17.4s, v17.4s, v14.4s
983 eor v4.16b, v4.16b, v10.16b
984 eor v25.16b, v25.16b, v22.16b
985 add v20.4s, v20.4s, v5.4s
986 add v17.4s, v17.4s, v1.4s
987 tbl v4.16b, { v4.16b }, v18.16b
988 tbl v25.16b, { v25.16b }, v18.16b
989 eor v6.16b, v6.16b, v20.16b
990 eor v16.16b, v16.16b, v17.16b
991 add v26.4s, v26.4s, v4.4s
992 add v7.4s, v7.4s, v25.4s
993 tbl v6.16b, { v6.16b }, v18.16b
994 tbl v16.16b, { v16.16b }, v18.16b
995 eor v0.16b, v26.16b, v0.16b
996 eor v2.16b, v7.16b, v2.16b
997 add v21.4s, v21.4s, v6.4s
998 add v19.4s, v19.4s, v16.4s
999 ushr v12.4s, v0.4s, #7
1000 shl v0.4s, v0.4s, #25
1001 ushr v13.4s, v2.4s, #7
1002 shl v2.4s, v2.4s, #25
1003 eor v5.16b, v21.16b, v5.16b
1004 eor v1.16b, v19.16b, v1.16b
1005 orr v0.16b, v0.16b, v12.16b
1006 add v22.4s, v22.4s, v23.4s
1007 orr v2.16b, v2.16b, v13.16b
1008 ushr v13.4s, v5.4s, #7
1009 shl v5.4s, v5.4s, #25
1010 add v17.4s, v17.4s, v11.4s
1011 mov v30.16b, v28.16b
1012 mov v28.16b, v23.16b
1014 ushr v12.4s, v1.4s, #7
1015 shl v1.4s, v1.4s, #25
1016 add v22.4s, v22.4s, v0.4s
1017 mov v29.16b, v31.16b
1019 orr v5.16b, v5.16b, v13.16b
1020 add v17.4s, v17.4s, v2.4s
1021 add v10.4s, v10.4s, v23.4s
1022 orr v1.16b, v1.16b, v12.16b
1024 eor v16.16b, v16.16b, v22.16b
1025 add v20.4s, v20.4s, v31.4s
1026 eor v6.16b, v6.16b, v17.16b
1027 add v10.4s, v10.4s, v5.4s
1028 tbl v16.16b, { v16.16b }, v27.16b
1029 add v20.4s, v20.4s, v1.4s
1030 tbl v6.16b, { v6.16b }, v27.16b
1031 eor v25.16b, v25.16b, v10.16b
1032 add v21.4s, v21.4s, v16.4s
1033 eor v4.16b, v4.16b, v20.16b
1034 add v26.4s, v26.4s, v6.4s
1035 tbl v25.16b, { v25.16b }, v27.16b
1036 eor v0.16b, v21.16b, v0.16b
1037 tbl v4.16b, { v4.16b }, v27.16b
1038 eor v2.16b, v26.16b, v2.16b
1039 add v19.4s, v19.4s, v25.4s
1040 ushr v12.4s, v0.4s, #12
1041 shl v0.4s, v0.4s, #20
1042 add v7.4s, v7.4s, v4.4s
1043 ushr v13.4s, v2.4s, #12
1044 shl v2.4s, v2.4s, #20
1045 eor v5.16b, v5.16b, v19.16b
1046 add v22.4s, v22.4s, v24.4s
1048 orr v0.16b, v0.16b, v12.16b
1049 eor v1.16b, v7.16b, v1.16b
1050 orr v2.16b, v2.16b, v13.16b
1051 ushr v12.4s, v5.4s, #12
1052 shl v5.4s, v5.4s, #20
1053 add v17.4s, v17.4s, v24.4s
1055 ushr v13.4s, v1.4s, #12
1056 shl v1.4s, v1.4s, #20
1057 add v22.4s, v22.4s, v0.4s
1058 orr v5.16b, v5.16b, v12.16b
1059 add v17.4s, v17.4s, v2.4s
1060 add v10.4s, v10.4s, v24.4s
1062 orr v1.16b, v1.16b, v13.16b
1063 eor v16.16b, v16.16b, v22.16b
1064 add v20.4s, v20.4s, v14.4s
1065 eor v6.16b, v6.16b, v17.16b
1066 add v10.4s, v10.4s, v5.4s
1067 tbl v16.16b, { v16.16b }, v18.16b
1068 add v20.4s, v20.4s, v1.4s
1069 tbl v6.16b, { v6.16b }, v18.16b
1070 eor v25.16b, v25.16b, v10.16b
1071 add v21.4s, v21.4s, v16.4s
1072 eor v4.16b, v4.16b, v20.16b
1073 add v26.4s, v26.4s, v6.4s
1074 tbl v25.16b, { v25.16b }, v18.16b
1075 eor v0.16b, v21.16b, v0.16b
1076 tbl v4.16b, { v4.16b }, v18.16b
1077 eor v2.16b, v26.16b, v2.16b
1078 add v19.4s, v19.4s, v25.4s
1079 ushr v12.4s, v0.4s, #7
1080 shl v0.4s, v0.4s, #25
1081 add v7.4s, v7.4s, v4.4s
1082 ushr v13.4s, v2.4s, #7
1083 shl v2.4s, v2.4s, #25
1084 eor v5.16b, v19.16b, v5.16b
1085 orr v0.16b, v0.16b, v12.16b
1086 eor v1.16b, v7.16b, v1.16b
1087 add v10.4s, v10.4s, v24.4s
1088 orr v2.16b, v2.16b, v13.16b
1089 ushr v12.4s, v5.4s, #7
1090 shl v5.4s, v5.4s, #25
1091 add v22.4s, v22.4s, v29.4s
1092 ushr v13.4s, v1.4s, #7
1093 shl v1.4s, v1.4s, #25
1094 add v10.4s, v10.4s, v0.4s
1095 orr v5.16b, v5.16b, v12.16b
1096 add v22.4s, v22.4s, v2.4s
1097 add v20.4s, v20.4s, v8.4s
1099 orr v1.16b, v1.16b, v13.16b
1100 add v17.4s, v17.4s, v3.4s
1102 eor v4.16b, v4.16b, v10.16b
1103 eor v25.16b, v25.16b, v22.16b
1104 add v20.4s, v20.4s, v5.4s
1105 add v17.4s, v17.4s, v1.4s
1106 tbl v4.16b, { v4.16b }, v27.16b
1107 tbl v25.16b, { v25.16b }, v27.16b
1108 eor v6.16b, v6.16b, v20.16b
1109 eor v16.16b, v16.16b, v17.16b
1110 add v26.4s, v26.4s, v4.4s
1111 add v7.4s, v7.4s, v25.4s
1112 tbl v6.16b, { v6.16b }, v27.16b
1113 tbl v16.16b, { v16.16b }, v27.16b
1114 eor v0.16b, v26.16b, v0.16b
1115 eor v2.16b, v7.16b, v2.16b
1116 add v21.4s, v21.4s, v6.4s
1117 add v19.4s, v19.4s, v16.4s
1118 ushr v12.4s, v0.4s, #12
1119 shl v0.4s, v0.4s, #20
1120 ushr v13.4s, v2.4s, #12
1121 shl v2.4s, v2.4s, #20
1122 eor v5.16b, v21.16b, v5.16b
1123 eor v1.16b, v19.16b, v1.16b
1124 orr v0.16b, v0.16b, v12.16b
1125 add v10.4s, v10.4s, v30.4s
1126 orr v2.16b, v2.16b, v13.16b
1127 ushr v13.4s, v5.4s, #12
1128 shl v5.4s, v5.4s, #20
1129 add v22.4s, v22.4s, v8.4s
1130 mov v24.16b, v30.16b
1131 mov v30.16b, v15.16b
1132 add v17.4s, v17.4s, v15.4s
1134 ushr v12.4s, v1.4s, #12
1135 shl v1.4s, v1.4s, #20
1136 add v10.4s, v10.4s, v0.4s
1138 orr v5.16b, v5.16b, v13.16b
1139 add v22.4s, v22.4s, v2.4s
1140 add v20.4s, v20.4s, v15.4s
1141 orr v1.16b, v1.16b, v12.16b
1142 eor v4.16b, v4.16b, v10.16b
1143 eor v25.16b, v25.16b, v22.16b
1144 add v20.4s, v20.4s, v5.4s
1145 add v17.4s, v17.4s, v1.4s
1146 tbl v4.16b, { v4.16b }, v18.16b
1147 tbl v25.16b, { v25.16b }, v18.16b
1148 eor v6.16b, v6.16b, v20.16b
1149 eor v16.16b, v16.16b, v17.16b
1150 add v26.4s, v26.4s, v4.4s
1151 add v7.4s, v7.4s, v25.4s
1152 tbl v6.16b, { v6.16b }, v18.16b
1153 tbl v16.16b, { v16.16b }, v18.16b
1154 eor v0.16b, v26.16b, v0.16b
1155 eor v2.16b, v7.16b, v2.16b
1156 add v21.4s, v21.4s, v6.4s
1157 add v19.4s, v19.4s, v16.4s
1158 ushr v12.4s, v0.4s, #7
1159 shl v0.4s, v0.4s, #25
1160 ushr v13.4s, v2.4s, #7
1161 shl v2.4s, v2.4s, #25
1162 eor v5.16b, v21.16b, v5.16b
1163 eor v1.16b, v19.16b, v1.16b
1164 orr v0.16b, v0.16b, v12.16b
1165 add v22.4s, v22.4s, v9.4s
1166 orr v2.16b, v2.16b, v13.16b
1167 ushr v13.4s, v5.4s, #7
1168 shl v5.4s, v5.4s, #25
1169 add v17.4s, v17.4s, v14.4s
1170 ushr v12.4s, v1.4s, #7
1171 shl v1.4s, v1.4s, #25
1172 add v22.4s, v22.4s, v0.4s
1173 orr v5.16b, v5.16b, v13.16b
1174 add v17.4s, v17.4s, v2.4s
1175 add v10.4s, v10.4s, v28.4s
1176 orr v1.16b, v1.16b, v12.16b
1177 eor v16.16b, v16.16b, v22.16b
1178 add v20.4s, v20.4s, v11.4s
1179 eor v6.16b, v6.16b, v17.16b
1180 add v10.4s, v10.4s, v5.4s
1181 tbl v16.16b, { v16.16b }, v27.16b
1182 add v20.4s, v20.4s, v1.4s
1183 tbl v6.16b, { v6.16b }, v27.16b
1184 eor v25.16b, v25.16b, v10.16b
1185 add v21.4s, v21.4s, v16.4s
1186 eor v4.16b, v4.16b, v20.16b
1187 add v26.4s, v26.4s, v6.4s
1188 tbl v25.16b, { v25.16b }, v27.16b
1189 eor v0.16b, v21.16b, v0.16b
1190 tbl v4.16b, { v4.16b }, v27.16b
1191 eor v2.16b, v26.16b, v2.16b
1192 add v19.4s, v19.4s, v25.4s
1193 ushr v12.4s, v0.4s, #12
1194 shl v0.4s, v0.4s, #20
1195 add v7.4s, v7.4s, v4.4s
1196 ushr v13.4s, v2.4s, #12
1197 shl v2.4s, v2.4s, #20
1198 eor v5.16b, v5.16b, v19.16b
1199 orr v0.16b, v0.16b, v12.16b
1200 eor v1.16b, v7.16b, v1.16b
1201 add v22.4s, v22.4s, v29.4s
1202 orr v2.16b, v2.16b, v13.16b
1203 ushr v12.4s, v5.4s, #12
1204 shl v5.4s, v5.4s, #20
1205 add v17.4s, v17.4s, v23.4s
1206 ushr v13.4s, v1.4s, #12
1207 shl v1.4s, v1.4s, #20
1208 add v22.4s, v22.4s, v0.4s
1209 orr v5.16b, v5.16b, v12.16b
1210 add v17.4s, v17.4s, v2.4s
1211 add v10.4s, v10.4s, v31.4s
1212 orr v1.16b, v1.16b, v13.16b
1213 eor v16.16b, v16.16b, v22.16b
1214 add v20.4s, v20.4s, v30.4s
1215 eor v6.16b, v6.16b, v17.16b
1216 add v10.4s, v10.4s, v5.4s
1217 tbl v16.16b, { v16.16b }, v18.16b
1218 add v20.4s, v20.4s, v1.4s
1219 tbl v6.16b, { v6.16b }, v18.16b
1220 eor v25.16b, v25.16b, v10.16b
1221 add v21.4s, v21.4s, v16.4s
1222 eor v4.16b, v4.16b, v20.16b
1223 add v26.4s, v26.4s, v6.4s
1224 tbl v25.16b, { v25.16b }, v18.16b
1225 eor v0.16b, v21.16b, v0.16b
1226 tbl v4.16b, { v4.16b }, v18.16b
1227 eor v2.16b, v26.16b, v2.16b
1228 add v19.4s, v19.4s, v25.4s
1229 ushr v12.4s, v0.4s, #7
1230 shl v0.4s, v0.4s, #25
1231 add v7.4s, v7.4s, v4.4s
1232 ushr v13.4s, v2.4s, #7
1233 shl v2.4s, v2.4s, #25
1234 eor v5.16b, v19.16b, v5.16b
1235 add v10.4s, v10.4s, v3.4s
1237 orr v0.16b, v0.16b, v12.16b
1238 eor v1.16b, v7.16b, v1.16b
1239 orr v2.16b, v2.16b, v13.16b
1240 ushr v12.4s, v5.4s, #7
1241 shl v5.4s, v5.4s, #25
1242 add v22.4s, v22.4s, v3.4s
1243 ushr v13.4s, v1.4s, #7
1244 shl v1.4s, v1.4s, #25
1245 add v10.4s, v10.4s, v0.4s
1246 orr v5.16b, v5.16b, v12.16b
1247 add v22.4s, v22.4s, v2.4s
1248 add v20.4s, v20.4s, v15.4s
1250 orr v1.16b, v1.16b, v13.16b
1251 add v17.4s, v17.4s, v24.4s
1252 eor v4.16b, v4.16b, v10.16b
1253 eor v25.16b, v25.16b, v22.16b
1254 add v20.4s, v20.4s, v5.4s
1255 add v17.4s, v17.4s, v1.4s
1256 tbl v4.16b, { v4.16b }, v27.16b
1257 tbl v25.16b, { v25.16b }, v27.16b
1258 eor v6.16b, v6.16b, v20.16b
1259 eor v16.16b, v16.16b, v17.16b
1260 add v26.4s, v26.4s, v4.4s
1261 add v7.4s, v7.4s, v25.4s
1262 tbl v6.16b, { v6.16b }, v27.16b
1263 tbl v16.16b, { v16.16b }, v27.16b
1264 eor v0.16b, v26.16b, v0.16b
1265 eor v2.16b, v7.16b, v2.16b
1266 add v21.4s, v21.4s, v6.4s
1267 add v19.4s, v19.4s, v16.4s
1268 ushr v12.4s, v0.4s, #12
1269 shl v0.4s, v0.4s, #20
1270 ushr v13.4s, v2.4s, #12
1271 shl v2.4s, v2.4s, #20
1272 eor v5.16b, v21.16b, v5.16b
1273 ldp q23, q11, [sp, #320]
1274 eor v1.16b, v19.16b, v1.16b
1275 orr v0.16b, v0.16b, v12.16b
1276 add v10.4s, v10.4s, v8.4s
1277 orr v2.16b, v2.16b, v13.16b
1278 ushr v13.4s, v5.4s, #12
1279 shl v5.4s, v5.4s, #20
1280 add v22.4s, v22.4s, v23.4s
1281 ushr v12.4s, v1.4s, #12
1282 shl v1.4s, v1.4s, #20
1283 add v10.4s, v10.4s, v0.4s
1284 mov v28.16b, v31.16b
1287 orr v5.16b, v5.16b, v13.16b
1288 add v22.4s, v22.4s, v2.4s
1289 add v20.4s, v20.4s, v11.4s
1290 orr v1.16b, v1.16b, v12.16b
1291 add v17.4s, v17.4s, v8.4s
1292 eor v4.16b, v4.16b, v10.16b
1293 eor v25.16b, v25.16b, v22.16b
1294 add v20.4s, v20.4s, v5.4s
1295 add v17.4s, v17.4s, v1.4s
1296 tbl v4.16b, { v4.16b }, v18.16b
1297 tbl v25.16b, { v25.16b }, v18.16b
1298 eor v6.16b, v6.16b, v20.16b
1299 eor v16.16b, v16.16b, v17.16b
1300 add v26.4s, v26.4s, v4.4s
1301 add v7.4s, v7.4s, v25.4s
1302 tbl v6.16b, { v6.16b }, v18.16b
1303 tbl v16.16b, { v16.16b }, v18.16b
1304 eor v0.16b, v26.16b, v0.16b
1305 eor v2.16b, v7.16b, v2.16b
1306 add v21.4s, v21.4s, v6.4s
1307 add v19.4s, v19.4s, v16.4s
1308 ushr v12.4s, v0.4s, #7
1309 shl v0.4s, v0.4s, #25
1310 ushr v13.4s, v2.4s, #7
1311 shl v2.4s, v2.4s, #25
1312 eor v5.16b, v21.16b, v5.16b
1313 eor v1.16b, v19.16b, v1.16b
1314 orr v0.16b, v0.16b, v12.16b
1315 add v22.4s, v22.4s, v29.4s
1316 orr v2.16b, v2.16b, v13.16b
1317 ushr v13.4s, v5.4s, #7
1318 shl v5.4s, v5.4s, #25
1319 add v17.4s, v17.4s, v30.4s
1320 ushr v12.4s, v1.4s, #7
1321 shl v1.4s, v1.4s, #25
1322 add v22.4s, v22.4s, v0.4s
1323 orr v5.16b, v5.16b, v13.16b
1324 add v17.4s, v17.4s, v2.4s
1325 add v10.4s, v10.4s, v9.4s
1326 orr v1.16b, v1.16b, v12.16b
1327 eor v16.16b, v16.16b, v22.16b
1328 add v20.4s, v20.4s, v14.4s
1330 eor v6.16b, v6.16b, v17.16b
1331 add v10.4s, v10.4s, v5.4s
1332 tbl v16.16b, { v16.16b }, v27.16b
1333 add v20.4s, v20.4s, v1.4s
1334 tbl v6.16b, { v6.16b }, v27.16b
1335 eor v25.16b, v25.16b, v10.16b
1336 add v21.4s, v21.4s, v16.4s
1337 eor v4.16b, v4.16b, v20.16b
1338 add v26.4s, v26.4s, v6.4s
1339 tbl v25.16b, { v25.16b }, v27.16b
1340 eor v0.16b, v21.16b, v0.16b
1341 tbl v4.16b, { v4.16b }, v27.16b
1342 eor v2.16b, v26.16b, v2.16b
1343 add v19.4s, v19.4s, v25.4s
1344 ushr v12.4s, v0.4s, #12
1345 shl v0.4s, v0.4s, #20
1346 add v7.4s, v7.4s, v4.4s
1347 ushr v13.4s, v2.4s, #12
1348 shl v2.4s, v2.4s, #20
1349 eor v5.16b, v5.16b, v19.16b
1350 orr v0.16b, v0.16b, v12.16b
1351 eor v1.16b, v7.16b, v1.16b
1352 add v22.4s, v22.4s, v3.4s
1353 orr v2.16b, v2.16b, v13.16b
1354 ushr v12.4s, v5.4s, #12
1355 shl v5.4s, v5.4s, #20
1356 add v17.4s, v17.4s, v15.4s
1357 ushr v13.4s, v1.4s, #12
1358 shl v1.4s, v1.4s, #20
1359 add v22.4s, v22.4s, v0.4s
1360 orr v5.16b, v5.16b, v12.16b
1361 add v17.4s, v17.4s, v2.4s
1362 add v10.4s, v10.4s, v14.4s
1363 orr v1.16b, v1.16b, v13.16b
1364 eor v16.16b, v16.16b, v22.16b
1365 add v20.4s, v20.4s, v8.4s
1366 eor v6.16b, v6.16b, v17.16b
1367 add v10.4s, v10.4s, v5.4s
1368 tbl v16.16b, { v16.16b }, v18.16b
1369 add v20.4s, v20.4s, v1.4s
1370 tbl v6.16b, { v6.16b }, v18.16b
1371 eor v25.16b, v25.16b, v10.16b
1372 add v21.4s, v21.4s, v16.4s
1373 eor v4.16b, v4.16b, v20.16b
1374 add v26.4s, v26.4s, v6.4s
1375 tbl v25.16b, { v25.16b }, v18.16b
1376 eor v0.16b, v21.16b, v0.16b
1377 tbl v4.16b, { v4.16b }, v18.16b
1378 eor v2.16b, v26.16b, v2.16b
1379 add v19.4s, v19.4s, v25.4s
1380 ushr v12.4s, v0.4s, #7
1381 shl v0.4s, v0.4s, #25
1382 add v7.4s, v7.4s, v4.4s
1383 ushr v13.4s, v2.4s, #7
1384 shl v2.4s, v2.4s, #25
1385 eor v5.16b, v19.16b, v5.16b
1386 orr v0.16b, v0.16b, v12.16b
1387 eor v1.16b, v7.16b, v1.16b
1388 add v10.4s, v10.4s, v28.4s
1389 orr v2.16b, v2.16b, v13.16b
1390 ushr v12.4s, v5.4s, #7
1391 shl v5.4s, v5.4s, #25
1392 add v22.4s, v22.4s, v24.4s
1393 ushr v13.4s, v1.4s, #7
1394 shl v1.4s, v1.4s, #25
1395 add v10.4s, v10.4s, v0.4s
1396 orr v5.16b, v5.16b, v12.16b
1397 add v22.4s, v22.4s, v2.4s
1398 add v20.4s, v20.4s, v11.4s
1400 orr v1.16b, v1.16b, v13.16b
1401 add v17.4s, v17.4s, v31.4s
1403 eor v4.16b, v4.16b, v10.16b
1404 eor v25.16b, v25.16b, v22.16b
1405 add v20.4s, v20.4s, v5.4s
1406 add v17.4s, v17.4s, v1.4s
1407 tbl v4.16b, { v4.16b }, v27.16b
1408 tbl v25.16b, { v25.16b }, v27.16b
1409 eor v6.16b, v6.16b, v20.16b
1410 eor v16.16b, v16.16b, v17.16b
1411 add v26.4s, v26.4s, v4.4s
1412 add v7.4s, v7.4s, v25.4s
1413 tbl v6.16b, { v6.16b }, v27.16b
1414 tbl v16.16b, { v16.16b }, v27.16b
1415 eor v0.16b, v26.16b, v0.16b
1416 eor v2.16b, v7.16b, v2.16b
1417 add v21.4s, v21.4s, v6.4s
1418 add v19.4s, v19.4s, v16.4s
1419 ushr v12.4s, v0.4s, #12
1420 shl v0.4s, v0.4s, #20
1421 ushr v13.4s, v2.4s, #12
1422 shl v2.4s, v2.4s, #20
1423 eor v5.16b, v21.16b, v5.16b
1424 eor v1.16b, v19.16b, v1.16b
1425 orr v0.16b, v0.16b, v12.16b
1426 add v10.4s, v10.4s, v23.4s
1428 orr v2.16b, v2.16b, v13.16b
1429 ushr v13.4s, v5.4s, #12
1430 shl v5.4s, v5.4s, #20
1431 add v22.4s, v22.4s, v11.4s
1435 ushr v12.4s, v1.4s, #12
1436 shl v1.4s, v1.4s, #20
1437 add v10.4s, v10.4s, v0.4s
1438 orr v5.16b, v5.16b, v13.16b
1440 add v22.4s, v22.4s, v2.4s
1441 add v20.4s, v20.4s, v24.4s
1442 orr v1.16b, v1.16b, v12.16b
1443 add v17.4s, v17.4s, v31.4s
1444 eor v4.16b, v4.16b, v10.16b
1445 eor v25.16b, v25.16b, v22.16b
1446 add v20.4s, v20.4s, v5.4s
1447 add v17.4s, v17.4s, v1.4s
1448 tbl v4.16b, { v4.16b }, v18.16b
1449 tbl v25.16b, { v25.16b }, v18.16b
1450 eor v6.16b, v6.16b, v20.16b
1451 eor v16.16b, v16.16b, v17.16b
1452 add v26.4s, v26.4s, v4.4s
1453 add v7.4s, v7.4s, v25.4s
1454 tbl v6.16b, { v6.16b }, v18.16b
1455 tbl v16.16b, { v16.16b }, v18.16b
1456 eor v0.16b, v26.16b, v0.16b
1457 eor v2.16b, v7.16b, v2.16b
1458 add v21.4s, v21.4s, v6.4s
1460 add v19.4s, v19.4s, v16.4s
1461 ushr v12.4s, v0.4s, #7
1462 shl v0.4s, v0.4s, #25
1463 ushr v13.4s, v2.4s, #7
1464 shl v2.4s, v2.4s, #25
1465 eor v5.16b, v21.16b, v5.16b
1466 eor v1.16b, v19.16b, v1.16b
1467 orr v0.16b, v0.16b, v12.16b
1468 add v22.4s, v22.4s, v29.4s
1469 orr v2.16b, v2.16b, v13.16b
1470 ushr v13.4s, v5.4s, #7
1471 shl v5.4s, v5.4s, #25
1472 add v17.4s, v17.4s, v30.4s
1474 ushr v12.4s, v1.4s, #7
1475 shl v1.4s, v1.4s, #25
1476 add v22.4s, v22.4s, v0.4s
1479 orr v5.16b, v5.16b, v13.16b
1480 add v17.4s, v17.4s, v2.4s
1481 add v10.4s, v10.4s, v30.4s
1482 orr v1.16b, v1.16b, v12.16b
1483 eor v16.16b, v16.16b, v22.16b
1484 add v20.4s, v20.4s, v28.4s
1485 eor v6.16b, v6.16b, v17.16b
1486 add v10.4s, v10.4s, v5.4s
1487 tbl v16.16b, { v16.16b }, v27.16b
1488 add v20.4s, v20.4s, v1.4s
1489 tbl v6.16b, { v6.16b }, v27.16b
1490 eor v25.16b, v25.16b, v10.16b
1491 add v21.4s, v21.4s, v16.4s
1492 eor v4.16b, v4.16b, v20.16b
1493 add v26.4s, v26.4s, v6.4s
1494 tbl v25.16b, { v25.16b }, v27.16b
1495 eor v0.16b, v21.16b, v0.16b
1496 tbl v4.16b, { v4.16b }, v27.16b
1497 eor v2.16b, v26.16b, v2.16b
1498 add v19.4s, v19.4s, v25.4s
1499 ushr v12.4s, v0.4s, #12
1500 shl v0.4s, v0.4s, #20
1501 add v7.4s, v7.4s, v4.4s
1502 ushr v13.4s, v2.4s, #12
1503 shl v2.4s, v2.4s, #20
1504 eor v5.16b, v5.16b, v19.16b
1505 orr v0.16b, v0.16b, v12.16b
1506 eor v1.16b, v7.16b, v1.16b
1507 add v22.4s, v22.4s, v8.4s
1508 orr v2.16b, v2.16b, v13.16b
1509 ushr v12.4s, v5.4s, #12
1510 shl v5.4s, v5.4s, #20
1511 add v17.4s, v17.4s, v9.4s
1513 ushr v13.4s, v1.4s, #12
1514 shl v1.4s, v1.4s, #20
1515 add v22.4s, v22.4s, v0.4s
1516 orr v5.16b, v5.16b, v12.16b
1517 add v17.4s, v17.4s, v2.4s
1518 add v10.4s, v10.4s, v23.4s
1519 orr v1.16b, v1.16b, v13.16b
1520 eor v16.16b, v16.16b, v22.16b
1521 add v20.4s, v20.4s, v31.4s
1522 eor v6.16b, v6.16b, v17.16b
1523 add v10.4s, v10.4s, v5.4s
1524 tbl v16.16b, { v16.16b }, v18.16b
1525 add v20.4s, v20.4s, v1.4s
1526 tbl v6.16b, { v6.16b }, v18.16b
1527 eor v25.16b, v25.16b, v10.16b
1528 add v21.4s, v21.4s, v16.4s
1529 eor v4.16b, v4.16b, v20.16b
1530 add v26.4s, v26.4s, v6.4s
1531 tbl v25.16b, { v25.16b }, v18.16b
1532 eor v0.16b, v21.16b, v0.16b
1533 tbl v4.16b, { v4.16b }, v18.16b
1534 eor v2.16b, v26.16b, v2.16b
1535 add v19.4s, v19.4s, v25.4s
1536 ushr v12.4s, v0.4s, #7
1537 shl v0.4s, v0.4s, #25
1538 add v7.4s, v7.4s, v4.4s
1539 ushr v13.4s, v2.4s, #7
1540 shl v2.4s, v2.4s, #25
1541 eor v5.16b, v19.16b, v5.16b
1542 add v10.4s, v10.4s, v14.4s
1544 orr v0.16b, v0.16b, v12.16b
1545 eor v1.16b, v7.16b, v1.16b
1546 orr v2.16b, v2.16b, v13.16b
1547 ushr v12.4s, v5.4s, #7
1548 shl v5.4s, v5.4s, #25
1549 add v22.4s, v22.4s, v14.4s
1550 ushr v13.4s, v1.4s, #7
1551 shl v1.4s, v1.4s, #25
1552 add v10.4s, v10.4s, v0.4s
1553 orr v5.16b, v5.16b, v12.16b
1554 add v22.4s, v22.4s, v2.4s
1555 add v20.4s, v20.4s, v24.4s
1556 orr v1.16b, v1.16b, v13.16b
1557 eor v4.16b, v4.16b, v10.16b
1558 add v17.4s, v17.4s, v9.4s
1559 eor v25.16b, v25.16b, v22.16b
1560 add v20.4s, v20.4s, v5.4s
1561 tbl v4.16b, { v4.16b }, v27.16b
1562 add v17.4s, v17.4s, v1.4s
1563 tbl v25.16b, { v25.16b }, v27.16b
1564 eor v6.16b, v6.16b, v20.16b
1565 add v26.4s, v26.4s, v4.4s
1566 eor v16.16b, v16.16b, v17.16b
1567 add v7.4s, v7.4s, v25.4s
1568 tbl v6.16b, { v6.16b }, v27.16b
1569 eor v0.16b, v26.16b, v0.16b
1570 tbl v16.16b, { v16.16b }, v27.16b
1571 eor v2.16b, v7.16b, v2.16b
1572 add v21.4s, v21.4s, v6.4s
1573 ushr v12.4s, v0.4s, #12
1574 shl v0.4s, v0.4s, #20
1575 add v19.4s, v19.4s, v16.4s
1576 ushr v13.4s, v2.4s, #12
1577 shl v2.4s, v2.4s, #20
1578 eor v5.16b, v21.16b, v5.16b
1579 orr v0.16b, v0.16b, v12.16b
1580 eor v1.16b, v19.16b, v1.16b
1581 add v10.4s, v10.4s, v11.4s
1582 orr v2.16b, v2.16b, v13.16b
1583 ushr v13.4s, v5.4s, #12
1584 shl v5.4s, v5.4s, #20
1585 ushr v12.4s, v1.4s, #12
1586 shl v1.4s, v1.4s, #20
1587 add v10.4s, v10.4s, v0.4s
1588 add v22.4s, v22.4s, v15.4s
1589 orr v5.16b, v5.16b, v13.16b
1590 add v20.4s, v20.4s, v3.4s
1593 orr v1.16b, v1.16b, v12.16b
1594 eor v4.16b, v4.16b, v10.16b
1595 add v22.4s, v22.4s, v2.4s
1596 add v17.4s, v17.4s, v3.4s
1597 add v20.4s, v20.4s, v5.4s
1598 tbl v4.16b, { v4.16b }, v18.16b
1599 eor v25.16b, v25.16b, v22.16b
1600 add v17.4s, v17.4s, v1.4s
1601 eor v6.16b, v6.16b, v20.16b
1602 add v26.4s, v26.4s, v4.4s
1603 tbl v25.16b, { v25.16b }, v18.16b
1604 eor v16.16b, v16.16b, v17.16b
1605 tbl v6.16b, { v6.16b }, v18.16b
1606 eor v0.16b, v26.16b, v0.16b
1607 add v7.4s, v7.4s, v25.4s
1608 tbl v16.16b, { v16.16b }, v18.16b
1609 add v21.4s, v21.4s, v6.4s
1610 ushr v12.4s, v0.4s, #7
1611 shl v0.4s, v0.4s, #25
1612 eor v2.16b, v7.16b, v2.16b
1613 add v19.4s, v19.4s, v16.4s
1614 eor v5.16b, v21.16b, v5.16b
1615 orr v0.16b, v0.16b, v12.16b
1616 ushr v12.4s, v2.4s, #7
1617 shl v2.4s, v2.4s, #25
1618 eor v1.16b, v19.16b, v1.16b
1619 ushr v13.4s, v5.4s, #7
1620 shl v5.4s, v5.4s, #25
1621 add v22.4s, v22.4s, v8.4s
1622 orr v2.16b, v2.16b, v12.16b
1623 ushr v12.4s, v1.4s, #7
1624 shl v1.4s, v1.4s, #25
1625 orr v5.16b, v5.16b, v13.16b
1626 add v22.4s, v22.4s, v0.4s
1627 add v10.4s, v10.4s, v29.4s
1629 add v17.4s, v17.4s, v31.4s
1630 orr v1.16b, v1.16b, v12.16b
1631 add v20.4s, v20.4s, v29.4s
1632 eor v16.16b, v16.16b, v22.16b
1633 add v10.4s, v10.4s, v5.4s
1634 add v17.4s, v17.4s, v2.4s
1635 add v20.4s, v20.4s, v1.4s
1636 tbl v16.16b, { v16.16b }, v27.16b
1637 eor v25.16b, v25.16b, v10.16b
1638 eor v6.16b, v6.16b, v17.16b
1639 eor v4.16b, v4.16b, v20.16b
1640 add v21.4s, v21.4s, v16.4s
1641 tbl v25.16b, { v25.16b }, v27.16b
1642 tbl v6.16b, { v6.16b }, v27.16b
1643 tbl v4.16b, { v4.16b }, v27.16b
1644 eor v0.16b, v21.16b, v0.16b
1645 add v19.4s, v19.4s, v25.4s
1646 add v26.4s, v26.4s, v6.4s
1647 add v7.4s, v7.4s, v4.4s
1648 ushr v12.4s, v0.4s, #12
1649 shl v0.4s, v0.4s, #20
1650 eor v5.16b, v5.16b, v19.16b
1651 eor v2.16b, v26.16b, v2.16b
1652 eor v1.16b, v7.16b, v1.16b
1653 orr v0.16b, v0.16b, v12.16b
1654 ushr v12.4s, v5.4s, #12
1655 shl v5.4s, v5.4s, #20
1656 add v22.4s, v22.4s, v14.4s
1658 ushr v13.4s, v2.4s, #12
1659 shl v2.4s, v2.4s, #20
1660 mov v31.16b, v14.16b
1661 ushr v14.4s, v1.4s, #12
1662 shl v1.4s, v1.4s, #20
1663 orr v5.16b, v5.16b, v12.16b
1664 add v22.4s, v22.4s, v0.4s
1665 add v10.4s, v10.4s, v28.4s
1667 orr v2.16b, v2.16b, v13.16b
1668 orr v1.16b, v1.16b, v14.16b
1669 add v17.4s, v17.4s, v30.4s
1670 add v20.4s, v20.4s, v3.4s
1671 eor v16.16b, v16.16b, v22.16b
1672 add v10.4s, v10.4s, v5.4s
1673 add v17.4s, v17.4s, v2.4s
1674 add v20.4s, v20.4s, v1.4s
1675 tbl v16.16b, { v16.16b }, v18.16b
1676 eor v25.16b, v25.16b, v10.16b
1677 eor v6.16b, v6.16b, v17.16b
1678 eor v4.16b, v4.16b, v20.16b
1679 add v21.4s, v21.4s, v16.4s
1680 tbl v25.16b, { v25.16b }, v18.16b
1681 tbl v6.16b, { v6.16b }, v18.16b
1682 tbl v4.16b, { v4.16b }, v18.16b
1683 eor v0.16b, v21.16b, v0.16b
1684 add v19.4s, v19.4s, v25.4s
1685 add v26.4s, v26.4s, v6.4s
1686 add v7.4s, v7.4s, v4.4s
1687 ushr v12.4s, v0.4s, #7
1688 shl v0.4s, v0.4s, #25
1689 eor v5.16b, v19.16b, v5.16b
1690 eor v2.16b, v26.16b, v2.16b
1691 eor v1.16b, v7.16b, v1.16b
1692 orr v0.16b, v0.16b, v12.16b
1693 ushr v12.4s, v5.4s, #7
1694 shl v5.4s, v5.4s, #25
1695 add v10.4s, v10.4s, v23.4s
1696 ushr v13.4s, v2.4s, #7
1697 shl v2.4s, v2.4s, #25
1698 ushr v14.4s, v1.4s, #7
1699 shl v1.4s, v1.4s, #25
1700 orr v5.16b, v5.16b, v12.16b
1701 add v10.4s, v10.4s, v0.4s
1702 add v20.4s, v20.4s, v24.4s
1704 orr v2.16b, v2.16b, v13.16b
1705 orr v1.16b, v1.16b, v14.16b
1706 add v22.4s, v22.4s, v9.4s
1707 add v17.4s, v17.4s, v11.4s
1708 eor v4.16b, v4.16b, v10.16b
1709 add v20.4s, v20.4s, v5.4s
1710 add v22.4s, v22.4s, v2.4s
1711 add v17.4s, v17.4s, v1.4s
1712 tbl v4.16b, { v4.16b }, v27.16b
1713 eor v6.16b, v6.16b, v20.16b
1714 eor v25.16b, v25.16b, v22.16b
1715 eor v16.16b, v16.16b, v17.16b
1716 add v26.4s, v26.4s, v4.4s
1717 tbl v6.16b, { v6.16b }, v27.16b
1718 tbl v25.16b, { v25.16b }, v27.16b
1719 tbl v16.16b, { v16.16b }, v27.16b
1720 eor v0.16b, v26.16b, v0.16b
1721 add v21.4s, v21.4s, v6.4s
1722 add v7.4s, v7.4s, v25.4s
1723 add v19.4s, v19.4s, v16.4s
1724 ushr v12.4s, v0.4s, #12
1725 shl v0.4s, v0.4s, #20
1726 eor v5.16b, v21.16b, v5.16b
1727 eor v2.16b, v7.16b, v2.16b
1728 eor v1.16b, v19.16b, v1.16b
1729 orr v0.16b, v0.16b, v12.16b
1730 add v10.4s, v10.4s, v15.4s
1731 ushr v14.4s, v5.4s, #12
1732 shl v5.4s, v5.4s, #20
1735 ushr v12.4s, v2.4s, #12
1736 shl v2.4s, v2.4s, #20
1737 ushr v13.4s, v1.4s, #12
1738 shl v1.4s, v1.4s, #20
1739 add v10.4s, v10.4s, v0.4s
1740 orr v5.16b, v5.16b, v14.16b
1741 add v20.4s, v20.4s, v3.4s
1742 orr v2.16b, v2.16b, v12.16b
1743 orr v1.16b, v1.16b, v13.16b
1744 add v22.4s, v22.4s, v24.4s
1745 add v17.4s, v17.4s, v28.4s
1746 eor v4.16b, v4.16b, v10.16b
1747 add v20.4s, v20.4s, v5.4s
1748 add v22.4s, v22.4s, v2.4s
1749 add v17.4s, v17.4s, v1.4s
1750 tbl v4.16b, { v4.16b }, v18.16b
1751 eor v6.16b, v6.16b, v20.16b
1752 eor v25.16b, v25.16b, v22.16b
1753 eor v16.16b, v16.16b, v17.16b
1754 add v26.4s, v26.4s, v4.4s
1755 tbl v6.16b, { v6.16b }, v18.16b
1756 tbl v25.16b, { v25.16b }, v18.16b
1757 tbl v16.16b, { v16.16b }, v18.16b
1758 eor v0.16b, v26.16b, v0.16b
1759 add v21.4s, v21.4s, v6.4s
1760 add v7.4s, v7.4s, v25.4s
1761 add v19.4s, v19.4s, v16.4s
1762 ushr v12.4s, v0.4s, #7
1763 shl v0.4s, v0.4s, #25
1764 eor v5.16b, v21.16b, v5.16b
1765 eor v2.16b, v7.16b, v2.16b
1766 eor v1.16b, v19.16b, v1.16b
1767 orr v0.16b, v0.16b, v12.16b
1768 ushr v12.4s, v5.4s, #7
1769 shl v5.4s, v5.4s, #25
1772 ushr v13.4s, v2.4s, #7
1773 shl v2.4s, v2.4s, #25
1774 ushr v14.4s, v1.4s, #7
1775 shl v1.4s, v1.4s, #25
1776 orr v5.16b, v5.16b, v12.16b
1777 add v9.4s, v10.4s, v9.4s
1778 orr v2.16b, v2.16b, v13.16b
1779 orr v1.16b, v1.16b, v14.16b
1781 add v22.4s, v22.4s, v31.4s
1782 add v17.4s, v17.4s, v30.4s
1783 add v20.4s, v20.4s, v8.4s
1784 add v9.4s, v9.4s, v5.4s
1785 add v22.4s, v22.4s, v0.4s
1786 add v17.4s, v17.4s, v2.4s
1787 add v20.4s, v20.4s, v1.4s
1788 eor v25.16b, v25.16b, v9.16b
1789 eor v16.16b, v16.16b, v22.16b
1790 eor v6.16b, v6.16b, v17.16b
1791 eor v4.16b, v4.16b, v20.16b
1792 tbl v25.16b, { v25.16b }, v27.16b
1793 tbl v16.16b, { v16.16b }, v27.16b
1794 tbl v6.16b, { v6.16b }, v27.16b
1795 tbl v4.16b, { v4.16b }, v27.16b
1796 add v19.4s, v19.4s, v25.4s
1797 add v21.4s, v21.4s, v16.4s
1798 add v26.4s, v26.4s, v6.4s
1799 add v7.4s, v7.4s, v4.4s
1800 eor v5.16b, v5.16b, v19.16b
1801 eor v0.16b, v21.16b, v0.16b
1802 eor v2.16b, v26.16b, v2.16b
1803 eor v1.16b, v7.16b, v1.16b
1804 ushr v30.4s, v5.4s, #12
1805 shl v5.4s, v5.4s, #20
1806 ushr v10.4s, v0.4s, #12
1807 shl v0.4s, v0.4s, #20
1808 ushr v12.4s, v2.4s, #12
1809 shl v2.4s, v2.4s, #20
1810 ushr v13.4s, v1.4s, #12
1811 shl v1.4s, v1.4s, #20
1812 orr v5.16b, v5.16b, v30.16b
1813 add v30.4s, v9.4s, v29.4s
1814 add v22.4s, v22.4s, v23.4s
1816 orr v0.16b, v0.16b, v10.16b
1817 orr v2.16b, v2.16b, v12.16b
1818 orr v1.16b, v1.16b, v13.16b
1819 add v17.4s, v17.4s, v23.4s
1820 add v20.4s, v20.4s, v28.4s
1821 add v23.4s, v30.4s, v5.4s
1822 add v22.4s, v22.4s, v0.4s
1823 add v17.4s, v17.4s, v2.4s
1824 add v20.4s, v20.4s, v1.4s
1825 eor v25.16b, v25.16b, v23.16b
1826 eor v16.16b, v16.16b, v22.16b
1827 eor v6.16b, v6.16b, v17.16b
1828 eor v4.16b, v4.16b, v20.16b
1829 tbl v25.16b, { v25.16b }, v18.16b
1830 tbl v16.16b, { v16.16b }, v18.16b
1831 tbl v6.16b, { v6.16b }, v18.16b
1832 tbl v4.16b, { v4.16b }, v18.16b
1833 add v19.4s, v19.4s, v25.4s
1834 add v21.4s, v21.4s, v16.4s
1835 add v26.4s, v26.4s, v6.4s
1836 add v7.4s, v7.4s, v4.4s
1837 eor v5.16b, v19.16b, v5.16b
1838 eor v0.16b, v21.16b, v0.16b
1839 eor v2.16b, v26.16b, v2.16b
1840 eor v1.16b, v7.16b, v1.16b
1841 ushr v28.4s, v5.4s, #7
1842 shl v5.4s, v5.4s, #25
1843 ushr v30.4s, v0.4s, #7
1844 shl v0.4s, v0.4s, #25
1845 ushr v31.4s, v2.4s, #7
1846 shl v2.4s, v2.4s, #25
1847 ushr v8.4s, v1.4s, #7
1848 shl v1.4s, v1.4s, #25
1849 orr v5.16b, v5.16b, v28.16b
1851 orr v0.16b, v0.16b, v30.16b
1852 orr v2.16b, v2.16b, v31.16b
1853 orr v1.16b, v1.16b, v8.16b
1854 add v23.4s, v23.4s, v28.4s
1855 add v22.4s, v22.4s, v11.4s
1856 add v17.4s, v17.4s, v15.4s
1857 add v20.4s, v20.4s, v3.4s
1859 add v23.4s, v23.4s, v0.4s
1860 add v22.4s, v22.4s, v2.4s
1861 add v17.4s, v17.4s, v1.4s
1862 add v20.4s, v20.4s, v5.4s
1863 eor v4.16b, v4.16b, v23.16b
1864 eor v25.16b, v25.16b, v22.16b
1865 eor v16.16b, v16.16b, v17.16b
1866 eor v6.16b, v6.16b, v20.16b
1867 tbl v4.16b, { v4.16b }, v27.16b
1868 tbl v25.16b, { v25.16b }, v27.16b
1869 tbl v16.16b, { v16.16b }, v27.16b
1870 tbl v6.16b, { v6.16b }, v27.16b
1871 add v26.4s, v26.4s, v4.4s
1872 add v7.4s, v7.4s, v25.4s
1873 add v19.4s, v19.4s, v16.4s
1874 add v21.4s, v21.4s, v6.4s
1875 eor v0.16b, v26.16b, v0.16b
1876 eor v2.16b, v7.16b, v2.16b
1877 eor v1.16b, v19.16b, v1.16b
1878 eor v5.16b, v21.16b, v5.16b
1879 add v3.4s, v22.4s, v3.4s
1881 ushr v28.4s, v0.4s, #12
1882 shl v0.4s, v0.4s, #20
1883 ushr v29.4s, v2.4s, #12
1884 shl v2.4s, v2.4s, #20
1885 ushr v30.4s, v1.4s, #12
1886 shl v1.4s, v1.4s, #20
1887 ushr v31.4s, v5.4s, #12
1888 shl v5.4s, v5.4s, #20
1889 add v17.4s, v17.4s, v22.4s
1891 orr v0.16b, v0.16b, v28.16b
1892 prfm pldl1keep, [x23, #256]
1893 orr v2.16b, v2.16b, v29.16b
1894 prfm pldl1keep, [x24, #256]
1895 orr v1.16b, v1.16b, v30.16b
1896 prfm pldl1keep, [x22, #256]
1897 orr v5.16b, v5.16b, v31.16b
1898 prfm pldl1keep, [x25, #256]
1899 add v23.4s, v23.4s, v24.4s
1900 add v20.4s, v20.4s, v22.4s
1901 add v3.4s, v3.4s, v2.4s
1902 add v17.4s, v17.4s, v1.4s
1903 add v22.4s, v23.4s, v0.4s
1904 add v20.4s, v20.4s, v5.4s
1905 eor v23.16b, v25.16b, v3.16b
1906 eor v16.16b, v16.16b, v17.16b
1907 eor v4.16b, v4.16b, v22.16b
1908 eor v6.16b, v6.16b, v20.16b
1909 tbl v23.16b, { v23.16b }, v18.16b
1910 tbl v16.16b, { v16.16b }, v18.16b
1911 tbl v4.16b, { v4.16b }, v18.16b
1912 tbl v6.16b, { v6.16b }, v18.16b
1913 add v7.4s, v7.4s, v23.4s
1914 add v19.4s, v19.4s, v16.4s
1915 add v18.4s, v26.4s, v4.4s
1916 add v21.4s, v21.4s, v6.4s
1917 eor v2.16b, v7.16b, v2.16b
1918 eor v1.16b, v19.16b, v1.16b
1919 eor v0.16b, v18.16b, v0.16b
1920 eor v5.16b, v21.16b, v5.16b
1921 ushr v25.4s, v2.4s, #7
1922 shl v2.4s, v2.4s, #25
1923 ushr v24.4s, v0.4s, #7
1924 shl v0.4s, v0.4s, #25
1925 ushr v26.4s, v1.4s, #7
1926 shl v1.4s, v1.4s, #25
1927 ushr v27.4s, v5.4s, #7
1928 shl v5.4s, v5.4s, #25
1929 orr v0.16b, v0.16b, v24.16b
1930 orr v2.16b, v2.16b, v25.16b
1931 orr v1.16b, v1.16b, v26.16b
1932 orr v5.16b, v5.16b, v27.16b
1934 eor v29.16b, v19.16b, v22.16b
1935 eor v8.16b, v21.16b, v3.16b
1936 eor v30.16b, v17.16b, v18.16b
1937 eor v31.16b, v20.16b, v7.16b
1938 eor v24.16b, v5.16b, v23.16b
1939 eor v18.16b, v0.16b, v16.16b
1940 eor v25.16b, v2.16b, v6.16b
1941 eor v26.16b, v1.16b, v4.16b
1947 ldr q0, [x11, :lo12:.LCPI3_1]
1949 ldr q2, [x10, :lo12:.LCPI3_2]
1950 ldr q1, [x12, :lo12:.LCPI3_3]
1968 add v5.4s, v5.4s, v4.4s
1972 uzp1 v17.4s, v6.4s, v7.4s
1974 add v5.4s, v5.4s, v17.4s
1975 eor v16.16b, v5.16b, v16.16b
1976 tbl v16.16b, { v16.16b }, v0.16b
1977 add v18.4s, v16.4s, v1.4s
1978 eor v19.16b, v18.16b, v4.16b
1979 uzp2 v4.4s, v6.4s, v7.4s
1980 ushr v6.4s, v19.4s, #12
1981 shl v7.4s, v19.4s, #20
1982 ld2 { v19.4s, v20.4s }, [x14]
1983 add v5.4s, v5.4s, v4.4s
1985 orr v6.16b, v7.16b, v6.16b
1986 add v5.4s, v5.4s, v6.4s
1987 eor v7.16b, v16.16b, v5.16b
1988 add v5.4s, v5.4s, v19.4s
1989 tbl v7.16b, { v7.16b }, v2.16b
1990 ext v5.16b, v5.16b, v5.16b, #12
1991 add v16.4s, v18.4s, v7.4s
1992 ext v7.16b, v7.16b, v7.16b, #8
1993 eor v6.16b, v6.16b, v16.16b
1994 ext v16.16b, v16.16b, v16.16b, #4
1995 ushr v18.4s, v6.4s, #7
1996 shl v6.4s, v6.4s, #25
1997 orr v6.16b, v6.16b, v18.16b
1998 ext v18.16b, v20.16b, v20.16b, #12
1999 add v5.4s, v5.4s, v6.4s
2000 eor v7.16b, v5.16b, v7.16b
2001 add v5.4s, v5.4s, v18.4s
2002 tbl v7.16b, { v7.16b }, v0.16b
2003 add v16.4s, v16.4s, v7.4s
2004 eor v6.16b, v6.16b, v16.16b
2005 ushr v21.4s, v6.4s, #12
2006 shl v6.4s, v6.4s, #20
2007 orr v6.16b, v6.16b, v21.16b
2008 uzp1 v21.4s, v17.4s, v17.4s
2009 add v5.4s, v5.4s, v6.4s
2010 ext v21.16b, v21.16b, v17.16b, #8
2011 eor v7.16b, v7.16b, v5.16b
2012 uzp2 v21.4s, v21.4s, v4.4s
2013 tbl v7.16b, { v7.16b }, v2.16b
2014 add v5.4s, v5.4s, v21.4s
2015 add v16.4s, v16.4s, v7.4s
2016 ext v5.16b, v5.16b, v5.16b, #4
2017 ext v7.16b, v7.16b, v7.16b, #8
2018 eor v6.16b, v6.16b, v16.16b
2019 ushr v22.4s, v6.4s, #7
2020 shl v6.4s, v6.4s, #25
2021 orr v6.16b, v6.16b, v22.16b
2022 add v22.4s, v5.4s, v6.4s
2023 eor v5.16b, v22.16b, v7.16b
2024 ext v7.16b, v16.16b, v16.16b, #12
2025 tbl v16.16b, { v5.16b }, v0.16b
2026 ext v5.16b, v17.16b, v17.16b, #12
2027 add v7.4s, v7.4s, v16.4s
2028 ext v5.16b, v17.16b, v5.16b, #12
2029 ext v17.16b, v19.16b, v19.16b, #12
2030 mov v19.16b, v18.16b
2031 eor v6.16b, v6.16b, v7.16b
2033 mov v19.s[1], v17.s[2]
2034 ushr v20.4s, v6.4s, #12
2035 shl v6.4s, v6.4s, #20
2036 trn2 v5.4s, v5.4s, v19.4s
2037 orr v6.16b, v6.16b, v20.16b
2038 zip1 v20.2d, v18.2d, v4.2d
2039 zip2 v4.4s, v4.4s, v18.4s
2040 add v19.4s, v6.4s, v5.4s
2041 mov v20.s[3], v17.s[3]
2042 add v19.4s, v19.4s, v22.4s
2043 ext v22.16b, v20.16b, v20.16b, #12
2044 eor v16.16b, v16.16b, v19.16b
2045 ext v19.16b, v19.16b, v19.16b, #12
2046 tbl v16.16b, { v16.16b }, v2.16b
2047 add v7.4s, v7.4s, v16.4s
2048 ext v16.16b, v16.16b, v16.16b, #8
2049 eor v6.16b, v6.16b, v7.16b
2050 ext v7.16b, v7.16b, v7.16b, #4
2051 ushr v23.4s, v6.4s, #7
2052 shl v24.4s, v6.4s, #25
2053 uzp1 v6.4s, v20.4s, v22.4s
2054 orr v20.16b, v24.16b, v23.16b
2055 add v22.4s, v20.4s, v6.4s
2056 add v19.4s, v22.4s, v19.4s
2057 eor v16.16b, v19.16b, v16.16b
2058 tbl v16.16b, { v16.16b }, v0.16b
2059 add v7.4s, v7.4s, v16.4s
2060 eor v18.16b, v20.16b, v7.16b
2061 zip1 v20.4s, v4.4s, v17.4s
2062 zip1 v4.4s, v17.4s, v4.4s
2063 ushr v17.4s, v18.4s, #12
2064 shl v18.4s, v18.4s, #20
2065 ext v20.16b, v4.16b, v20.16b, #8
2066 orr v4.16b, v18.16b, v17.16b
2067 ext v18.16b, v21.16b, v21.16b, #4
2068 add v17.4s, v4.4s, v20.4s
2069 add v17.4s, v17.4s, v19.4s
2070 uzp1 v19.4s, v18.4s, v18.4s
2071 eor v16.16b, v16.16b, v17.16b
2072 ext v19.16b, v19.16b, v18.16b, #8
2073 tbl v16.16b, { v16.16b }, v2.16b
2074 uzp2 v19.4s, v19.4s, v5.4s
2075 add v7.4s, v7.4s, v16.4s
2076 add v17.4s, v17.4s, v19.4s
2077 ext v16.16b, v16.16b, v16.16b, #8
2078 eor v4.16b, v4.16b, v7.16b
2079 ext v17.16b, v17.16b, v17.16b, #4
2080 ext v7.16b, v7.16b, v7.16b, #12
2081 ushr v21.4s, v4.4s, #7
2082 shl v4.4s, v4.4s, #25
2083 orr v4.16b, v4.16b, v21.16b
2084 ext v21.16b, v18.16b, v18.16b, #12
2085 add v17.4s, v17.4s, v4.4s
2086 ext v18.16b, v18.16b, v21.16b, #12
2087 mov v21.16b, v20.16b
2088 eor v16.16b, v17.16b, v16.16b
2089 rev64 v18.4s, v18.4s
2090 mov v21.s[1], v6.s[2]
2091 tbl v16.16b, { v16.16b }, v0.16b
2092 add v7.4s, v7.4s, v16.4s
2093 eor v4.16b, v4.16b, v7.16b
2094 ushr v22.4s, v4.4s, #12
2095 shl v23.4s, v4.4s, #20
2096 trn2 v4.4s, v18.4s, v21.4s
2097 orr v18.16b, v23.16b, v22.16b
2098 add v21.4s, v18.4s, v4.4s
2099 add v17.4s, v21.4s, v17.4s
2100 zip1 v21.2d, v20.2d, v5.2d
2101 zip2 v5.4s, v5.4s, v20.4s
2102 eor v16.16b, v16.16b, v17.16b
2103 mov v21.s[3], v6.s[3]
2104 ext v17.16b, v17.16b, v17.16b, #12
2105 zip1 v20.4s, v5.4s, v6.4s
2106 tbl v16.16b, { v16.16b }, v2.16b
2107 zip1 v5.4s, v6.4s, v5.4s
2108 add v22.4s, v7.4s, v16.4s
2109 ext v16.16b, v16.16b, v16.16b, #8
2110 ext v20.16b, v5.16b, v20.16b, #8
2111 eor v7.16b, v18.16b, v22.16b
2112 ext v18.16b, v21.16b, v21.16b, #12
2113 ushr v23.4s, v7.4s, #7
2114 shl v24.4s, v7.4s, #25
2115 uzp1 v7.4s, v21.4s, v18.4s
2116 orr v18.16b, v24.16b, v23.16b
2117 add v21.4s, v18.4s, v7.4s
2118 add v17.4s, v21.4s, v17.4s
2119 ext v21.16b, v22.16b, v22.16b, #4
2120 eor v16.16b, v17.16b, v16.16b
2121 tbl v16.16b, { v16.16b }, v0.16b
2122 add v21.4s, v21.4s, v16.4s
2123 eor v18.16b, v18.16b, v21.16b
2124 ushr v6.4s, v18.4s, #12
2125 shl v18.4s, v18.4s, #20
2126 orr v5.16b, v18.16b, v6.16b
2127 add v6.4s, v5.4s, v20.4s
2128 add v6.4s, v6.4s, v17.4s
2129 ext v17.16b, v19.16b, v19.16b, #4
2130 eor v16.16b, v16.16b, v6.16b
2131 uzp1 v18.4s, v17.4s, v17.4s
2132 tbl v16.16b, { v16.16b }, v2.16b
2133 ext v18.16b, v18.16b, v17.16b, #8
2134 add v19.4s, v21.4s, v16.4s
2135 uzp2 v18.4s, v18.4s, v4.4s
2136 ext v16.16b, v16.16b, v16.16b, #8
2137 eor v5.16b, v5.16b, v19.16b
2138 add v6.4s, v6.4s, v18.4s
2139 ext v19.16b, v19.16b, v19.16b, #12
2140 ushr v21.4s, v5.4s, #7
2141 shl v5.4s, v5.4s, #25
2142 ext v6.16b, v6.16b, v6.16b, #4
2143 orr v5.16b, v5.16b, v21.16b
2144 ext v21.16b, v17.16b, v17.16b, #12
2145 add v6.4s, v6.4s, v5.4s
2146 ext v17.16b, v17.16b, v21.16b, #12
2147 mov v21.16b, v20.16b
2148 eor v16.16b, v6.16b, v16.16b
2149 rev64 v17.4s, v17.4s
2150 mov v21.s[1], v7.s[2]
2151 tbl v16.16b, { v16.16b }, v0.16b
2152 add v19.4s, v19.4s, v16.4s
2153 eor v5.16b, v5.16b, v19.16b
2154 ushr v22.4s, v5.4s, #12
2155 shl v23.4s, v5.4s, #20
2156 trn2 v5.4s, v17.4s, v21.4s
2157 orr v17.16b, v23.16b, v22.16b
2158 add v21.4s, v17.4s, v5.4s
2159 add v6.4s, v21.4s, v6.4s
2160 eor v16.16b, v16.16b, v6.16b
2161 ext v6.16b, v6.16b, v6.16b, #12
2162 tbl v21.16b, { v16.16b }, v2.16b
2163 zip1 v16.2d, v20.2d, v4.2d
2164 zip2 v4.4s, v4.4s, v20.4s
2165 add v19.4s, v19.4s, v21.4s
2166 mov v16.s[3], v7.s[3]
2167 ext v21.16b, v21.16b, v21.16b, #8
2168 zip1 v20.4s, v4.4s, v7.4s
2169 eor v17.16b, v17.16b, v19.16b
2170 ext v22.16b, v16.16b, v16.16b, #12
2171 ext v19.16b, v19.16b, v19.16b, #4
2172 zip1 v4.4s, v7.4s, v4.4s
2173 ushr v23.4s, v17.4s, #7
2174 shl v17.4s, v17.4s, #25
2175 uzp1 v16.4s, v16.4s, v22.4s
2176 ext v4.16b, v4.16b, v20.16b, #8
2177 orr v17.16b, v17.16b, v23.16b
2178 add v22.4s, v17.4s, v16.4s
2179 add v6.4s, v22.4s, v6.4s
2180 eor v21.16b, v6.16b, v21.16b
2181 tbl v21.16b, { v21.16b }, v0.16b
2182 add v19.4s, v19.4s, v21.4s
2183 eor v17.16b, v17.16b, v19.16b
2184 ushr v7.4s, v17.4s, #12
2185 shl v17.4s, v17.4s, #20
2186 orr v7.16b, v17.16b, v7.16b
2187 add v17.4s, v7.4s, v4.4s
2188 add v6.4s, v17.4s, v6.4s
2189 ext v17.16b, v18.16b, v18.16b, #4
2190 eor v18.16b, v21.16b, v6.16b
2191 uzp1 v20.4s, v17.4s, v17.4s
2192 tbl v18.16b, { v18.16b }, v2.16b
2193 ext v20.16b, v20.16b, v17.16b, #8
2194 add v19.4s, v19.4s, v18.4s
2195 uzp2 v20.4s, v20.4s, v5.4s
2196 ext v18.16b, v18.16b, v18.16b, #8
2197 eor v7.16b, v7.16b, v19.16b
2198 add v6.4s, v6.4s, v20.4s
2199 ushr v21.4s, v7.4s, #7
2200 shl v7.4s, v7.4s, #25
2201 ext v6.16b, v6.16b, v6.16b, #4
2202 orr v7.16b, v7.16b, v21.16b
2203 add v21.4s, v6.4s, v7.4s
2204 eor v6.16b, v21.16b, v18.16b
2205 ext v18.16b, v19.16b, v19.16b, #12
2206 tbl v19.16b, { v6.16b }, v0.16b
2207 ext v6.16b, v17.16b, v17.16b, #12
2208 add v18.4s, v18.4s, v19.4s
2209 ext v6.16b, v17.16b, v6.16b, #12
2211 eor v7.16b, v7.16b, v18.16b
2213 mov v17.s[1], v16.s[2]
2214 ushr v22.4s, v7.4s, #12
2215 shl v7.4s, v7.4s, #20
2216 trn2 v6.4s, v6.4s, v17.4s
2217 orr v7.16b, v7.16b, v22.16b
2218 add v17.4s, v7.4s, v6.4s
2219 add v17.4s, v17.4s, v21.4s
2220 zip1 v21.2d, v4.2d, v5.2d
2221 zip2 v4.4s, v5.4s, v4.4s
2222 eor v19.16b, v19.16b, v17.16b
2223 mov v21.s[3], v16.s[3]
2224 ext v17.16b, v17.16b, v17.16b, #12
2225 tbl v19.16b, { v19.16b }, v2.16b
2226 ext v22.16b, v21.16b, v21.16b, #12
2227 add v18.4s, v18.4s, v19.4s
2228 ext v19.16b, v19.16b, v19.16b, #8
2229 eor v7.16b, v7.16b, v18.16b
2230 ext v18.16b, v18.16b, v18.16b, #4
2231 ushr v23.4s, v7.4s, #7
2232 shl v24.4s, v7.4s, #25
2233 uzp1 v7.4s, v21.4s, v22.4s
2234 orr v21.16b, v24.16b, v23.16b
2235 add v22.4s, v21.4s, v7.4s
2236 add v17.4s, v22.4s, v17.4s
2237 eor v19.16b, v17.16b, v19.16b
2238 tbl v19.16b, { v19.16b }, v0.16b
2239 add v18.4s, v18.4s, v19.4s
2240 eor v5.16b, v21.16b, v18.16b
2241 zip1 v21.4s, v4.4s, v16.4s
2242 zip1 v4.4s, v16.4s, v4.4s
2243 ushr v16.4s, v5.4s, #12
2244 shl v5.4s, v5.4s, #20
2245 ext v21.16b, v4.16b, v21.16b, #8
2246 orr v4.16b, v5.16b, v16.16b
2247 ext v16.16b, v20.16b, v20.16b, #4
2248 mov v23.16b, v21.16b
2249 add v5.4s, v4.4s, v21.4s
2250 mov v23.s[1], v7.s[2]
2251 add v5.4s, v5.4s, v17.4s
2252 eor v17.16b, v19.16b, v5.16b
2253 uzp1 v19.4s, v16.4s, v16.4s
2254 tbl v17.16b, { v17.16b }, v2.16b
2255 ext v19.16b, v19.16b, v16.16b, #8
2256 add v18.4s, v18.4s, v17.4s
2257 uzp2 v19.4s, v19.4s, v6.4s
2258 eor v4.16b, v4.16b, v18.16b
2259 add v5.4s, v5.4s, v19.4s
2260 ext v19.16b, v19.16b, v19.16b, #4
2261 ushr v20.4s, v4.4s, #7
2262 shl v4.4s, v4.4s, #25
2263 ext v5.16b, v5.16b, v5.16b, #4
2264 orr v20.16b, v4.16b, v20.16b
2265 ext v4.16b, v17.16b, v17.16b, #8
2266 add v17.4s, v5.4s, v20.4s
2267 ext v5.16b, v18.16b, v18.16b, #12
2268 eor v4.16b, v17.16b, v4.16b
2269 tbl v18.16b, { v4.16b }, v0.16b
2270 ext v4.16b, v16.16b, v16.16b, #12
2271 add v22.4s, v5.4s, v18.4s
2272 ext v4.16b, v16.16b, v4.16b, #12
2273 eor v5.16b, v20.16b, v22.16b
2275 ushr v20.4s, v5.4s, #12
2276 shl v24.4s, v5.4s, #20
2277 trn2 v5.4s, v16.4s, v23.4s
2278 orr v16.16b, v24.16b, v20.16b
2279 add v20.4s, v16.4s, v5.4s
2280 add v17.4s, v20.4s, v17.4s
2281 zip1 v20.2d, v21.2d, v6.2d
2282 zip2 v6.4s, v6.4s, v21.4s
2283 eor v18.16b, v18.16b, v17.16b
2284 mov v20.s[3], v7.s[3]
2285 ext v17.16b, v17.16b, v17.16b, #12
2286 zip1 v21.4s, v6.4s, v7.4s
2287 tbl v18.16b, { v18.16b }, v2.16b
2288 ext v24.16b, v20.16b, v20.16b, #12
2289 zip1 v6.4s, v7.4s, v6.4s
2290 add v22.4s, v22.4s, v18.4s
2291 ext v18.16b, v18.16b, v18.16b, #8
2292 ext v6.16b, v6.16b, v21.16b, #8
2293 eor v16.16b, v16.16b, v22.16b
2294 ext v22.16b, v22.16b, v22.16b, #4
2295 zip1 v5.2d, v6.2d, v5.2d
2296 zip2 v4.4s, v4.4s, v6.4s
2297 ushr v25.4s, v16.4s, #7
2298 shl v26.4s, v16.4s, #25
2299 uzp1 v16.4s, v20.4s, v24.4s
2300 orr v20.16b, v26.16b, v25.16b
2301 mov v5.s[3], v16.s[3]
2302 add v24.4s, v20.4s, v16.4s
2303 add v17.4s, v24.4s, v17.4s
2304 eor v18.16b, v17.16b, v18.16b
2305 tbl v18.16b, { v18.16b }, v0.16b
2306 add v22.4s, v22.4s, v18.4s
2307 eor v20.16b, v20.16b, v22.16b
2308 ushr v7.4s, v20.4s, #12
2309 shl v20.4s, v20.4s, #20
2310 orr v7.16b, v20.16b, v7.16b
2311 add v20.4s, v7.4s, v6.4s
2312 add v17.4s, v20.4s, v17.4s
2313 ext v20.16b, v19.16b, v19.16b, #8
2314 eor v18.16b, v18.16b, v17.16b
2315 ext v17.16b, v17.16b, v17.16b, #4
2316 tbl v18.16b, { v18.16b }, v2.16b
2317 add v21.4s, v22.4s, v18.4s
2318 uzp2 v22.4s, v20.4s, v23.4s
2319 ext v18.16b, v18.16b, v18.16b, #8
2320 eor v7.16b, v7.16b, v21.16b
2321 ext v20.16b, v22.16b, v20.16b, #4
2322 ushr v22.4s, v7.4s, #7
2323 shl v7.4s, v7.4s, #25
2324 add v17.4s, v17.4s, v20.4s
2325 ext v20.16b, v21.16b, v21.16b, #12
2326 ext v21.16b, v19.16b, v19.16b, #12
2327 orr v7.16b, v7.16b, v22.16b
2328 ext v19.16b, v19.16b, v21.16b, #12
2329 add v17.4s, v17.4s, v7.4s
2331 rev64 v19.4s, v19.4s
2332 eor v18.16b, v17.16b, v18.16b
2333 mov v21.s[1], v16.s[2]
2334 tbl v18.16b, { v18.16b }, v0.16b
2335 trn2 v19.4s, v19.4s, v21.4s
2336 add v20.4s, v20.4s, v18.4s
2337 eor v7.16b, v7.16b, v20.16b
2338 ushr v22.4s, v7.4s, #12
2339 shl v7.4s, v7.4s, #20
2340 orr v7.16b, v7.16b, v22.16b
2341 add v19.4s, v7.4s, v19.4s
2342 add v17.4s, v19.4s, v17.4s
2343 eor v18.16b, v18.16b, v17.16b
2344 ext v17.16b, v17.16b, v17.16b, #12
2345 tbl v18.16b, { v18.16b }, v2.16b
2346 add v19.4s, v20.4s, v18.4s
2347 ext v20.16b, v5.16b, v5.16b, #12
2348 ext v18.16b, v18.16b, v18.16b, #8
2349 eor v7.16b, v7.16b, v19.16b
2350 uzp1 v5.4s, v5.4s, v20.4s
2351 ushr v21.4s, v7.4s, #7
2352 shl v7.4s, v7.4s, #25
2353 orr v7.16b, v7.16b, v21.16b
2354 add v5.4s, v7.4s, v5.4s
2355 add v5.4s, v5.4s, v17.4s
2356 eor v17.16b, v5.16b, v18.16b
2357 ext v18.16b, v19.16b, v19.16b, #4
2358 tbl v17.16b, { v17.16b }, v0.16b
2359 add v18.4s, v18.4s, v17.4s
2360 eor v6.16b, v7.16b, v18.16b
2361 zip1 v7.4s, v4.4s, v16.4s
2362 zip1 v4.4s, v16.4s, v4.4s
2363 ushr v16.4s, v6.4s, #12
2364 shl v6.4s, v6.4s, #20
2365 ext v4.16b, v4.16b, v7.16b, #8
2366 orr v6.16b, v6.16b, v16.16b
2367 add v4.4s, v6.4s, v4.4s
2368 add v4.4s, v4.4s, v5.4s
2369 eor v5.16b, v17.16b, v4.16b
2370 ext v4.16b, v4.16b, v4.16b, #4
2371 tbl v5.16b, { v5.16b }, v2.16b
2372 add v7.4s, v18.4s, v5.4s
2373 eor v6.16b, v6.16b, v7.16b
2374 ext v7.16b, v7.16b, v7.16b, #12
2375 ushr v16.4s, v6.4s, #7
2376 shl v6.4s, v6.4s, #25
2377 orr v6.16b, v6.16b, v16.16b
2378 ext v16.16b, v5.16b, v5.16b, #8
2379 eor v5.16b, v4.16b, v7.16b
2380 eor v4.16b, v6.16b, v16.16b
2388 stp q5, q4, [x8], #32
2392 ldp x20, x19, [sp, #128]
2393 ldp x22, x21, [sp, #112]
2394 ldp x24, x23, [sp, #96]
2395 ldp x26, x25, [sp, #80]
2396 ldp x29, x27, [sp, #64]
2397 ldp d9, d8, [sp, #48]
2398 ldp d11, d10, [sp, #32]
2399 ldp d13, d12, [sp, #16]
2400 ldp d15, d14, [sp], #144
2403 .size zfs_blake3_hash_many_sse41, .Lfunc_end3-zfs_blake3_hash_many_sse41
2405 .section ".note.GNU-stack","",@progbits