]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - secure/lib/libcrypto/aarch64/aesv8-armx.S
Update tcpdump to 4.9.2
[FreeBSD/FreeBSD.git] / secure / lib / libcrypto / aarch64 / aesv8-armx.S
1 /* $FreeBSD$ */
2 /* Do not modify. This file is auto-generated from aesv8-armx.pl. */
3 #include "arm_arch.h"
4
5 #if __ARM_MAX_ARCH__>=7
6 .text
7 .align  5
8 rcon:
9 .long   0x01,0x01,0x01,0x01
10 .long   0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d     // rotate-n-splat
11 .long   0x1b,0x1b,0x1b,0x1b
12
13 .globl  aes_v8_set_encrypt_key
14 .type   aes_v8_set_encrypt_key,%function
15 .align  5
16 aes_v8_set_encrypt_key:
17 .Lenc_key:
18         stp     x29,x30,[sp,#-16]!
19         add     x29,sp,#0
20         mov     x3,#-1
21         cmp     x0,#0
22         b.eq    .Lenc_key_abort
23         cmp     x2,#0
24         b.eq    .Lenc_key_abort
25         mov     x3,#-2
26         cmp     w1,#128
27         b.lt    .Lenc_key_abort
28         cmp     w1,#256
29         b.gt    .Lenc_key_abort
30         tst     w1,#0x3f
31         b.ne    .Lenc_key_abort
32
33         adr     x3,rcon
34         cmp     w1,#192
35
36         eor     v0.16b,v0.16b,v0.16b
37         ld1     {v3.16b},[x0],#16
38         mov     w1,#8           // reuse w1
39         ld1     {v1.4s,v2.4s},[x3],#32
40
41         b.lt    .Loop128
42         b.eq    .L192
43         b       .L256
44
45 .align  4
46 .Loop128:
47         tbl     v6.16b,{v3.16b},v2.16b
48         ext     v5.16b,v0.16b,v3.16b,#12
49         st1     {v3.4s},[x2],#16
50         aese    v6.16b,v0.16b
51         subs    w1,w1,#1
52
53         eor     v3.16b,v3.16b,v5.16b
54         ext     v5.16b,v0.16b,v5.16b,#12
55         eor     v3.16b,v3.16b,v5.16b
56         ext     v5.16b,v0.16b,v5.16b,#12
57          eor    v6.16b,v6.16b,v1.16b
58         eor     v3.16b,v3.16b,v5.16b
59         shl     v1.16b,v1.16b,#1
60         eor     v3.16b,v3.16b,v6.16b
61         b.ne    .Loop128
62
63         ld1     {v1.4s},[x3]
64
65         tbl     v6.16b,{v3.16b},v2.16b
66         ext     v5.16b,v0.16b,v3.16b,#12
67         st1     {v3.4s},[x2],#16
68         aese    v6.16b,v0.16b
69
70         eor     v3.16b,v3.16b,v5.16b
71         ext     v5.16b,v0.16b,v5.16b,#12
72         eor     v3.16b,v3.16b,v5.16b
73         ext     v5.16b,v0.16b,v5.16b,#12
74          eor    v6.16b,v6.16b,v1.16b
75         eor     v3.16b,v3.16b,v5.16b
76         shl     v1.16b,v1.16b,#1
77         eor     v3.16b,v3.16b,v6.16b
78
79         tbl     v6.16b,{v3.16b},v2.16b
80         ext     v5.16b,v0.16b,v3.16b,#12
81         st1     {v3.4s},[x2],#16
82         aese    v6.16b,v0.16b
83
84         eor     v3.16b,v3.16b,v5.16b
85         ext     v5.16b,v0.16b,v5.16b,#12
86         eor     v3.16b,v3.16b,v5.16b
87         ext     v5.16b,v0.16b,v5.16b,#12
88          eor    v6.16b,v6.16b,v1.16b
89         eor     v3.16b,v3.16b,v5.16b
90         eor     v3.16b,v3.16b,v6.16b
91         st1     {v3.4s},[x2]
92         add     x2,x2,#0x50
93
94         mov     w12,#10
95         b       .Ldone
96
97 .align  4
98 .L192:
99         ld1     {v4.8b},[x0],#8
100         movi    v6.16b,#8                       // borrow v6.16b
101         st1     {v3.4s},[x2],#16
102         sub     v2.16b,v2.16b,v6.16b    // adjust the mask
103
104 .Loop192:
105         tbl     v6.16b,{v4.16b},v2.16b
106         ext     v5.16b,v0.16b,v3.16b,#12
107         st1     {v4.8b},[x2],#8
108         aese    v6.16b,v0.16b
109         subs    w1,w1,#1
110
111         eor     v3.16b,v3.16b,v5.16b
112         ext     v5.16b,v0.16b,v5.16b,#12
113         eor     v3.16b,v3.16b,v5.16b
114         ext     v5.16b,v0.16b,v5.16b,#12
115         eor     v3.16b,v3.16b,v5.16b
116
117         dup     v5.4s,v3.s[3]
118         eor     v5.16b,v5.16b,v4.16b
119          eor    v6.16b,v6.16b,v1.16b
120         ext     v4.16b,v0.16b,v4.16b,#12
121         shl     v1.16b,v1.16b,#1
122         eor     v4.16b,v4.16b,v5.16b
123         eor     v3.16b,v3.16b,v6.16b
124         eor     v4.16b,v4.16b,v6.16b
125         st1     {v3.4s},[x2],#16
126         b.ne    .Loop192
127
128         mov     w12,#12
129         add     x2,x2,#0x20
130         b       .Ldone
131
132 .align  4
133 .L256:
134         ld1     {v4.16b},[x0]
135         mov     w1,#7
136         mov     w12,#14
137         st1     {v3.4s},[x2],#16
138
139 .Loop256:
140         tbl     v6.16b,{v4.16b},v2.16b
141         ext     v5.16b,v0.16b,v3.16b,#12
142         st1     {v4.4s},[x2],#16
143         aese    v6.16b,v0.16b
144         subs    w1,w1,#1
145
146         eor     v3.16b,v3.16b,v5.16b
147         ext     v5.16b,v0.16b,v5.16b,#12
148         eor     v3.16b,v3.16b,v5.16b
149         ext     v5.16b,v0.16b,v5.16b,#12
150          eor    v6.16b,v6.16b,v1.16b
151         eor     v3.16b,v3.16b,v5.16b
152         shl     v1.16b,v1.16b,#1
153         eor     v3.16b,v3.16b,v6.16b
154         st1     {v3.4s},[x2],#16
155         b.eq    .Ldone
156
157         dup     v6.4s,v3.s[3]           // just splat
158         ext     v5.16b,v0.16b,v4.16b,#12
159         aese    v6.16b,v0.16b
160
161         eor     v4.16b,v4.16b,v5.16b
162         ext     v5.16b,v0.16b,v5.16b,#12
163         eor     v4.16b,v4.16b,v5.16b
164         ext     v5.16b,v0.16b,v5.16b,#12
165         eor     v4.16b,v4.16b,v5.16b
166
167         eor     v4.16b,v4.16b,v6.16b
168         b       .Loop256
169
170 .Ldone:
171         str     w12,[x2]
172         mov     x3,#0
173
174 .Lenc_key_abort:
175         mov     x0,x3                   // return value
176         ldr     x29,[sp],#16
177         ret
178 .size   aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
179
180 .globl  aes_v8_set_decrypt_key
181 .type   aes_v8_set_decrypt_key,%function
182 .align  5
183 aes_v8_set_decrypt_key:
184         stp     x29,x30,[sp,#-16]!
185         add     x29,sp,#0
186         bl      .Lenc_key
187
188         cmp     x0,#0
189         b.ne    .Ldec_key_abort
190
191         sub     x2,x2,#240              // restore original x2
192         mov     x4,#-16
193         add     x0,x2,x12,lsl#4 // end of key schedule
194
195         ld1     {v0.4s},[x2]
196         ld1     {v1.4s},[x0]
197         st1     {v0.4s},[x0],x4
198         st1     {v1.4s},[x2],#16
199
200 .Loop_imc:
201         ld1     {v0.4s},[x2]
202         ld1     {v1.4s},[x0]
203         aesimc  v0.16b,v0.16b
204         aesimc  v1.16b,v1.16b
205         st1     {v0.4s},[x0],x4
206         st1     {v1.4s},[x2],#16
207         cmp     x0,x2
208         b.hi    .Loop_imc
209
210         ld1     {v0.4s},[x2]
211         aesimc  v0.16b,v0.16b
212         st1     {v0.4s},[x0]
213
214         eor     x0,x0,x0                // return value
215 .Ldec_key_abort:
216         ldp     x29,x30,[sp],#16
217         ret
218 .size   aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
219 .globl  aes_v8_encrypt
220 .type   aes_v8_encrypt,%function
221 .align  5
222 aes_v8_encrypt:
223         ldr     w3,[x2,#240]
224         ld1     {v0.4s},[x2],#16
225         ld1     {v2.16b},[x0]
226         sub     w3,w3,#2
227         ld1     {v1.4s},[x2],#16
228
229 .Loop_enc:
230         aese    v2.16b,v0.16b
231         aesmc   v2.16b,v2.16b
232         ld1     {v0.4s},[x2],#16
233         subs    w3,w3,#2
234         aese    v2.16b,v1.16b
235         aesmc   v2.16b,v2.16b
236         ld1     {v1.4s},[x2],#16
237         b.gt    .Loop_enc
238
239         aese    v2.16b,v0.16b
240         aesmc   v2.16b,v2.16b
241         ld1     {v0.4s},[x2]
242         aese    v2.16b,v1.16b
243         eor     v2.16b,v2.16b,v0.16b
244
245         st1     {v2.16b},[x1]
246         ret
247 .size   aes_v8_encrypt,.-aes_v8_encrypt
248 .globl  aes_v8_decrypt
249 .type   aes_v8_decrypt,%function
250 .align  5
251 aes_v8_decrypt:
252         ldr     w3,[x2,#240]
253         ld1     {v0.4s},[x2],#16
254         ld1     {v2.16b},[x0]
255         sub     w3,w3,#2
256         ld1     {v1.4s},[x2],#16
257
258 .Loop_dec:
259         aesd    v2.16b,v0.16b
260         aesimc  v2.16b,v2.16b
261         ld1     {v0.4s},[x2],#16
262         subs    w3,w3,#2
263         aesd    v2.16b,v1.16b
264         aesimc  v2.16b,v2.16b
265         ld1     {v1.4s},[x2],#16
266         b.gt    .Loop_dec
267
268         aesd    v2.16b,v0.16b
269         aesimc  v2.16b,v2.16b
270         ld1     {v0.4s},[x2]
271         aesd    v2.16b,v1.16b
272         eor     v2.16b,v2.16b,v0.16b
273
274         st1     {v2.16b},[x1]
275         ret
276 .size   aes_v8_decrypt,.-aes_v8_decrypt
277 .globl  aes_v8_cbc_encrypt
278 .type   aes_v8_cbc_encrypt,%function
279 .align  5
280 aes_v8_cbc_encrypt:
281         stp     x29,x30,[sp,#-16]!
282         add     x29,sp,#0
283         subs    x2,x2,#16
284         mov     x8,#16
285         b.lo    .Lcbc_abort
286         csel    x8,xzr,x8,eq
287
288         cmp     w5,#0                   // en- or decrypting?
289         ldr     w5,[x3,#240]
290         and     x2,x2,#-16
291         ld1     {v6.16b},[x4]
292         ld1     {v0.16b},[x0],x8
293
294         ld1     {v16.4s-v17.4s},[x3]            // load key schedule...
295         sub     w5,w5,#6
296         add     x7,x3,x5,lsl#4  // pointer to last 7 round keys
297         sub     w5,w5,#2
298         ld1     {v18.4s-v19.4s},[x7],#32
299         ld1     {v20.4s-v21.4s},[x7],#32
300         ld1     {v22.4s-v23.4s},[x7],#32
301         ld1     {v7.4s},[x7]
302
303         add     x7,x3,#32
304         mov     w6,w5
305         b.eq    .Lcbc_dec
306
307         cmp     w5,#2
308         eor     v0.16b,v0.16b,v6.16b
309         eor     v5.16b,v16.16b,v7.16b
310         b.eq    .Lcbc_enc128
311
312         ld1     {v2.4s-v3.4s},[x7]
313         add     x7,x3,#16
314         add     x6,x3,#16*4
315         add     x12,x3,#16*5
316         aese    v0.16b,v16.16b
317         aesmc   v0.16b,v0.16b
318         add     x14,x3,#16*6
319         add     x3,x3,#16*7
320         b       .Lenter_cbc_enc
321
322 .align  4
323 .Loop_cbc_enc:
324         aese    v0.16b,v16.16b
325         aesmc   v0.16b,v0.16b
326          st1    {v6.16b},[x1],#16
327 .Lenter_cbc_enc:
328         aese    v0.16b,v17.16b
329         aesmc   v0.16b,v0.16b
330         aese    v0.16b,v2.16b
331         aesmc   v0.16b,v0.16b
332         ld1     {v16.4s},[x6]
333         cmp     w5,#4
334         aese    v0.16b,v3.16b
335         aesmc   v0.16b,v0.16b
336         ld1     {v17.4s},[x12]
337         b.eq    .Lcbc_enc192
338
339         aese    v0.16b,v16.16b
340         aesmc   v0.16b,v0.16b
341         ld1     {v16.4s},[x14]
342         aese    v0.16b,v17.16b
343         aesmc   v0.16b,v0.16b
344         ld1     {v17.4s},[x3]
345         nop
346
347 .Lcbc_enc192:
348         aese    v0.16b,v16.16b
349         aesmc   v0.16b,v0.16b
350          subs   x2,x2,#16
351         aese    v0.16b,v17.16b
352         aesmc   v0.16b,v0.16b
353          csel   x8,xzr,x8,eq
354         aese    v0.16b,v18.16b
355         aesmc   v0.16b,v0.16b
356         aese    v0.16b,v19.16b
357         aesmc   v0.16b,v0.16b
358          ld1    {v16.16b},[x0],x8
359         aese    v0.16b,v20.16b
360         aesmc   v0.16b,v0.16b
361          eor    v16.16b,v16.16b,v5.16b
362         aese    v0.16b,v21.16b
363         aesmc   v0.16b,v0.16b
364          ld1 {v17.4s},[x7]              // re-pre-load rndkey[1]
365         aese    v0.16b,v22.16b
366         aesmc   v0.16b,v0.16b
367         aese    v0.16b,v23.16b
368         eor     v6.16b,v0.16b,v7.16b
369         b.hs    .Loop_cbc_enc
370
371         st1     {v6.16b},[x1],#16
372         b       .Lcbc_done
373
374 .align  5
375 .Lcbc_enc128:
376         ld1     {v2.4s-v3.4s},[x7]
377         aese    v0.16b,v16.16b
378         aesmc   v0.16b,v0.16b
379         b       .Lenter_cbc_enc128
380 .Loop_cbc_enc128:
381         aese    v0.16b,v16.16b
382         aesmc   v0.16b,v0.16b
383          st1    {v6.16b},[x1],#16
384 .Lenter_cbc_enc128:
385         aese    v0.16b,v17.16b
386         aesmc   v0.16b,v0.16b
387          subs   x2,x2,#16
388         aese    v0.16b,v2.16b
389         aesmc   v0.16b,v0.16b
390          csel   x8,xzr,x8,eq
391         aese    v0.16b,v3.16b
392         aesmc   v0.16b,v0.16b
393         aese    v0.16b,v18.16b
394         aesmc   v0.16b,v0.16b
395         aese    v0.16b,v19.16b
396         aesmc   v0.16b,v0.16b
397          ld1    {v16.16b},[x0],x8
398         aese    v0.16b,v20.16b
399         aesmc   v0.16b,v0.16b
400         aese    v0.16b,v21.16b
401         aesmc   v0.16b,v0.16b
402         aese    v0.16b,v22.16b
403         aesmc   v0.16b,v0.16b
404          eor    v16.16b,v16.16b,v5.16b
405         aese    v0.16b,v23.16b
406         eor     v6.16b,v0.16b,v7.16b
407         b.hs    .Loop_cbc_enc128
408
409         st1     {v6.16b},[x1],#16
410         b       .Lcbc_done
411 .align  5
412 .Lcbc_dec:
413         ld1     {v18.16b},[x0],#16
414         subs    x2,x2,#32               // bias
415         add     w6,w5,#2
416         orr     v3.16b,v0.16b,v0.16b
417         orr     v1.16b,v0.16b,v0.16b
418         orr     v19.16b,v18.16b,v18.16b
419         b.lo    .Lcbc_dec_tail
420
421         orr     v1.16b,v18.16b,v18.16b
422         ld1     {v18.16b},[x0],#16
423         orr     v2.16b,v0.16b,v0.16b
424         orr     v3.16b,v1.16b,v1.16b
425         orr     v19.16b,v18.16b,v18.16b
426
427 .Loop3x_cbc_dec:
428         aesd    v0.16b,v16.16b
429         aesimc  v0.16b,v0.16b
430         aesd    v1.16b,v16.16b
431         aesimc  v1.16b,v1.16b
432         aesd    v18.16b,v16.16b
433         aesimc  v18.16b,v18.16b
434         ld1     {v16.4s},[x7],#16
435         subs    w6,w6,#2
436         aesd    v0.16b,v17.16b
437         aesimc  v0.16b,v0.16b
438         aesd    v1.16b,v17.16b
439         aesimc  v1.16b,v1.16b
440         aesd    v18.16b,v17.16b
441         aesimc  v18.16b,v18.16b
442         ld1     {v17.4s},[x7],#16
443         b.gt    .Loop3x_cbc_dec
444
445         aesd    v0.16b,v16.16b
446         aesimc  v0.16b,v0.16b
447         aesd    v1.16b,v16.16b
448         aesimc  v1.16b,v1.16b
449         aesd    v18.16b,v16.16b
450         aesimc  v18.16b,v18.16b
451          eor    v4.16b,v6.16b,v7.16b
452          subs   x2,x2,#0x30
453          eor    v5.16b,v2.16b,v7.16b
454          csel   x6,x2,x6,lo                     // x6, w6, is zero at this point
455         aesd    v0.16b,v17.16b
456         aesimc  v0.16b,v0.16b
457         aesd    v1.16b,v17.16b
458         aesimc  v1.16b,v1.16b
459         aesd    v18.16b,v17.16b
460         aesimc  v18.16b,v18.16b
461          eor    v17.16b,v3.16b,v7.16b
462          add    x0,x0,x6                // x0 is adjusted in such way that
463                                         // at exit from the loop v1.16b-v18.16b
464                                         // are loaded with last "words"
465          orr    v6.16b,v19.16b,v19.16b
466          mov    x7,x3
467         aesd    v0.16b,v20.16b
468         aesimc  v0.16b,v0.16b
469         aesd    v1.16b,v20.16b
470         aesimc  v1.16b,v1.16b
471         aesd    v18.16b,v20.16b
472         aesimc  v18.16b,v18.16b
473          ld1    {v2.16b},[x0],#16
474         aesd    v0.16b,v21.16b
475         aesimc  v0.16b,v0.16b
476         aesd    v1.16b,v21.16b
477         aesimc  v1.16b,v1.16b
478         aesd    v18.16b,v21.16b
479         aesimc  v18.16b,v18.16b
480          ld1    {v3.16b},[x0],#16
481         aesd    v0.16b,v22.16b
482         aesimc  v0.16b,v0.16b
483         aesd    v1.16b,v22.16b
484         aesimc  v1.16b,v1.16b
485         aesd    v18.16b,v22.16b
486         aesimc  v18.16b,v18.16b
487          ld1    {v19.16b},[x0],#16
488         aesd    v0.16b,v23.16b
489         aesd    v1.16b,v23.16b
490         aesd    v18.16b,v23.16b
491          ld1 {v16.4s},[x7],#16  // re-pre-load rndkey[0]
492          add    w6,w5,#2
493         eor     v4.16b,v4.16b,v0.16b
494         eor     v5.16b,v5.16b,v1.16b
495         eor     v18.16b,v18.16b,v17.16b
496          ld1 {v17.4s},[x7],#16  // re-pre-load rndkey[1]
497         st1     {v4.16b},[x1],#16
498          orr    v0.16b,v2.16b,v2.16b
499         st1     {v5.16b},[x1],#16
500          orr    v1.16b,v3.16b,v3.16b
501         st1     {v18.16b},[x1],#16
502          orr    v18.16b,v19.16b,v19.16b
503         b.hs    .Loop3x_cbc_dec
504
505         cmn     x2,#0x30
506         b.eq    .Lcbc_done
507         nop
508
509 .Lcbc_dec_tail:
510         aesd    v1.16b,v16.16b
511         aesimc  v1.16b,v1.16b
512         aesd    v18.16b,v16.16b
513         aesimc  v18.16b,v18.16b
514         ld1     {v16.4s},[x7],#16
515         subs    w6,w6,#2
516         aesd    v1.16b,v17.16b
517         aesimc  v1.16b,v1.16b
518         aesd    v18.16b,v17.16b
519         aesimc  v18.16b,v18.16b
520         ld1     {v17.4s},[x7],#16
521         b.gt    .Lcbc_dec_tail
522
523         aesd    v1.16b,v16.16b
524         aesimc  v1.16b,v1.16b
525         aesd    v18.16b,v16.16b
526         aesimc  v18.16b,v18.16b
527         aesd    v1.16b,v17.16b
528         aesimc  v1.16b,v1.16b
529         aesd    v18.16b,v17.16b
530         aesimc  v18.16b,v18.16b
531         aesd    v1.16b,v20.16b
532         aesimc  v1.16b,v1.16b
533         aesd    v18.16b,v20.16b
534         aesimc  v18.16b,v18.16b
535          cmn    x2,#0x20
536         aesd    v1.16b,v21.16b
537         aesimc  v1.16b,v1.16b
538         aesd    v18.16b,v21.16b
539         aesimc  v18.16b,v18.16b
540          eor    v5.16b,v6.16b,v7.16b
541         aesd    v1.16b,v22.16b
542         aesimc  v1.16b,v1.16b
543         aesd    v18.16b,v22.16b
544         aesimc  v18.16b,v18.16b
545          eor    v17.16b,v3.16b,v7.16b
546         aesd    v1.16b,v23.16b
547         aesd    v18.16b,v23.16b
548         b.eq    .Lcbc_dec_one
549         eor     v5.16b,v5.16b,v1.16b
550         eor     v17.16b,v17.16b,v18.16b
551          orr    v6.16b,v19.16b,v19.16b
552         st1     {v5.16b},[x1],#16
553         st1     {v17.16b},[x1],#16
554         b       .Lcbc_done
555
556 .Lcbc_dec_one:
557         eor     v5.16b,v5.16b,v18.16b
558          orr    v6.16b,v19.16b,v19.16b
559         st1     {v5.16b},[x1],#16
560
561 .Lcbc_done:
562         st1     {v6.16b},[x4]
563 .Lcbc_abort:
564         ldr     x29,[sp],#16
565         ret
566 .size   aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
567 .globl  aes_v8_ctr32_encrypt_blocks
568 .type   aes_v8_ctr32_encrypt_blocks,%function
569 .align  5
570 aes_v8_ctr32_encrypt_blocks:
571         stp             x29,x30,[sp,#-16]!
572         add             x29,sp,#0
573         ldr             w5,[x3,#240]
574
575         ldr             w8, [x4, #12]
576         ld1             {v0.4s},[x4]
577
578         ld1             {v16.4s-v17.4s},[x3]            // load key schedule...
579         sub             w5,w5,#4
580         mov             x12,#16
581         cmp             x2,#2
582         add             x7,x3,x5,lsl#4  // pointer to last 5 round keys
583         sub             w5,w5,#2
584         ld1             {v20.4s-v21.4s},[x7],#32
585         ld1             {v22.4s-v23.4s},[x7],#32
586         ld1             {v7.4s},[x7]
587         add             x7,x3,#32
588         mov             w6,w5
589         csel    x12,xzr,x12,lo
590 #ifndef __ARMEB__
591         rev             w8, w8
592 #endif
593         orr             v1.16b,v0.16b,v0.16b
594         add             w10, w8, #1
595         orr             v18.16b,v0.16b,v0.16b
596         add             w8, w8, #2
597         orr             v6.16b,v0.16b,v0.16b
598         rev             w10, w10
599         mov             v1.s[3],w10
600         b.ls            .Lctr32_tail
601         rev             w12, w8
602         sub             x2,x2,#3                // bias
603         mov             v18.s[3],w12
604         b               .Loop3x_ctr32
605
606 .align  4
607 .Loop3x_ctr32:
608         aese            v0.16b,v16.16b
609         aesmc           v0.16b,v0.16b
610         aese            v1.16b,v16.16b
611         aesmc           v1.16b,v1.16b
612         aese            v18.16b,v16.16b
613         aesmc           v18.16b,v18.16b
614         ld1             {v16.4s},[x7],#16
615         subs            w6,w6,#2
616         aese            v0.16b,v17.16b
617         aesmc           v0.16b,v0.16b
618         aese            v1.16b,v17.16b
619         aesmc           v1.16b,v1.16b
620         aese            v18.16b,v17.16b
621         aesmc           v18.16b,v18.16b
622         ld1             {v17.4s},[x7],#16
623         b.gt            .Loop3x_ctr32
624
625         aese            v0.16b,v16.16b
626         aesmc           v4.16b,v0.16b
627         aese            v1.16b,v16.16b
628         aesmc           v5.16b,v1.16b
629          ld1            {v2.16b},[x0],#16
630          orr            v0.16b,v6.16b,v6.16b
631         aese            v18.16b,v16.16b
632         aesmc           v18.16b,v18.16b
633          ld1            {v3.16b},[x0],#16
634          orr            v1.16b,v6.16b,v6.16b
635         aese            v4.16b,v17.16b
636         aesmc           v4.16b,v4.16b
637         aese            v5.16b,v17.16b
638         aesmc           v5.16b,v5.16b
639          ld1            {v19.16b},[x0],#16
640          mov            x7,x3
641         aese            v18.16b,v17.16b
642         aesmc           v17.16b,v18.16b
643          orr            v18.16b,v6.16b,v6.16b
644          add            w9,w8,#1
645         aese            v4.16b,v20.16b
646         aesmc           v4.16b,v4.16b
647         aese            v5.16b,v20.16b
648         aesmc           v5.16b,v5.16b
649          eor            v2.16b,v2.16b,v7.16b
650          add            w10,w8,#2
651         aese            v17.16b,v20.16b
652         aesmc           v17.16b,v17.16b
653          eor            v3.16b,v3.16b,v7.16b
654          add            w8,w8,#3
655         aese            v4.16b,v21.16b
656         aesmc           v4.16b,v4.16b
657         aese            v5.16b,v21.16b
658         aesmc           v5.16b,v5.16b
659          eor            v19.16b,v19.16b,v7.16b
660          rev            w9,w9
661         aese            v17.16b,v21.16b
662         aesmc           v17.16b,v17.16b
663          mov    v0.s[3], w9
664          rev            w10,w10
665         aese            v4.16b,v22.16b
666         aesmc           v4.16b,v4.16b
667         aese            v5.16b,v22.16b
668         aesmc           v5.16b,v5.16b
669          mov    v1.s[3], w10
670          rev            w12,w8
671         aese            v17.16b,v22.16b
672         aesmc           v17.16b,v17.16b
673          mov    v18.s[3], w12
674          subs           x2,x2,#3
675         aese            v4.16b,v23.16b
676         aese            v5.16b,v23.16b
677         aese            v17.16b,v23.16b
678
679         eor             v2.16b,v2.16b,v4.16b
680          ld1     {v16.4s},[x7],#16      // re-pre-load rndkey[0]
681         st1             {v2.16b},[x1],#16
682         eor             v3.16b,v3.16b,v5.16b
683          mov            w6,w5
684         st1             {v3.16b},[x1],#16
685         eor             v19.16b,v19.16b,v17.16b
686          ld1     {v17.4s},[x7],#16      // re-pre-load rndkey[1]
687         st1             {v19.16b},[x1],#16
688         b.hs            .Loop3x_ctr32
689
690         adds            x2,x2,#3
691         b.eq            .Lctr32_done
692         cmp             x2,#1
693         mov             x12,#16
694         csel    x12,xzr,x12,eq
695
696 .Lctr32_tail:
697         aese            v0.16b,v16.16b
698         aesmc           v0.16b,v0.16b
699         aese            v1.16b,v16.16b
700         aesmc           v1.16b,v1.16b
701         ld1             {v16.4s},[x7],#16
702         subs            w6,w6,#2
703         aese            v0.16b,v17.16b
704         aesmc           v0.16b,v0.16b
705         aese            v1.16b,v17.16b
706         aesmc           v1.16b,v1.16b
707         ld1             {v17.4s},[x7],#16
708         b.gt            .Lctr32_tail
709
710         aese            v0.16b,v16.16b
711         aesmc           v0.16b,v0.16b
712         aese            v1.16b,v16.16b
713         aesmc           v1.16b,v1.16b
714         aese            v0.16b,v17.16b
715         aesmc           v0.16b,v0.16b
716         aese            v1.16b,v17.16b
717         aesmc           v1.16b,v1.16b
718          ld1            {v2.16b},[x0],x12
719         aese            v0.16b,v20.16b
720         aesmc           v0.16b,v0.16b
721         aese            v1.16b,v20.16b
722         aesmc           v1.16b,v1.16b
723          ld1            {v3.16b},[x0]
724         aese            v0.16b,v21.16b
725         aesmc           v0.16b,v0.16b
726         aese            v1.16b,v21.16b
727         aesmc           v1.16b,v1.16b
728          eor            v2.16b,v2.16b,v7.16b
729         aese            v0.16b,v22.16b
730         aesmc           v0.16b,v0.16b
731         aese            v1.16b,v22.16b
732         aesmc           v1.16b,v1.16b
733          eor            v3.16b,v3.16b,v7.16b
734         aese            v0.16b,v23.16b
735         aese            v1.16b,v23.16b
736
737         cmp             x2,#1
738         eor             v2.16b,v2.16b,v0.16b
739         eor             v3.16b,v3.16b,v1.16b
740         st1             {v2.16b},[x1],#16
741         b.eq            .Lctr32_done
742         st1             {v3.16b},[x1]
743
744 .Lctr32_done:
745         ldr             x29,[sp],#16
746         ret
747 .size   aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
748 #endif