]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - secure/lib/libcrypto/amd64/aesni-gcm-x86_64.S
MFV: r356607
[FreeBSD/FreeBSD.git] / secure / lib / libcrypto / amd64 / aesni-gcm-x86_64.S
1 /* $FreeBSD$ */
2 /* Do not modify. This file is auto-generated from aesni-gcm-x86_64.pl. */
3 .text   
4
5 .type   _aesni_ctr32_ghash_6x,@function
6 .align  32
7 _aesni_ctr32_ghash_6x:
8         vmovdqu 32(%r11),%xmm2
9         subq    $6,%rdx
10         vpxor   %xmm4,%xmm4,%xmm4
11         vmovdqu 0-128(%rcx),%xmm15
12         vpaddb  %xmm2,%xmm1,%xmm10
13         vpaddb  %xmm2,%xmm10,%xmm11
14         vpaddb  %xmm2,%xmm11,%xmm12
15         vpaddb  %xmm2,%xmm12,%xmm13
16         vpaddb  %xmm2,%xmm13,%xmm14
17         vpxor   %xmm15,%xmm1,%xmm9
18         vmovdqu %xmm4,16+8(%rsp)
19         jmp     .Loop6x
20
21 .align  32
22 .Loop6x:
23         addl    $100663296,%ebx
24         jc      .Lhandle_ctr32
25         vmovdqu 0-32(%r9),%xmm3
26         vpaddb  %xmm2,%xmm14,%xmm1
27         vpxor   %xmm15,%xmm10,%xmm10
28         vpxor   %xmm15,%xmm11,%xmm11
29
30 .Lresume_ctr32:
31         vmovdqu %xmm1,(%r8)
32         vpclmulqdq      $0x10,%xmm3,%xmm7,%xmm5
33         vpxor   %xmm15,%xmm12,%xmm12
34         vmovups 16-128(%rcx),%xmm2
35         vpclmulqdq      $0x01,%xmm3,%xmm7,%xmm6
36         xorq    %r12,%r12
37         cmpq    %r14,%r15
38
39         vaesenc %xmm2,%xmm9,%xmm9
40         vmovdqu 48+8(%rsp),%xmm0
41         vpxor   %xmm15,%xmm13,%xmm13
42         vpclmulqdq      $0x00,%xmm3,%xmm7,%xmm1
43         vaesenc %xmm2,%xmm10,%xmm10
44         vpxor   %xmm15,%xmm14,%xmm14
45         setnc   %r12b
46         vpclmulqdq      $0x11,%xmm3,%xmm7,%xmm7
47         vaesenc %xmm2,%xmm11,%xmm11
48         vmovdqu 16-32(%r9),%xmm3
49         negq    %r12
50         vaesenc %xmm2,%xmm12,%xmm12
51         vpxor   %xmm5,%xmm6,%xmm6
52         vpclmulqdq      $0x00,%xmm3,%xmm0,%xmm5
53         vpxor   %xmm4,%xmm8,%xmm8
54         vaesenc %xmm2,%xmm13,%xmm13
55         vpxor   %xmm5,%xmm1,%xmm4
56         andq    $0x60,%r12
57         vmovups 32-128(%rcx),%xmm15
58         vpclmulqdq      $0x10,%xmm3,%xmm0,%xmm1
59         vaesenc %xmm2,%xmm14,%xmm14
60
61         vpclmulqdq      $0x01,%xmm3,%xmm0,%xmm2
62         leaq    (%r14,%r12,1),%r14
63         vaesenc %xmm15,%xmm9,%xmm9
64         vpxor   16+8(%rsp),%xmm8,%xmm8
65         vpclmulqdq      $0x11,%xmm3,%xmm0,%xmm3
66         vmovdqu 64+8(%rsp),%xmm0
67         vaesenc %xmm15,%xmm10,%xmm10
68         movbeq  88(%r14),%r13
69         vaesenc %xmm15,%xmm11,%xmm11
70         movbeq  80(%r14),%r12
71         vaesenc %xmm15,%xmm12,%xmm12
72         movq    %r13,32+8(%rsp)
73         vaesenc %xmm15,%xmm13,%xmm13
74         movq    %r12,40+8(%rsp)
75         vmovdqu 48-32(%r9),%xmm5
76         vaesenc %xmm15,%xmm14,%xmm14
77
78         vmovups 48-128(%rcx),%xmm15
79         vpxor   %xmm1,%xmm6,%xmm6
80         vpclmulqdq      $0x00,%xmm5,%xmm0,%xmm1
81         vaesenc %xmm15,%xmm9,%xmm9
82         vpxor   %xmm2,%xmm6,%xmm6
83         vpclmulqdq      $0x10,%xmm5,%xmm0,%xmm2
84         vaesenc %xmm15,%xmm10,%xmm10
85         vpxor   %xmm3,%xmm7,%xmm7
86         vpclmulqdq      $0x01,%xmm5,%xmm0,%xmm3
87         vaesenc %xmm15,%xmm11,%xmm11
88         vpclmulqdq      $0x11,%xmm5,%xmm0,%xmm5
89         vmovdqu 80+8(%rsp),%xmm0
90         vaesenc %xmm15,%xmm12,%xmm12
91         vaesenc %xmm15,%xmm13,%xmm13
92         vpxor   %xmm1,%xmm4,%xmm4
93         vmovdqu 64-32(%r9),%xmm1
94         vaesenc %xmm15,%xmm14,%xmm14
95
96         vmovups 64-128(%rcx),%xmm15
97         vpxor   %xmm2,%xmm6,%xmm6
98         vpclmulqdq      $0x00,%xmm1,%xmm0,%xmm2
99         vaesenc %xmm15,%xmm9,%xmm9
100         vpxor   %xmm3,%xmm6,%xmm6
101         vpclmulqdq      $0x10,%xmm1,%xmm0,%xmm3
102         vaesenc %xmm15,%xmm10,%xmm10
103         movbeq  72(%r14),%r13
104         vpxor   %xmm5,%xmm7,%xmm7
105         vpclmulqdq      $0x01,%xmm1,%xmm0,%xmm5
106         vaesenc %xmm15,%xmm11,%xmm11
107         movbeq  64(%r14),%r12
108         vpclmulqdq      $0x11,%xmm1,%xmm0,%xmm1
109         vmovdqu 96+8(%rsp),%xmm0
110         vaesenc %xmm15,%xmm12,%xmm12
111         movq    %r13,48+8(%rsp)
112         vaesenc %xmm15,%xmm13,%xmm13
113         movq    %r12,56+8(%rsp)
114         vpxor   %xmm2,%xmm4,%xmm4
115         vmovdqu 96-32(%r9),%xmm2
116         vaesenc %xmm15,%xmm14,%xmm14
117
118         vmovups 80-128(%rcx),%xmm15
119         vpxor   %xmm3,%xmm6,%xmm6
120         vpclmulqdq      $0x00,%xmm2,%xmm0,%xmm3
121         vaesenc %xmm15,%xmm9,%xmm9
122         vpxor   %xmm5,%xmm6,%xmm6
123         vpclmulqdq      $0x10,%xmm2,%xmm0,%xmm5
124         vaesenc %xmm15,%xmm10,%xmm10
125         movbeq  56(%r14),%r13
126         vpxor   %xmm1,%xmm7,%xmm7
127         vpclmulqdq      $0x01,%xmm2,%xmm0,%xmm1
128         vpxor   112+8(%rsp),%xmm8,%xmm8
129         vaesenc %xmm15,%xmm11,%xmm11
130         movbeq  48(%r14),%r12
131         vpclmulqdq      $0x11,%xmm2,%xmm0,%xmm2
132         vaesenc %xmm15,%xmm12,%xmm12
133         movq    %r13,64+8(%rsp)
134         vaesenc %xmm15,%xmm13,%xmm13
135         movq    %r12,72+8(%rsp)
136         vpxor   %xmm3,%xmm4,%xmm4
137         vmovdqu 112-32(%r9),%xmm3
138         vaesenc %xmm15,%xmm14,%xmm14
139
140         vmovups 96-128(%rcx),%xmm15
141         vpxor   %xmm5,%xmm6,%xmm6
142         vpclmulqdq      $0x10,%xmm3,%xmm8,%xmm5
143         vaesenc %xmm15,%xmm9,%xmm9
144         vpxor   %xmm1,%xmm6,%xmm6
145         vpclmulqdq      $0x01,%xmm3,%xmm8,%xmm1
146         vaesenc %xmm15,%xmm10,%xmm10
147         movbeq  40(%r14),%r13
148         vpxor   %xmm2,%xmm7,%xmm7
149         vpclmulqdq      $0x00,%xmm3,%xmm8,%xmm2
150         vaesenc %xmm15,%xmm11,%xmm11
151         movbeq  32(%r14),%r12
152         vpclmulqdq      $0x11,%xmm3,%xmm8,%xmm8
153         vaesenc %xmm15,%xmm12,%xmm12
154         movq    %r13,80+8(%rsp)
155         vaesenc %xmm15,%xmm13,%xmm13
156         movq    %r12,88+8(%rsp)
157         vpxor   %xmm5,%xmm6,%xmm6
158         vaesenc %xmm15,%xmm14,%xmm14
159         vpxor   %xmm1,%xmm6,%xmm6
160
161         vmovups 112-128(%rcx),%xmm15
162         vpslldq $8,%xmm6,%xmm5
163         vpxor   %xmm2,%xmm4,%xmm4
164         vmovdqu 16(%r11),%xmm3
165
166         vaesenc %xmm15,%xmm9,%xmm9
167         vpxor   %xmm8,%xmm7,%xmm7
168         vaesenc %xmm15,%xmm10,%xmm10
169         vpxor   %xmm5,%xmm4,%xmm4
170         movbeq  24(%r14),%r13
171         vaesenc %xmm15,%xmm11,%xmm11
172         movbeq  16(%r14),%r12
173         vpalignr        $8,%xmm4,%xmm4,%xmm0
174         vpclmulqdq      $0x10,%xmm3,%xmm4,%xmm4
175         movq    %r13,96+8(%rsp)
176         vaesenc %xmm15,%xmm12,%xmm12
177         movq    %r12,104+8(%rsp)
178         vaesenc %xmm15,%xmm13,%xmm13
179         vmovups 128-128(%rcx),%xmm1
180         vaesenc %xmm15,%xmm14,%xmm14
181
182         vaesenc %xmm1,%xmm9,%xmm9
183         vmovups 144-128(%rcx),%xmm15
184         vaesenc %xmm1,%xmm10,%xmm10
185         vpsrldq $8,%xmm6,%xmm6
186         vaesenc %xmm1,%xmm11,%xmm11
187         vpxor   %xmm6,%xmm7,%xmm7
188         vaesenc %xmm1,%xmm12,%xmm12
189         vpxor   %xmm0,%xmm4,%xmm4
190         movbeq  8(%r14),%r13
191         vaesenc %xmm1,%xmm13,%xmm13
192         movbeq  0(%r14),%r12
193         vaesenc %xmm1,%xmm14,%xmm14
194         vmovups 160-128(%rcx),%xmm1
195         cmpl    $11,%ebp
196         jb      .Lenc_tail
197
198         vaesenc %xmm15,%xmm9,%xmm9
199         vaesenc %xmm15,%xmm10,%xmm10
200         vaesenc %xmm15,%xmm11,%xmm11
201         vaesenc %xmm15,%xmm12,%xmm12
202         vaesenc %xmm15,%xmm13,%xmm13
203         vaesenc %xmm15,%xmm14,%xmm14
204
205         vaesenc %xmm1,%xmm9,%xmm9
206         vaesenc %xmm1,%xmm10,%xmm10
207         vaesenc %xmm1,%xmm11,%xmm11
208         vaesenc %xmm1,%xmm12,%xmm12
209         vaesenc %xmm1,%xmm13,%xmm13
210         vmovups 176-128(%rcx),%xmm15
211         vaesenc %xmm1,%xmm14,%xmm14
212         vmovups 192-128(%rcx),%xmm1
213         je      .Lenc_tail
214
215         vaesenc %xmm15,%xmm9,%xmm9
216         vaesenc %xmm15,%xmm10,%xmm10
217         vaesenc %xmm15,%xmm11,%xmm11
218         vaesenc %xmm15,%xmm12,%xmm12
219         vaesenc %xmm15,%xmm13,%xmm13
220         vaesenc %xmm15,%xmm14,%xmm14
221
222         vaesenc %xmm1,%xmm9,%xmm9
223         vaesenc %xmm1,%xmm10,%xmm10
224         vaesenc %xmm1,%xmm11,%xmm11
225         vaesenc %xmm1,%xmm12,%xmm12
226         vaesenc %xmm1,%xmm13,%xmm13
227         vmovups 208-128(%rcx),%xmm15
228         vaesenc %xmm1,%xmm14,%xmm14
229         vmovups 224-128(%rcx),%xmm1
230         jmp     .Lenc_tail
231
232 .align  32
233 .Lhandle_ctr32:
234         vmovdqu (%r11),%xmm0
235         vpshufb %xmm0,%xmm1,%xmm6
236         vmovdqu 48(%r11),%xmm5
237         vpaddd  64(%r11),%xmm6,%xmm10
238         vpaddd  %xmm5,%xmm6,%xmm11
239         vmovdqu 0-32(%r9),%xmm3
240         vpaddd  %xmm5,%xmm10,%xmm12
241         vpshufb %xmm0,%xmm10,%xmm10
242         vpaddd  %xmm5,%xmm11,%xmm13
243         vpshufb %xmm0,%xmm11,%xmm11
244         vpxor   %xmm15,%xmm10,%xmm10
245         vpaddd  %xmm5,%xmm12,%xmm14
246         vpshufb %xmm0,%xmm12,%xmm12
247         vpxor   %xmm15,%xmm11,%xmm11
248         vpaddd  %xmm5,%xmm13,%xmm1
249         vpshufb %xmm0,%xmm13,%xmm13
250         vpshufb %xmm0,%xmm14,%xmm14
251         vpshufb %xmm0,%xmm1,%xmm1
252         jmp     .Lresume_ctr32
253
254 .align  32
255 .Lenc_tail:
256         vaesenc %xmm15,%xmm9,%xmm9
257         vmovdqu %xmm7,16+8(%rsp)
258         vpalignr        $8,%xmm4,%xmm4,%xmm8
259         vaesenc %xmm15,%xmm10,%xmm10
260         vpclmulqdq      $0x10,%xmm3,%xmm4,%xmm4
261         vpxor   0(%rdi),%xmm1,%xmm2
262         vaesenc %xmm15,%xmm11,%xmm11
263         vpxor   16(%rdi),%xmm1,%xmm0
264         vaesenc %xmm15,%xmm12,%xmm12
265         vpxor   32(%rdi),%xmm1,%xmm5
266         vaesenc %xmm15,%xmm13,%xmm13
267         vpxor   48(%rdi),%xmm1,%xmm6
268         vaesenc %xmm15,%xmm14,%xmm14
269         vpxor   64(%rdi),%xmm1,%xmm7
270         vpxor   80(%rdi),%xmm1,%xmm3
271         vmovdqu (%r8),%xmm1
272
273         vaesenclast     %xmm2,%xmm9,%xmm9
274         vmovdqu 32(%r11),%xmm2
275         vaesenclast     %xmm0,%xmm10,%xmm10
276         vpaddb  %xmm2,%xmm1,%xmm0
277         movq    %r13,112+8(%rsp)
278         leaq    96(%rdi),%rdi
279         vaesenclast     %xmm5,%xmm11,%xmm11
280         vpaddb  %xmm2,%xmm0,%xmm5
281         movq    %r12,120+8(%rsp)
282         leaq    96(%rsi),%rsi
283         vmovdqu 0-128(%rcx),%xmm15
284         vaesenclast     %xmm6,%xmm12,%xmm12
285         vpaddb  %xmm2,%xmm5,%xmm6
286         vaesenclast     %xmm7,%xmm13,%xmm13
287         vpaddb  %xmm2,%xmm6,%xmm7
288         vaesenclast     %xmm3,%xmm14,%xmm14
289         vpaddb  %xmm2,%xmm7,%xmm3
290
291         addq    $0x60,%r10
292         subq    $0x6,%rdx
293         jc      .L6x_done
294
295         vmovups %xmm9,-96(%rsi)
296         vpxor   %xmm15,%xmm1,%xmm9
297         vmovups %xmm10,-80(%rsi)
298         vmovdqa %xmm0,%xmm10
299         vmovups %xmm11,-64(%rsi)
300         vmovdqa %xmm5,%xmm11
301         vmovups %xmm12,-48(%rsi)
302         vmovdqa %xmm6,%xmm12
303         vmovups %xmm13,-32(%rsi)
304         vmovdqa %xmm7,%xmm13
305         vmovups %xmm14,-16(%rsi)
306         vmovdqa %xmm3,%xmm14
307         vmovdqu 32+8(%rsp),%xmm7
308         jmp     .Loop6x
309
310 .L6x_done:
311         vpxor   16+8(%rsp),%xmm8,%xmm8
312         vpxor   %xmm4,%xmm8,%xmm8
313
314         .byte   0xf3,0xc3
315 .size   _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
316 .globl  aesni_gcm_decrypt
317 .type   aesni_gcm_decrypt,@function
318 .align  32
319 aesni_gcm_decrypt:
320 .cfi_startproc  
321         xorq    %r10,%r10
322         cmpq    $0x60,%rdx
323         jb      .Lgcm_dec_abort
324
325         leaq    (%rsp),%rax
326 .cfi_def_cfa_register   %rax
327         pushq   %rbx
328 .cfi_offset     %rbx,-16
329         pushq   %rbp
330 .cfi_offset     %rbp,-24
331         pushq   %r12
332 .cfi_offset     %r12,-32
333         pushq   %r13
334 .cfi_offset     %r13,-40
335         pushq   %r14
336 .cfi_offset     %r14,-48
337         pushq   %r15
338 .cfi_offset     %r15,-56
339         vzeroupper
340
341         vmovdqu (%r8),%xmm1
342         addq    $-128,%rsp
343         movl    12(%r8),%ebx
344         leaq    .Lbswap_mask(%rip),%r11
345         leaq    -128(%rcx),%r14
346         movq    $0xf80,%r15
347         vmovdqu (%r9),%xmm8
348         andq    $-128,%rsp
349         vmovdqu (%r11),%xmm0
350         leaq    128(%rcx),%rcx
351         leaq    32+32(%r9),%r9
352         movl    240-128(%rcx),%ebp
353         vpshufb %xmm0,%xmm8,%xmm8
354
355         andq    %r15,%r14
356         andq    %rsp,%r15
357         subq    %r14,%r15
358         jc      .Ldec_no_key_aliasing
359         cmpq    $768,%r15
360         jnc     .Ldec_no_key_aliasing
361         subq    %r15,%rsp
362 .Ldec_no_key_aliasing:
363
364         vmovdqu 80(%rdi),%xmm7
365         leaq    (%rdi),%r14
366         vmovdqu 64(%rdi),%xmm4
367         leaq    -192(%rdi,%rdx,1),%r15
368         vmovdqu 48(%rdi),%xmm5
369         shrq    $4,%rdx
370         xorq    %r10,%r10
371         vmovdqu 32(%rdi),%xmm6
372         vpshufb %xmm0,%xmm7,%xmm7
373         vmovdqu 16(%rdi),%xmm2
374         vpshufb %xmm0,%xmm4,%xmm4
375         vmovdqu (%rdi),%xmm3
376         vpshufb %xmm0,%xmm5,%xmm5
377         vmovdqu %xmm4,48(%rsp)
378         vpshufb %xmm0,%xmm6,%xmm6
379         vmovdqu %xmm5,64(%rsp)
380         vpshufb %xmm0,%xmm2,%xmm2
381         vmovdqu %xmm6,80(%rsp)
382         vpshufb %xmm0,%xmm3,%xmm3
383         vmovdqu %xmm2,96(%rsp)
384         vmovdqu %xmm3,112(%rsp)
385
386         call    _aesni_ctr32_ghash_6x
387
388         vmovups %xmm9,-96(%rsi)
389         vmovups %xmm10,-80(%rsi)
390         vmovups %xmm11,-64(%rsi)
391         vmovups %xmm12,-48(%rsi)
392         vmovups %xmm13,-32(%rsi)
393         vmovups %xmm14,-16(%rsi)
394
395         vpshufb (%r11),%xmm8,%xmm8
396         vmovdqu %xmm8,-64(%r9)
397
398         vzeroupper
399         movq    -48(%rax),%r15
400 .cfi_restore    %r15
401         movq    -40(%rax),%r14
402 .cfi_restore    %r14
403         movq    -32(%rax),%r13
404 .cfi_restore    %r13
405         movq    -24(%rax),%r12
406 .cfi_restore    %r12
407         movq    -16(%rax),%rbp
408 .cfi_restore    %rbp
409         movq    -8(%rax),%rbx
410 .cfi_restore    %rbx
411         leaq    (%rax),%rsp
412 .cfi_def_cfa_register   %rsp
413 .Lgcm_dec_abort:
414         movq    %r10,%rax
415         .byte   0xf3,0xc3
416 .cfi_endproc    
417 .size   aesni_gcm_decrypt,.-aesni_gcm_decrypt
418 .type   _aesni_ctr32_6x,@function
419 .align  32
420 _aesni_ctr32_6x:
421         vmovdqu 0-128(%rcx),%xmm4
422         vmovdqu 32(%r11),%xmm2
423         leaq    -1(%rbp),%r13
424         vmovups 16-128(%rcx),%xmm15
425         leaq    32-128(%rcx),%r12
426         vpxor   %xmm4,%xmm1,%xmm9
427         addl    $100663296,%ebx
428         jc      .Lhandle_ctr32_2
429         vpaddb  %xmm2,%xmm1,%xmm10
430         vpaddb  %xmm2,%xmm10,%xmm11
431         vpxor   %xmm4,%xmm10,%xmm10
432         vpaddb  %xmm2,%xmm11,%xmm12
433         vpxor   %xmm4,%xmm11,%xmm11
434         vpaddb  %xmm2,%xmm12,%xmm13
435         vpxor   %xmm4,%xmm12,%xmm12
436         vpaddb  %xmm2,%xmm13,%xmm14
437         vpxor   %xmm4,%xmm13,%xmm13
438         vpaddb  %xmm2,%xmm14,%xmm1
439         vpxor   %xmm4,%xmm14,%xmm14
440         jmp     .Loop_ctr32
441
442 .align  16
443 .Loop_ctr32:
444         vaesenc %xmm15,%xmm9,%xmm9
445         vaesenc %xmm15,%xmm10,%xmm10
446         vaesenc %xmm15,%xmm11,%xmm11
447         vaesenc %xmm15,%xmm12,%xmm12
448         vaesenc %xmm15,%xmm13,%xmm13
449         vaesenc %xmm15,%xmm14,%xmm14
450         vmovups (%r12),%xmm15
451         leaq    16(%r12),%r12
452         decl    %r13d
453         jnz     .Loop_ctr32
454
455         vmovdqu (%r12),%xmm3
456         vaesenc %xmm15,%xmm9,%xmm9
457         vpxor   0(%rdi),%xmm3,%xmm4
458         vaesenc %xmm15,%xmm10,%xmm10
459         vpxor   16(%rdi),%xmm3,%xmm5
460         vaesenc %xmm15,%xmm11,%xmm11
461         vpxor   32(%rdi),%xmm3,%xmm6
462         vaesenc %xmm15,%xmm12,%xmm12
463         vpxor   48(%rdi),%xmm3,%xmm8
464         vaesenc %xmm15,%xmm13,%xmm13
465         vpxor   64(%rdi),%xmm3,%xmm2
466         vaesenc %xmm15,%xmm14,%xmm14
467         vpxor   80(%rdi),%xmm3,%xmm3
468         leaq    96(%rdi),%rdi
469
470         vaesenclast     %xmm4,%xmm9,%xmm9
471         vaesenclast     %xmm5,%xmm10,%xmm10
472         vaesenclast     %xmm6,%xmm11,%xmm11
473         vaesenclast     %xmm8,%xmm12,%xmm12
474         vaesenclast     %xmm2,%xmm13,%xmm13
475         vaesenclast     %xmm3,%xmm14,%xmm14
476         vmovups %xmm9,0(%rsi)
477         vmovups %xmm10,16(%rsi)
478         vmovups %xmm11,32(%rsi)
479         vmovups %xmm12,48(%rsi)
480         vmovups %xmm13,64(%rsi)
481         vmovups %xmm14,80(%rsi)
482         leaq    96(%rsi),%rsi
483
484         .byte   0xf3,0xc3
485 .align  32
486 .Lhandle_ctr32_2:
487         vpshufb %xmm0,%xmm1,%xmm6
488         vmovdqu 48(%r11),%xmm5
489         vpaddd  64(%r11),%xmm6,%xmm10
490         vpaddd  %xmm5,%xmm6,%xmm11
491         vpaddd  %xmm5,%xmm10,%xmm12
492         vpshufb %xmm0,%xmm10,%xmm10
493         vpaddd  %xmm5,%xmm11,%xmm13
494         vpshufb %xmm0,%xmm11,%xmm11
495         vpxor   %xmm4,%xmm10,%xmm10
496         vpaddd  %xmm5,%xmm12,%xmm14
497         vpshufb %xmm0,%xmm12,%xmm12
498         vpxor   %xmm4,%xmm11,%xmm11
499         vpaddd  %xmm5,%xmm13,%xmm1
500         vpshufb %xmm0,%xmm13,%xmm13
501         vpxor   %xmm4,%xmm12,%xmm12
502         vpshufb %xmm0,%xmm14,%xmm14
503         vpxor   %xmm4,%xmm13,%xmm13
504         vpshufb %xmm0,%xmm1,%xmm1
505         vpxor   %xmm4,%xmm14,%xmm14
506         jmp     .Loop_ctr32
507 .size   _aesni_ctr32_6x,.-_aesni_ctr32_6x
508
509 .globl  aesni_gcm_encrypt
510 .type   aesni_gcm_encrypt,@function
511 .align  32
512 aesni_gcm_encrypt:
513 .cfi_startproc  
514         xorq    %r10,%r10
515         cmpq    $288,%rdx
516         jb      .Lgcm_enc_abort
517
518         leaq    (%rsp),%rax
519 .cfi_def_cfa_register   %rax
520         pushq   %rbx
521 .cfi_offset     %rbx,-16
522         pushq   %rbp
523 .cfi_offset     %rbp,-24
524         pushq   %r12
525 .cfi_offset     %r12,-32
526         pushq   %r13
527 .cfi_offset     %r13,-40
528         pushq   %r14
529 .cfi_offset     %r14,-48
530         pushq   %r15
531 .cfi_offset     %r15,-56
532         vzeroupper
533
534         vmovdqu (%r8),%xmm1
535         addq    $-128,%rsp
536         movl    12(%r8),%ebx
537         leaq    .Lbswap_mask(%rip),%r11
538         leaq    -128(%rcx),%r14
539         movq    $0xf80,%r15
540         leaq    128(%rcx),%rcx
541         vmovdqu (%r11),%xmm0
542         andq    $-128,%rsp
543         movl    240-128(%rcx),%ebp
544
545         andq    %r15,%r14
546         andq    %rsp,%r15
547         subq    %r14,%r15
548         jc      .Lenc_no_key_aliasing
549         cmpq    $768,%r15
550         jnc     .Lenc_no_key_aliasing
551         subq    %r15,%rsp
552 .Lenc_no_key_aliasing:
553
554         leaq    (%rsi),%r14
555         leaq    -192(%rsi,%rdx,1),%r15
556         shrq    $4,%rdx
557
558         call    _aesni_ctr32_6x
559         vpshufb %xmm0,%xmm9,%xmm8
560         vpshufb %xmm0,%xmm10,%xmm2
561         vmovdqu %xmm8,112(%rsp)
562         vpshufb %xmm0,%xmm11,%xmm4
563         vmovdqu %xmm2,96(%rsp)
564         vpshufb %xmm0,%xmm12,%xmm5
565         vmovdqu %xmm4,80(%rsp)
566         vpshufb %xmm0,%xmm13,%xmm6
567         vmovdqu %xmm5,64(%rsp)
568         vpshufb %xmm0,%xmm14,%xmm7
569         vmovdqu %xmm6,48(%rsp)
570
571         call    _aesni_ctr32_6x
572
573         vmovdqu (%r9),%xmm8
574         leaq    32+32(%r9),%r9
575         subq    $12,%rdx
576         movq    $192,%r10
577         vpshufb %xmm0,%xmm8,%xmm8
578
579         call    _aesni_ctr32_ghash_6x
580         vmovdqu 32(%rsp),%xmm7
581         vmovdqu (%r11),%xmm0
582         vmovdqu 0-32(%r9),%xmm3
583         vpunpckhqdq     %xmm7,%xmm7,%xmm1
584         vmovdqu 32-32(%r9),%xmm15
585         vmovups %xmm9,-96(%rsi)
586         vpshufb %xmm0,%xmm9,%xmm9
587         vpxor   %xmm7,%xmm1,%xmm1
588         vmovups %xmm10,-80(%rsi)
589         vpshufb %xmm0,%xmm10,%xmm10
590         vmovups %xmm11,-64(%rsi)
591         vpshufb %xmm0,%xmm11,%xmm11
592         vmovups %xmm12,-48(%rsi)
593         vpshufb %xmm0,%xmm12,%xmm12
594         vmovups %xmm13,-32(%rsi)
595         vpshufb %xmm0,%xmm13,%xmm13
596         vmovups %xmm14,-16(%rsi)
597         vpshufb %xmm0,%xmm14,%xmm14
598         vmovdqu %xmm9,16(%rsp)
599         vmovdqu 48(%rsp),%xmm6
600         vmovdqu 16-32(%r9),%xmm0
601         vpunpckhqdq     %xmm6,%xmm6,%xmm2
602         vpclmulqdq      $0x00,%xmm3,%xmm7,%xmm5
603         vpxor   %xmm6,%xmm2,%xmm2
604         vpclmulqdq      $0x11,%xmm3,%xmm7,%xmm7
605         vpclmulqdq      $0x00,%xmm15,%xmm1,%xmm1
606
607         vmovdqu 64(%rsp),%xmm9
608         vpclmulqdq      $0x00,%xmm0,%xmm6,%xmm4
609         vmovdqu 48-32(%r9),%xmm3
610         vpxor   %xmm5,%xmm4,%xmm4
611         vpunpckhqdq     %xmm9,%xmm9,%xmm5
612         vpclmulqdq      $0x11,%xmm0,%xmm6,%xmm6
613         vpxor   %xmm9,%xmm5,%xmm5
614         vpxor   %xmm7,%xmm6,%xmm6
615         vpclmulqdq      $0x10,%xmm15,%xmm2,%xmm2
616         vmovdqu 80-32(%r9),%xmm15
617         vpxor   %xmm1,%xmm2,%xmm2
618
619         vmovdqu 80(%rsp),%xmm1
620         vpclmulqdq      $0x00,%xmm3,%xmm9,%xmm7
621         vmovdqu 64-32(%r9),%xmm0
622         vpxor   %xmm4,%xmm7,%xmm7
623         vpunpckhqdq     %xmm1,%xmm1,%xmm4
624         vpclmulqdq      $0x11,%xmm3,%xmm9,%xmm9
625         vpxor   %xmm1,%xmm4,%xmm4
626         vpxor   %xmm6,%xmm9,%xmm9
627         vpclmulqdq      $0x00,%xmm15,%xmm5,%xmm5
628         vpxor   %xmm2,%xmm5,%xmm5
629
630         vmovdqu 96(%rsp),%xmm2
631         vpclmulqdq      $0x00,%xmm0,%xmm1,%xmm6
632         vmovdqu 96-32(%r9),%xmm3
633         vpxor   %xmm7,%xmm6,%xmm6
634         vpunpckhqdq     %xmm2,%xmm2,%xmm7
635         vpclmulqdq      $0x11,%xmm0,%xmm1,%xmm1
636         vpxor   %xmm2,%xmm7,%xmm7
637         vpxor   %xmm9,%xmm1,%xmm1
638         vpclmulqdq      $0x10,%xmm15,%xmm4,%xmm4
639         vmovdqu 128-32(%r9),%xmm15
640         vpxor   %xmm5,%xmm4,%xmm4
641
642         vpxor   112(%rsp),%xmm8,%xmm8
643         vpclmulqdq      $0x00,%xmm3,%xmm2,%xmm5
644         vmovdqu 112-32(%r9),%xmm0
645         vpunpckhqdq     %xmm8,%xmm8,%xmm9
646         vpxor   %xmm6,%xmm5,%xmm5
647         vpclmulqdq      $0x11,%xmm3,%xmm2,%xmm2
648         vpxor   %xmm8,%xmm9,%xmm9
649         vpxor   %xmm1,%xmm2,%xmm2
650         vpclmulqdq      $0x00,%xmm15,%xmm7,%xmm7
651         vpxor   %xmm4,%xmm7,%xmm4
652
653         vpclmulqdq      $0x00,%xmm0,%xmm8,%xmm6
654         vmovdqu 0-32(%r9),%xmm3
655         vpunpckhqdq     %xmm14,%xmm14,%xmm1
656         vpclmulqdq      $0x11,%xmm0,%xmm8,%xmm8
657         vpxor   %xmm14,%xmm1,%xmm1
658         vpxor   %xmm5,%xmm6,%xmm5
659         vpclmulqdq      $0x10,%xmm15,%xmm9,%xmm9
660         vmovdqu 32-32(%r9),%xmm15
661         vpxor   %xmm2,%xmm8,%xmm7
662         vpxor   %xmm4,%xmm9,%xmm6
663
664         vmovdqu 16-32(%r9),%xmm0
665         vpxor   %xmm5,%xmm7,%xmm9
666         vpclmulqdq      $0x00,%xmm3,%xmm14,%xmm4
667         vpxor   %xmm9,%xmm6,%xmm6
668         vpunpckhqdq     %xmm13,%xmm13,%xmm2
669         vpclmulqdq      $0x11,%xmm3,%xmm14,%xmm14
670         vpxor   %xmm13,%xmm2,%xmm2
671         vpslldq $8,%xmm6,%xmm9
672         vpclmulqdq      $0x00,%xmm15,%xmm1,%xmm1
673         vpxor   %xmm9,%xmm5,%xmm8
674         vpsrldq $8,%xmm6,%xmm6
675         vpxor   %xmm6,%xmm7,%xmm7
676
677         vpclmulqdq      $0x00,%xmm0,%xmm13,%xmm5
678         vmovdqu 48-32(%r9),%xmm3
679         vpxor   %xmm4,%xmm5,%xmm5
680         vpunpckhqdq     %xmm12,%xmm12,%xmm9
681         vpclmulqdq      $0x11,%xmm0,%xmm13,%xmm13
682         vpxor   %xmm12,%xmm9,%xmm9
683         vpxor   %xmm14,%xmm13,%xmm13
684         vpalignr        $8,%xmm8,%xmm8,%xmm14
685         vpclmulqdq      $0x10,%xmm15,%xmm2,%xmm2
686         vmovdqu 80-32(%r9),%xmm15
687         vpxor   %xmm1,%xmm2,%xmm2
688
689         vpclmulqdq      $0x00,%xmm3,%xmm12,%xmm4
690         vmovdqu 64-32(%r9),%xmm0
691         vpxor   %xmm5,%xmm4,%xmm4
692         vpunpckhqdq     %xmm11,%xmm11,%xmm1
693         vpclmulqdq      $0x11,%xmm3,%xmm12,%xmm12
694         vpxor   %xmm11,%xmm1,%xmm1
695         vpxor   %xmm13,%xmm12,%xmm12
696         vxorps  16(%rsp),%xmm7,%xmm7
697         vpclmulqdq      $0x00,%xmm15,%xmm9,%xmm9
698         vpxor   %xmm2,%xmm9,%xmm9
699
700         vpclmulqdq      $0x10,16(%r11),%xmm8,%xmm8
701         vxorps  %xmm14,%xmm8,%xmm8
702
703         vpclmulqdq      $0x00,%xmm0,%xmm11,%xmm5
704         vmovdqu 96-32(%r9),%xmm3
705         vpxor   %xmm4,%xmm5,%xmm5
706         vpunpckhqdq     %xmm10,%xmm10,%xmm2
707         vpclmulqdq      $0x11,%xmm0,%xmm11,%xmm11
708         vpxor   %xmm10,%xmm2,%xmm2
709         vpalignr        $8,%xmm8,%xmm8,%xmm14
710         vpxor   %xmm12,%xmm11,%xmm11
711         vpclmulqdq      $0x10,%xmm15,%xmm1,%xmm1
712         vmovdqu 128-32(%r9),%xmm15
713         vpxor   %xmm9,%xmm1,%xmm1
714
715         vxorps  %xmm7,%xmm14,%xmm14
716         vpclmulqdq      $0x10,16(%r11),%xmm8,%xmm8
717         vxorps  %xmm14,%xmm8,%xmm8
718
719         vpclmulqdq      $0x00,%xmm3,%xmm10,%xmm4
720         vmovdqu 112-32(%r9),%xmm0
721         vpxor   %xmm5,%xmm4,%xmm4
722         vpunpckhqdq     %xmm8,%xmm8,%xmm9
723         vpclmulqdq      $0x11,%xmm3,%xmm10,%xmm10
724         vpxor   %xmm8,%xmm9,%xmm9
725         vpxor   %xmm11,%xmm10,%xmm10
726         vpclmulqdq      $0x00,%xmm15,%xmm2,%xmm2
727         vpxor   %xmm1,%xmm2,%xmm2
728
729         vpclmulqdq      $0x00,%xmm0,%xmm8,%xmm5
730         vpclmulqdq      $0x11,%xmm0,%xmm8,%xmm7
731         vpxor   %xmm4,%xmm5,%xmm5
732         vpclmulqdq      $0x10,%xmm15,%xmm9,%xmm6
733         vpxor   %xmm10,%xmm7,%xmm7
734         vpxor   %xmm2,%xmm6,%xmm6
735
736         vpxor   %xmm5,%xmm7,%xmm4
737         vpxor   %xmm4,%xmm6,%xmm6
738         vpslldq $8,%xmm6,%xmm1
739         vmovdqu 16(%r11),%xmm3
740         vpsrldq $8,%xmm6,%xmm6
741         vpxor   %xmm1,%xmm5,%xmm8
742         vpxor   %xmm6,%xmm7,%xmm7
743
744         vpalignr        $8,%xmm8,%xmm8,%xmm2
745         vpclmulqdq      $0x10,%xmm3,%xmm8,%xmm8
746         vpxor   %xmm2,%xmm8,%xmm8
747
748         vpalignr        $8,%xmm8,%xmm8,%xmm2
749         vpclmulqdq      $0x10,%xmm3,%xmm8,%xmm8
750         vpxor   %xmm7,%xmm2,%xmm2
751         vpxor   %xmm2,%xmm8,%xmm8
752         vpshufb (%r11),%xmm8,%xmm8
753         vmovdqu %xmm8,-64(%r9)
754
755         vzeroupper
756         movq    -48(%rax),%r15
757 .cfi_restore    %r15
758         movq    -40(%rax),%r14
759 .cfi_restore    %r14
760         movq    -32(%rax),%r13
761 .cfi_restore    %r13
762         movq    -24(%rax),%r12
763 .cfi_restore    %r12
764         movq    -16(%rax),%rbp
765 .cfi_restore    %rbp
766         movq    -8(%rax),%rbx
767 .cfi_restore    %rbx
768         leaq    (%rax),%rsp
769 .cfi_def_cfa_register   %rsp
770 .Lgcm_enc_abort:
771         movq    %r10,%rax
772         .byte   0xf3,0xc3
773 .cfi_endproc    
774 .size   aesni_gcm_encrypt,.-aesni_gcm_encrypt
775 .align  64
776 .Lbswap_mask:
777 .byte   15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
778 .Lpoly:
779 .byte   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
780 .Lone_msb:
781 .byte   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
782 .Ltwo_lsb:
783 .byte   2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
784 .Lone_lsb:
785 .byte   1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
786 .byte   65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
787 .align  64