]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - secure/lib/libcrypto/amd64/aesni-gcm-x86_64.S
Regen X86 assembly files after r364822.
[FreeBSD/FreeBSD.git] / secure / lib / libcrypto / amd64 / aesni-gcm-x86_64.S
1 /* $FreeBSD$ */
2 /* Do not modify. This file is auto-generated from aesni-gcm-x86_64.pl. */
3 .text   
4
5 .type   _aesni_ctr32_ghash_6x,@function
6 .align  32
7 _aesni_ctr32_ghash_6x:
8 .cfi_startproc  
9         vmovdqu 32(%r11),%xmm2
10         subq    $6,%rdx
11         vpxor   %xmm4,%xmm4,%xmm4
12         vmovdqu 0-128(%rcx),%xmm15
13         vpaddb  %xmm2,%xmm1,%xmm10
14         vpaddb  %xmm2,%xmm10,%xmm11
15         vpaddb  %xmm2,%xmm11,%xmm12
16         vpaddb  %xmm2,%xmm12,%xmm13
17         vpaddb  %xmm2,%xmm13,%xmm14
18         vpxor   %xmm15,%xmm1,%xmm9
19         vmovdqu %xmm4,16+8(%rsp)
20         jmp     .Loop6x
21
22 .align  32
23 .Loop6x:
24         addl    $100663296,%ebx
25         jc      .Lhandle_ctr32
26         vmovdqu 0-32(%r9),%xmm3
27         vpaddb  %xmm2,%xmm14,%xmm1
28         vpxor   %xmm15,%xmm10,%xmm10
29         vpxor   %xmm15,%xmm11,%xmm11
30
31 .Lresume_ctr32:
32         vmovdqu %xmm1,(%r8)
33         vpclmulqdq      $0x10,%xmm3,%xmm7,%xmm5
34         vpxor   %xmm15,%xmm12,%xmm12
35         vmovups 16-128(%rcx),%xmm2
36         vpclmulqdq      $0x01,%xmm3,%xmm7,%xmm6
37         xorq    %r12,%r12
38         cmpq    %r14,%r15
39
40         vaesenc %xmm2,%xmm9,%xmm9
41         vmovdqu 48+8(%rsp),%xmm0
42         vpxor   %xmm15,%xmm13,%xmm13
43         vpclmulqdq      $0x00,%xmm3,%xmm7,%xmm1
44         vaesenc %xmm2,%xmm10,%xmm10
45         vpxor   %xmm15,%xmm14,%xmm14
46         setnc   %r12b
47         vpclmulqdq      $0x11,%xmm3,%xmm7,%xmm7
48         vaesenc %xmm2,%xmm11,%xmm11
49         vmovdqu 16-32(%r9),%xmm3
50         negq    %r12
51         vaesenc %xmm2,%xmm12,%xmm12
52         vpxor   %xmm5,%xmm6,%xmm6
53         vpclmulqdq      $0x00,%xmm3,%xmm0,%xmm5
54         vpxor   %xmm4,%xmm8,%xmm8
55         vaesenc %xmm2,%xmm13,%xmm13
56         vpxor   %xmm5,%xmm1,%xmm4
57         andq    $0x60,%r12
58         vmovups 32-128(%rcx),%xmm15
59         vpclmulqdq      $0x10,%xmm3,%xmm0,%xmm1
60         vaesenc %xmm2,%xmm14,%xmm14
61
62         vpclmulqdq      $0x01,%xmm3,%xmm0,%xmm2
63         leaq    (%r14,%r12,1),%r14
64         vaesenc %xmm15,%xmm9,%xmm9
65         vpxor   16+8(%rsp),%xmm8,%xmm8
66         vpclmulqdq      $0x11,%xmm3,%xmm0,%xmm3
67         vmovdqu 64+8(%rsp),%xmm0
68         vaesenc %xmm15,%xmm10,%xmm10
69         movbeq  88(%r14),%r13
70         vaesenc %xmm15,%xmm11,%xmm11
71         movbeq  80(%r14),%r12
72         vaesenc %xmm15,%xmm12,%xmm12
73         movq    %r13,32+8(%rsp)
74         vaesenc %xmm15,%xmm13,%xmm13
75         movq    %r12,40+8(%rsp)
76         vmovdqu 48-32(%r9),%xmm5
77         vaesenc %xmm15,%xmm14,%xmm14
78
79         vmovups 48-128(%rcx),%xmm15
80         vpxor   %xmm1,%xmm6,%xmm6
81         vpclmulqdq      $0x00,%xmm5,%xmm0,%xmm1
82         vaesenc %xmm15,%xmm9,%xmm9
83         vpxor   %xmm2,%xmm6,%xmm6
84         vpclmulqdq      $0x10,%xmm5,%xmm0,%xmm2
85         vaesenc %xmm15,%xmm10,%xmm10
86         vpxor   %xmm3,%xmm7,%xmm7
87         vpclmulqdq      $0x01,%xmm5,%xmm0,%xmm3
88         vaesenc %xmm15,%xmm11,%xmm11
89         vpclmulqdq      $0x11,%xmm5,%xmm0,%xmm5
90         vmovdqu 80+8(%rsp),%xmm0
91         vaesenc %xmm15,%xmm12,%xmm12
92         vaesenc %xmm15,%xmm13,%xmm13
93         vpxor   %xmm1,%xmm4,%xmm4
94         vmovdqu 64-32(%r9),%xmm1
95         vaesenc %xmm15,%xmm14,%xmm14
96
97         vmovups 64-128(%rcx),%xmm15
98         vpxor   %xmm2,%xmm6,%xmm6
99         vpclmulqdq      $0x00,%xmm1,%xmm0,%xmm2
100         vaesenc %xmm15,%xmm9,%xmm9
101         vpxor   %xmm3,%xmm6,%xmm6
102         vpclmulqdq      $0x10,%xmm1,%xmm0,%xmm3
103         vaesenc %xmm15,%xmm10,%xmm10
104         movbeq  72(%r14),%r13
105         vpxor   %xmm5,%xmm7,%xmm7
106         vpclmulqdq      $0x01,%xmm1,%xmm0,%xmm5
107         vaesenc %xmm15,%xmm11,%xmm11
108         movbeq  64(%r14),%r12
109         vpclmulqdq      $0x11,%xmm1,%xmm0,%xmm1
110         vmovdqu 96+8(%rsp),%xmm0
111         vaesenc %xmm15,%xmm12,%xmm12
112         movq    %r13,48+8(%rsp)
113         vaesenc %xmm15,%xmm13,%xmm13
114         movq    %r12,56+8(%rsp)
115         vpxor   %xmm2,%xmm4,%xmm4
116         vmovdqu 96-32(%r9),%xmm2
117         vaesenc %xmm15,%xmm14,%xmm14
118
119         vmovups 80-128(%rcx),%xmm15
120         vpxor   %xmm3,%xmm6,%xmm6
121         vpclmulqdq      $0x00,%xmm2,%xmm0,%xmm3
122         vaesenc %xmm15,%xmm9,%xmm9
123         vpxor   %xmm5,%xmm6,%xmm6
124         vpclmulqdq      $0x10,%xmm2,%xmm0,%xmm5
125         vaesenc %xmm15,%xmm10,%xmm10
126         movbeq  56(%r14),%r13
127         vpxor   %xmm1,%xmm7,%xmm7
128         vpclmulqdq      $0x01,%xmm2,%xmm0,%xmm1
129         vpxor   112+8(%rsp),%xmm8,%xmm8
130         vaesenc %xmm15,%xmm11,%xmm11
131         movbeq  48(%r14),%r12
132         vpclmulqdq      $0x11,%xmm2,%xmm0,%xmm2
133         vaesenc %xmm15,%xmm12,%xmm12
134         movq    %r13,64+8(%rsp)
135         vaesenc %xmm15,%xmm13,%xmm13
136         movq    %r12,72+8(%rsp)
137         vpxor   %xmm3,%xmm4,%xmm4
138         vmovdqu 112-32(%r9),%xmm3
139         vaesenc %xmm15,%xmm14,%xmm14
140
141         vmovups 96-128(%rcx),%xmm15
142         vpxor   %xmm5,%xmm6,%xmm6
143         vpclmulqdq      $0x10,%xmm3,%xmm8,%xmm5
144         vaesenc %xmm15,%xmm9,%xmm9
145         vpxor   %xmm1,%xmm6,%xmm6
146         vpclmulqdq      $0x01,%xmm3,%xmm8,%xmm1
147         vaesenc %xmm15,%xmm10,%xmm10
148         movbeq  40(%r14),%r13
149         vpxor   %xmm2,%xmm7,%xmm7
150         vpclmulqdq      $0x00,%xmm3,%xmm8,%xmm2
151         vaesenc %xmm15,%xmm11,%xmm11
152         movbeq  32(%r14),%r12
153         vpclmulqdq      $0x11,%xmm3,%xmm8,%xmm8
154         vaesenc %xmm15,%xmm12,%xmm12
155         movq    %r13,80+8(%rsp)
156         vaesenc %xmm15,%xmm13,%xmm13
157         movq    %r12,88+8(%rsp)
158         vpxor   %xmm5,%xmm6,%xmm6
159         vaesenc %xmm15,%xmm14,%xmm14
160         vpxor   %xmm1,%xmm6,%xmm6
161
162         vmovups 112-128(%rcx),%xmm15
163         vpslldq $8,%xmm6,%xmm5
164         vpxor   %xmm2,%xmm4,%xmm4
165         vmovdqu 16(%r11),%xmm3
166
167         vaesenc %xmm15,%xmm9,%xmm9
168         vpxor   %xmm8,%xmm7,%xmm7
169         vaesenc %xmm15,%xmm10,%xmm10
170         vpxor   %xmm5,%xmm4,%xmm4
171         movbeq  24(%r14),%r13
172         vaesenc %xmm15,%xmm11,%xmm11
173         movbeq  16(%r14),%r12
174         vpalignr        $8,%xmm4,%xmm4,%xmm0
175         vpclmulqdq      $0x10,%xmm3,%xmm4,%xmm4
176         movq    %r13,96+8(%rsp)
177         vaesenc %xmm15,%xmm12,%xmm12
178         movq    %r12,104+8(%rsp)
179         vaesenc %xmm15,%xmm13,%xmm13
180         vmovups 128-128(%rcx),%xmm1
181         vaesenc %xmm15,%xmm14,%xmm14
182
183         vaesenc %xmm1,%xmm9,%xmm9
184         vmovups 144-128(%rcx),%xmm15
185         vaesenc %xmm1,%xmm10,%xmm10
186         vpsrldq $8,%xmm6,%xmm6
187         vaesenc %xmm1,%xmm11,%xmm11
188         vpxor   %xmm6,%xmm7,%xmm7
189         vaesenc %xmm1,%xmm12,%xmm12
190         vpxor   %xmm0,%xmm4,%xmm4
191         movbeq  8(%r14),%r13
192         vaesenc %xmm1,%xmm13,%xmm13
193         movbeq  0(%r14),%r12
194         vaesenc %xmm1,%xmm14,%xmm14
195         vmovups 160-128(%rcx),%xmm1
196         cmpl    $11,%ebp
197         jb      .Lenc_tail
198
199         vaesenc %xmm15,%xmm9,%xmm9
200         vaesenc %xmm15,%xmm10,%xmm10
201         vaesenc %xmm15,%xmm11,%xmm11
202         vaesenc %xmm15,%xmm12,%xmm12
203         vaesenc %xmm15,%xmm13,%xmm13
204         vaesenc %xmm15,%xmm14,%xmm14
205
206         vaesenc %xmm1,%xmm9,%xmm9
207         vaesenc %xmm1,%xmm10,%xmm10
208         vaesenc %xmm1,%xmm11,%xmm11
209         vaesenc %xmm1,%xmm12,%xmm12
210         vaesenc %xmm1,%xmm13,%xmm13
211         vmovups 176-128(%rcx),%xmm15
212         vaesenc %xmm1,%xmm14,%xmm14
213         vmovups 192-128(%rcx),%xmm1
214         je      .Lenc_tail
215
216         vaesenc %xmm15,%xmm9,%xmm9
217         vaesenc %xmm15,%xmm10,%xmm10
218         vaesenc %xmm15,%xmm11,%xmm11
219         vaesenc %xmm15,%xmm12,%xmm12
220         vaesenc %xmm15,%xmm13,%xmm13
221         vaesenc %xmm15,%xmm14,%xmm14
222
223         vaesenc %xmm1,%xmm9,%xmm9
224         vaesenc %xmm1,%xmm10,%xmm10
225         vaesenc %xmm1,%xmm11,%xmm11
226         vaesenc %xmm1,%xmm12,%xmm12
227         vaesenc %xmm1,%xmm13,%xmm13
228         vmovups 208-128(%rcx),%xmm15
229         vaesenc %xmm1,%xmm14,%xmm14
230         vmovups 224-128(%rcx),%xmm1
231         jmp     .Lenc_tail
232
233 .align  32
234 .Lhandle_ctr32:
235         vmovdqu (%r11),%xmm0
236         vpshufb %xmm0,%xmm1,%xmm6
237         vmovdqu 48(%r11),%xmm5
238         vpaddd  64(%r11),%xmm6,%xmm10
239         vpaddd  %xmm5,%xmm6,%xmm11
240         vmovdqu 0-32(%r9),%xmm3
241         vpaddd  %xmm5,%xmm10,%xmm12
242         vpshufb %xmm0,%xmm10,%xmm10
243         vpaddd  %xmm5,%xmm11,%xmm13
244         vpshufb %xmm0,%xmm11,%xmm11
245         vpxor   %xmm15,%xmm10,%xmm10
246         vpaddd  %xmm5,%xmm12,%xmm14
247         vpshufb %xmm0,%xmm12,%xmm12
248         vpxor   %xmm15,%xmm11,%xmm11
249         vpaddd  %xmm5,%xmm13,%xmm1
250         vpshufb %xmm0,%xmm13,%xmm13
251         vpshufb %xmm0,%xmm14,%xmm14
252         vpshufb %xmm0,%xmm1,%xmm1
253         jmp     .Lresume_ctr32
254
255 .align  32
256 .Lenc_tail:
257         vaesenc %xmm15,%xmm9,%xmm9
258         vmovdqu %xmm7,16+8(%rsp)
259         vpalignr        $8,%xmm4,%xmm4,%xmm8
260         vaesenc %xmm15,%xmm10,%xmm10
261         vpclmulqdq      $0x10,%xmm3,%xmm4,%xmm4
262         vpxor   0(%rdi),%xmm1,%xmm2
263         vaesenc %xmm15,%xmm11,%xmm11
264         vpxor   16(%rdi),%xmm1,%xmm0
265         vaesenc %xmm15,%xmm12,%xmm12
266         vpxor   32(%rdi),%xmm1,%xmm5
267         vaesenc %xmm15,%xmm13,%xmm13
268         vpxor   48(%rdi),%xmm1,%xmm6
269         vaesenc %xmm15,%xmm14,%xmm14
270         vpxor   64(%rdi),%xmm1,%xmm7
271         vpxor   80(%rdi),%xmm1,%xmm3
272         vmovdqu (%r8),%xmm1
273
274         vaesenclast     %xmm2,%xmm9,%xmm9
275         vmovdqu 32(%r11),%xmm2
276         vaesenclast     %xmm0,%xmm10,%xmm10
277         vpaddb  %xmm2,%xmm1,%xmm0
278         movq    %r13,112+8(%rsp)
279         leaq    96(%rdi),%rdi
280         vaesenclast     %xmm5,%xmm11,%xmm11
281         vpaddb  %xmm2,%xmm0,%xmm5
282         movq    %r12,120+8(%rsp)
283         leaq    96(%rsi),%rsi
284         vmovdqu 0-128(%rcx),%xmm15
285         vaesenclast     %xmm6,%xmm12,%xmm12
286         vpaddb  %xmm2,%xmm5,%xmm6
287         vaesenclast     %xmm7,%xmm13,%xmm13
288         vpaddb  %xmm2,%xmm6,%xmm7
289         vaesenclast     %xmm3,%xmm14,%xmm14
290         vpaddb  %xmm2,%xmm7,%xmm3
291
292         addq    $0x60,%r10
293         subq    $0x6,%rdx
294         jc      .L6x_done
295
296         vmovups %xmm9,-96(%rsi)
297         vpxor   %xmm15,%xmm1,%xmm9
298         vmovups %xmm10,-80(%rsi)
299         vmovdqa %xmm0,%xmm10
300         vmovups %xmm11,-64(%rsi)
301         vmovdqa %xmm5,%xmm11
302         vmovups %xmm12,-48(%rsi)
303         vmovdqa %xmm6,%xmm12
304         vmovups %xmm13,-32(%rsi)
305         vmovdqa %xmm7,%xmm13
306         vmovups %xmm14,-16(%rsi)
307         vmovdqa %xmm3,%xmm14
308         vmovdqu 32+8(%rsp),%xmm7
309         jmp     .Loop6x
310
311 .L6x_done:
312         vpxor   16+8(%rsp),%xmm8,%xmm8
313         vpxor   %xmm4,%xmm8,%xmm8
314
315         .byte   0xf3,0xc3
316 .cfi_endproc    
317 .size   _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
318 .globl  aesni_gcm_decrypt
319 .type   aesni_gcm_decrypt,@function
320 .align  32
321 aesni_gcm_decrypt:
322 .cfi_startproc  
323         xorq    %r10,%r10
324         cmpq    $0x60,%rdx
325         jb      .Lgcm_dec_abort
326
327         leaq    (%rsp),%rax
328 .cfi_def_cfa_register   %rax
329         pushq   %rbx
330 .cfi_offset     %rbx,-16
331         pushq   %rbp
332 .cfi_offset     %rbp,-24
333         pushq   %r12
334 .cfi_offset     %r12,-32
335         pushq   %r13
336 .cfi_offset     %r13,-40
337         pushq   %r14
338 .cfi_offset     %r14,-48
339         pushq   %r15
340 .cfi_offset     %r15,-56
341         vzeroupper
342
343         vmovdqu (%r8),%xmm1
344         addq    $-128,%rsp
345         movl    12(%r8),%ebx
346         leaq    .Lbswap_mask(%rip),%r11
347         leaq    -128(%rcx),%r14
348         movq    $0xf80,%r15
349         vmovdqu (%r9),%xmm8
350         andq    $-128,%rsp
351         vmovdqu (%r11),%xmm0
352         leaq    128(%rcx),%rcx
353         leaq    32+32(%r9),%r9
354         movl    240-128(%rcx),%ebp
355         vpshufb %xmm0,%xmm8,%xmm8
356
357         andq    %r15,%r14
358         andq    %rsp,%r15
359         subq    %r14,%r15
360         jc      .Ldec_no_key_aliasing
361         cmpq    $768,%r15
362         jnc     .Ldec_no_key_aliasing
363         subq    %r15,%rsp
364 .Ldec_no_key_aliasing:
365
366         vmovdqu 80(%rdi),%xmm7
367         leaq    (%rdi),%r14
368         vmovdqu 64(%rdi),%xmm4
369         leaq    -192(%rdi,%rdx,1),%r15
370         vmovdqu 48(%rdi),%xmm5
371         shrq    $4,%rdx
372         xorq    %r10,%r10
373         vmovdqu 32(%rdi),%xmm6
374         vpshufb %xmm0,%xmm7,%xmm7
375         vmovdqu 16(%rdi),%xmm2
376         vpshufb %xmm0,%xmm4,%xmm4
377         vmovdqu (%rdi),%xmm3
378         vpshufb %xmm0,%xmm5,%xmm5
379         vmovdqu %xmm4,48(%rsp)
380         vpshufb %xmm0,%xmm6,%xmm6
381         vmovdqu %xmm5,64(%rsp)
382         vpshufb %xmm0,%xmm2,%xmm2
383         vmovdqu %xmm6,80(%rsp)
384         vpshufb %xmm0,%xmm3,%xmm3
385         vmovdqu %xmm2,96(%rsp)
386         vmovdqu %xmm3,112(%rsp)
387
388         call    _aesni_ctr32_ghash_6x
389
390         vmovups %xmm9,-96(%rsi)
391         vmovups %xmm10,-80(%rsi)
392         vmovups %xmm11,-64(%rsi)
393         vmovups %xmm12,-48(%rsi)
394         vmovups %xmm13,-32(%rsi)
395         vmovups %xmm14,-16(%rsi)
396
397         vpshufb (%r11),%xmm8,%xmm8
398         vmovdqu %xmm8,-64(%r9)
399
400         vzeroupper
401         movq    -48(%rax),%r15
402 .cfi_restore    %r15
403         movq    -40(%rax),%r14
404 .cfi_restore    %r14
405         movq    -32(%rax),%r13
406 .cfi_restore    %r13
407         movq    -24(%rax),%r12
408 .cfi_restore    %r12
409         movq    -16(%rax),%rbp
410 .cfi_restore    %rbp
411         movq    -8(%rax),%rbx
412 .cfi_restore    %rbx
413         leaq    (%rax),%rsp
414 .cfi_def_cfa_register   %rsp
415 .Lgcm_dec_abort:
416         movq    %r10,%rax
417         .byte   0xf3,0xc3
418 .cfi_endproc    
419 .size   aesni_gcm_decrypt,.-aesni_gcm_decrypt
420 .type   _aesni_ctr32_6x,@function
421 .align  32
422 _aesni_ctr32_6x:
423 .cfi_startproc  
424         vmovdqu 0-128(%rcx),%xmm4
425         vmovdqu 32(%r11),%xmm2
426         leaq    -1(%rbp),%r13
427         vmovups 16-128(%rcx),%xmm15
428         leaq    32-128(%rcx),%r12
429         vpxor   %xmm4,%xmm1,%xmm9
430         addl    $100663296,%ebx
431         jc      .Lhandle_ctr32_2
432         vpaddb  %xmm2,%xmm1,%xmm10
433         vpaddb  %xmm2,%xmm10,%xmm11
434         vpxor   %xmm4,%xmm10,%xmm10
435         vpaddb  %xmm2,%xmm11,%xmm12
436         vpxor   %xmm4,%xmm11,%xmm11
437         vpaddb  %xmm2,%xmm12,%xmm13
438         vpxor   %xmm4,%xmm12,%xmm12
439         vpaddb  %xmm2,%xmm13,%xmm14
440         vpxor   %xmm4,%xmm13,%xmm13
441         vpaddb  %xmm2,%xmm14,%xmm1
442         vpxor   %xmm4,%xmm14,%xmm14
443         jmp     .Loop_ctr32
444
445 .align  16
446 .Loop_ctr32:
447         vaesenc %xmm15,%xmm9,%xmm9
448         vaesenc %xmm15,%xmm10,%xmm10
449         vaesenc %xmm15,%xmm11,%xmm11
450         vaesenc %xmm15,%xmm12,%xmm12
451         vaesenc %xmm15,%xmm13,%xmm13
452         vaesenc %xmm15,%xmm14,%xmm14
453         vmovups (%r12),%xmm15
454         leaq    16(%r12),%r12
455         decl    %r13d
456         jnz     .Loop_ctr32
457
458         vmovdqu (%r12),%xmm3
459         vaesenc %xmm15,%xmm9,%xmm9
460         vpxor   0(%rdi),%xmm3,%xmm4
461         vaesenc %xmm15,%xmm10,%xmm10
462         vpxor   16(%rdi),%xmm3,%xmm5
463         vaesenc %xmm15,%xmm11,%xmm11
464         vpxor   32(%rdi),%xmm3,%xmm6
465         vaesenc %xmm15,%xmm12,%xmm12
466         vpxor   48(%rdi),%xmm3,%xmm8
467         vaesenc %xmm15,%xmm13,%xmm13
468         vpxor   64(%rdi),%xmm3,%xmm2
469         vaesenc %xmm15,%xmm14,%xmm14
470         vpxor   80(%rdi),%xmm3,%xmm3
471         leaq    96(%rdi),%rdi
472
473         vaesenclast     %xmm4,%xmm9,%xmm9
474         vaesenclast     %xmm5,%xmm10,%xmm10
475         vaesenclast     %xmm6,%xmm11,%xmm11
476         vaesenclast     %xmm8,%xmm12,%xmm12
477         vaesenclast     %xmm2,%xmm13,%xmm13
478         vaesenclast     %xmm3,%xmm14,%xmm14
479         vmovups %xmm9,0(%rsi)
480         vmovups %xmm10,16(%rsi)
481         vmovups %xmm11,32(%rsi)
482         vmovups %xmm12,48(%rsi)
483         vmovups %xmm13,64(%rsi)
484         vmovups %xmm14,80(%rsi)
485         leaq    96(%rsi),%rsi
486
487         .byte   0xf3,0xc3
488 .align  32
489 .Lhandle_ctr32_2:
490         vpshufb %xmm0,%xmm1,%xmm6
491         vmovdqu 48(%r11),%xmm5
492         vpaddd  64(%r11),%xmm6,%xmm10
493         vpaddd  %xmm5,%xmm6,%xmm11
494         vpaddd  %xmm5,%xmm10,%xmm12
495         vpshufb %xmm0,%xmm10,%xmm10
496         vpaddd  %xmm5,%xmm11,%xmm13
497         vpshufb %xmm0,%xmm11,%xmm11
498         vpxor   %xmm4,%xmm10,%xmm10
499         vpaddd  %xmm5,%xmm12,%xmm14
500         vpshufb %xmm0,%xmm12,%xmm12
501         vpxor   %xmm4,%xmm11,%xmm11
502         vpaddd  %xmm5,%xmm13,%xmm1
503         vpshufb %xmm0,%xmm13,%xmm13
504         vpxor   %xmm4,%xmm12,%xmm12
505         vpshufb %xmm0,%xmm14,%xmm14
506         vpxor   %xmm4,%xmm13,%xmm13
507         vpshufb %xmm0,%xmm1,%xmm1
508         vpxor   %xmm4,%xmm14,%xmm14
509         jmp     .Loop_ctr32
510 .cfi_endproc    
511 .size   _aesni_ctr32_6x,.-_aesni_ctr32_6x
512
513 .globl  aesni_gcm_encrypt
514 .type   aesni_gcm_encrypt,@function
515 .align  32
516 aesni_gcm_encrypt:
517 .cfi_startproc  
518         xorq    %r10,%r10
519         cmpq    $288,%rdx
520         jb      .Lgcm_enc_abort
521
522         leaq    (%rsp),%rax
523 .cfi_def_cfa_register   %rax
524         pushq   %rbx
525 .cfi_offset     %rbx,-16
526         pushq   %rbp
527 .cfi_offset     %rbp,-24
528         pushq   %r12
529 .cfi_offset     %r12,-32
530         pushq   %r13
531 .cfi_offset     %r13,-40
532         pushq   %r14
533 .cfi_offset     %r14,-48
534         pushq   %r15
535 .cfi_offset     %r15,-56
536         vzeroupper
537
538         vmovdqu (%r8),%xmm1
539         addq    $-128,%rsp
540         movl    12(%r8),%ebx
541         leaq    .Lbswap_mask(%rip),%r11
542         leaq    -128(%rcx),%r14
543         movq    $0xf80,%r15
544         leaq    128(%rcx),%rcx
545         vmovdqu (%r11),%xmm0
546         andq    $-128,%rsp
547         movl    240-128(%rcx),%ebp
548
549         andq    %r15,%r14
550         andq    %rsp,%r15
551         subq    %r14,%r15
552         jc      .Lenc_no_key_aliasing
553         cmpq    $768,%r15
554         jnc     .Lenc_no_key_aliasing
555         subq    %r15,%rsp
556 .Lenc_no_key_aliasing:
557
558         leaq    (%rsi),%r14
559         leaq    -192(%rsi,%rdx,1),%r15
560         shrq    $4,%rdx
561
562         call    _aesni_ctr32_6x
563         vpshufb %xmm0,%xmm9,%xmm8
564         vpshufb %xmm0,%xmm10,%xmm2
565         vmovdqu %xmm8,112(%rsp)
566         vpshufb %xmm0,%xmm11,%xmm4
567         vmovdqu %xmm2,96(%rsp)
568         vpshufb %xmm0,%xmm12,%xmm5
569         vmovdqu %xmm4,80(%rsp)
570         vpshufb %xmm0,%xmm13,%xmm6
571         vmovdqu %xmm5,64(%rsp)
572         vpshufb %xmm0,%xmm14,%xmm7
573         vmovdqu %xmm6,48(%rsp)
574
575         call    _aesni_ctr32_6x
576
577         vmovdqu (%r9),%xmm8
578         leaq    32+32(%r9),%r9
579         subq    $12,%rdx
580         movq    $192,%r10
581         vpshufb %xmm0,%xmm8,%xmm8
582
583         call    _aesni_ctr32_ghash_6x
584         vmovdqu 32(%rsp),%xmm7
585         vmovdqu (%r11),%xmm0
586         vmovdqu 0-32(%r9),%xmm3
587         vpunpckhqdq     %xmm7,%xmm7,%xmm1
588         vmovdqu 32-32(%r9),%xmm15
589         vmovups %xmm9,-96(%rsi)
590         vpshufb %xmm0,%xmm9,%xmm9
591         vpxor   %xmm7,%xmm1,%xmm1
592         vmovups %xmm10,-80(%rsi)
593         vpshufb %xmm0,%xmm10,%xmm10
594         vmovups %xmm11,-64(%rsi)
595         vpshufb %xmm0,%xmm11,%xmm11
596         vmovups %xmm12,-48(%rsi)
597         vpshufb %xmm0,%xmm12,%xmm12
598         vmovups %xmm13,-32(%rsi)
599         vpshufb %xmm0,%xmm13,%xmm13
600         vmovups %xmm14,-16(%rsi)
601         vpshufb %xmm0,%xmm14,%xmm14
602         vmovdqu %xmm9,16(%rsp)
603         vmovdqu 48(%rsp),%xmm6
604         vmovdqu 16-32(%r9),%xmm0
605         vpunpckhqdq     %xmm6,%xmm6,%xmm2
606         vpclmulqdq      $0x00,%xmm3,%xmm7,%xmm5
607         vpxor   %xmm6,%xmm2,%xmm2
608         vpclmulqdq      $0x11,%xmm3,%xmm7,%xmm7
609         vpclmulqdq      $0x00,%xmm15,%xmm1,%xmm1
610
611         vmovdqu 64(%rsp),%xmm9
612         vpclmulqdq      $0x00,%xmm0,%xmm6,%xmm4
613         vmovdqu 48-32(%r9),%xmm3
614         vpxor   %xmm5,%xmm4,%xmm4
615         vpunpckhqdq     %xmm9,%xmm9,%xmm5
616         vpclmulqdq      $0x11,%xmm0,%xmm6,%xmm6
617         vpxor   %xmm9,%xmm5,%xmm5
618         vpxor   %xmm7,%xmm6,%xmm6
619         vpclmulqdq      $0x10,%xmm15,%xmm2,%xmm2
620         vmovdqu 80-32(%r9),%xmm15
621         vpxor   %xmm1,%xmm2,%xmm2
622
623         vmovdqu 80(%rsp),%xmm1
624         vpclmulqdq      $0x00,%xmm3,%xmm9,%xmm7
625         vmovdqu 64-32(%r9),%xmm0
626         vpxor   %xmm4,%xmm7,%xmm7
627         vpunpckhqdq     %xmm1,%xmm1,%xmm4
628         vpclmulqdq      $0x11,%xmm3,%xmm9,%xmm9
629         vpxor   %xmm1,%xmm4,%xmm4
630         vpxor   %xmm6,%xmm9,%xmm9
631         vpclmulqdq      $0x00,%xmm15,%xmm5,%xmm5
632         vpxor   %xmm2,%xmm5,%xmm5
633
634         vmovdqu 96(%rsp),%xmm2
635         vpclmulqdq      $0x00,%xmm0,%xmm1,%xmm6
636         vmovdqu 96-32(%r9),%xmm3
637         vpxor   %xmm7,%xmm6,%xmm6
638         vpunpckhqdq     %xmm2,%xmm2,%xmm7
639         vpclmulqdq      $0x11,%xmm0,%xmm1,%xmm1
640         vpxor   %xmm2,%xmm7,%xmm7
641         vpxor   %xmm9,%xmm1,%xmm1
642         vpclmulqdq      $0x10,%xmm15,%xmm4,%xmm4
643         vmovdqu 128-32(%r9),%xmm15
644         vpxor   %xmm5,%xmm4,%xmm4
645
646         vpxor   112(%rsp),%xmm8,%xmm8
647         vpclmulqdq      $0x00,%xmm3,%xmm2,%xmm5
648         vmovdqu 112-32(%r9),%xmm0
649         vpunpckhqdq     %xmm8,%xmm8,%xmm9
650         vpxor   %xmm6,%xmm5,%xmm5
651         vpclmulqdq      $0x11,%xmm3,%xmm2,%xmm2
652         vpxor   %xmm8,%xmm9,%xmm9
653         vpxor   %xmm1,%xmm2,%xmm2
654         vpclmulqdq      $0x00,%xmm15,%xmm7,%xmm7
655         vpxor   %xmm4,%xmm7,%xmm4
656
657         vpclmulqdq      $0x00,%xmm0,%xmm8,%xmm6
658         vmovdqu 0-32(%r9),%xmm3
659         vpunpckhqdq     %xmm14,%xmm14,%xmm1
660         vpclmulqdq      $0x11,%xmm0,%xmm8,%xmm8
661         vpxor   %xmm14,%xmm1,%xmm1
662         vpxor   %xmm5,%xmm6,%xmm5
663         vpclmulqdq      $0x10,%xmm15,%xmm9,%xmm9
664         vmovdqu 32-32(%r9),%xmm15
665         vpxor   %xmm2,%xmm8,%xmm7
666         vpxor   %xmm4,%xmm9,%xmm6
667
668         vmovdqu 16-32(%r9),%xmm0
669         vpxor   %xmm5,%xmm7,%xmm9
670         vpclmulqdq      $0x00,%xmm3,%xmm14,%xmm4
671         vpxor   %xmm9,%xmm6,%xmm6
672         vpunpckhqdq     %xmm13,%xmm13,%xmm2
673         vpclmulqdq      $0x11,%xmm3,%xmm14,%xmm14
674         vpxor   %xmm13,%xmm2,%xmm2
675         vpslldq $8,%xmm6,%xmm9
676         vpclmulqdq      $0x00,%xmm15,%xmm1,%xmm1
677         vpxor   %xmm9,%xmm5,%xmm8
678         vpsrldq $8,%xmm6,%xmm6
679         vpxor   %xmm6,%xmm7,%xmm7
680
681         vpclmulqdq      $0x00,%xmm0,%xmm13,%xmm5
682         vmovdqu 48-32(%r9),%xmm3
683         vpxor   %xmm4,%xmm5,%xmm5
684         vpunpckhqdq     %xmm12,%xmm12,%xmm9
685         vpclmulqdq      $0x11,%xmm0,%xmm13,%xmm13
686         vpxor   %xmm12,%xmm9,%xmm9
687         vpxor   %xmm14,%xmm13,%xmm13
688         vpalignr        $8,%xmm8,%xmm8,%xmm14
689         vpclmulqdq      $0x10,%xmm15,%xmm2,%xmm2
690         vmovdqu 80-32(%r9),%xmm15
691         vpxor   %xmm1,%xmm2,%xmm2
692
693         vpclmulqdq      $0x00,%xmm3,%xmm12,%xmm4
694         vmovdqu 64-32(%r9),%xmm0
695         vpxor   %xmm5,%xmm4,%xmm4
696         vpunpckhqdq     %xmm11,%xmm11,%xmm1
697         vpclmulqdq      $0x11,%xmm3,%xmm12,%xmm12
698         vpxor   %xmm11,%xmm1,%xmm1
699         vpxor   %xmm13,%xmm12,%xmm12
700         vxorps  16(%rsp),%xmm7,%xmm7
701         vpclmulqdq      $0x00,%xmm15,%xmm9,%xmm9
702         vpxor   %xmm2,%xmm9,%xmm9
703
704         vpclmulqdq      $0x10,16(%r11),%xmm8,%xmm8
705         vxorps  %xmm14,%xmm8,%xmm8
706
707         vpclmulqdq      $0x00,%xmm0,%xmm11,%xmm5
708         vmovdqu 96-32(%r9),%xmm3
709         vpxor   %xmm4,%xmm5,%xmm5
710         vpunpckhqdq     %xmm10,%xmm10,%xmm2
711         vpclmulqdq      $0x11,%xmm0,%xmm11,%xmm11
712         vpxor   %xmm10,%xmm2,%xmm2
713         vpalignr        $8,%xmm8,%xmm8,%xmm14
714         vpxor   %xmm12,%xmm11,%xmm11
715         vpclmulqdq      $0x10,%xmm15,%xmm1,%xmm1
716         vmovdqu 128-32(%r9),%xmm15
717         vpxor   %xmm9,%xmm1,%xmm1
718
719         vxorps  %xmm7,%xmm14,%xmm14
720         vpclmulqdq      $0x10,16(%r11),%xmm8,%xmm8
721         vxorps  %xmm14,%xmm8,%xmm8
722
723         vpclmulqdq      $0x00,%xmm3,%xmm10,%xmm4
724         vmovdqu 112-32(%r9),%xmm0
725         vpxor   %xmm5,%xmm4,%xmm4
726         vpunpckhqdq     %xmm8,%xmm8,%xmm9
727         vpclmulqdq      $0x11,%xmm3,%xmm10,%xmm10
728         vpxor   %xmm8,%xmm9,%xmm9
729         vpxor   %xmm11,%xmm10,%xmm10
730         vpclmulqdq      $0x00,%xmm15,%xmm2,%xmm2
731         vpxor   %xmm1,%xmm2,%xmm2
732
733         vpclmulqdq      $0x00,%xmm0,%xmm8,%xmm5
734         vpclmulqdq      $0x11,%xmm0,%xmm8,%xmm7
735         vpxor   %xmm4,%xmm5,%xmm5
736         vpclmulqdq      $0x10,%xmm15,%xmm9,%xmm6
737         vpxor   %xmm10,%xmm7,%xmm7
738         vpxor   %xmm2,%xmm6,%xmm6
739
740         vpxor   %xmm5,%xmm7,%xmm4
741         vpxor   %xmm4,%xmm6,%xmm6
742         vpslldq $8,%xmm6,%xmm1
743         vmovdqu 16(%r11),%xmm3
744         vpsrldq $8,%xmm6,%xmm6
745         vpxor   %xmm1,%xmm5,%xmm8
746         vpxor   %xmm6,%xmm7,%xmm7
747
748         vpalignr        $8,%xmm8,%xmm8,%xmm2
749         vpclmulqdq      $0x10,%xmm3,%xmm8,%xmm8
750         vpxor   %xmm2,%xmm8,%xmm8
751
752         vpalignr        $8,%xmm8,%xmm8,%xmm2
753         vpclmulqdq      $0x10,%xmm3,%xmm8,%xmm8
754         vpxor   %xmm7,%xmm2,%xmm2
755         vpxor   %xmm2,%xmm8,%xmm8
756         vpshufb (%r11),%xmm8,%xmm8
757         vmovdqu %xmm8,-64(%r9)
758
759         vzeroupper
760         movq    -48(%rax),%r15
761 .cfi_restore    %r15
762         movq    -40(%rax),%r14
763 .cfi_restore    %r14
764         movq    -32(%rax),%r13
765 .cfi_restore    %r13
766         movq    -24(%rax),%r12
767 .cfi_restore    %r12
768         movq    -16(%rax),%rbp
769 .cfi_restore    %rbp
770         movq    -8(%rax),%rbx
771 .cfi_restore    %rbx
772         leaq    (%rax),%rsp
773 .cfi_def_cfa_register   %rsp
774 .Lgcm_enc_abort:
775         movq    %r10,%rax
776         .byte   0xf3,0xc3
777 .cfi_endproc    
778 .size   aesni_gcm_encrypt,.-aesni_gcm_encrypt
779 .align  64
780 .Lbswap_mask:
781 .byte   15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
782 .Lpoly:
783 .byte   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
784 .Lone_msb:
785 .byte   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
786 .Ltwo_lsb:
787 .byte   2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
788 .Lone_lsb:
789 .byte   1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
790 .byte   65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
791 .align  64