]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - secure/lib/libcrypto/amd64/ghash-x86_64.S
Upgrade Unbound to 1.8.0. More to follow.
[FreeBSD/FreeBSD.git] / secure / lib / libcrypto / amd64 / ghash-x86_64.S
1 /* $FreeBSD$ */
2 /* Do not modify. This file is auto-generated from ghash-x86_64.pl. */
3 .text   
4
5
6 .globl  gcm_gmult_4bit
7 .type   gcm_gmult_4bit,@function
8 .align  16
9 gcm_gmult_4bit:
10 .cfi_startproc  
11         pushq   %rbx
12 .cfi_adjust_cfa_offset  8
13 .cfi_offset     %rbx,-16
14         pushq   %rbp
15 .cfi_adjust_cfa_offset  8
16 .cfi_offset     %rbp,-24
17         pushq   %r12
18 .cfi_adjust_cfa_offset  8
19 .cfi_offset     %r12,-32
20         pushq   %r13
21 .cfi_adjust_cfa_offset  8
22 .cfi_offset     %r13,-40
23         pushq   %r14
24 .cfi_adjust_cfa_offset  8
25 .cfi_offset     %r14,-48
26         pushq   %r15
27 .cfi_adjust_cfa_offset  8
28 .cfi_offset     %r15,-56
29         subq    $280,%rsp
30 .cfi_adjust_cfa_offset  280
31 .Lgmult_prologue:
32
33         movzbq  15(%rdi),%r8
34         leaq    .Lrem_4bit(%rip),%r11
35         xorq    %rax,%rax
36         xorq    %rbx,%rbx
37         movb    %r8b,%al
38         movb    %r8b,%bl
39         shlb    $4,%al
40         movq    $14,%rcx
41         movq    8(%rsi,%rax,1),%r8
42         movq    (%rsi,%rax,1),%r9
43         andb    $0xf0,%bl
44         movq    %r8,%rdx
45         jmp     .Loop1
46
47 .align  16
48 .Loop1:
49         shrq    $4,%r8
50         andq    $0xf,%rdx
51         movq    %r9,%r10
52         movb    (%rdi,%rcx,1),%al
53         shrq    $4,%r9
54         xorq    8(%rsi,%rbx,1),%r8
55         shlq    $60,%r10
56         xorq    (%rsi,%rbx,1),%r9
57         movb    %al,%bl
58         xorq    (%r11,%rdx,8),%r9
59         movq    %r8,%rdx
60         shlb    $4,%al
61         xorq    %r10,%r8
62         decq    %rcx
63         js      .Lbreak1
64
65         shrq    $4,%r8
66         andq    $0xf,%rdx
67         movq    %r9,%r10
68         shrq    $4,%r9
69         xorq    8(%rsi,%rax,1),%r8
70         shlq    $60,%r10
71         xorq    (%rsi,%rax,1),%r9
72         andb    $0xf0,%bl
73         xorq    (%r11,%rdx,8),%r9
74         movq    %r8,%rdx
75         xorq    %r10,%r8
76         jmp     .Loop1
77
78 .align  16
79 .Lbreak1:
80         shrq    $4,%r8
81         andq    $0xf,%rdx
82         movq    %r9,%r10
83         shrq    $4,%r9
84         xorq    8(%rsi,%rax,1),%r8
85         shlq    $60,%r10
86         xorq    (%rsi,%rax,1),%r9
87         andb    $0xf0,%bl
88         xorq    (%r11,%rdx,8),%r9
89         movq    %r8,%rdx
90         xorq    %r10,%r8
91
92         shrq    $4,%r8
93         andq    $0xf,%rdx
94         movq    %r9,%r10
95         shrq    $4,%r9
96         xorq    8(%rsi,%rbx,1),%r8
97         shlq    $60,%r10
98         xorq    (%rsi,%rbx,1),%r9
99         xorq    %r10,%r8
100         xorq    (%r11,%rdx,8),%r9
101
102         bswapq  %r8
103         bswapq  %r9
104         movq    %r8,8(%rdi)
105         movq    %r9,(%rdi)
106
107         leaq    280+48(%rsp),%rsi
108 .cfi_def_cfa    %rsi,8
109         movq    -8(%rsi),%rbx
110 .cfi_restore    %rbx
111         leaq    (%rsi),%rsp
112 .cfi_def_cfa_register   %rsp
113 .Lgmult_epilogue:
114         .byte   0xf3,0xc3
115 .cfi_endproc    
116 .size   gcm_gmult_4bit,.-gcm_gmult_4bit
117 .globl  gcm_ghash_4bit
118 .type   gcm_ghash_4bit,@function
119 .align  16
120 gcm_ghash_4bit:
121 .cfi_startproc  
122         pushq   %rbx
123 .cfi_adjust_cfa_offset  8
124 .cfi_offset     %rbx,-16
125         pushq   %rbp
126 .cfi_adjust_cfa_offset  8
127 .cfi_offset     %rbp,-24
128         pushq   %r12
129 .cfi_adjust_cfa_offset  8
130 .cfi_offset     %r12,-32
131         pushq   %r13
132 .cfi_adjust_cfa_offset  8
133 .cfi_offset     %r13,-40
134         pushq   %r14
135 .cfi_adjust_cfa_offset  8
136 .cfi_offset     %r14,-48
137         pushq   %r15
138 .cfi_adjust_cfa_offset  8
139 .cfi_offset     %r15,-56
140         subq    $280,%rsp
141 .cfi_adjust_cfa_offset  280
142 .Lghash_prologue:
143         movq    %rdx,%r14
144         movq    %rcx,%r15
145         subq    $-128,%rsi
146         leaq    16+128(%rsp),%rbp
147         xorl    %edx,%edx
148         movq    0+0-128(%rsi),%r8
149         movq    0+8-128(%rsi),%rax
150         movb    %al,%dl
151         shrq    $4,%rax
152         movq    %r8,%r10
153         shrq    $4,%r8
154         movq    16+0-128(%rsi),%r9
155         shlb    $4,%dl
156         movq    16+8-128(%rsi),%rbx
157         shlq    $60,%r10
158         movb    %dl,0(%rsp)
159         orq     %r10,%rax
160         movb    %bl,%dl
161         shrq    $4,%rbx
162         movq    %r9,%r10
163         shrq    $4,%r9
164         movq    %r8,0(%rbp)
165         movq    32+0-128(%rsi),%r8
166         shlb    $4,%dl
167         movq    %rax,0-128(%rbp)
168         movq    32+8-128(%rsi),%rax
169         shlq    $60,%r10
170         movb    %dl,1(%rsp)
171         orq     %r10,%rbx
172         movb    %al,%dl
173         shrq    $4,%rax
174         movq    %r8,%r10
175         shrq    $4,%r8
176         movq    %r9,8(%rbp)
177         movq    48+0-128(%rsi),%r9
178         shlb    $4,%dl
179         movq    %rbx,8-128(%rbp)
180         movq    48+8-128(%rsi),%rbx
181         shlq    $60,%r10
182         movb    %dl,2(%rsp)
183         orq     %r10,%rax
184         movb    %bl,%dl
185         shrq    $4,%rbx
186         movq    %r9,%r10
187         shrq    $4,%r9
188         movq    %r8,16(%rbp)
189         movq    64+0-128(%rsi),%r8
190         shlb    $4,%dl
191         movq    %rax,16-128(%rbp)
192         movq    64+8-128(%rsi),%rax
193         shlq    $60,%r10
194         movb    %dl,3(%rsp)
195         orq     %r10,%rbx
196         movb    %al,%dl
197         shrq    $4,%rax
198         movq    %r8,%r10
199         shrq    $4,%r8
200         movq    %r9,24(%rbp)
201         movq    80+0-128(%rsi),%r9
202         shlb    $4,%dl
203         movq    %rbx,24-128(%rbp)
204         movq    80+8-128(%rsi),%rbx
205         shlq    $60,%r10
206         movb    %dl,4(%rsp)
207         orq     %r10,%rax
208         movb    %bl,%dl
209         shrq    $4,%rbx
210         movq    %r9,%r10
211         shrq    $4,%r9
212         movq    %r8,32(%rbp)
213         movq    96+0-128(%rsi),%r8
214         shlb    $4,%dl
215         movq    %rax,32-128(%rbp)
216         movq    96+8-128(%rsi),%rax
217         shlq    $60,%r10
218         movb    %dl,5(%rsp)
219         orq     %r10,%rbx
220         movb    %al,%dl
221         shrq    $4,%rax
222         movq    %r8,%r10
223         shrq    $4,%r8
224         movq    %r9,40(%rbp)
225         movq    112+0-128(%rsi),%r9
226         shlb    $4,%dl
227         movq    %rbx,40-128(%rbp)
228         movq    112+8-128(%rsi),%rbx
229         shlq    $60,%r10
230         movb    %dl,6(%rsp)
231         orq     %r10,%rax
232         movb    %bl,%dl
233         shrq    $4,%rbx
234         movq    %r9,%r10
235         shrq    $4,%r9
236         movq    %r8,48(%rbp)
237         movq    128+0-128(%rsi),%r8
238         shlb    $4,%dl
239         movq    %rax,48-128(%rbp)
240         movq    128+8-128(%rsi),%rax
241         shlq    $60,%r10
242         movb    %dl,7(%rsp)
243         orq     %r10,%rbx
244         movb    %al,%dl
245         shrq    $4,%rax
246         movq    %r8,%r10
247         shrq    $4,%r8
248         movq    %r9,56(%rbp)
249         movq    144+0-128(%rsi),%r9
250         shlb    $4,%dl
251         movq    %rbx,56-128(%rbp)
252         movq    144+8-128(%rsi),%rbx
253         shlq    $60,%r10
254         movb    %dl,8(%rsp)
255         orq     %r10,%rax
256         movb    %bl,%dl
257         shrq    $4,%rbx
258         movq    %r9,%r10
259         shrq    $4,%r9
260         movq    %r8,64(%rbp)
261         movq    160+0-128(%rsi),%r8
262         shlb    $4,%dl
263         movq    %rax,64-128(%rbp)
264         movq    160+8-128(%rsi),%rax
265         shlq    $60,%r10
266         movb    %dl,9(%rsp)
267         orq     %r10,%rbx
268         movb    %al,%dl
269         shrq    $4,%rax
270         movq    %r8,%r10
271         shrq    $4,%r8
272         movq    %r9,72(%rbp)
273         movq    176+0-128(%rsi),%r9
274         shlb    $4,%dl
275         movq    %rbx,72-128(%rbp)
276         movq    176+8-128(%rsi),%rbx
277         shlq    $60,%r10
278         movb    %dl,10(%rsp)
279         orq     %r10,%rax
280         movb    %bl,%dl
281         shrq    $4,%rbx
282         movq    %r9,%r10
283         shrq    $4,%r9
284         movq    %r8,80(%rbp)
285         movq    192+0-128(%rsi),%r8
286         shlb    $4,%dl
287         movq    %rax,80-128(%rbp)
288         movq    192+8-128(%rsi),%rax
289         shlq    $60,%r10
290         movb    %dl,11(%rsp)
291         orq     %r10,%rbx
292         movb    %al,%dl
293         shrq    $4,%rax
294         movq    %r8,%r10
295         shrq    $4,%r8
296         movq    %r9,88(%rbp)
297         movq    208+0-128(%rsi),%r9
298         shlb    $4,%dl
299         movq    %rbx,88-128(%rbp)
300         movq    208+8-128(%rsi),%rbx
301         shlq    $60,%r10
302         movb    %dl,12(%rsp)
303         orq     %r10,%rax
304         movb    %bl,%dl
305         shrq    $4,%rbx
306         movq    %r9,%r10
307         shrq    $4,%r9
308         movq    %r8,96(%rbp)
309         movq    224+0-128(%rsi),%r8
310         shlb    $4,%dl
311         movq    %rax,96-128(%rbp)
312         movq    224+8-128(%rsi),%rax
313         shlq    $60,%r10
314         movb    %dl,13(%rsp)
315         orq     %r10,%rbx
316         movb    %al,%dl
317         shrq    $4,%rax
318         movq    %r8,%r10
319         shrq    $4,%r8
320         movq    %r9,104(%rbp)
321         movq    240+0-128(%rsi),%r9
322         shlb    $4,%dl
323         movq    %rbx,104-128(%rbp)
324         movq    240+8-128(%rsi),%rbx
325         shlq    $60,%r10
326         movb    %dl,14(%rsp)
327         orq     %r10,%rax
328         movb    %bl,%dl
329         shrq    $4,%rbx
330         movq    %r9,%r10
331         shrq    $4,%r9
332         movq    %r8,112(%rbp)
333         shlb    $4,%dl
334         movq    %rax,112-128(%rbp)
335         shlq    $60,%r10
336         movb    %dl,15(%rsp)
337         orq     %r10,%rbx
338         movq    %r9,120(%rbp)
339         movq    %rbx,120-128(%rbp)
340         addq    $-128,%rsi
341         movq    8(%rdi),%r8
342         movq    0(%rdi),%r9
343         addq    %r14,%r15
344         leaq    .Lrem_8bit(%rip),%r11
345         jmp     .Louter_loop
346 .align  16
347 .Louter_loop:
348         xorq    (%r14),%r9
349         movq    8(%r14),%rdx
350         leaq    16(%r14),%r14
351         xorq    %r8,%rdx
352         movq    %r9,(%rdi)
353         movq    %rdx,8(%rdi)
354         shrq    $32,%rdx
355         xorq    %rax,%rax
356         roll    $8,%edx
357         movb    %dl,%al
358         movzbl  %dl,%ebx
359         shlb    $4,%al
360         shrl    $4,%ebx
361         roll    $8,%edx
362         movq    8(%rsi,%rax,1),%r8
363         movq    (%rsi,%rax,1),%r9
364         movb    %dl,%al
365         movzbl  %dl,%ecx
366         shlb    $4,%al
367         movzbq  (%rsp,%rbx,1),%r12
368         shrl    $4,%ecx
369         xorq    %r8,%r12
370         movq    %r9,%r10
371         shrq    $8,%r8
372         movzbq  %r12b,%r12
373         shrq    $8,%r9
374         xorq    -128(%rbp,%rbx,8),%r8
375         shlq    $56,%r10
376         xorq    (%rbp,%rbx,8),%r9
377         roll    $8,%edx
378         xorq    8(%rsi,%rax,1),%r8
379         xorq    (%rsi,%rax,1),%r9
380         movb    %dl,%al
381         xorq    %r10,%r8
382         movzwq  (%r11,%r12,2),%r12
383         movzbl  %dl,%ebx
384         shlb    $4,%al
385         movzbq  (%rsp,%rcx,1),%r13
386         shrl    $4,%ebx
387         shlq    $48,%r12
388         xorq    %r8,%r13
389         movq    %r9,%r10
390         xorq    %r12,%r9
391         shrq    $8,%r8
392         movzbq  %r13b,%r13
393         shrq    $8,%r9
394         xorq    -128(%rbp,%rcx,8),%r8
395         shlq    $56,%r10
396         xorq    (%rbp,%rcx,8),%r9
397         roll    $8,%edx
398         xorq    8(%rsi,%rax,1),%r8
399         xorq    (%rsi,%rax,1),%r9
400         movb    %dl,%al
401         xorq    %r10,%r8
402         movzwq  (%r11,%r13,2),%r13
403         movzbl  %dl,%ecx
404         shlb    $4,%al
405         movzbq  (%rsp,%rbx,1),%r12
406         shrl    $4,%ecx
407         shlq    $48,%r13
408         xorq    %r8,%r12
409         movq    %r9,%r10
410         xorq    %r13,%r9
411         shrq    $8,%r8
412         movzbq  %r12b,%r12
413         movl    8(%rdi),%edx
414         shrq    $8,%r9
415         xorq    -128(%rbp,%rbx,8),%r8
416         shlq    $56,%r10
417         xorq    (%rbp,%rbx,8),%r9
418         roll    $8,%edx
419         xorq    8(%rsi,%rax,1),%r8
420         xorq    (%rsi,%rax,1),%r9
421         movb    %dl,%al
422         xorq    %r10,%r8
423         movzwq  (%r11,%r12,2),%r12
424         movzbl  %dl,%ebx
425         shlb    $4,%al
426         movzbq  (%rsp,%rcx,1),%r13
427         shrl    $4,%ebx
428         shlq    $48,%r12
429         xorq    %r8,%r13
430         movq    %r9,%r10
431         xorq    %r12,%r9
432         shrq    $8,%r8
433         movzbq  %r13b,%r13
434         shrq    $8,%r9
435         xorq    -128(%rbp,%rcx,8),%r8
436         shlq    $56,%r10
437         xorq    (%rbp,%rcx,8),%r9
438         roll    $8,%edx
439         xorq    8(%rsi,%rax,1),%r8
440         xorq    (%rsi,%rax,1),%r9
441         movb    %dl,%al
442         xorq    %r10,%r8
443         movzwq  (%r11,%r13,2),%r13
444         movzbl  %dl,%ecx
445         shlb    $4,%al
446         movzbq  (%rsp,%rbx,1),%r12
447         shrl    $4,%ecx
448         shlq    $48,%r13
449         xorq    %r8,%r12
450         movq    %r9,%r10
451         xorq    %r13,%r9
452         shrq    $8,%r8
453         movzbq  %r12b,%r12
454         shrq    $8,%r9
455         xorq    -128(%rbp,%rbx,8),%r8
456         shlq    $56,%r10
457         xorq    (%rbp,%rbx,8),%r9
458         roll    $8,%edx
459         xorq    8(%rsi,%rax,1),%r8
460         xorq    (%rsi,%rax,1),%r9
461         movb    %dl,%al
462         xorq    %r10,%r8
463         movzwq  (%r11,%r12,2),%r12
464         movzbl  %dl,%ebx
465         shlb    $4,%al
466         movzbq  (%rsp,%rcx,1),%r13
467         shrl    $4,%ebx
468         shlq    $48,%r12
469         xorq    %r8,%r13
470         movq    %r9,%r10
471         xorq    %r12,%r9
472         shrq    $8,%r8
473         movzbq  %r13b,%r13
474         shrq    $8,%r9
475         xorq    -128(%rbp,%rcx,8),%r8
476         shlq    $56,%r10
477         xorq    (%rbp,%rcx,8),%r9
478         roll    $8,%edx
479         xorq    8(%rsi,%rax,1),%r8
480         xorq    (%rsi,%rax,1),%r9
481         movb    %dl,%al
482         xorq    %r10,%r8
483         movzwq  (%r11,%r13,2),%r13
484         movzbl  %dl,%ecx
485         shlb    $4,%al
486         movzbq  (%rsp,%rbx,1),%r12
487         shrl    $4,%ecx
488         shlq    $48,%r13
489         xorq    %r8,%r12
490         movq    %r9,%r10
491         xorq    %r13,%r9
492         shrq    $8,%r8
493         movzbq  %r12b,%r12
494         movl    4(%rdi),%edx
495         shrq    $8,%r9
496         xorq    -128(%rbp,%rbx,8),%r8
497         shlq    $56,%r10
498         xorq    (%rbp,%rbx,8),%r9
499         roll    $8,%edx
500         xorq    8(%rsi,%rax,1),%r8
501         xorq    (%rsi,%rax,1),%r9
502         movb    %dl,%al
503         xorq    %r10,%r8
504         movzwq  (%r11,%r12,2),%r12
505         movzbl  %dl,%ebx
506         shlb    $4,%al
507         movzbq  (%rsp,%rcx,1),%r13
508         shrl    $4,%ebx
509         shlq    $48,%r12
510         xorq    %r8,%r13
511         movq    %r9,%r10
512         xorq    %r12,%r9
513         shrq    $8,%r8
514         movzbq  %r13b,%r13
515         shrq    $8,%r9
516         xorq    -128(%rbp,%rcx,8),%r8
517         shlq    $56,%r10
518         xorq    (%rbp,%rcx,8),%r9
519         roll    $8,%edx
520         xorq    8(%rsi,%rax,1),%r8
521         xorq    (%rsi,%rax,1),%r9
522         movb    %dl,%al
523         xorq    %r10,%r8
524         movzwq  (%r11,%r13,2),%r13
525         movzbl  %dl,%ecx
526         shlb    $4,%al
527         movzbq  (%rsp,%rbx,1),%r12
528         shrl    $4,%ecx
529         shlq    $48,%r13
530         xorq    %r8,%r12
531         movq    %r9,%r10
532         xorq    %r13,%r9
533         shrq    $8,%r8
534         movzbq  %r12b,%r12
535         shrq    $8,%r9
536         xorq    -128(%rbp,%rbx,8),%r8
537         shlq    $56,%r10
538         xorq    (%rbp,%rbx,8),%r9
539         roll    $8,%edx
540         xorq    8(%rsi,%rax,1),%r8
541         xorq    (%rsi,%rax,1),%r9
542         movb    %dl,%al
543         xorq    %r10,%r8
544         movzwq  (%r11,%r12,2),%r12
545         movzbl  %dl,%ebx
546         shlb    $4,%al
547         movzbq  (%rsp,%rcx,1),%r13
548         shrl    $4,%ebx
549         shlq    $48,%r12
550         xorq    %r8,%r13
551         movq    %r9,%r10
552         xorq    %r12,%r9
553         shrq    $8,%r8
554         movzbq  %r13b,%r13
555         shrq    $8,%r9
556         xorq    -128(%rbp,%rcx,8),%r8
557         shlq    $56,%r10
558         xorq    (%rbp,%rcx,8),%r9
559         roll    $8,%edx
560         xorq    8(%rsi,%rax,1),%r8
561         xorq    (%rsi,%rax,1),%r9
562         movb    %dl,%al
563         xorq    %r10,%r8
564         movzwq  (%r11,%r13,2),%r13
565         movzbl  %dl,%ecx
566         shlb    $4,%al
567         movzbq  (%rsp,%rbx,1),%r12
568         shrl    $4,%ecx
569         shlq    $48,%r13
570         xorq    %r8,%r12
571         movq    %r9,%r10
572         xorq    %r13,%r9
573         shrq    $8,%r8
574         movzbq  %r12b,%r12
575         movl    0(%rdi),%edx
576         shrq    $8,%r9
577         xorq    -128(%rbp,%rbx,8),%r8
578         shlq    $56,%r10
579         xorq    (%rbp,%rbx,8),%r9
580         roll    $8,%edx
581         xorq    8(%rsi,%rax,1),%r8
582         xorq    (%rsi,%rax,1),%r9
583         movb    %dl,%al
584         xorq    %r10,%r8
585         movzwq  (%r11,%r12,2),%r12
586         movzbl  %dl,%ebx
587         shlb    $4,%al
588         movzbq  (%rsp,%rcx,1),%r13
589         shrl    $4,%ebx
590         shlq    $48,%r12
591         xorq    %r8,%r13
592         movq    %r9,%r10
593         xorq    %r12,%r9
594         shrq    $8,%r8
595         movzbq  %r13b,%r13
596         shrq    $8,%r9
597         xorq    -128(%rbp,%rcx,8),%r8
598         shlq    $56,%r10
599         xorq    (%rbp,%rcx,8),%r9
600         roll    $8,%edx
601         xorq    8(%rsi,%rax,1),%r8
602         xorq    (%rsi,%rax,1),%r9
603         movb    %dl,%al
604         xorq    %r10,%r8
605         movzwq  (%r11,%r13,2),%r13
606         movzbl  %dl,%ecx
607         shlb    $4,%al
608         movzbq  (%rsp,%rbx,1),%r12
609         shrl    $4,%ecx
610         shlq    $48,%r13
611         xorq    %r8,%r12
612         movq    %r9,%r10
613         xorq    %r13,%r9
614         shrq    $8,%r8
615         movzbq  %r12b,%r12
616         shrq    $8,%r9
617         xorq    -128(%rbp,%rbx,8),%r8
618         shlq    $56,%r10
619         xorq    (%rbp,%rbx,8),%r9
620         roll    $8,%edx
621         xorq    8(%rsi,%rax,1),%r8
622         xorq    (%rsi,%rax,1),%r9
623         movb    %dl,%al
624         xorq    %r10,%r8
625         movzwq  (%r11,%r12,2),%r12
626         movzbl  %dl,%ebx
627         shlb    $4,%al
628         movzbq  (%rsp,%rcx,1),%r13
629         shrl    $4,%ebx
630         shlq    $48,%r12
631         xorq    %r8,%r13
632         movq    %r9,%r10
633         xorq    %r12,%r9
634         shrq    $8,%r8
635         movzbq  %r13b,%r13
636         shrq    $8,%r9
637         xorq    -128(%rbp,%rcx,8),%r8
638         shlq    $56,%r10
639         xorq    (%rbp,%rcx,8),%r9
640         roll    $8,%edx
641         xorq    8(%rsi,%rax,1),%r8
642         xorq    (%rsi,%rax,1),%r9
643         movb    %dl,%al
644         xorq    %r10,%r8
645         movzwq  (%r11,%r13,2),%r13
646         movzbl  %dl,%ecx
647         shlb    $4,%al
648         movzbq  (%rsp,%rbx,1),%r12
649         andl    $240,%ecx
650         shlq    $48,%r13
651         xorq    %r8,%r12
652         movq    %r9,%r10
653         xorq    %r13,%r9
654         shrq    $8,%r8
655         movzbq  %r12b,%r12
656         movl    -4(%rdi),%edx
657         shrq    $8,%r9
658         xorq    -128(%rbp,%rbx,8),%r8
659         shlq    $56,%r10
660         xorq    (%rbp,%rbx,8),%r9
661         movzwq  (%r11,%r12,2),%r12
662         xorq    8(%rsi,%rax,1),%r8
663         xorq    (%rsi,%rax,1),%r9
664         shlq    $48,%r12
665         xorq    %r10,%r8
666         xorq    %r12,%r9
667         movzbq  %r8b,%r13
668         shrq    $4,%r8
669         movq    %r9,%r10
670         shlb    $4,%r13b
671         shrq    $4,%r9
672         xorq    8(%rsi,%rcx,1),%r8
673         movzwq  (%r11,%r13,2),%r13
674         shlq    $60,%r10
675         xorq    (%rsi,%rcx,1),%r9
676         xorq    %r10,%r8
677         shlq    $48,%r13
678         bswapq  %r8
679         xorq    %r13,%r9
680         bswapq  %r9
681         cmpq    %r15,%r14
682         jb      .Louter_loop
683         movq    %r8,8(%rdi)
684         movq    %r9,(%rdi)
685
686         leaq    280+48(%rsp),%rsi
687 .cfi_def_cfa    %rsi,8
688         movq    -48(%rsi),%r15
689 .cfi_restore    %r15
690         movq    -40(%rsi),%r14
691 .cfi_restore    %r14
692         movq    -32(%rsi),%r13
693 .cfi_restore    %r13
694         movq    -24(%rsi),%r12
695 .cfi_restore    %r12
696         movq    -16(%rsi),%rbp
697 .cfi_restore    %rbp
698         movq    -8(%rsi),%rbx
699 .cfi_restore    %rbx
700         leaq    0(%rsi),%rsp
701 .cfi_def_cfa_register   %rsp
702 .Lghash_epilogue:
703         .byte   0xf3,0xc3
704 .cfi_endproc    
705 .size   gcm_ghash_4bit,.-gcm_ghash_4bit
706 .globl  gcm_init_clmul
707 .type   gcm_init_clmul,@function
708 .align  16
709 gcm_init_clmul:
710 .L_init_clmul:
711         movdqu  (%rsi),%xmm2
712         pshufd  $78,%xmm2,%xmm2
713
714
715         pshufd  $255,%xmm2,%xmm4
716         movdqa  %xmm2,%xmm3
717         psllq   $1,%xmm2
718         pxor    %xmm5,%xmm5
719         psrlq   $63,%xmm3
720         pcmpgtd %xmm4,%xmm5
721         pslldq  $8,%xmm3
722         por     %xmm3,%xmm2
723
724
725         pand    .L0x1c2_polynomial(%rip),%xmm5
726         pxor    %xmm5,%xmm2
727
728
729         pshufd  $78,%xmm2,%xmm6
730         movdqa  %xmm2,%xmm0
731         pxor    %xmm2,%xmm6
732         movdqa  %xmm0,%xmm1
733         pshufd  $78,%xmm0,%xmm3
734         pxor    %xmm0,%xmm3
735 .byte   102,15,58,68,194,0
736 .byte   102,15,58,68,202,17
737 .byte   102,15,58,68,222,0
738         pxor    %xmm0,%xmm3
739         pxor    %xmm1,%xmm3
740
741         movdqa  %xmm3,%xmm4
742         psrldq  $8,%xmm3
743         pslldq  $8,%xmm4
744         pxor    %xmm3,%xmm1
745         pxor    %xmm4,%xmm0
746
747         movdqa  %xmm0,%xmm4
748         movdqa  %xmm0,%xmm3
749         psllq   $5,%xmm0
750         pxor    %xmm0,%xmm3
751         psllq   $1,%xmm0
752         pxor    %xmm3,%xmm0
753         psllq   $57,%xmm0
754         movdqa  %xmm0,%xmm3
755         pslldq  $8,%xmm0
756         psrldq  $8,%xmm3
757         pxor    %xmm4,%xmm0
758         pxor    %xmm3,%xmm1
759
760
761         movdqa  %xmm0,%xmm4
762         psrlq   $1,%xmm0
763         pxor    %xmm4,%xmm1
764         pxor    %xmm0,%xmm4
765         psrlq   $5,%xmm0
766         pxor    %xmm4,%xmm0
767         psrlq   $1,%xmm0
768         pxor    %xmm1,%xmm0
769         pshufd  $78,%xmm2,%xmm3
770         pshufd  $78,%xmm0,%xmm4
771         pxor    %xmm2,%xmm3
772         movdqu  %xmm2,0(%rdi)
773         pxor    %xmm0,%xmm4
774         movdqu  %xmm0,16(%rdi)
775 .byte   102,15,58,15,227,8
776         movdqu  %xmm4,32(%rdi)
777         movdqa  %xmm0,%xmm1
778         pshufd  $78,%xmm0,%xmm3
779         pxor    %xmm0,%xmm3
780 .byte   102,15,58,68,194,0
781 .byte   102,15,58,68,202,17
782 .byte   102,15,58,68,222,0
783         pxor    %xmm0,%xmm3
784         pxor    %xmm1,%xmm3
785
786         movdqa  %xmm3,%xmm4
787         psrldq  $8,%xmm3
788         pslldq  $8,%xmm4
789         pxor    %xmm3,%xmm1
790         pxor    %xmm4,%xmm0
791
792         movdqa  %xmm0,%xmm4
793         movdqa  %xmm0,%xmm3
794         psllq   $5,%xmm0
795         pxor    %xmm0,%xmm3
796         psllq   $1,%xmm0
797         pxor    %xmm3,%xmm0
798         psllq   $57,%xmm0
799         movdqa  %xmm0,%xmm3
800         pslldq  $8,%xmm0
801         psrldq  $8,%xmm3
802         pxor    %xmm4,%xmm0
803         pxor    %xmm3,%xmm1
804
805
806         movdqa  %xmm0,%xmm4
807         psrlq   $1,%xmm0
808         pxor    %xmm4,%xmm1
809         pxor    %xmm0,%xmm4
810         psrlq   $5,%xmm0
811         pxor    %xmm4,%xmm0
812         psrlq   $1,%xmm0
813         pxor    %xmm1,%xmm0
814         movdqa  %xmm0,%xmm5
815         movdqa  %xmm0,%xmm1
816         pshufd  $78,%xmm0,%xmm3
817         pxor    %xmm0,%xmm3
818 .byte   102,15,58,68,194,0
819 .byte   102,15,58,68,202,17
820 .byte   102,15,58,68,222,0
821         pxor    %xmm0,%xmm3
822         pxor    %xmm1,%xmm3
823
824         movdqa  %xmm3,%xmm4
825         psrldq  $8,%xmm3
826         pslldq  $8,%xmm4
827         pxor    %xmm3,%xmm1
828         pxor    %xmm4,%xmm0
829
830         movdqa  %xmm0,%xmm4
831         movdqa  %xmm0,%xmm3
832         psllq   $5,%xmm0
833         pxor    %xmm0,%xmm3
834         psllq   $1,%xmm0
835         pxor    %xmm3,%xmm0
836         psllq   $57,%xmm0
837         movdqa  %xmm0,%xmm3
838         pslldq  $8,%xmm0
839         psrldq  $8,%xmm3
840         pxor    %xmm4,%xmm0
841         pxor    %xmm3,%xmm1
842
843
844         movdqa  %xmm0,%xmm4
845         psrlq   $1,%xmm0
846         pxor    %xmm4,%xmm1
847         pxor    %xmm0,%xmm4
848         psrlq   $5,%xmm0
849         pxor    %xmm4,%xmm0
850         psrlq   $1,%xmm0
851         pxor    %xmm1,%xmm0
852         pshufd  $78,%xmm5,%xmm3
853         pshufd  $78,%xmm0,%xmm4
854         pxor    %xmm5,%xmm3
855         movdqu  %xmm5,48(%rdi)
856         pxor    %xmm0,%xmm4
857         movdqu  %xmm0,64(%rdi)
858 .byte   102,15,58,15,227,8
859         movdqu  %xmm4,80(%rdi)
860         .byte   0xf3,0xc3
861 .size   gcm_init_clmul,.-gcm_init_clmul
862 .globl  gcm_gmult_clmul
863 .type   gcm_gmult_clmul,@function
864 .align  16
865 gcm_gmult_clmul:
866 .L_gmult_clmul:
867         movdqu  (%rdi),%xmm0
868         movdqa  .Lbswap_mask(%rip),%xmm5
869         movdqu  (%rsi),%xmm2
870         movdqu  32(%rsi),%xmm4
871 .byte   102,15,56,0,197
872         movdqa  %xmm0,%xmm1
873         pshufd  $78,%xmm0,%xmm3
874         pxor    %xmm0,%xmm3
875 .byte   102,15,58,68,194,0
876 .byte   102,15,58,68,202,17
877 .byte   102,15,58,68,220,0
878         pxor    %xmm0,%xmm3
879         pxor    %xmm1,%xmm3
880
881         movdqa  %xmm3,%xmm4
882         psrldq  $8,%xmm3
883         pslldq  $8,%xmm4
884         pxor    %xmm3,%xmm1
885         pxor    %xmm4,%xmm0
886
887         movdqa  %xmm0,%xmm4
888         movdqa  %xmm0,%xmm3
889         psllq   $5,%xmm0
890         pxor    %xmm0,%xmm3
891         psllq   $1,%xmm0
892         pxor    %xmm3,%xmm0
893         psllq   $57,%xmm0
894         movdqa  %xmm0,%xmm3
895         pslldq  $8,%xmm0
896         psrldq  $8,%xmm3
897         pxor    %xmm4,%xmm0
898         pxor    %xmm3,%xmm1
899
900
901         movdqa  %xmm0,%xmm4
902         psrlq   $1,%xmm0
903         pxor    %xmm4,%xmm1
904         pxor    %xmm0,%xmm4
905         psrlq   $5,%xmm0
906         pxor    %xmm4,%xmm0
907         psrlq   $1,%xmm0
908         pxor    %xmm1,%xmm0
909 .byte   102,15,56,0,197
910         movdqu  %xmm0,(%rdi)
911         .byte   0xf3,0xc3
912 .size   gcm_gmult_clmul,.-gcm_gmult_clmul
913 .globl  gcm_ghash_clmul
914 .type   gcm_ghash_clmul,@function
915 .align  32
916 gcm_ghash_clmul:
917 .L_ghash_clmul:
918         movdqa  .Lbswap_mask(%rip),%xmm10
919
920         movdqu  (%rdi),%xmm0
921         movdqu  (%rsi),%xmm2
922         movdqu  32(%rsi),%xmm7
923 .byte   102,65,15,56,0,194
924
925         subq    $0x10,%rcx
926         jz      .Lodd_tail
927
928         movdqu  16(%rsi),%xmm6
929         movl    OPENSSL_ia32cap_P+4(%rip),%eax
930         cmpq    $0x30,%rcx
931         jb      .Lskip4x
932
933         andl    $71303168,%eax
934         cmpl    $4194304,%eax
935         je      .Lskip4x
936
937         subq    $0x30,%rcx
938         movq    $0xA040608020C0E000,%rax
939         movdqu  48(%rsi),%xmm14
940         movdqu  64(%rsi),%xmm15
941
942
943
944
945         movdqu  48(%rdx),%xmm3
946         movdqu  32(%rdx),%xmm11
947 .byte   102,65,15,56,0,218
948 .byte   102,69,15,56,0,218
949         movdqa  %xmm3,%xmm5
950         pshufd  $78,%xmm3,%xmm4
951         pxor    %xmm3,%xmm4
952 .byte   102,15,58,68,218,0
953 .byte   102,15,58,68,234,17
954 .byte   102,15,58,68,231,0
955
956         movdqa  %xmm11,%xmm13
957         pshufd  $78,%xmm11,%xmm12
958         pxor    %xmm11,%xmm12
959 .byte   102,68,15,58,68,222,0
960 .byte   102,68,15,58,68,238,17
961 .byte   102,68,15,58,68,231,16
962         xorps   %xmm11,%xmm3
963         xorps   %xmm13,%xmm5
964         movups  80(%rsi),%xmm7
965         xorps   %xmm12,%xmm4
966
967         movdqu  16(%rdx),%xmm11
968         movdqu  0(%rdx),%xmm8
969 .byte   102,69,15,56,0,218
970 .byte   102,69,15,56,0,194
971         movdqa  %xmm11,%xmm13
972         pshufd  $78,%xmm11,%xmm12
973         pxor    %xmm8,%xmm0
974         pxor    %xmm11,%xmm12
975 .byte   102,69,15,58,68,222,0
976         movdqa  %xmm0,%xmm1
977         pshufd  $78,%xmm0,%xmm8
978         pxor    %xmm0,%xmm8
979 .byte   102,69,15,58,68,238,17
980 .byte   102,68,15,58,68,231,0
981         xorps   %xmm11,%xmm3
982         xorps   %xmm13,%xmm5
983
984         leaq    64(%rdx),%rdx
985         subq    $0x40,%rcx
986         jc      .Ltail4x
987
988         jmp     .Lmod4_loop
989 .align  32
990 .Lmod4_loop:
991 .byte   102,65,15,58,68,199,0
992         xorps   %xmm12,%xmm4
993         movdqu  48(%rdx),%xmm11
994 .byte   102,69,15,56,0,218
995 .byte   102,65,15,58,68,207,17
996         xorps   %xmm3,%xmm0
997         movdqu  32(%rdx),%xmm3
998         movdqa  %xmm11,%xmm13
999 .byte   102,68,15,58,68,199,16
1000         pshufd  $78,%xmm11,%xmm12
1001         xorps   %xmm5,%xmm1
1002         pxor    %xmm11,%xmm12
1003 .byte   102,65,15,56,0,218
1004         movups  32(%rsi),%xmm7
1005         xorps   %xmm4,%xmm8
1006 .byte   102,68,15,58,68,218,0
1007         pshufd  $78,%xmm3,%xmm4
1008
1009         pxor    %xmm0,%xmm8
1010         movdqa  %xmm3,%xmm5
1011         pxor    %xmm1,%xmm8
1012         pxor    %xmm3,%xmm4
1013         movdqa  %xmm8,%xmm9
1014 .byte   102,68,15,58,68,234,17
1015         pslldq  $8,%xmm8
1016         psrldq  $8,%xmm9
1017         pxor    %xmm8,%xmm0
1018         movdqa  .L7_mask(%rip),%xmm8
1019         pxor    %xmm9,%xmm1
1020 .byte   102,76,15,110,200
1021
1022         pand    %xmm0,%xmm8
1023 .byte   102,69,15,56,0,200
1024         pxor    %xmm0,%xmm9
1025 .byte   102,68,15,58,68,231,0
1026         psllq   $57,%xmm9
1027         movdqa  %xmm9,%xmm8
1028         pslldq  $8,%xmm9
1029 .byte   102,15,58,68,222,0
1030         psrldq  $8,%xmm8
1031         pxor    %xmm9,%xmm0
1032         pxor    %xmm8,%xmm1
1033         movdqu  0(%rdx),%xmm8
1034
1035         movdqa  %xmm0,%xmm9
1036         psrlq   $1,%xmm0
1037 .byte   102,15,58,68,238,17
1038         xorps   %xmm11,%xmm3
1039         movdqu  16(%rdx),%xmm11
1040 .byte   102,69,15,56,0,218
1041 .byte   102,15,58,68,231,16
1042         xorps   %xmm13,%xmm5
1043         movups  80(%rsi),%xmm7
1044 .byte   102,69,15,56,0,194
1045         pxor    %xmm9,%xmm1
1046         pxor    %xmm0,%xmm9
1047         psrlq   $5,%xmm0
1048
1049         movdqa  %xmm11,%xmm13
1050         pxor    %xmm12,%xmm4
1051         pshufd  $78,%xmm11,%xmm12
1052         pxor    %xmm9,%xmm0
1053         pxor    %xmm8,%xmm1
1054         pxor    %xmm11,%xmm12
1055 .byte   102,69,15,58,68,222,0
1056         psrlq   $1,%xmm0
1057         pxor    %xmm1,%xmm0
1058         movdqa  %xmm0,%xmm1
1059 .byte   102,69,15,58,68,238,17
1060         xorps   %xmm11,%xmm3
1061         pshufd  $78,%xmm0,%xmm8
1062         pxor    %xmm0,%xmm8
1063
1064 .byte   102,68,15,58,68,231,0
1065         xorps   %xmm13,%xmm5
1066
1067         leaq    64(%rdx),%rdx
1068         subq    $0x40,%rcx
1069         jnc     .Lmod4_loop
1070
1071 .Ltail4x:
1072 .byte   102,65,15,58,68,199,0
1073 .byte   102,65,15,58,68,207,17
1074 .byte   102,68,15,58,68,199,16
1075         xorps   %xmm12,%xmm4
1076         xorps   %xmm3,%xmm0
1077         xorps   %xmm5,%xmm1
1078         pxor    %xmm0,%xmm1
1079         pxor    %xmm4,%xmm8
1080
1081         pxor    %xmm1,%xmm8
1082         pxor    %xmm0,%xmm1
1083
1084         movdqa  %xmm8,%xmm9
1085         psrldq  $8,%xmm8
1086         pslldq  $8,%xmm9
1087         pxor    %xmm8,%xmm1
1088         pxor    %xmm9,%xmm0
1089
1090         movdqa  %xmm0,%xmm4
1091         movdqa  %xmm0,%xmm3
1092         psllq   $5,%xmm0
1093         pxor    %xmm0,%xmm3
1094         psllq   $1,%xmm0
1095         pxor    %xmm3,%xmm0
1096         psllq   $57,%xmm0
1097         movdqa  %xmm0,%xmm3
1098         pslldq  $8,%xmm0
1099         psrldq  $8,%xmm3
1100         pxor    %xmm4,%xmm0
1101         pxor    %xmm3,%xmm1
1102
1103
1104         movdqa  %xmm0,%xmm4
1105         psrlq   $1,%xmm0
1106         pxor    %xmm4,%xmm1
1107         pxor    %xmm0,%xmm4
1108         psrlq   $5,%xmm0
1109         pxor    %xmm4,%xmm0
1110         psrlq   $1,%xmm0
1111         pxor    %xmm1,%xmm0
1112         addq    $0x40,%rcx
1113         jz      .Ldone
1114         movdqu  32(%rsi),%xmm7
1115         subq    $0x10,%rcx
1116         jz      .Lodd_tail
1117 .Lskip4x:
1118
1119
1120
1121
1122
1123         movdqu  (%rdx),%xmm8
1124         movdqu  16(%rdx),%xmm3
1125 .byte   102,69,15,56,0,194
1126 .byte   102,65,15,56,0,218
1127         pxor    %xmm8,%xmm0
1128
1129         movdqa  %xmm3,%xmm5
1130         pshufd  $78,%xmm3,%xmm4
1131         pxor    %xmm3,%xmm4
1132 .byte   102,15,58,68,218,0
1133 .byte   102,15,58,68,234,17
1134 .byte   102,15,58,68,231,0
1135
1136         leaq    32(%rdx),%rdx
1137         nop
1138         subq    $0x20,%rcx
1139         jbe     .Leven_tail
1140         nop
1141         jmp     .Lmod_loop
1142
1143 .align  32
1144 .Lmod_loop:
1145         movdqa  %xmm0,%xmm1
1146         movdqa  %xmm4,%xmm8
1147         pshufd  $78,%xmm0,%xmm4
1148         pxor    %xmm0,%xmm4
1149
1150 .byte   102,15,58,68,198,0
1151 .byte   102,15,58,68,206,17
1152 .byte   102,15,58,68,231,16
1153
1154         pxor    %xmm3,%xmm0
1155         pxor    %xmm5,%xmm1
1156         movdqu  (%rdx),%xmm9
1157         pxor    %xmm0,%xmm8
1158 .byte   102,69,15,56,0,202
1159         movdqu  16(%rdx),%xmm3
1160
1161         pxor    %xmm1,%xmm8
1162         pxor    %xmm9,%xmm1
1163         pxor    %xmm8,%xmm4
1164 .byte   102,65,15,56,0,218
1165         movdqa  %xmm4,%xmm8
1166         psrldq  $8,%xmm8
1167         pslldq  $8,%xmm4
1168         pxor    %xmm8,%xmm1
1169         pxor    %xmm4,%xmm0
1170
1171         movdqa  %xmm3,%xmm5
1172
1173         movdqa  %xmm0,%xmm9
1174         movdqa  %xmm0,%xmm8
1175         psllq   $5,%xmm0
1176         pxor    %xmm0,%xmm8
1177 .byte   102,15,58,68,218,0
1178         psllq   $1,%xmm0
1179         pxor    %xmm8,%xmm0
1180         psllq   $57,%xmm0
1181         movdqa  %xmm0,%xmm8
1182         pslldq  $8,%xmm0
1183         psrldq  $8,%xmm8
1184         pxor    %xmm9,%xmm0
1185         pshufd  $78,%xmm5,%xmm4
1186         pxor    %xmm8,%xmm1
1187         pxor    %xmm5,%xmm4
1188
1189         movdqa  %xmm0,%xmm9
1190         psrlq   $1,%xmm0
1191 .byte   102,15,58,68,234,17
1192         pxor    %xmm9,%xmm1
1193         pxor    %xmm0,%xmm9
1194         psrlq   $5,%xmm0
1195         pxor    %xmm9,%xmm0
1196         leaq    32(%rdx),%rdx
1197         psrlq   $1,%xmm0
1198 .byte   102,15,58,68,231,0
1199         pxor    %xmm1,%xmm0
1200
1201         subq    $0x20,%rcx
1202         ja      .Lmod_loop
1203
1204 .Leven_tail:
1205         movdqa  %xmm0,%xmm1
1206         movdqa  %xmm4,%xmm8
1207         pshufd  $78,%xmm0,%xmm4
1208         pxor    %xmm0,%xmm4
1209
1210 .byte   102,15,58,68,198,0
1211 .byte   102,15,58,68,206,17
1212 .byte   102,15,58,68,231,16
1213
1214         pxor    %xmm3,%xmm0
1215         pxor    %xmm5,%xmm1
1216         pxor    %xmm0,%xmm8
1217         pxor    %xmm1,%xmm8
1218         pxor    %xmm8,%xmm4
1219         movdqa  %xmm4,%xmm8
1220         psrldq  $8,%xmm8
1221         pslldq  $8,%xmm4
1222         pxor    %xmm8,%xmm1
1223         pxor    %xmm4,%xmm0
1224
1225         movdqa  %xmm0,%xmm4
1226         movdqa  %xmm0,%xmm3
1227         psllq   $5,%xmm0
1228         pxor    %xmm0,%xmm3
1229         psllq   $1,%xmm0
1230         pxor    %xmm3,%xmm0
1231         psllq   $57,%xmm0
1232         movdqa  %xmm0,%xmm3
1233         pslldq  $8,%xmm0
1234         psrldq  $8,%xmm3
1235         pxor    %xmm4,%xmm0
1236         pxor    %xmm3,%xmm1
1237
1238
1239         movdqa  %xmm0,%xmm4
1240         psrlq   $1,%xmm0
1241         pxor    %xmm4,%xmm1
1242         pxor    %xmm0,%xmm4
1243         psrlq   $5,%xmm0
1244         pxor    %xmm4,%xmm0
1245         psrlq   $1,%xmm0
1246         pxor    %xmm1,%xmm0
1247         testq   %rcx,%rcx
1248         jnz     .Ldone
1249
1250 .Lodd_tail:
1251         movdqu  (%rdx),%xmm8
1252 .byte   102,69,15,56,0,194
1253         pxor    %xmm8,%xmm0
1254         movdqa  %xmm0,%xmm1
1255         pshufd  $78,%xmm0,%xmm3
1256         pxor    %xmm0,%xmm3
1257 .byte   102,15,58,68,194,0
1258 .byte   102,15,58,68,202,17
1259 .byte   102,15,58,68,223,0
1260         pxor    %xmm0,%xmm3
1261         pxor    %xmm1,%xmm3
1262
1263         movdqa  %xmm3,%xmm4
1264         psrldq  $8,%xmm3
1265         pslldq  $8,%xmm4
1266         pxor    %xmm3,%xmm1
1267         pxor    %xmm4,%xmm0
1268
1269         movdqa  %xmm0,%xmm4
1270         movdqa  %xmm0,%xmm3
1271         psllq   $5,%xmm0
1272         pxor    %xmm0,%xmm3
1273         psllq   $1,%xmm0
1274         pxor    %xmm3,%xmm0
1275         psllq   $57,%xmm0
1276         movdqa  %xmm0,%xmm3
1277         pslldq  $8,%xmm0
1278         psrldq  $8,%xmm3
1279         pxor    %xmm4,%xmm0
1280         pxor    %xmm3,%xmm1
1281
1282
1283         movdqa  %xmm0,%xmm4
1284         psrlq   $1,%xmm0
1285         pxor    %xmm4,%xmm1
1286         pxor    %xmm0,%xmm4
1287         psrlq   $5,%xmm0
1288         pxor    %xmm4,%xmm0
1289         psrlq   $1,%xmm0
1290         pxor    %xmm1,%xmm0
1291 .Ldone:
1292 .byte   102,65,15,56,0,194
1293         movdqu  %xmm0,(%rdi)
1294         .byte   0xf3,0xc3
1295 .size   gcm_ghash_clmul,.-gcm_ghash_clmul
1296 .globl  gcm_init_avx
1297 .type   gcm_init_avx,@function
1298 .align  32
1299 gcm_init_avx:
1300         vzeroupper
1301
1302         vmovdqu (%rsi),%xmm2
1303         vpshufd $78,%xmm2,%xmm2
1304
1305
1306         vpshufd $255,%xmm2,%xmm4
1307         vpsrlq  $63,%xmm2,%xmm3
1308         vpsllq  $1,%xmm2,%xmm2
1309         vpxor   %xmm5,%xmm5,%xmm5
1310         vpcmpgtd        %xmm4,%xmm5,%xmm5
1311         vpslldq $8,%xmm3,%xmm3
1312         vpor    %xmm3,%xmm2,%xmm2
1313
1314
1315         vpand   .L0x1c2_polynomial(%rip),%xmm5,%xmm5
1316         vpxor   %xmm5,%xmm2,%xmm2
1317
1318         vpunpckhqdq     %xmm2,%xmm2,%xmm6
1319         vmovdqa %xmm2,%xmm0
1320         vpxor   %xmm2,%xmm6,%xmm6
1321         movq    $4,%r10
1322         jmp     .Linit_start_avx
1323 .align  32
1324 .Linit_loop_avx:
1325         vpalignr        $8,%xmm3,%xmm4,%xmm5
1326         vmovdqu %xmm5,-16(%rdi)
1327         vpunpckhqdq     %xmm0,%xmm0,%xmm3
1328         vpxor   %xmm0,%xmm3,%xmm3
1329         vpclmulqdq      $0x11,%xmm2,%xmm0,%xmm1
1330         vpclmulqdq      $0x00,%xmm2,%xmm0,%xmm0
1331         vpclmulqdq      $0x00,%xmm6,%xmm3,%xmm3
1332         vpxor   %xmm0,%xmm1,%xmm4
1333         vpxor   %xmm4,%xmm3,%xmm3
1334
1335         vpslldq $8,%xmm3,%xmm4
1336         vpsrldq $8,%xmm3,%xmm3
1337         vpxor   %xmm4,%xmm0,%xmm0
1338         vpxor   %xmm3,%xmm1,%xmm1
1339         vpsllq  $57,%xmm0,%xmm3
1340         vpsllq  $62,%xmm0,%xmm4
1341         vpxor   %xmm3,%xmm4,%xmm4
1342         vpsllq  $63,%xmm0,%xmm3
1343         vpxor   %xmm3,%xmm4,%xmm4
1344         vpslldq $8,%xmm4,%xmm3
1345         vpsrldq $8,%xmm4,%xmm4
1346         vpxor   %xmm3,%xmm0,%xmm0
1347         vpxor   %xmm4,%xmm1,%xmm1
1348
1349         vpsrlq  $1,%xmm0,%xmm4
1350         vpxor   %xmm0,%xmm1,%xmm1
1351         vpxor   %xmm4,%xmm0,%xmm0
1352         vpsrlq  $5,%xmm4,%xmm4
1353         vpxor   %xmm4,%xmm0,%xmm0
1354         vpsrlq  $1,%xmm0,%xmm0
1355         vpxor   %xmm1,%xmm0,%xmm0
1356 .Linit_start_avx:
1357         vmovdqa %xmm0,%xmm5
1358         vpunpckhqdq     %xmm0,%xmm0,%xmm3
1359         vpxor   %xmm0,%xmm3,%xmm3
1360         vpclmulqdq      $0x11,%xmm2,%xmm0,%xmm1
1361         vpclmulqdq      $0x00,%xmm2,%xmm0,%xmm0
1362         vpclmulqdq      $0x00,%xmm6,%xmm3,%xmm3
1363         vpxor   %xmm0,%xmm1,%xmm4
1364         vpxor   %xmm4,%xmm3,%xmm3
1365
1366         vpslldq $8,%xmm3,%xmm4
1367         vpsrldq $8,%xmm3,%xmm3
1368         vpxor   %xmm4,%xmm0,%xmm0
1369         vpxor   %xmm3,%xmm1,%xmm1
1370         vpsllq  $57,%xmm0,%xmm3
1371         vpsllq  $62,%xmm0,%xmm4
1372         vpxor   %xmm3,%xmm4,%xmm4
1373         vpsllq  $63,%xmm0,%xmm3
1374         vpxor   %xmm3,%xmm4,%xmm4
1375         vpslldq $8,%xmm4,%xmm3
1376         vpsrldq $8,%xmm4,%xmm4
1377         vpxor   %xmm3,%xmm0,%xmm0
1378         vpxor   %xmm4,%xmm1,%xmm1
1379
1380         vpsrlq  $1,%xmm0,%xmm4
1381         vpxor   %xmm0,%xmm1,%xmm1
1382         vpxor   %xmm4,%xmm0,%xmm0
1383         vpsrlq  $5,%xmm4,%xmm4
1384         vpxor   %xmm4,%xmm0,%xmm0
1385         vpsrlq  $1,%xmm0,%xmm0
1386         vpxor   %xmm1,%xmm0,%xmm0
1387         vpshufd $78,%xmm5,%xmm3
1388         vpshufd $78,%xmm0,%xmm4
1389         vpxor   %xmm5,%xmm3,%xmm3
1390         vmovdqu %xmm5,0(%rdi)
1391         vpxor   %xmm0,%xmm4,%xmm4
1392         vmovdqu %xmm0,16(%rdi)
1393         leaq    48(%rdi),%rdi
1394         subq    $1,%r10
1395         jnz     .Linit_loop_avx
1396
1397         vpalignr        $8,%xmm4,%xmm3,%xmm5
1398         vmovdqu %xmm5,-16(%rdi)
1399
1400         vzeroupper
1401         .byte   0xf3,0xc3
1402 .size   gcm_init_avx,.-gcm_init_avx
1403 .globl  gcm_gmult_avx
1404 .type   gcm_gmult_avx,@function
1405 .align  32
1406 gcm_gmult_avx:
1407         jmp     .L_gmult_clmul
1408 .size   gcm_gmult_avx,.-gcm_gmult_avx
1409 .globl  gcm_ghash_avx
1410 .type   gcm_ghash_avx,@function
1411 .align  32
1412 gcm_ghash_avx:
1413         vzeroupper
1414
1415         vmovdqu (%rdi),%xmm10
1416         leaq    .L0x1c2_polynomial(%rip),%r10
1417         leaq    64(%rsi),%rsi
1418         vmovdqu .Lbswap_mask(%rip),%xmm13
1419         vpshufb %xmm13,%xmm10,%xmm10
1420         cmpq    $0x80,%rcx
1421         jb      .Lshort_avx
1422         subq    $0x80,%rcx
1423
1424         vmovdqu 112(%rdx),%xmm14
1425         vmovdqu 0-64(%rsi),%xmm6
1426         vpshufb %xmm13,%xmm14,%xmm14
1427         vmovdqu 32-64(%rsi),%xmm7
1428
1429         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1430         vmovdqu 96(%rdx),%xmm15
1431         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1432         vpxor   %xmm14,%xmm9,%xmm9
1433         vpshufb %xmm13,%xmm15,%xmm15
1434         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1435         vmovdqu 16-64(%rsi),%xmm6
1436         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1437         vmovdqu 80(%rdx),%xmm14
1438         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
1439         vpxor   %xmm15,%xmm8,%xmm8
1440
1441         vpshufb %xmm13,%xmm14,%xmm14
1442         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
1443         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1444         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
1445         vmovdqu 48-64(%rsi),%xmm6
1446         vpxor   %xmm14,%xmm9,%xmm9
1447         vmovdqu 64(%rdx),%xmm15
1448         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
1449         vmovdqu 80-64(%rsi),%xmm7
1450
1451         vpshufb %xmm13,%xmm15,%xmm15
1452         vpxor   %xmm0,%xmm3,%xmm3
1453         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1454         vpxor   %xmm1,%xmm4,%xmm4
1455         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1456         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1457         vmovdqu 64-64(%rsi),%xmm6
1458         vpxor   %xmm2,%xmm5,%xmm5
1459         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
1460         vpxor   %xmm15,%xmm8,%xmm8
1461
1462         vmovdqu 48(%rdx),%xmm14
1463         vpxor   %xmm3,%xmm0,%xmm0
1464         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
1465         vpxor   %xmm4,%xmm1,%xmm1
1466         vpshufb %xmm13,%xmm14,%xmm14
1467         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
1468         vmovdqu 96-64(%rsi),%xmm6
1469         vpxor   %xmm5,%xmm2,%xmm2
1470         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1471         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
1472         vmovdqu 128-64(%rsi),%xmm7
1473         vpxor   %xmm14,%xmm9,%xmm9
1474
1475         vmovdqu 32(%rdx),%xmm15
1476         vpxor   %xmm0,%xmm3,%xmm3
1477         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1478         vpxor   %xmm1,%xmm4,%xmm4
1479         vpshufb %xmm13,%xmm15,%xmm15
1480         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1481         vmovdqu 112-64(%rsi),%xmm6
1482         vpxor   %xmm2,%xmm5,%xmm5
1483         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1484         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
1485         vpxor   %xmm15,%xmm8,%xmm8
1486
1487         vmovdqu 16(%rdx),%xmm14
1488         vpxor   %xmm3,%xmm0,%xmm0
1489         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
1490         vpxor   %xmm4,%xmm1,%xmm1
1491         vpshufb %xmm13,%xmm14,%xmm14
1492         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
1493         vmovdqu 144-64(%rsi),%xmm6
1494         vpxor   %xmm5,%xmm2,%xmm2
1495         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1496         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
1497         vmovdqu 176-64(%rsi),%xmm7
1498         vpxor   %xmm14,%xmm9,%xmm9
1499
1500         vmovdqu (%rdx),%xmm15
1501         vpxor   %xmm0,%xmm3,%xmm3
1502         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1503         vpxor   %xmm1,%xmm4,%xmm4
1504         vpshufb %xmm13,%xmm15,%xmm15
1505         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1506         vmovdqu 160-64(%rsi),%xmm6
1507         vpxor   %xmm2,%xmm5,%xmm5
1508         vpclmulqdq      $0x10,%xmm7,%xmm9,%xmm2
1509
1510         leaq    128(%rdx),%rdx
1511         cmpq    $0x80,%rcx
1512         jb      .Ltail_avx
1513
1514         vpxor   %xmm10,%xmm15,%xmm15
1515         subq    $0x80,%rcx
1516         jmp     .Loop8x_avx
1517
1518 .align  32
1519 .Loop8x_avx:
1520         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1521         vmovdqu 112(%rdx),%xmm14
1522         vpxor   %xmm0,%xmm3,%xmm3
1523         vpxor   %xmm15,%xmm8,%xmm8
1524         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm10
1525         vpshufb %xmm13,%xmm14,%xmm14
1526         vpxor   %xmm1,%xmm4,%xmm4
1527         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm11
1528         vmovdqu 0-64(%rsi),%xmm6
1529         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1530         vpxor   %xmm2,%xmm5,%xmm5
1531         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm12
1532         vmovdqu 32-64(%rsi),%xmm7
1533         vpxor   %xmm14,%xmm9,%xmm9
1534
1535         vmovdqu 96(%rdx),%xmm15
1536         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1537         vpxor   %xmm3,%xmm10,%xmm10
1538         vpshufb %xmm13,%xmm15,%xmm15
1539         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1540         vxorps  %xmm4,%xmm11,%xmm11
1541         vmovdqu 16-64(%rsi),%xmm6
1542         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1543         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
1544         vpxor   %xmm5,%xmm12,%xmm12
1545         vxorps  %xmm15,%xmm8,%xmm8
1546
1547         vmovdqu 80(%rdx),%xmm14
1548         vpxor   %xmm10,%xmm12,%xmm12
1549         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
1550         vpxor   %xmm11,%xmm12,%xmm12
1551         vpslldq $8,%xmm12,%xmm9
1552         vpxor   %xmm0,%xmm3,%xmm3
1553         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
1554         vpsrldq $8,%xmm12,%xmm12
1555         vpxor   %xmm9,%xmm10,%xmm10
1556         vmovdqu 48-64(%rsi),%xmm6
1557         vpshufb %xmm13,%xmm14,%xmm14
1558         vxorps  %xmm12,%xmm11,%xmm11
1559         vpxor   %xmm1,%xmm4,%xmm4
1560         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1561         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
1562         vmovdqu 80-64(%rsi),%xmm7
1563         vpxor   %xmm14,%xmm9,%xmm9
1564         vpxor   %xmm2,%xmm5,%xmm5
1565
1566         vmovdqu 64(%rdx),%xmm15
1567         vpalignr        $8,%xmm10,%xmm10,%xmm12
1568         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1569         vpshufb %xmm13,%xmm15,%xmm15
1570         vpxor   %xmm3,%xmm0,%xmm0
1571         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1572         vmovdqu 64-64(%rsi),%xmm6
1573         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1574         vpxor   %xmm4,%xmm1,%xmm1
1575         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
1576         vxorps  %xmm15,%xmm8,%xmm8
1577         vpxor   %xmm5,%xmm2,%xmm2
1578
1579         vmovdqu 48(%rdx),%xmm14
1580         vpclmulqdq      $0x10,(%r10),%xmm10,%xmm10
1581         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
1582         vpshufb %xmm13,%xmm14,%xmm14
1583         vpxor   %xmm0,%xmm3,%xmm3
1584         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
1585         vmovdqu 96-64(%rsi),%xmm6
1586         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1587         vpxor   %xmm1,%xmm4,%xmm4
1588         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
1589         vmovdqu 128-64(%rsi),%xmm7
1590         vpxor   %xmm14,%xmm9,%xmm9
1591         vpxor   %xmm2,%xmm5,%xmm5
1592
1593         vmovdqu 32(%rdx),%xmm15
1594         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1595         vpshufb %xmm13,%xmm15,%xmm15
1596         vpxor   %xmm3,%xmm0,%xmm0
1597         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1598         vmovdqu 112-64(%rsi),%xmm6
1599         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1600         vpxor   %xmm4,%xmm1,%xmm1
1601         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
1602         vpxor   %xmm15,%xmm8,%xmm8
1603         vpxor   %xmm5,%xmm2,%xmm2
1604         vxorps  %xmm12,%xmm10,%xmm10
1605
1606         vmovdqu 16(%rdx),%xmm14
1607         vpalignr        $8,%xmm10,%xmm10,%xmm12
1608         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
1609         vpshufb %xmm13,%xmm14,%xmm14
1610         vpxor   %xmm0,%xmm3,%xmm3
1611         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
1612         vmovdqu 144-64(%rsi),%xmm6
1613         vpclmulqdq      $0x10,(%r10),%xmm10,%xmm10
1614         vxorps  %xmm11,%xmm12,%xmm12
1615         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1616         vpxor   %xmm1,%xmm4,%xmm4
1617         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
1618         vmovdqu 176-64(%rsi),%xmm7
1619         vpxor   %xmm14,%xmm9,%xmm9
1620         vpxor   %xmm2,%xmm5,%xmm5
1621
1622         vmovdqu (%rdx),%xmm15
1623         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1624         vpshufb %xmm13,%xmm15,%xmm15
1625         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1626         vmovdqu 160-64(%rsi),%xmm6
1627         vpxor   %xmm12,%xmm15,%xmm15
1628         vpclmulqdq      $0x10,%xmm7,%xmm9,%xmm2
1629         vpxor   %xmm10,%xmm15,%xmm15
1630
1631         leaq    128(%rdx),%rdx
1632         subq    $0x80,%rcx
1633         jnc     .Loop8x_avx
1634
1635         addq    $0x80,%rcx
1636         jmp     .Ltail_no_xor_avx
1637
1638 .align  32
1639 .Lshort_avx:
1640         vmovdqu -16(%rdx,%rcx,1),%xmm14
1641         leaq    (%rdx,%rcx,1),%rdx
1642         vmovdqu 0-64(%rsi),%xmm6
1643         vmovdqu 32-64(%rsi),%xmm7
1644         vpshufb %xmm13,%xmm14,%xmm15
1645
1646         vmovdqa %xmm0,%xmm3
1647         vmovdqa %xmm1,%xmm4
1648         vmovdqa %xmm2,%xmm5
1649         subq    $0x10,%rcx
1650         jz      .Ltail_avx
1651
1652         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1653         vpxor   %xmm0,%xmm3,%xmm3
1654         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
1655         vpxor   %xmm15,%xmm8,%xmm8
1656         vmovdqu -32(%rdx),%xmm14
1657         vpxor   %xmm1,%xmm4,%xmm4
1658         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
1659         vmovdqu 16-64(%rsi),%xmm6
1660         vpshufb %xmm13,%xmm14,%xmm15
1661         vpxor   %xmm2,%xmm5,%xmm5
1662         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
1663         vpsrldq $8,%xmm7,%xmm7
1664         subq    $0x10,%rcx
1665         jz      .Ltail_avx
1666
1667         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1668         vpxor   %xmm0,%xmm3,%xmm3
1669         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
1670         vpxor   %xmm15,%xmm8,%xmm8
1671         vmovdqu -48(%rdx),%xmm14
1672         vpxor   %xmm1,%xmm4,%xmm4
1673         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
1674         vmovdqu 48-64(%rsi),%xmm6
1675         vpshufb %xmm13,%xmm14,%xmm15
1676         vpxor   %xmm2,%xmm5,%xmm5
1677         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
1678         vmovdqu 80-64(%rsi),%xmm7
1679         subq    $0x10,%rcx
1680         jz      .Ltail_avx
1681
1682         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1683         vpxor   %xmm0,%xmm3,%xmm3
1684         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
1685         vpxor   %xmm15,%xmm8,%xmm8
1686         vmovdqu -64(%rdx),%xmm14
1687         vpxor   %xmm1,%xmm4,%xmm4
1688         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
1689         vmovdqu 64-64(%rsi),%xmm6
1690         vpshufb %xmm13,%xmm14,%xmm15
1691         vpxor   %xmm2,%xmm5,%xmm5
1692         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
1693         vpsrldq $8,%xmm7,%xmm7
1694         subq    $0x10,%rcx
1695         jz      .Ltail_avx
1696
1697         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1698         vpxor   %xmm0,%xmm3,%xmm3
1699         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
1700         vpxor   %xmm15,%xmm8,%xmm8
1701         vmovdqu -80(%rdx),%xmm14
1702         vpxor   %xmm1,%xmm4,%xmm4
1703         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
1704         vmovdqu 96-64(%rsi),%xmm6
1705         vpshufb %xmm13,%xmm14,%xmm15
1706         vpxor   %xmm2,%xmm5,%xmm5
1707         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
1708         vmovdqu 128-64(%rsi),%xmm7
1709         subq    $0x10,%rcx
1710         jz      .Ltail_avx
1711
1712         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1713         vpxor   %xmm0,%xmm3,%xmm3
1714         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
1715         vpxor   %xmm15,%xmm8,%xmm8
1716         vmovdqu -96(%rdx),%xmm14
1717         vpxor   %xmm1,%xmm4,%xmm4
1718         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
1719         vmovdqu 112-64(%rsi),%xmm6
1720         vpshufb %xmm13,%xmm14,%xmm15
1721         vpxor   %xmm2,%xmm5,%xmm5
1722         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
1723         vpsrldq $8,%xmm7,%xmm7
1724         subq    $0x10,%rcx
1725         jz      .Ltail_avx
1726
1727         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1728         vpxor   %xmm0,%xmm3,%xmm3
1729         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
1730         vpxor   %xmm15,%xmm8,%xmm8
1731         vmovdqu -112(%rdx),%xmm14
1732         vpxor   %xmm1,%xmm4,%xmm4
1733         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
1734         vmovdqu 144-64(%rsi),%xmm6
1735         vpshufb %xmm13,%xmm14,%xmm15
1736         vpxor   %xmm2,%xmm5,%xmm5
1737         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
1738         vmovq   184-64(%rsi),%xmm7
1739         subq    $0x10,%rcx
1740         jmp     .Ltail_avx
1741
1742 .align  32
1743 .Ltail_avx:
1744         vpxor   %xmm10,%xmm15,%xmm15
1745 .Ltail_no_xor_avx:
1746         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1747         vpxor   %xmm0,%xmm3,%xmm3
1748         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
1749         vpxor   %xmm15,%xmm8,%xmm8
1750         vpxor   %xmm1,%xmm4,%xmm4
1751         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
1752         vpxor   %xmm2,%xmm5,%xmm5
1753         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
1754
1755         vmovdqu (%r10),%xmm12
1756
1757         vpxor   %xmm0,%xmm3,%xmm10
1758         vpxor   %xmm1,%xmm4,%xmm11
1759         vpxor   %xmm2,%xmm5,%xmm5
1760
1761         vpxor   %xmm10,%xmm5,%xmm5
1762         vpxor   %xmm11,%xmm5,%xmm5
1763         vpslldq $8,%xmm5,%xmm9
1764         vpsrldq $8,%xmm5,%xmm5
1765         vpxor   %xmm9,%xmm10,%xmm10
1766         vpxor   %xmm5,%xmm11,%xmm11
1767
1768         vpclmulqdq      $0x10,%xmm12,%xmm10,%xmm9
1769         vpalignr        $8,%xmm10,%xmm10,%xmm10
1770         vpxor   %xmm9,%xmm10,%xmm10
1771
1772         vpclmulqdq      $0x10,%xmm12,%xmm10,%xmm9
1773         vpalignr        $8,%xmm10,%xmm10,%xmm10
1774         vpxor   %xmm11,%xmm10,%xmm10
1775         vpxor   %xmm9,%xmm10,%xmm10
1776
1777         cmpq    $0,%rcx
1778         jne     .Lshort_avx
1779
1780         vpshufb %xmm13,%xmm10,%xmm10
1781         vmovdqu %xmm10,(%rdi)
1782         vzeroupper
1783         .byte   0xf3,0xc3
1784 .size   gcm_ghash_avx,.-gcm_ghash_avx
1785 .align  64
1786 .Lbswap_mask:
1787 .byte   15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1788 .L0x1c2_polynomial:
1789 .byte   1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1790 .L7_mask:
1791 .long   7,0,7,0
1792 .L7_mask_poly:
1793 .long   7,0,450,0
1794 .align  64
1795 .type   .Lrem_4bit,@object
1796 .Lrem_4bit:
1797 .long   0,0,0,471859200,0,943718400,0,610271232
1798 .long   0,1887436800,0,1822425088,0,1220542464,0,1423966208
1799 .long   0,3774873600,0,4246732800,0,3644850176,0,3311403008
1800 .long   0,2441084928,0,2376073216,0,2847932416,0,3051356160
1801 .type   .Lrem_8bit,@object
1802 .Lrem_8bit:
1803 .value  0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
1804 .value  0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
1805 .value  0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
1806 .value  0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
1807 .value  0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
1808 .value  0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
1809 .value  0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
1810 .value  0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
1811 .value  0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
1812 .value  0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
1813 .value  0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
1814 .value  0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
1815 .value  0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
1816 .value  0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
1817 .value  0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
1818 .value  0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
1819 .value  0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
1820 .value  0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
1821 .value  0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
1822 .value  0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
1823 .value  0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
1824 .value  0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
1825 .value  0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
1826 .value  0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
1827 .value  0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
1828 .value  0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
1829 .value  0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
1830 .value  0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
1831 .value  0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
1832 .value  0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
1833 .value  0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
1834 .value  0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
1835
1836 .byte   71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1837 .align  64