]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - secure/lib/libcrypto/amd64/ghash-x86_64.S
MFC: r364822, r364823
[FreeBSD/FreeBSD.git] / secure / lib / libcrypto / amd64 / ghash-x86_64.S
1 /* $FreeBSD$ */
2 /* Do not modify. This file is auto-generated from ghash-x86_64.pl. */
3 .text   
4
5
6 .globl  gcm_gmult_4bit
7 .type   gcm_gmult_4bit,@function
8 .align  16
9 gcm_gmult_4bit:
10 .cfi_startproc  
11         pushq   %rbx
12 .cfi_adjust_cfa_offset  8
13 .cfi_offset     %rbx,-16
14         pushq   %rbp
15 .cfi_adjust_cfa_offset  8
16 .cfi_offset     %rbp,-24
17         pushq   %r12
18 .cfi_adjust_cfa_offset  8
19 .cfi_offset     %r12,-32
20         pushq   %r13
21 .cfi_adjust_cfa_offset  8
22 .cfi_offset     %r13,-40
23         pushq   %r14
24 .cfi_adjust_cfa_offset  8
25 .cfi_offset     %r14,-48
26         pushq   %r15
27 .cfi_adjust_cfa_offset  8
28 .cfi_offset     %r15,-56
29         subq    $280,%rsp
30 .cfi_adjust_cfa_offset  280
31 .Lgmult_prologue:
32
33         movzbq  15(%rdi),%r8
34         leaq    .Lrem_4bit(%rip),%r11
35         xorq    %rax,%rax
36         xorq    %rbx,%rbx
37         movb    %r8b,%al
38         movb    %r8b,%bl
39         shlb    $4,%al
40         movq    $14,%rcx
41         movq    8(%rsi,%rax,1),%r8
42         movq    (%rsi,%rax,1),%r9
43         andb    $0xf0,%bl
44         movq    %r8,%rdx
45         jmp     .Loop1
46
47 .align  16
48 .Loop1:
49         shrq    $4,%r8
50         andq    $0xf,%rdx
51         movq    %r9,%r10
52         movb    (%rdi,%rcx,1),%al
53         shrq    $4,%r9
54         xorq    8(%rsi,%rbx,1),%r8
55         shlq    $60,%r10
56         xorq    (%rsi,%rbx,1),%r9
57         movb    %al,%bl
58         xorq    (%r11,%rdx,8),%r9
59         movq    %r8,%rdx
60         shlb    $4,%al
61         xorq    %r10,%r8
62         decq    %rcx
63         js      .Lbreak1
64
65         shrq    $4,%r8
66         andq    $0xf,%rdx
67         movq    %r9,%r10
68         shrq    $4,%r9
69         xorq    8(%rsi,%rax,1),%r8
70         shlq    $60,%r10
71         xorq    (%rsi,%rax,1),%r9
72         andb    $0xf0,%bl
73         xorq    (%r11,%rdx,8),%r9
74         movq    %r8,%rdx
75         xorq    %r10,%r8
76         jmp     .Loop1
77
78 .align  16
79 .Lbreak1:
80         shrq    $4,%r8
81         andq    $0xf,%rdx
82         movq    %r9,%r10
83         shrq    $4,%r9
84         xorq    8(%rsi,%rax,1),%r8
85         shlq    $60,%r10
86         xorq    (%rsi,%rax,1),%r9
87         andb    $0xf0,%bl
88         xorq    (%r11,%rdx,8),%r9
89         movq    %r8,%rdx
90         xorq    %r10,%r8
91
92         shrq    $4,%r8
93         andq    $0xf,%rdx
94         movq    %r9,%r10
95         shrq    $4,%r9
96         xorq    8(%rsi,%rbx,1),%r8
97         shlq    $60,%r10
98         xorq    (%rsi,%rbx,1),%r9
99         xorq    %r10,%r8
100         xorq    (%r11,%rdx,8),%r9
101
102         bswapq  %r8
103         bswapq  %r9
104         movq    %r8,8(%rdi)
105         movq    %r9,(%rdi)
106
107         leaq    280+48(%rsp),%rsi
108 .cfi_def_cfa    %rsi,8
109         movq    -8(%rsi),%rbx
110 .cfi_restore    %rbx
111         leaq    (%rsi),%rsp
112 .cfi_def_cfa_register   %rsp
113 .Lgmult_epilogue:
114         .byte   0xf3,0xc3
115 .cfi_endproc    
116 .size   gcm_gmult_4bit,.-gcm_gmult_4bit
117 .globl  gcm_ghash_4bit
118 .type   gcm_ghash_4bit,@function
119 .align  16
120 gcm_ghash_4bit:
121 .cfi_startproc  
122         pushq   %rbx
123 .cfi_adjust_cfa_offset  8
124 .cfi_offset     %rbx,-16
125         pushq   %rbp
126 .cfi_adjust_cfa_offset  8
127 .cfi_offset     %rbp,-24
128         pushq   %r12
129 .cfi_adjust_cfa_offset  8
130 .cfi_offset     %r12,-32
131         pushq   %r13
132 .cfi_adjust_cfa_offset  8
133 .cfi_offset     %r13,-40
134         pushq   %r14
135 .cfi_adjust_cfa_offset  8
136 .cfi_offset     %r14,-48
137         pushq   %r15
138 .cfi_adjust_cfa_offset  8
139 .cfi_offset     %r15,-56
140         subq    $280,%rsp
141 .cfi_adjust_cfa_offset  280
142 .Lghash_prologue:
143         movq    %rdx,%r14
144         movq    %rcx,%r15
145         subq    $-128,%rsi
146         leaq    16+128(%rsp),%rbp
147         xorl    %edx,%edx
148         movq    0+0-128(%rsi),%r8
149         movq    0+8-128(%rsi),%rax
150         movb    %al,%dl
151         shrq    $4,%rax
152         movq    %r8,%r10
153         shrq    $4,%r8
154         movq    16+0-128(%rsi),%r9
155         shlb    $4,%dl
156         movq    16+8-128(%rsi),%rbx
157         shlq    $60,%r10
158         movb    %dl,0(%rsp)
159         orq     %r10,%rax
160         movb    %bl,%dl
161         shrq    $4,%rbx
162         movq    %r9,%r10
163         shrq    $4,%r9
164         movq    %r8,0(%rbp)
165         movq    32+0-128(%rsi),%r8
166         shlb    $4,%dl
167         movq    %rax,0-128(%rbp)
168         movq    32+8-128(%rsi),%rax
169         shlq    $60,%r10
170         movb    %dl,1(%rsp)
171         orq     %r10,%rbx
172         movb    %al,%dl
173         shrq    $4,%rax
174         movq    %r8,%r10
175         shrq    $4,%r8
176         movq    %r9,8(%rbp)
177         movq    48+0-128(%rsi),%r9
178         shlb    $4,%dl
179         movq    %rbx,8-128(%rbp)
180         movq    48+8-128(%rsi),%rbx
181         shlq    $60,%r10
182         movb    %dl,2(%rsp)
183         orq     %r10,%rax
184         movb    %bl,%dl
185         shrq    $4,%rbx
186         movq    %r9,%r10
187         shrq    $4,%r9
188         movq    %r8,16(%rbp)
189         movq    64+0-128(%rsi),%r8
190         shlb    $4,%dl
191         movq    %rax,16-128(%rbp)
192         movq    64+8-128(%rsi),%rax
193         shlq    $60,%r10
194         movb    %dl,3(%rsp)
195         orq     %r10,%rbx
196         movb    %al,%dl
197         shrq    $4,%rax
198         movq    %r8,%r10
199         shrq    $4,%r8
200         movq    %r9,24(%rbp)
201         movq    80+0-128(%rsi),%r9
202         shlb    $4,%dl
203         movq    %rbx,24-128(%rbp)
204         movq    80+8-128(%rsi),%rbx
205         shlq    $60,%r10
206         movb    %dl,4(%rsp)
207         orq     %r10,%rax
208         movb    %bl,%dl
209         shrq    $4,%rbx
210         movq    %r9,%r10
211         shrq    $4,%r9
212         movq    %r8,32(%rbp)
213         movq    96+0-128(%rsi),%r8
214         shlb    $4,%dl
215         movq    %rax,32-128(%rbp)
216         movq    96+8-128(%rsi),%rax
217         shlq    $60,%r10
218         movb    %dl,5(%rsp)
219         orq     %r10,%rbx
220         movb    %al,%dl
221         shrq    $4,%rax
222         movq    %r8,%r10
223         shrq    $4,%r8
224         movq    %r9,40(%rbp)
225         movq    112+0-128(%rsi),%r9
226         shlb    $4,%dl
227         movq    %rbx,40-128(%rbp)
228         movq    112+8-128(%rsi),%rbx
229         shlq    $60,%r10
230         movb    %dl,6(%rsp)
231         orq     %r10,%rax
232         movb    %bl,%dl
233         shrq    $4,%rbx
234         movq    %r9,%r10
235         shrq    $4,%r9
236         movq    %r8,48(%rbp)
237         movq    128+0-128(%rsi),%r8
238         shlb    $4,%dl
239         movq    %rax,48-128(%rbp)
240         movq    128+8-128(%rsi),%rax
241         shlq    $60,%r10
242         movb    %dl,7(%rsp)
243         orq     %r10,%rbx
244         movb    %al,%dl
245         shrq    $4,%rax
246         movq    %r8,%r10
247         shrq    $4,%r8
248         movq    %r9,56(%rbp)
249         movq    144+0-128(%rsi),%r9
250         shlb    $4,%dl
251         movq    %rbx,56-128(%rbp)
252         movq    144+8-128(%rsi),%rbx
253         shlq    $60,%r10
254         movb    %dl,8(%rsp)
255         orq     %r10,%rax
256         movb    %bl,%dl
257         shrq    $4,%rbx
258         movq    %r9,%r10
259         shrq    $4,%r9
260         movq    %r8,64(%rbp)
261         movq    160+0-128(%rsi),%r8
262         shlb    $4,%dl
263         movq    %rax,64-128(%rbp)
264         movq    160+8-128(%rsi),%rax
265         shlq    $60,%r10
266         movb    %dl,9(%rsp)
267         orq     %r10,%rbx
268         movb    %al,%dl
269         shrq    $4,%rax
270         movq    %r8,%r10
271         shrq    $4,%r8
272         movq    %r9,72(%rbp)
273         movq    176+0-128(%rsi),%r9
274         shlb    $4,%dl
275         movq    %rbx,72-128(%rbp)
276         movq    176+8-128(%rsi),%rbx
277         shlq    $60,%r10
278         movb    %dl,10(%rsp)
279         orq     %r10,%rax
280         movb    %bl,%dl
281         shrq    $4,%rbx
282         movq    %r9,%r10
283         shrq    $4,%r9
284         movq    %r8,80(%rbp)
285         movq    192+0-128(%rsi),%r8
286         shlb    $4,%dl
287         movq    %rax,80-128(%rbp)
288         movq    192+8-128(%rsi),%rax
289         shlq    $60,%r10
290         movb    %dl,11(%rsp)
291         orq     %r10,%rbx
292         movb    %al,%dl
293         shrq    $4,%rax
294         movq    %r8,%r10
295         shrq    $4,%r8
296         movq    %r9,88(%rbp)
297         movq    208+0-128(%rsi),%r9
298         shlb    $4,%dl
299         movq    %rbx,88-128(%rbp)
300         movq    208+8-128(%rsi),%rbx
301         shlq    $60,%r10
302         movb    %dl,12(%rsp)
303         orq     %r10,%rax
304         movb    %bl,%dl
305         shrq    $4,%rbx
306         movq    %r9,%r10
307         shrq    $4,%r9
308         movq    %r8,96(%rbp)
309         movq    224+0-128(%rsi),%r8
310         shlb    $4,%dl
311         movq    %rax,96-128(%rbp)
312         movq    224+8-128(%rsi),%rax
313         shlq    $60,%r10
314         movb    %dl,13(%rsp)
315         orq     %r10,%rbx
316         movb    %al,%dl
317         shrq    $4,%rax
318         movq    %r8,%r10
319         shrq    $4,%r8
320         movq    %r9,104(%rbp)
321         movq    240+0-128(%rsi),%r9
322         shlb    $4,%dl
323         movq    %rbx,104-128(%rbp)
324         movq    240+8-128(%rsi),%rbx
325         shlq    $60,%r10
326         movb    %dl,14(%rsp)
327         orq     %r10,%rax
328         movb    %bl,%dl
329         shrq    $4,%rbx
330         movq    %r9,%r10
331         shrq    $4,%r9
332         movq    %r8,112(%rbp)
333         shlb    $4,%dl
334         movq    %rax,112-128(%rbp)
335         shlq    $60,%r10
336         movb    %dl,15(%rsp)
337         orq     %r10,%rbx
338         movq    %r9,120(%rbp)
339         movq    %rbx,120-128(%rbp)
340         addq    $-128,%rsi
341         movq    8(%rdi),%r8
342         movq    0(%rdi),%r9
343         addq    %r14,%r15
344         leaq    .Lrem_8bit(%rip),%r11
345         jmp     .Louter_loop
346 .align  16
347 .Louter_loop:
348         xorq    (%r14),%r9
349         movq    8(%r14),%rdx
350         leaq    16(%r14),%r14
351         xorq    %r8,%rdx
352         movq    %r9,(%rdi)
353         movq    %rdx,8(%rdi)
354         shrq    $32,%rdx
355         xorq    %rax,%rax
356         roll    $8,%edx
357         movb    %dl,%al
358         movzbl  %dl,%ebx
359         shlb    $4,%al
360         shrl    $4,%ebx
361         roll    $8,%edx
362         movq    8(%rsi,%rax,1),%r8
363         movq    (%rsi,%rax,1),%r9
364         movb    %dl,%al
365         movzbl  %dl,%ecx
366         shlb    $4,%al
367         movzbq  (%rsp,%rbx,1),%r12
368         shrl    $4,%ecx
369         xorq    %r8,%r12
370         movq    %r9,%r10
371         shrq    $8,%r8
372         movzbq  %r12b,%r12
373         shrq    $8,%r9
374         xorq    -128(%rbp,%rbx,8),%r8
375         shlq    $56,%r10
376         xorq    (%rbp,%rbx,8),%r9
377         roll    $8,%edx
378         xorq    8(%rsi,%rax,1),%r8
379         xorq    (%rsi,%rax,1),%r9
380         movb    %dl,%al
381         xorq    %r10,%r8
382         movzwq  (%r11,%r12,2),%r12
383         movzbl  %dl,%ebx
384         shlb    $4,%al
385         movzbq  (%rsp,%rcx,1),%r13
386         shrl    $4,%ebx
387         shlq    $48,%r12
388         xorq    %r8,%r13
389         movq    %r9,%r10
390         xorq    %r12,%r9
391         shrq    $8,%r8
392         movzbq  %r13b,%r13
393         shrq    $8,%r9
394         xorq    -128(%rbp,%rcx,8),%r8
395         shlq    $56,%r10
396         xorq    (%rbp,%rcx,8),%r9
397         roll    $8,%edx
398         xorq    8(%rsi,%rax,1),%r8
399         xorq    (%rsi,%rax,1),%r9
400         movb    %dl,%al
401         xorq    %r10,%r8
402         movzwq  (%r11,%r13,2),%r13
403         movzbl  %dl,%ecx
404         shlb    $4,%al
405         movzbq  (%rsp,%rbx,1),%r12
406         shrl    $4,%ecx
407         shlq    $48,%r13
408         xorq    %r8,%r12
409         movq    %r9,%r10
410         xorq    %r13,%r9
411         shrq    $8,%r8
412         movzbq  %r12b,%r12
413         movl    8(%rdi),%edx
414         shrq    $8,%r9
415         xorq    -128(%rbp,%rbx,8),%r8
416         shlq    $56,%r10
417         xorq    (%rbp,%rbx,8),%r9
418         roll    $8,%edx
419         xorq    8(%rsi,%rax,1),%r8
420         xorq    (%rsi,%rax,1),%r9
421         movb    %dl,%al
422         xorq    %r10,%r8
423         movzwq  (%r11,%r12,2),%r12
424         movzbl  %dl,%ebx
425         shlb    $4,%al
426         movzbq  (%rsp,%rcx,1),%r13
427         shrl    $4,%ebx
428         shlq    $48,%r12
429         xorq    %r8,%r13
430         movq    %r9,%r10
431         xorq    %r12,%r9
432         shrq    $8,%r8
433         movzbq  %r13b,%r13
434         shrq    $8,%r9
435         xorq    -128(%rbp,%rcx,8),%r8
436         shlq    $56,%r10
437         xorq    (%rbp,%rcx,8),%r9
438         roll    $8,%edx
439         xorq    8(%rsi,%rax,1),%r8
440         xorq    (%rsi,%rax,1),%r9
441         movb    %dl,%al
442         xorq    %r10,%r8
443         movzwq  (%r11,%r13,2),%r13
444         movzbl  %dl,%ecx
445         shlb    $4,%al
446         movzbq  (%rsp,%rbx,1),%r12
447         shrl    $4,%ecx
448         shlq    $48,%r13
449         xorq    %r8,%r12
450         movq    %r9,%r10
451         xorq    %r13,%r9
452         shrq    $8,%r8
453         movzbq  %r12b,%r12
454         shrq    $8,%r9
455         xorq    -128(%rbp,%rbx,8),%r8
456         shlq    $56,%r10
457         xorq    (%rbp,%rbx,8),%r9
458         roll    $8,%edx
459         xorq    8(%rsi,%rax,1),%r8
460         xorq    (%rsi,%rax,1),%r9
461         movb    %dl,%al
462         xorq    %r10,%r8
463         movzwq  (%r11,%r12,2),%r12
464         movzbl  %dl,%ebx
465         shlb    $4,%al
466         movzbq  (%rsp,%rcx,1),%r13
467         shrl    $4,%ebx
468         shlq    $48,%r12
469         xorq    %r8,%r13
470         movq    %r9,%r10
471         xorq    %r12,%r9
472         shrq    $8,%r8
473         movzbq  %r13b,%r13
474         shrq    $8,%r9
475         xorq    -128(%rbp,%rcx,8),%r8
476         shlq    $56,%r10
477         xorq    (%rbp,%rcx,8),%r9
478         roll    $8,%edx
479         xorq    8(%rsi,%rax,1),%r8
480         xorq    (%rsi,%rax,1),%r9
481         movb    %dl,%al
482         xorq    %r10,%r8
483         movzwq  (%r11,%r13,2),%r13
484         movzbl  %dl,%ecx
485         shlb    $4,%al
486         movzbq  (%rsp,%rbx,1),%r12
487         shrl    $4,%ecx
488         shlq    $48,%r13
489         xorq    %r8,%r12
490         movq    %r9,%r10
491         xorq    %r13,%r9
492         shrq    $8,%r8
493         movzbq  %r12b,%r12
494         movl    4(%rdi),%edx
495         shrq    $8,%r9
496         xorq    -128(%rbp,%rbx,8),%r8
497         shlq    $56,%r10
498         xorq    (%rbp,%rbx,8),%r9
499         roll    $8,%edx
500         xorq    8(%rsi,%rax,1),%r8
501         xorq    (%rsi,%rax,1),%r9
502         movb    %dl,%al
503         xorq    %r10,%r8
504         movzwq  (%r11,%r12,2),%r12
505         movzbl  %dl,%ebx
506         shlb    $4,%al
507         movzbq  (%rsp,%rcx,1),%r13
508         shrl    $4,%ebx
509         shlq    $48,%r12
510         xorq    %r8,%r13
511         movq    %r9,%r10
512         xorq    %r12,%r9
513         shrq    $8,%r8
514         movzbq  %r13b,%r13
515         shrq    $8,%r9
516         xorq    -128(%rbp,%rcx,8),%r8
517         shlq    $56,%r10
518         xorq    (%rbp,%rcx,8),%r9
519         roll    $8,%edx
520         xorq    8(%rsi,%rax,1),%r8
521         xorq    (%rsi,%rax,1),%r9
522         movb    %dl,%al
523         xorq    %r10,%r8
524         movzwq  (%r11,%r13,2),%r13
525         movzbl  %dl,%ecx
526         shlb    $4,%al
527         movzbq  (%rsp,%rbx,1),%r12
528         shrl    $4,%ecx
529         shlq    $48,%r13
530         xorq    %r8,%r12
531         movq    %r9,%r10
532         xorq    %r13,%r9
533         shrq    $8,%r8
534         movzbq  %r12b,%r12
535         shrq    $8,%r9
536         xorq    -128(%rbp,%rbx,8),%r8
537         shlq    $56,%r10
538         xorq    (%rbp,%rbx,8),%r9
539         roll    $8,%edx
540         xorq    8(%rsi,%rax,1),%r8
541         xorq    (%rsi,%rax,1),%r9
542         movb    %dl,%al
543         xorq    %r10,%r8
544         movzwq  (%r11,%r12,2),%r12
545         movzbl  %dl,%ebx
546         shlb    $4,%al
547         movzbq  (%rsp,%rcx,1),%r13
548         shrl    $4,%ebx
549         shlq    $48,%r12
550         xorq    %r8,%r13
551         movq    %r9,%r10
552         xorq    %r12,%r9
553         shrq    $8,%r8
554         movzbq  %r13b,%r13
555         shrq    $8,%r9
556         xorq    -128(%rbp,%rcx,8),%r8
557         shlq    $56,%r10
558         xorq    (%rbp,%rcx,8),%r9
559         roll    $8,%edx
560         xorq    8(%rsi,%rax,1),%r8
561         xorq    (%rsi,%rax,1),%r9
562         movb    %dl,%al
563         xorq    %r10,%r8
564         movzwq  (%r11,%r13,2),%r13
565         movzbl  %dl,%ecx
566         shlb    $4,%al
567         movzbq  (%rsp,%rbx,1),%r12
568         shrl    $4,%ecx
569         shlq    $48,%r13
570         xorq    %r8,%r12
571         movq    %r9,%r10
572         xorq    %r13,%r9
573         shrq    $8,%r8
574         movzbq  %r12b,%r12
575         movl    0(%rdi),%edx
576         shrq    $8,%r9
577         xorq    -128(%rbp,%rbx,8),%r8
578         shlq    $56,%r10
579         xorq    (%rbp,%rbx,8),%r9
580         roll    $8,%edx
581         xorq    8(%rsi,%rax,1),%r8
582         xorq    (%rsi,%rax,1),%r9
583         movb    %dl,%al
584         xorq    %r10,%r8
585         movzwq  (%r11,%r12,2),%r12
586         movzbl  %dl,%ebx
587         shlb    $4,%al
588         movzbq  (%rsp,%rcx,1),%r13
589         shrl    $4,%ebx
590         shlq    $48,%r12
591         xorq    %r8,%r13
592         movq    %r9,%r10
593         xorq    %r12,%r9
594         shrq    $8,%r8
595         movzbq  %r13b,%r13
596         shrq    $8,%r9
597         xorq    -128(%rbp,%rcx,8),%r8
598         shlq    $56,%r10
599         xorq    (%rbp,%rcx,8),%r9
600         roll    $8,%edx
601         xorq    8(%rsi,%rax,1),%r8
602         xorq    (%rsi,%rax,1),%r9
603         movb    %dl,%al
604         xorq    %r10,%r8
605         movzwq  (%r11,%r13,2),%r13
606         movzbl  %dl,%ecx
607         shlb    $4,%al
608         movzbq  (%rsp,%rbx,1),%r12
609         shrl    $4,%ecx
610         shlq    $48,%r13
611         xorq    %r8,%r12
612         movq    %r9,%r10
613         xorq    %r13,%r9
614         shrq    $8,%r8
615         movzbq  %r12b,%r12
616         shrq    $8,%r9
617         xorq    -128(%rbp,%rbx,8),%r8
618         shlq    $56,%r10
619         xorq    (%rbp,%rbx,8),%r9
620         roll    $8,%edx
621         xorq    8(%rsi,%rax,1),%r8
622         xorq    (%rsi,%rax,1),%r9
623         movb    %dl,%al
624         xorq    %r10,%r8
625         movzwq  (%r11,%r12,2),%r12
626         movzbl  %dl,%ebx
627         shlb    $4,%al
628         movzbq  (%rsp,%rcx,1),%r13
629         shrl    $4,%ebx
630         shlq    $48,%r12
631         xorq    %r8,%r13
632         movq    %r9,%r10
633         xorq    %r12,%r9
634         shrq    $8,%r8
635         movzbq  %r13b,%r13
636         shrq    $8,%r9
637         xorq    -128(%rbp,%rcx,8),%r8
638         shlq    $56,%r10
639         xorq    (%rbp,%rcx,8),%r9
640         roll    $8,%edx
641         xorq    8(%rsi,%rax,1),%r8
642         xorq    (%rsi,%rax,1),%r9
643         movb    %dl,%al
644         xorq    %r10,%r8
645         movzwq  (%r11,%r13,2),%r13
646         movzbl  %dl,%ecx
647         shlb    $4,%al
648         movzbq  (%rsp,%rbx,1),%r12
649         andl    $240,%ecx
650         shlq    $48,%r13
651         xorq    %r8,%r12
652         movq    %r9,%r10
653         xorq    %r13,%r9
654         shrq    $8,%r8
655         movzbq  %r12b,%r12
656         movl    -4(%rdi),%edx
657         shrq    $8,%r9
658         xorq    -128(%rbp,%rbx,8),%r8
659         shlq    $56,%r10
660         xorq    (%rbp,%rbx,8),%r9
661         movzwq  (%r11,%r12,2),%r12
662         xorq    8(%rsi,%rax,1),%r8
663         xorq    (%rsi,%rax,1),%r9
664         shlq    $48,%r12
665         xorq    %r10,%r8
666         xorq    %r12,%r9
667         movzbq  %r8b,%r13
668         shrq    $4,%r8
669         movq    %r9,%r10
670         shlb    $4,%r13b
671         shrq    $4,%r9
672         xorq    8(%rsi,%rcx,1),%r8
673         movzwq  (%r11,%r13,2),%r13
674         shlq    $60,%r10
675         xorq    (%rsi,%rcx,1),%r9
676         xorq    %r10,%r8
677         shlq    $48,%r13
678         bswapq  %r8
679         xorq    %r13,%r9
680         bswapq  %r9
681         cmpq    %r15,%r14
682         jb      .Louter_loop
683         movq    %r8,8(%rdi)
684         movq    %r9,(%rdi)
685
686         leaq    280+48(%rsp),%rsi
687 .cfi_def_cfa    %rsi,8
688         movq    -48(%rsi),%r15
689 .cfi_restore    %r15
690         movq    -40(%rsi),%r14
691 .cfi_restore    %r14
692         movq    -32(%rsi),%r13
693 .cfi_restore    %r13
694         movq    -24(%rsi),%r12
695 .cfi_restore    %r12
696         movq    -16(%rsi),%rbp
697 .cfi_restore    %rbp
698         movq    -8(%rsi),%rbx
699 .cfi_restore    %rbx
700         leaq    0(%rsi),%rsp
701 .cfi_def_cfa_register   %rsp
702 .Lghash_epilogue:
703         .byte   0xf3,0xc3
704 .cfi_endproc    
705 .size   gcm_ghash_4bit,.-gcm_ghash_4bit
706 .globl  gcm_init_clmul
707 .type   gcm_init_clmul,@function
708 .align  16
709 gcm_init_clmul:
710 .cfi_startproc  
711 .L_init_clmul:
712         movdqu  (%rsi),%xmm2
713         pshufd  $78,%xmm2,%xmm2
714
715
716         pshufd  $255,%xmm2,%xmm4
717         movdqa  %xmm2,%xmm3
718         psllq   $1,%xmm2
719         pxor    %xmm5,%xmm5
720         psrlq   $63,%xmm3
721         pcmpgtd %xmm4,%xmm5
722         pslldq  $8,%xmm3
723         por     %xmm3,%xmm2
724
725
726         pand    .L0x1c2_polynomial(%rip),%xmm5
727         pxor    %xmm5,%xmm2
728
729
730         pshufd  $78,%xmm2,%xmm6
731         movdqa  %xmm2,%xmm0
732         pxor    %xmm2,%xmm6
733         movdqa  %xmm0,%xmm1
734         pshufd  $78,%xmm0,%xmm3
735         pxor    %xmm0,%xmm3
736 .byte   102,15,58,68,194,0
737 .byte   102,15,58,68,202,17
738 .byte   102,15,58,68,222,0
739         pxor    %xmm0,%xmm3
740         pxor    %xmm1,%xmm3
741
742         movdqa  %xmm3,%xmm4
743         psrldq  $8,%xmm3
744         pslldq  $8,%xmm4
745         pxor    %xmm3,%xmm1
746         pxor    %xmm4,%xmm0
747
748         movdqa  %xmm0,%xmm4
749         movdqa  %xmm0,%xmm3
750         psllq   $5,%xmm0
751         pxor    %xmm0,%xmm3
752         psllq   $1,%xmm0
753         pxor    %xmm3,%xmm0
754         psllq   $57,%xmm0
755         movdqa  %xmm0,%xmm3
756         pslldq  $8,%xmm0
757         psrldq  $8,%xmm3
758         pxor    %xmm4,%xmm0
759         pxor    %xmm3,%xmm1
760
761
762         movdqa  %xmm0,%xmm4
763         psrlq   $1,%xmm0
764         pxor    %xmm4,%xmm1
765         pxor    %xmm0,%xmm4
766         psrlq   $5,%xmm0
767         pxor    %xmm4,%xmm0
768         psrlq   $1,%xmm0
769         pxor    %xmm1,%xmm0
770         pshufd  $78,%xmm2,%xmm3
771         pshufd  $78,%xmm0,%xmm4
772         pxor    %xmm2,%xmm3
773         movdqu  %xmm2,0(%rdi)
774         pxor    %xmm0,%xmm4
775         movdqu  %xmm0,16(%rdi)
776 .byte   102,15,58,15,227,8
777         movdqu  %xmm4,32(%rdi)
778         movdqa  %xmm0,%xmm1
779         pshufd  $78,%xmm0,%xmm3
780         pxor    %xmm0,%xmm3
781 .byte   102,15,58,68,194,0
782 .byte   102,15,58,68,202,17
783 .byte   102,15,58,68,222,0
784         pxor    %xmm0,%xmm3
785         pxor    %xmm1,%xmm3
786
787         movdqa  %xmm3,%xmm4
788         psrldq  $8,%xmm3
789         pslldq  $8,%xmm4
790         pxor    %xmm3,%xmm1
791         pxor    %xmm4,%xmm0
792
793         movdqa  %xmm0,%xmm4
794         movdqa  %xmm0,%xmm3
795         psllq   $5,%xmm0
796         pxor    %xmm0,%xmm3
797         psllq   $1,%xmm0
798         pxor    %xmm3,%xmm0
799         psllq   $57,%xmm0
800         movdqa  %xmm0,%xmm3
801         pslldq  $8,%xmm0
802         psrldq  $8,%xmm3
803         pxor    %xmm4,%xmm0
804         pxor    %xmm3,%xmm1
805
806
807         movdqa  %xmm0,%xmm4
808         psrlq   $1,%xmm0
809         pxor    %xmm4,%xmm1
810         pxor    %xmm0,%xmm4
811         psrlq   $5,%xmm0
812         pxor    %xmm4,%xmm0
813         psrlq   $1,%xmm0
814         pxor    %xmm1,%xmm0
815         movdqa  %xmm0,%xmm5
816         movdqa  %xmm0,%xmm1
817         pshufd  $78,%xmm0,%xmm3
818         pxor    %xmm0,%xmm3
819 .byte   102,15,58,68,194,0
820 .byte   102,15,58,68,202,17
821 .byte   102,15,58,68,222,0
822         pxor    %xmm0,%xmm3
823         pxor    %xmm1,%xmm3
824
825         movdqa  %xmm3,%xmm4
826         psrldq  $8,%xmm3
827         pslldq  $8,%xmm4
828         pxor    %xmm3,%xmm1
829         pxor    %xmm4,%xmm0
830
831         movdqa  %xmm0,%xmm4
832         movdqa  %xmm0,%xmm3
833         psllq   $5,%xmm0
834         pxor    %xmm0,%xmm3
835         psllq   $1,%xmm0
836         pxor    %xmm3,%xmm0
837         psllq   $57,%xmm0
838         movdqa  %xmm0,%xmm3
839         pslldq  $8,%xmm0
840         psrldq  $8,%xmm3
841         pxor    %xmm4,%xmm0
842         pxor    %xmm3,%xmm1
843
844
845         movdqa  %xmm0,%xmm4
846         psrlq   $1,%xmm0
847         pxor    %xmm4,%xmm1
848         pxor    %xmm0,%xmm4
849         psrlq   $5,%xmm0
850         pxor    %xmm4,%xmm0
851         psrlq   $1,%xmm0
852         pxor    %xmm1,%xmm0
853         pshufd  $78,%xmm5,%xmm3
854         pshufd  $78,%xmm0,%xmm4
855         pxor    %xmm5,%xmm3
856         movdqu  %xmm5,48(%rdi)
857         pxor    %xmm0,%xmm4
858         movdqu  %xmm0,64(%rdi)
859 .byte   102,15,58,15,227,8
860         movdqu  %xmm4,80(%rdi)
861         .byte   0xf3,0xc3
862 .cfi_endproc    
863 .size   gcm_init_clmul,.-gcm_init_clmul
864 .globl  gcm_gmult_clmul
865 .type   gcm_gmult_clmul,@function
866 .align  16
867 gcm_gmult_clmul:
868 .cfi_startproc  
869 .L_gmult_clmul:
870         movdqu  (%rdi),%xmm0
871         movdqa  .Lbswap_mask(%rip),%xmm5
872         movdqu  (%rsi),%xmm2
873         movdqu  32(%rsi),%xmm4
874 .byte   102,15,56,0,197
875         movdqa  %xmm0,%xmm1
876         pshufd  $78,%xmm0,%xmm3
877         pxor    %xmm0,%xmm3
878 .byte   102,15,58,68,194,0
879 .byte   102,15,58,68,202,17
880 .byte   102,15,58,68,220,0
881         pxor    %xmm0,%xmm3
882         pxor    %xmm1,%xmm3
883
884         movdqa  %xmm3,%xmm4
885         psrldq  $8,%xmm3
886         pslldq  $8,%xmm4
887         pxor    %xmm3,%xmm1
888         pxor    %xmm4,%xmm0
889
890         movdqa  %xmm0,%xmm4
891         movdqa  %xmm0,%xmm3
892         psllq   $5,%xmm0
893         pxor    %xmm0,%xmm3
894         psllq   $1,%xmm0
895         pxor    %xmm3,%xmm0
896         psllq   $57,%xmm0
897         movdqa  %xmm0,%xmm3
898         pslldq  $8,%xmm0
899         psrldq  $8,%xmm3
900         pxor    %xmm4,%xmm0
901         pxor    %xmm3,%xmm1
902
903
904         movdqa  %xmm0,%xmm4
905         psrlq   $1,%xmm0
906         pxor    %xmm4,%xmm1
907         pxor    %xmm0,%xmm4
908         psrlq   $5,%xmm0
909         pxor    %xmm4,%xmm0
910         psrlq   $1,%xmm0
911         pxor    %xmm1,%xmm0
912 .byte   102,15,56,0,197
913         movdqu  %xmm0,(%rdi)
914         .byte   0xf3,0xc3
915 .cfi_endproc    
916 .size   gcm_gmult_clmul,.-gcm_gmult_clmul
917 .globl  gcm_ghash_clmul
918 .type   gcm_ghash_clmul,@function
919 .align  32
920 gcm_ghash_clmul:
921 .cfi_startproc  
922 .L_ghash_clmul:
923         movdqa  .Lbswap_mask(%rip),%xmm10
924
925         movdqu  (%rdi),%xmm0
926         movdqu  (%rsi),%xmm2
927         movdqu  32(%rsi),%xmm7
928 .byte   102,65,15,56,0,194
929
930         subq    $0x10,%rcx
931         jz      .Lodd_tail
932
933         movdqu  16(%rsi),%xmm6
934         movl    OPENSSL_ia32cap_P+4(%rip),%eax
935         cmpq    $0x30,%rcx
936         jb      .Lskip4x
937
938         andl    $71303168,%eax
939         cmpl    $4194304,%eax
940         je      .Lskip4x
941
942         subq    $0x30,%rcx
943         movq    $0xA040608020C0E000,%rax
944         movdqu  48(%rsi),%xmm14
945         movdqu  64(%rsi),%xmm15
946
947
948
949
950         movdqu  48(%rdx),%xmm3
951         movdqu  32(%rdx),%xmm11
952 .byte   102,65,15,56,0,218
953 .byte   102,69,15,56,0,218
954         movdqa  %xmm3,%xmm5
955         pshufd  $78,%xmm3,%xmm4
956         pxor    %xmm3,%xmm4
957 .byte   102,15,58,68,218,0
958 .byte   102,15,58,68,234,17
959 .byte   102,15,58,68,231,0
960
961         movdqa  %xmm11,%xmm13
962         pshufd  $78,%xmm11,%xmm12
963         pxor    %xmm11,%xmm12
964 .byte   102,68,15,58,68,222,0
965 .byte   102,68,15,58,68,238,17
966 .byte   102,68,15,58,68,231,16
967         xorps   %xmm11,%xmm3
968         xorps   %xmm13,%xmm5
969         movups  80(%rsi),%xmm7
970         xorps   %xmm12,%xmm4
971
972         movdqu  16(%rdx),%xmm11
973         movdqu  0(%rdx),%xmm8
974 .byte   102,69,15,56,0,218
975 .byte   102,69,15,56,0,194
976         movdqa  %xmm11,%xmm13
977         pshufd  $78,%xmm11,%xmm12
978         pxor    %xmm8,%xmm0
979         pxor    %xmm11,%xmm12
980 .byte   102,69,15,58,68,222,0
981         movdqa  %xmm0,%xmm1
982         pshufd  $78,%xmm0,%xmm8
983         pxor    %xmm0,%xmm8
984 .byte   102,69,15,58,68,238,17
985 .byte   102,68,15,58,68,231,0
986         xorps   %xmm11,%xmm3
987         xorps   %xmm13,%xmm5
988
989         leaq    64(%rdx),%rdx
990         subq    $0x40,%rcx
991         jc      .Ltail4x
992
993         jmp     .Lmod4_loop
994 .align  32
995 .Lmod4_loop:
996 .byte   102,65,15,58,68,199,0
997         xorps   %xmm12,%xmm4
998         movdqu  48(%rdx),%xmm11
999 .byte   102,69,15,56,0,218
1000 .byte   102,65,15,58,68,207,17
1001         xorps   %xmm3,%xmm0
1002         movdqu  32(%rdx),%xmm3
1003         movdqa  %xmm11,%xmm13
1004 .byte   102,68,15,58,68,199,16
1005         pshufd  $78,%xmm11,%xmm12
1006         xorps   %xmm5,%xmm1
1007         pxor    %xmm11,%xmm12
1008 .byte   102,65,15,56,0,218
1009         movups  32(%rsi),%xmm7
1010         xorps   %xmm4,%xmm8
1011 .byte   102,68,15,58,68,218,0
1012         pshufd  $78,%xmm3,%xmm4
1013
1014         pxor    %xmm0,%xmm8
1015         movdqa  %xmm3,%xmm5
1016         pxor    %xmm1,%xmm8
1017         pxor    %xmm3,%xmm4
1018         movdqa  %xmm8,%xmm9
1019 .byte   102,68,15,58,68,234,17
1020         pslldq  $8,%xmm8
1021         psrldq  $8,%xmm9
1022         pxor    %xmm8,%xmm0
1023         movdqa  .L7_mask(%rip),%xmm8
1024         pxor    %xmm9,%xmm1
1025 .byte   102,76,15,110,200
1026
1027         pand    %xmm0,%xmm8
1028 .byte   102,69,15,56,0,200
1029         pxor    %xmm0,%xmm9
1030 .byte   102,68,15,58,68,231,0
1031         psllq   $57,%xmm9
1032         movdqa  %xmm9,%xmm8
1033         pslldq  $8,%xmm9
1034 .byte   102,15,58,68,222,0
1035         psrldq  $8,%xmm8
1036         pxor    %xmm9,%xmm0
1037         pxor    %xmm8,%xmm1
1038         movdqu  0(%rdx),%xmm8
1039
1040         movdqa  %xmm0,%xmm9
1041         psrlq   $1,%xmm0
1042 .byte   102,15,58,68,238,17
1043         xorps   %xmm11,%xmm3
1044         movdqu  16(%rdx),%xmm11
1045 .byte   102,69,15,56,0,218
1046 .byte   102,15,58,68,231,16
1047         xorps   %xmm13,%xmm5
1048         movups  80(%rsi),%xmm7
1049 .byte   102,69,15,56,0,194
1050         pxor    %xmm9,%xmm1
1051         pxor    %xmm0,%xmm9
1052         psrlq   $5,%xmm0
1053
1054         movdqa  %xmm11,%xmm13
1055         pxor    %xmm12,%xmm4
1056         pshufd  $78,%xmm11,%xmm12
1057         pxor    %xmm9,%xmm0
1058         pxor    %xmm8,%xmm1
1059         pxor    %xmm11,%xmm12
1060 .byte   102,69,15,58,68,222,0
1061         psrlq   $1,%xmm0
1062         pxor    %xmm1,%xmm0
1063         movdqa  %xmm0,%xmm1
1064 .byte   102,69,15,58,68,238,17
1065         xorps   %xmm11,%xmm3
1066         pshufd  $78,%xmm0,%xmm8
1067         pxor    %xmm0,%xmm8
1068
1069 .byte   102,68,15,58,68,231,0
1070         xorps   %xmm13,%xmm5
1071
1072         leaq    64(%rdx),%rdx
1073         subq    $0x40,%rcx
1074         jnc     .Lmod4_loop
1075
1076 .Ltail4x:
1077 .byte   102,65,15,58,68,199,0
1078 .byte   102,65,15,58,68,207,17
1079 .byte   102,68,15,58,68,199,16
1080         xorps   %xmm12,%xmm4
1081         xorps   %xmm3,%xmm0
1082         xorps   %xmm5,%xmm1
1083         pxor    %xmm0,%xmm1
1084         pxor    %xmm4,%xmm8
1085
1086         pxor    %xmm1,%xmm8
1087         pxor    %xmm0,%xmm1
1088
1089         movdqa  %xmm8,%xmm9
1090         psrldq  $8,%xmm8
1091         pslldq  $8,%xmm9
1092         pxor    %xmm8,%xmm1
1093         pxor    %xmm9,%xmm0
1094
1095         movdqa  %xmm0,%xmm4
1096         movdqa  %xmm0,%xmm3
1097         psllq   $5,%xmm0
1098         pxor    %xmm0,%xmm3
1099         psllq   $1,%xmm0
1100         pxor    %xmm3,%xmm0
1101         psllq   $57,%xmm0
1102         movdqa  %xmm0,%xmm3
1103         pslldq  $8,%xmm0
1104         psrldq  $8,%xmm3
1105         pxor    %xmm4,%xmm0
1106         pxor    %xmm3,%xmm1
1107
1108
1109         movdqa  %xmm0,%xmm4
1110         psrlq   $1,%xmm0
1111         pxor    %xmm4,%xmm1
1112         pxor    %xmm0,%xmm4
1113         psrlq   $5,%xmm0
1114         pxor    %xmm4,%xmm0
1115         psrlq   $1,%xmm0
1116         pxor    %xmm1,%xmm0
1117         addq    $0x40,%rcx
1118         jz      .Ldone
1119         movdqu  32(%rsi),%xmm7
1120         subq    $0x10,%rcx
1121         jz      .Lodd_tail
1122 .Lskip4x:
1123
1124
1125
1126
1127
1128         movdqu  (%rdx),%xmm8
1129         movdqu  16(%rdx),%xmm3
1130 .byte   102,69,15,56,0,194
1131 .byte   102,65,15,56,0,218
1132         pxor    %xmm8,%xmm0
1133
1134         movdqa  %xmm3,%xmm5
1135         pshufd  $78,%xmm3,%xmm4
1136         pxor    %xmm3,%xmm4
1137 .byte   102,15,58,68,218,0
1138 .byte   102,15,58,68,234,17
1139 .byte   102,15,58,68,231,0
1140
1141         leaq    32(%rdx),%rdx
1142         nop
1143         subq    $0x20,%rcx
1144         jbe     .Leven_tail
1145         nop
1146         jmp     .Lmod_loop
1147
1148 .align  32
1149 .Lmod_loop:
1150         movdqa  %xmm0,%xmm1
1151         movdqa  %xmm4,%xmm8
1152         pshufd  $78,%xmm0,%xmm4
1153         pxor    %xmm0,%xmm4
1154
1155 .byte   102,15,58,68,198,0
1156 .byte   102,15,58,68,206,17
1157 .byte   102,15,58,68,231,16
1158
1159         pxor    %xmm3,%xmm0
1160         pxor    %xmm5,%xmm1
1161         movdqu  (%rdx),%xmm9
1162         pxor    %xmm0,%xmm8
1163 .byte   102,69,15,56,0,202
1164         movdqu  16(%rdx),%xmm3
1165
1166         pxor    %xmm1,%xmm8
1167         pxor    %xmm9,%xmm1
1168         pxor    %xmm8,%xmm4
1169 .byte   102,65,15,56,0,218
1170         movdqa  %xmm4,%xmm8
1171         psrldq  $8,%xmm8
1172         pslldq  $8,%xmm4
1173         pxor    %xmm8,%xmm1
1174         pxor    %xmm4,%xmm0
1175
1176         movdqa  %xmm3,%xmm5
1177
1178         movdqa  %xmm0,%xmm9
1179         movdqa  %xmm0,%xmm8
1180         psllq   $5,%xmm0
1181         pxor    %xmm0,%xmm8
1182 .byte   102,15,58,68,218,0
1183         psllq   $1,%xmm0
1184         pxor    %xmm8,%xmm0
1185         psllq   $57,%xmm0
1186         movdqa  %xmm0,%xmm8
1187         pslldq  $8,%xmm0
1188         psrldq  $8,%xmm8
1189         pxor    %xmm9,%xmm0
1190         pshufd  $78,%xmm5,%xmm4
1191         pxor    %xmm8,%xmm1
1192         pxor    %xmm5,%xmm4
1193
1194         movdqa  %xmm0,%xmm9
1195         psrlq   $1,%xmm0
1196 .byte   102,15,58,68,234,17
1197         pxor    %xmm9,%xmm1
1198         pxor    %xmm0,%xmm9
1199         psrlq   $5,%xmm0
1200         pxor    %xmm9,%xmm0
1201         leaq    32(%rdx),%rdx
1202         psrlq   $1,%xmm0
1203 .byte   102,15,58,68,231,0
1204         pxor    %xmm1,%xmm0
1205
1206         subq    $0x20,%rcx
1207         ja      .Lmod_loop
1208
1209 .Leven_tail:
1210         movdqa  %xmm0,%xmm1
1211         movdqa  %xmm4,%xmm8
1212         pshufd  $78,%xmm0,%xmm4
1213         pxor    %xmm0,%xmm4
1214
1215 .byte   102,15,58,68,198,0
1216 .byte   102,15,58,68,206,17
1217 .byte   102,15,58,68,231,16
1218
1219         pxor    %xmm3,%xmm0
1220         pxor    %xmm5,%xmm1
1221         pxor    %xmm0,%xmm8
1222         pxor    %xmm1,%xmm8
1223         pxor    %xmm8,%xmm4
1224         movdqa  %xmm4,%xmm8
1225         psrldq  $8,%xmm8
1226         pslldq  $8,%xmm4
1227         pxor    %xmm8,%xmm1
1228         pxor    %xmm4,%xmm0
1229
1230         movdqa  %xmm0,%xmm4
1231         movdqa  %xmm0,%xmm3
1232         psllq   $5,%xmm0
1233         pxor    %xmm0,%xmm3
1234         psllq   $1,%xmm0
1235         pxor    %xmm3,%xmm0
1236         psllq   $57,%xmm0
1237         movdqa  %xmm0,%xmm3
1238         pslldq  $8,%xmm0
1239         psrldq  $8,%xmm3
1240         pxor    %xmm4,%xmm0
1241         pxor    %xmm3,%xmm1
1242
1243
1244         movdqa  %xmm0,%xmm4
1245         psrlq   $1,%xmm0
1246         pxor    %xmm4,%xmm1
1247         pxor    %xmm0,%xmm4
1248         psrlq   $5,%xmm0
1249         pxor    %xmm4,%xmm0
1250         psrlq   $1,%xmm0
1251         pxor    %xmm1,%xmm0
1252         testq   %rcx,%rcx
1253         jnz     .Ldone
1254
1255 .Lodd_tail:
1256         movdqu  (%rdx),%xmm8
1257 .byte   102,69,15,56,0,194
1258         pxor    %xmm8,%xmm0
1259         movdqa  %xmm0,%xmm1
1260         pshufd  $78,%xmm0,%xmm3
1261         pxor    %xmm0,%xmm3
1262 .byte   102,15,58,68,194,0
1263 .byte   102,15,58,68,202,17
1264 .byte   102,15,58,68,223,0
1265         pxor    %xmm0,%xmm3
1266         pxor    %xmm1,%xmm3
1267
1268         movdqa  %xmm3,%xmm4
1269         psrldq  $8,%xmm3
1270         pslldq  $8,%xmm4
1271         pxor    %xmm3,%xmm1
1272         pxor    %xmm4,%xmm0
1273
1274         movdqa  %xmm0,%xmm4
1275         movdqa  %xmm0,%xmm3
1276         psllq   $5,%xmm0
1277         pxor    %xmm0,%xmm3
1278         psllq   $1,%xmm0
1279         pxor    %xmm3,%xmm0
1280         psllq   $57,%xmm0
1281         movdqa  %xmm0,%xmm3
1282         pslldq  $8,%xmm0
1283         psrldq  $8,%xmm3
1284         pxor    %xmm4,%xmm0
1285         pxor    %xmm3,%xmm1
1286
1287
1288         movdqa  %xmm0,%xmm4
1289         psrlq   $1,%xmm0
1290         pxor    %xmm4,%xmm1
1291         pxor    %xmm0,%xmm4
1292         psrlq   $5,%xmm0
1293         pxor    %xmm4,%xmm0
1294         psrlq   $1,%xmm0
1295         pxor    %xmm1,%xmm0
1296 .Ldone:
1297 .byte   102,65,15,56,0,194
1298         movdqu  %xmm0,(%rdi)
1299         .byte   0xf3,0xc3
1300 .cfi_endproc    
1301 .size   gcm_ghash_clmul,.-gcm_ghash_clmul
1302 .globl  gcm_init_avx
1303 .type   gcm_init_avx,@function
1304 .align  32
1305 gcm_init_avx:
1306 .cfi_startproc  
1307         vzeroupper
1308
1309         vmovdqu (%rsi),%xmm2
1310         vpshufd $78,%xmm2,%xmm2
1311
1312
1313         vpshufd $255,%xmm2,%xmm4
1314         vpsrlq  $63,%xmm2,%xmm3
1315         vpsllq  $1,%xmm2,%xmm2
1316         vpxor   %xmm5,%xmm5,%xmm5
1317         vpcmpgtd        %xmm4,%xmm5,%xmm5
1318         vpslldq $8,%xmm3,%xmm3
1319         vpor    %xmm3,%xmm2,%xmm2
1320
1321
1322         vpand   .L0x1c2_polynomial(%rip),%xmm5,%xmm5
1323         vpxor   %xmm5,%xmm2,%xmm2
1324
1325         vpunpckhqdq     %xmm2,%xmm2,%xmm6
1326         vmovdqa %xmm2,%xmm0
1327         vpxor   %xmm2,%xmm6,%xmm6
1328         movq    $4,%r10
1329         jmp     .Linit_start_avx
1330 .align  32
1331 .Linit_loop_avx:
1332         vpalignr        $8,%xmm3,%xmm4,%xmm5
1333         vmovdqu %xmm5,-16(%rdi)
1334         vpunpckhqdq     %xmm0,%xmm0,%xmm3
1335         vpxor   %xmm0,%xmm3,%xmm3
1336         vpclmulqdq      $0x11,%xmm2,%xmm0,%xmm1
1337         vpclmulqdq      $0x00,%xmm2,%xmm0,%xmm0
1338         vpclmulqdq      $0x00,%xmm6,%xmm3,%xmm3
1339         vpxor   %xmm0,%xmm1,%xmm4
1340         vpxor   %xmm4,%xmm3,%xmm3
1341
1342         vpslldq $8,%xmm3,%xmm4
1343         vpsrldq $8,%xmm3,%xmm3
1344         vpxor   %xmm4,%xmm0,%xmm0
1345         vpxor   %xmm3,%xmm1,%xmm1
1346         vpsllq  $57,%xmm0,%xmm3
1347         vpsllq  $62,%xmm0,%xmm4
1348         vpxor   %xmm3,%xmm4,%xmm4
1349         vpsllq  $63,%xmm0,%xmm3
1350         vpxor   %xmm3,%xmm4,%xmm4
1351         vpslldq $8,%xmm4,%xmm3
1352         vpsrldq $8,%xmm4,%xmm4
1353         vpxor   %xmm3,%xmm0,%xmm0
1354         vpxor   %xmm4,%xmm1,%xmm1
1355
1356         vpsrlq  $1,%xmm0,%xmm4
1357         vpxor   %xmm0,%xmm1,%xmm1
1358         vpxor   %xmm4,%xmm0,%xmm0
1359         vpsrlq  $5,%xmm4,%xmm4
1360         vpxor   %xmm4,%xmm0,%xmm0
1361         vpsrlq  $1,%xmm0,%xmm0
1362         vpxor   %xmm1,%xmm0,%xmm0
1363 .Linit_start_avx:
1364         vmovdqa %xmm0,%xmm5
1365         vpunpckhqdq     %xmm0,%xmm0,%xmm3
1366         vpxor   %xmm0,%xmm3,%xmm3
1367         vpclmulqdq      $0x11,%xmm2,%xmm0,%xmm1
1368         vpclmulqdq      $0x00,%xmm2,%xmm0,%xmm0
1369         vpclmulqdq      $0x00,%xmm6,%xmm3,%xmm3
1370         vpxor   %xmm0,%xmm1,%xmm4
1371         vpxor   %xmm4,%xmm3,%xmm3
1372
1373         vpslldq $8,%xmm3,%xmm4
1374         vpsrldq $8,%xmm3,%xmm3
1375         vpxor   %xmm4,%xmm0,%xmm0
1376         vpxor   %xmm3,%xmm1,%xmm1
1377         vpsllq  $57,%xmm0,%xmm3
1378         vpsllq  $62,%xmm0,%xmm4
1379         vpxor   %xmm3,%xmm4,%xmm4
1380         vpsllq  $63,%xmm0,%xmm3
1381         vpxor   %xmm3,%xmm4,%xmm4
1382         vpslldq $8,%xmm4,%xmm3
1383         vpsrldq $8,%xmm4,%xmm4
1384         vpxor   %xmm3,%xmm0,%xmm0
1385         vpxor   %xmm4,%xmm1,%xmm1
1386
1387         vpsrlq  $1,%xmm0,%xmm4
1388         vpxor   %xmm0,%xmm1,%xmm1
1389         vpxor   %xmm4,%xmm0,%xmm0
1390         vpsrlq  $5,%xmm4,%xmm4
1391         vpxor   %xmm4,%xmm0,%xmm0
1392         vpsrlq  $1,%xmm0,%xmm0
1393         vpxor   %xmm1,%xmm0,%xmm0
1394         vpshufd $78,%xmm5,%xmm3
1395         vpshufd $78,%xmm0,%xmm4
1396         vpxor   %xmm5,%xmm3,%xmm3
1397         vmovdqu %xmm5,0(%rdi)
1398         vpxor   %xmm0,%xmm4,%xmm4
1399         vmovdqu %xmm0,16(%rdi)
1400         leaq    48(%rdi),%rdi
1401         subq    $1,%r10
1402         jnz     .Linit_loop_avx
1403
1404         vpalignr        $8,%xmm4,%xmm3,%xmm5
1405         vmovdqu %xmm5,-16(%rdi)
1406
1407         vzeroupper
1408         .byte   0xf3,0xc3
1409 .cfi_endproc    
1410 .size   gcm_init_avx,.-gcm_init_avx
1411 .globl  gcm_gmult_avx
1412 .type   gcm_gmult_avx,@function
1413 .align  32
1414 gcm_gmult_avx:
1415 .cfi_startproc  
1416         jmp     .L_gmult_clmul
1417 .cfi_endproc    
1418 .size   gcm_gmult_avx,.-gcm_gmult_avx
1419 .globl  gcm_ghash_avx
1420 .type   gcm_ghash_avx,@function
1421 .align  32
1422 gcm_ghash_avx:
1423 .cfi_startproc  
1424         vzeroupper
1425
1426         vmovdqu (%rdi),%xmm10
1427         leaq    .L0x1c2_polynomial(%rip),%r10
1428         leaq    64(%rsi),%rsi
1429         vmovdqu .Lbswap_mask(%rip),%xmm13
1430         vpshufb %xmm13,%xmm10,%xmm10
1431         cmpq    $0x80,%rcx
1432         jb      .Lshort_avx
1433         subq    $0x80,%rcx
1434
1435         vmovdqu 112(%rdx),%xmm14
1436         vmovdqu 0-64(%rsi),%xmm6
1437         vpshufb %xmm13,%xmm14,%xmm14
1438         vmovdqu 32-64(%rsi),%xmm7
1439
1440         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1441         vmovdqu 96(%rdx),%xmm15
1442         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1443         vpxor   %xmm14,%xmm9,%xmm9
1444         vpshufb %xmm13,%xmm15,%xmm15
1445         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1446         vmovdqu 16-64(%rsi),%xmm6
1447         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1448         vmovdqu 80(%rdx),%xmm14
1449         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
1450         vpxor   %xmm15,%xmm8,%xmm8
1451
1452         vpshufb %xmm13,%xmm14,%xmm14
1453         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
1454         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1455         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
1456         vmovdqu 48-64(%rsi),%xmm6
1457         vpxor   %xmm14,%xmm9,%xmm9
1458         vmovdqu 64(%rdx),%xmm15
1459         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
1460         vmovdqu 80-64(%rsi),%xmm7
1461
1462         vpshufb %xmm13,%xmm15,%xmm15
1463         vpxor   %xmm0,%xmm3,%xmm3
1464         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1465         vpxor   %xmm1,%xmm4,%xmm4
1466         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1467         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1468         vmovdqu 64-64(%rsi),%xmm6
1469         vpxor   %xmm2,%xmm5,%xmm5
1470         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
1471         vpxor   %xmm15,%xmm8,%xmm8
1472
1473         vmovdqu 48(%rdx),%xmm14
1474         vpxor   %xmm3,%xmm0,%xmm0
1475         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
1476         vpxor   %xmm4,%xmm1,%xmm1
1477         vpshufb %xmm13,%xmm14,%xmm14
1478         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
1479         vmovdqu 96-64(%rsi),%xmm6
1480         vpxor   %xmm5,%xmm2,%xmm2
1481         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1482         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
1483         vmovdqu 128-64(%rsi),%xmm7
1484         vpxor   %xmm14,%xmm9,%xmm9
1485
1486         vmovdqu 32(%rdx),%xmm15
1487         vpxor   %xmm0,%xmm3,%xmm3
1488         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1489         vpxor   %xmm1,%xmm4,%xmm4
1490         vpshufb %xmm13,%xmm15,%xmm15
1491         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1492         vmovdqu 112-64(%rsi),%xmm6
1493         vpxor   %xmm2,%xmm5,%xmm5
1494         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1495         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
1496         vpxor   %xmm15,%xmm8,%xmm8
1497
1498         vmovdqu 16(%rdx),%xmm14
1499         vpxor   %xmm3,%xmm0,%xmm0
1500         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
1501         vpxor   %xmm4,%xmm1,%xmm1
1502         vpshufb %xmm13,%xmm14,%xmm14
1503         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
1504         vmovdqu 144-64(%rsi),%xmm6
1505         vpxor   %xmm5,%xmm2,%xmm2
1506         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1507         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
1508         vmovdqu 176-64(%rsi),%xmm7
1509         vpxor   %xmm14,%xmm9,%xmm9
1510
1511         vmovdqu (%rdx),%xmm15
1512         vpxor   %xmm0,%xmm3,%xmm3
1513         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1514         vpxor   %xmm1,%xmm4,%xmm4
1515         vpshufb %xmm13,%xmm15,%xmm15
1516         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1517         vmovdqu 160-64(%rsi),%xmm6
1518         vpxor   %xmm2,%xmm5,%xmm5
1519         vpclmulqdq      $0x10,%xmm7,%xmm9,%xmm2
1520
1521         leaq    128(%rdx),%rdx
1522         cmpq    $0x80,%rcx
1523         jb      .Ltail_avx
1524
1525         vpxor   %xmm10,%xmm15,%xmm15
1526         subq    $0x80,%rcx
1527         jmp     .Loop8x_avx
1528
1529 .align  32
1530 .Loop8x_avx:
1531         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1532         vmovdqu 112(%rdx),%xmm14
1533         vpxor   %xmm0,%xmm3,%xmm3
1534         vpxor   %xmm15,%xmm8,%xmm8
1535         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm10
1536         vpshufb %xmm13,%xmm14,%xmm14
1537         vpxor   %xmm1,%xmm4,%xmm4
1538         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm11
1539         vmovdqu 0-64(%rsi),%xmm6
1540         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1541         vpxor   %xmm2,%xmm5,%xmm5
1542         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm12
1543         vmovdqu 32-64(%rsi),%xmm7
1544         vpxor   %xmm14,%xmm9,%xmm9
1545
1546         vmovdqu 96(%rdx),%xmm15
1547         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1548         vpxor   %xmm3,%xmm10,%xmm10
1549         vpshufb %xmm13,%xmm15,%xmm15
1550         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1551         vxorps  %xmm4,%xmm11,%xmm11
1552         vmovdqu 16-64(%rsi),%xmm6
1553         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1554         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
1555         vpxor   %xmm5,%xmm12,%xmm12
1556         vxorps  %xmm15,%xmm8,%xmm8
1557
1558         vmovdqu 80(%rdx),%xmm14
1559         vpxor   %xmm10,%xmm12,%xmm12
1560         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
1561         vpxor   %xmm11,%xmm12,%xmm12
1562         vpslldq $8,%xmm12,%xmm9
1563         vpxor   %xmm0,%xmm3,%xmm3
1564         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
1565         vpsrldq $8,%xmm12,%xmm12
1566         vpxor   %xmm9,%xmm10,%xmm10
1567         vmovdqu 48-64(%rsi),%xmm6
1568         vpshufb %xmm13,%xmm14,%xmm14
1569         vxorps  %xmm12,%xmm11,%xmm11
1570         vpxor   %xmm1,%xmm4,%xmm4
1571         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1572         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
1573         vmovdqu 80-64(%rsi),%xmm7
1574         vpxor   %xmm14,%xmm9,%xmm9
1575         vpxor   %xmm2,%xmm5,%xmm5
1576
1577         vmovdqu 64(%rdx),%xmm15
1578         vpalignr        $8,%xmm10,%xmm10,%xmm12
1579         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1580         vpshufb %xmm13,%xmm15,%xmm15
1581         vpxor   %xmm3,%xmm0,%xmm0
1582         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1583         vmovdqu 64-64(%rsi),%xmm6
1584         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1585         vpxor   %xmm4,%xmm1,%xmm1
1586         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
1587         vxorps  %xmm15,%xmm8,%xmm8
1588         vpxor   %xmm5,%xmm2,%xmm2
1589
1590         vmovdqu 48(%rdx),%xmm14
1591         vpclmulqdq      $0x10,(%r10),%xmm10,%xmm10
1592         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
1593         vpshufb %xmm13,%xmm14,%xmm14
1594         vpxor   %xmm0,%xmm3,%xmm3
1595         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
1596         vmovdqu 96-64(%rsi),%xmm6
1597         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1598         vpxor   %xmm1,%xmm4,%xmm4
1599         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
1600         vmovdqu 128-64(%rsi),%xmm7
1601         vpxor   %xmm14,%xmm9,%xmm9
1602         vpxor   %xmm2,%xmm5,%xmm5
1603
1604         vmovdqu 32(%rdx),%xmm15
1605         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1606         vpshufb %xmm13,%xmm15,%xmm15
1607         vpxor   %xmm3,%xmm0,%xmm0
1608         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1609         vmovdqu 112-64(%rsi),%xmm6
1610         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1611         vpxor   %xmm4,%xmm1,%xmm1
1612         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
1613         vpxor   %xmm15,%xmm8,%xmm8
1614         vpxor   %xmm5,%xmm2,%xmm2
1615         vxorps  %xmm12,%xmm10,%xmm10
1616
1617         vmovdqu 16(%rdx),%xmm14
1618         vpalignr        $8,%xmm10,%xmm10,%xmm12
1619         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
1620         vpshufb %xmm13,%xmm14,%xmm14
1621         vpxor   %xmm0,%xmm3,%xmm3
1622         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
1623         vmovdqu 144-64(%rsi),%xmm6
1624         vpclmulqdq      $0x10,(%r10),%xmm10,%xmm10
1625         vxorps  %xmm11,%xmm12,%xmm12
1626         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1627         vpxor   %xmm1,%xmm4,%xmm4
1628         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
1629         vmovdqu 176-64(%rsi),%xmm7
1630         vpxor   %xmm14,%xmm9,%xmm9
1631         vpxor   %xmm2,%xmm5,%xmm5
1632
1633         vmovdqu (%rdx),%xmm15
1634         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1635         vpshufb %xmm13,%xmm15,%xmm15
1636         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1637         vmovdqu 160-64(%rsi),%xmm6
1638         vpxor   %xmm12,%xmm15,%xmm15
1639         vpclmulqdq      $0x10,%xmm7,%xmm9,%xmm2
1640         vpxor   %xmm10,%xmm15,%xmm15
1641
1642         leaq    128(%rdx),%rdx
1643         subq    $0x80,%rcx
1644         jnc     .Loop8x_avx
1645
1646         addq    $0x80,%rcx
1647         jmp     .Ltail_no_xor_avx
1648
1649 .align  32
1650 .Lshort_avx:
1651         vmovdqu -16(%rdx,%rcx,1),%xmm14
1652         leaq    (%rdx,%rcx,1),%rdx
1653         vmovdqu 0-64(%rsi),%xmm6
1654         vmovdqu 32-64(%rsi),%xmm7
1655         vpshufb %xmm13,%xmm14,%xmm15
1656
1657         vmovdqa %xmm0,%xmm3
1658         vmovdqa %xmm1,%xmm4
1659         vmovdqa %xmm2,%xmm5
1660         subq    $0x10,%rcx
1661         jz      .Ltail_avx
1662
1663         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1664         vpxor   %xmm0,%xmm3,%xmm3
1665         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
1666         vpxor   %xmm15,%xmm8,%xmm8
1667         vmovdqu -32(%rdx),%xmm14
1668         vpxor   %xmm1,%xmm4,%xmm4
1669         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
1670         vmovdqu 16-64(%rsi),%xmm6
1671         vpshufb %xmm13,%xmm14,%xmm15
1672         vpxor   %xmm2,%xmm5,%xmm5
1673         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
1674         vpsrldq $8,%xmm7,%xmm7
1675         subq    $0x10,%rcx
1676         jz      .Ltail_avx
1677
1678         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1679         vpxor   %xmm0,%xmm3,%xmm3
1680         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
1681         vpxor   %xmm15,%xmm8,%xmm8
1682         vmovdqu -48(%rdx),%xmm14
1683         vpxor   %xmm1,%xmm4,%xmm4
1684         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
1685         vmovdqu 48-64(%rsi),%xmm6
1686         vpshufb %xmm13,%xmm14,%xmm15
1687         vpxor   %xmm2,%xmm5,%xmm5
1688         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
1689         vmovdqu 80-64(%rsi),%xmm7
1690         subq    $0x10,%rcx
1691         jz      .Ltail_avx
1692
1693         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1694         vpxor   %xmm0,%xmm3,%xmm3
1695         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
1696         vpxor   %xmm15,%xmm8,%xmm8
1697         vmovdqu -64(%rdx),%xmm14
1698         vpxor   %xmm1,%xmm4,%xmm4
1699         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
1700         vmovdqu 64-64(%rsi),%xmm6
1701         vpshufb %xmm13,%xmm14,%xmm15
1702         vpxor   %xmm2,%xmm5,%xmm5
1703         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
1704         vpsrldq $8,%xmm7,%xmm7
1705         subq    $0x10,%rcx
1706         jz      .Ltail_avx
1707
1708         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1709         vpxor   %xmm0,%xmm3,%xmm3
1710         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
1711         vpxor   %xmm15,%xmm8,%xmm8
1712         vmovdqu -80(%rdx),%xmm14
1713         vpxor   %xmm1,%xmm4,%xmm4
1714         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
1715         vmovdqu 96-64(%rsi),%xmm6
1716         vpshufb %xmm13,%xmm14,%xmm15
1717         vpxor   %xmm2,%xmm5,%xmm5
1718         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
1719         vmovdqu 128-64(%rsi),%xmm7
1720         subq    $0x10,%rcx
1721         jz      .Ltail_avx
1722
1723         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1724         vpxor   %xmm0,%xmm3,%xmm3
1725         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
1726         vpxor   %xmm15,%xmm8,%xmm8
1727         vmovdqu -96(%rdx),%xmm14
1728         vpxor   %xmm1,%xmm4,%xmm4
1729         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
1730         vmovdqu 112-64(%rsi),%xmm6
1731         vpshufb %xmm13,%xmm14,%xmm15
1732         vpxor   %xmm2,%xmm5,%xmm5
1733         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
1734         vpsrldq $8,%xmm7,%xmm7
1735         subq    $0x10,%rcx
1736         jz      .Ltail_avx
1737
1738         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1739         vpxor   %xmm0,%xmm3,%xmm3
1740         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
1741         vpxor   %xmm15,%xmm8,%xmm8
1742         vmovdqu -112(%rdx),%xmm14
1743         vpxor   %xmm1,%xmm4,%xmm4
1744         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
1745         vmovdqu 144-64(%rsi),%xmm6
1746         vpshufb %xmm13,%xmm14,%xmm15
1747         vpxor   %xmm2,%xmm5,%xmm5
1748         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
1749         vmovq   184-64(%rsi),%xmm7
1750         subq    $0x10,%rcx
1751         jmp     .Ltail_avx
1752
1753 .align  32
1754 .Ltail_avx:
1755         vpxor   %xmm10,%xmm15,%xmm15
1756 .Ltail_no_xor_avx:
1757         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1758         vpxor   %xmm0,%xmm3,%xmm3
1759         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
1760         vpxor   %xmm15,%xmm8,%xmm8
1761         vpxor   %xmm1,%xmm4,%xmm4
1762         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
1763         vpxor   %xmm2,%xmm5,%xmm5
1764         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
1765
1766         vmovdqu (%r10),%xmm12
1767
1768         vpxor   %xmm0,%xmm3,%xmm10
1769         vpxor   %xmm1,%xmm4,%xmm11
1770         vpxor   %xmm2,%xmm5,%xmm5
1771
1772         vpxor   %xmm10,%xmm5,%xmm5
1773         vpxor   %xmm11,%xmm5,%xmm5
1774         vpslldq $8,%xmm5,%xmm9
1775         vpsrldq $8,%xmm5,%xmm5
1776         vpxor   %xmm9,%xmm10,%xmm10
1777         vpxor   %xmm5,%xmm11,%xmm11
1778
1779         vpclmulqdq      $0x10,%xmm12,%xmm10,%xmm9
1780         vpalignr        $8,%xmm10,%xmm10,%xmm10
1781         vpxor   %xmm9,%xmm10,%xmm10
1782
1783         vpclmulqdq      $0x10,%xmm12,%xmm10,%xmm9
1784         vpalignr        $8,%xmm10,%xmm10,%xmm10
1785         vpxor   %xmm11,%xmm10,%xmm10
1786         vpxor   %xmm9,%xmm10,%xmm10
1787
1788         cmpq    $0,%rcx
1789         jne     .Lshort_avx
1790
1791         vpshufb %xmm13,%xmm10,%xmm10
1792         vmovdqu %xmm10,(%rdi)
1793         vzeroupper
1794         .byte   0xf3,0xc3
1795 .cfi_endproc    
1796 .size   gcm_ghash_avx,.-gcm_ghash_avx
1797 .align  64
1798 .Lbswap_mask:
1799 .byte   15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1800 .L0x1c2_polynomial:
1801 .byte   1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1802 .L7_mask:
1803 .long   7,0,7,0
1804 .L7_mask_poly:
1805 .long   7,0,450,0
1806 .align  64
1807 .type   .Lrem_4bit,@object
1808 .Lrem_4bit:
1809 .long   0,0,0,471859200,0,943718400,0,610271232
1810 .long   0,1887436800,0,1822425088,0,1220542464,0,1423966208
1811 .long   0,3774873600,0,4246732800,0,3644850176,0,3311403008
1812 .long   0,2441084928,0,2376073216,0,2847932416,0,3051356160
1813 .type   .Lrem_8bit,@object
1814 .Lrem_8bit:
1815 .value  0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
1816 .value  0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
1817 .value  0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
1818 .value  0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
1819 .value  0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
1820 .value  0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
1821 .value  0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
1822 .value  0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
1823 .value  0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
1824 .value  0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
1825 .value  0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
1826 .value  0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
1827 .value  0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
1828 .value  0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
1829 .value  0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
1830 .value  0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
1831 .value  0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
1832 .value  0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
1833 .value  0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
1834 .value  0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
1835 .value  0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
1836 .value  0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
1837 .value  0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
1838 .value  0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
1839 .value  0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
1840 .value  0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
1841 .value  0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
1842 .value  0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
1843 .value  0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
1844 .value  0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
1845 .value  0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
1846 .value  0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
1847
1848 .byte   71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1849 .align  64