]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - secure/lib/libcrypto/amd64/ghash-x86_64.S
MFV r331407: 9213 zfs: sytem typo
[FreeBSD/FreeBSD.git] / secure / lib / libcrypto / amd64 / ghash-x86_64.S
1 /* $FreeBSD$ */
2 /* Do not modify. This file is auto-generated from ghash-x86_64.pl. */
3 .text   
4
5
6 .globl  gcm_gmult_4bit
7 .type   gcm_gmult_4bit,@function
8 .align  16
9 gcm_gmult_4bit:
10         pushq   %rbx
11         pushq   %rbp
12         pushq   %r12
13 .Lgmult_prologue:
14
15         movzbq  15(%rdi),%r8
16         leaq    .Lrem_4bit(%rip),%r11
17         xorq    %rax,%rax
18         xorq    %rbx,%rbx
19         movb    %r8b,%al
20         movb    %r8b,%bl
21         shlb    $4,%al
22         movq    $14,%rcx
23         movq    8(%rsi,%rax,1),%r8
24         movq    (%rsi,%rax,1),%r9
25         andb    $0xf0,%bl
26         movq    %r8,%rdx
27         jmp     .Loop1
28
29 .align  16
30 .Loop1:
31         shrq    $4,%r8
32         andq    $0xf,%rdx
33         movq    %r9,%r10
34         movb    (%rdi,%rcx,1),%al
35         shrq    $4,%r9
36         xorq    8(%rsi,%rbx,1),%r8
37         shlq    $60,%r10
38         xorq    (%rsi,%rbx,1),%r9
39         movb    %al,%bl
40         xorq    (%r11,%rdx,8),%r9
41         movq    %r8,%rdx
42         shlb    $4,%al
43         xorq    %r10,%r8
44         decq    %rcx
45         js      .Lbreak1
46
47         shrq    $4,%r8
48         andq    $0xf,%rdx
49         movq    %r9,%r10
50         shrq    $4,%r9
51         xorq    8(%rsi,%rax,1),%r8
52         shlq    $60,%r10
53         xorq    (%rsi,%rax,1),%r9
54         andb    $0xf0,%bl
55         xorq    (%r11,%rdx,8),%r9
56         movq    %r8,%rdx
57         xorq    %r10,%r8
58         jmp     .Loop1
59
60 .align  16
61 .Lbreak1:
62         shrq    $4,%r8
63         andq    $0xf,%rdx
64         movq    %r9,%r10
65         shrq    $4,%r9
66         xorq    8(%rsi,%rax,1),%r8
67         shlq    $60,%r10
68         xorq    (%rsi,%rax,1),%r9
69         andb    $0xf0,%bl
70         xorq    (%r11,%rdx,8),%r9
71         movq    %r8,%rdx
72         xorq    %r10,%r8
73
74         shrq    $4,%r8
75         andq    $0xf,%rdx
76         movq    %r9,%r10
77         shrq    $4,%r9
78         xorq    8(%rsi,%rbx,1),%r8
79         shlq    $60,%r10
80         xorq    (%rsi,%rbx,1),%r9
81         xorq    %r10,%r8
82         xorq    (%r11,%rdx,8),%r9
83
84         bswapq  %r8
85         bswapq  %r9
86         movq    %r8,8(%rdi)
87         movq    %r9,(%rdi)
88
89         movq    16(%rsp),%rbx
90         leaq    24(%rsp),%rsp
91 .Lgmult_epilogue:
92         .byte   0xf3,0xc3
93 .size   gcm_gmult_4bit,.-gcm_gmult_4bit
94 .globl  gcm_ghash_4bit
95 .type   gcm_ghash_4bit,@function
96 .align  16
97 gcm_ghash_4bit:
98         pushq   %rbx
99         pushq   %rbp
100         pushq   %r12
101         pushq   %r13
102         pushq   %r14
103         pushq   %r15
104         subq    $280,%rsp
105 .Lghash_prologue:
106         movq    %rdx,%r14
107         movq    %rcx,%r15
108         subq    $-128,%rsi
109         leaq    16+128(%rsp),%rbp
110         xorl    %edx,%edx
111         movq    0+0-128(%rsi),%r8
112         movq    0+8-128(%rsi),%rax
113         movb    %al,%dl
114         shrq    $4,%rax
115         movq    %r8,%r10
116         shrq    $4,%r8
117         movq    16+0-128(%rsi),%r9
118         shlb    $4,%dl
119         movq    16+8-128(%rsi),%rbx
120         shlq    $60,%r10
121         movb    %dl,0(%rsp)
122         orq     %r10,%rax
123         movb    %bl,%dl
124         shrq    $4,%rbx
125         movq    %r9,%r10
126         shrq    $4,%r9
127         movq    %r8,0(%rbp)
128         movq    32+0-128(%rsi),%r8
129         shlb    $4,%dl
130         movq    %rax,0-128(%rbp)
131         movq    32+8-128(%rsi),%rax
132         shlq    $60,%r10
133         movb    %dl,1(%rsp)
134         orq     %r10,%rbx
135         movb    %al,%dl
136         shrq    $4,%rax
137         movq    %r8,%r10
138         shrq    $4,%r8
139         movq    %r9,8(%rbp)
140         movq    48+0-128(%rsi),%r9
141         shlb    $4,%dl
142         movq    %rbx,8-128(%rbp)
143         movq    48+8-128(%rsi),%rbx
144         shlq    $60,%r10
145         movb    %dl,2(%rsp)
146         orq     %r10,%rax
147         movb    %bl,%dl
148         shrq    $4,%rbx
149         movq    %r9,%r10
150         shrq    $4,%r9
151         movq    %r8,16(%rbp)
152         movq    64+0-128(%rsi),%r8
153         shlb    $4,%dl
154         movq    %rax,16-128(%rbp)
155         movq    64+8-128(%rsi),%rax
156         shlq    $60,%r10
157         movb    %dl,3(%rsp)
158         orq     %r10,%rbx
159         movb    %al,%dl
160         shrq    $4,%rax
161         movq    %r8,%r10
162         shrq    $4,%r8
163         movq    %r9,24(%rbp)
164         movq    80+0-128(%rsi),%r9
165         shlb    $4,%dl
166         movq    %rbx,24-128(%rbp)
167         movq    80+8-128(%rsi),%rbx
168         shlq    $60,%r10
169         movb    %dl,4(%rsp)
170         orq     %r10,%rax
171         movb    %bl,%dl
172         shrq    $4,%rbx
173         movq    %r9,%r10
174         shrq    $4,%r9
175         movq    %r8,32(%rbp)
176         movq    96+0-128(%rsi),%r8
177         shlb    $4,%dl
178         movq    %rax,32-128(%rbp)
179         movq    96+8-128(%rsi),%rax
180         shlq    $60,%r10
181         movb    %dl,5(%rsp)
182         orq     %r10,%rbx
183         movb    %al,%dl
184         shrq    $4,%rax
185         movq    %r8,%r10
186         shrq    $4,%r8
187         movq    %r9,40(%rbp)
188         movq    112+0-128(%rsi),%r9
189         shlb    $4,%dl
190         movq    %rbx,40-128(%rbp)
191         movq    112+8-128(%rsi),%rbx
192         shlq    $60,%r10
193         movb    %dl,6(%rsp)
194         orq     %r10,%rax
195         movb    %bl,%dl
196         shrq    $4,%rbx
197         movq    %r9,%r10
198         shrq    $4,%r9
199         movq    %r8,48(%rbp)
200         movq    128+0-128(%rsi),%r8
201         shlb    $4,%dl
202         movq    %rax,48-128(%rbp)
203         movq    128+8-128(%rsi),%rax
204         shlq    $60,%r10
205         movb    %dl,7(%rsp)
206         orq     %r10,%rbx
207         movb    %al,%dl
208         shrq    $4,%rax
209         movq    %r8,%r10
210         shrq    $4,%r8
211         movq    %r9,56(%rbp)
212         movq    144+0-128(%rsi),%r9
213         shlb    $4,%dl
214         movq    %rbx,56-128(%rbp)
215         movq    144+8-128(%rsi),%rbx
216         shlq    $60,%r10
217         movb    %dl,8(%rsp)
218         orq     %r10,%rax
219         movb    %bl,%dl
220         shrq    $4,%rbx
221         movq    %r9,%r10
222         shrq    $4,%r9
223         movq    %r8,64(%rbp)
224         movq    160+0-128(%rsi),%r8
225         shlb    $4,%dl
226         movq    %rax,64-128(%rbp)
227         movq    160+8-128(%rsi),%rax
228         shlq    $60,%r10
229         movb    %dl,9(%rsp)
230         orq     %r10,%rbx
231         movb    %al,%dl
232         shrq    $4,%rax
233         movq    %r8,%r10
234         shrq    $4,%r8
235         movq    %r9,72(%rbp)
236         movq    176+0-128(%rsi),%r9
237         shlb    $4,%dl
238         movq    %rbx,72-128(%rbp)
239         movq    176+8-128(%rsi),%rbx
240         shlq    $60,%r10
241         movb    %dl,10(%rsp)
242         orq     %r10,%rax
243         movb    %bl,%dl
244         shrq    $4,%rbx
245         movq    %r9,%r10
246         shrq    $4,%r9
247         movq    %r8,80(%rbp)
248         movq    192+0-128(%rsi),%r8
249         shlb    $4,%dl
250         movq    %rax,80-128(%rbp)
251         movq    192+8-128(%rsi),%rax
252         shlq    $60,%r10
253         movb    %dl,11(%rsp)
254         orq     %r10,%rbx
255         movb    %al,%dl
256         shrq    $4,%rax
257         movq    %r8,%r10
258         shrq    $4,%r8
259         movq    %r9,88(%rbp)
260         movq    208+0-128(%rsi),%r9
261         shlb    $4,%dl
262         movq    %rbx,88-128(%rbp)
263         movq    208+8-128(%rsi),%rbx
264         shlq    $60,%r10
265         movb    %dl,12(%rsp)
266         orq     %r10,%rax
267         movb    %bl,%dl
268         shrq    $4,%rbx
269         movq    %r9,%r10
270         shrq    $4,%r9
271         movq    %r8,96(%rbp)
272         movq    224+0-128(%rsi),%r8
273         shlb    $4,%dl
274         movq    %rax,96-128(%rbp)
275         movq    224+8-128(%rsi),%rax
276         shlq    $60,%r10
277         movb    %dl,13(%rsp)
278         orq     %r10,%rbx
279         movb    %al,%dl
280         shrq    $4,%rax
281         movq    %r8,%r10
282         shrq    $4,%r8
283         movq    %r9,104(%rbp)
284         movq    240+0-128(%rsi),%r9
285         shlb    $4,%dl
286         movq    %rbx,104-128(%rbp)
287         movq    240+8-128(%rsi),%rbx
288         shlq    $60,%r10
289         movb    %dl,14(%rsp)
290         orq     %r10,%rax
291         movb    %bl,%dl
292         shrq    $4,%rbx
293         movq    %r9,%r10
294         shrq    $4,%r9
295         movq    %r8,112(%rbp)
296         shlb    $4,%dl
297         movq    %rax,112-128(%rbp)
298         shlq    $60,%r10
299         movb    %dl,15(%rsp)
300         orq     %r10,%rbx
301         movq    %r9,120(%rbp)
302         movq    %rbx,120-128(%rbp)
303         addq    $-128,%rsi
304         movq    8(%rdi),%r8
305         movq    0(%rdi),%r9
306         addq    %r14,%r15
307         leaq    .Lrem_8bit(%rip),%r11
308         jmp     .Louter_loop
309 .align  16
310 .Louter_loop:
311         xorq    (%r14),%r9
312         movq    8(%r14),%rdx
313         leaq    16(%r14),%r14
314         xorq    %r8,%rdx
315         movq    %r9,(%rdi)
316         movq    %rdx,8(%rdi)
317         shrq    $32,%rdx
318         xorq    %rax,%rax
319         roll    $8,%edx
320         movb    %dl,%al
321         movzbl  %dl,%ebx
322         shlb    $4,%al
323         shrl    $4,%ebx
324         roll    $8,%edx
325         movq    8(%rsi,%rax,1),%r8
326         movq    (%rsi,%rax,1),%r9
327         movb    %dl,%al
328         movzbl  %dl,%ecx
329         shlb    $4,%al
330         movzbq  (%rsp,%rbx,1),%r12
331         shrl    $4,%ecx
332         xorq    %r8,%r12
333         movq    %r9,%r10
334         shrq    $8,%r8
335         movzbq  %r12b,%r12
336         shrq    $8,%r9
337         xorq    -128(%rbp,%rbx,8),%r8
338         shlq    $56,%r10
339         xorq    (%rbp,%rbx,8),%r9
340         roll    $8,%edx
341         xorq    8(%rsi,%rax,1),%r8
342         xorq    (%rsi,%rax,1),%r9
343         movb    %dl,%al
344         xorq    %r10,%r8
345         movzwq  (%r11,%r12,2),%r12
346         movzbl  %dl,%ebx
347         shlb    $4,%al
348         movzbq  (%rsp,%rcx,1),%r13
349         shrl    $4,%ebx
350         shlq    $48,%r12
351         xorq    %r8,%r13
352         movq    %r9,%r10
353         xorq    %r12,%r9
354         shrq    $8,%r8
355         movzbq  %r13b,%r13
356         shrq    $8,%r9
357         xorq    -128(%rbp,%rcx,8),%r8
358         shlq    $56,%r10
359         xorq    (%rbp,%rcx,8),%r9
360         roll    $8,%edx
361         xorq    8(%rsi,%rax,1),%r8
362         xorq    (%rsi,%rax,1),%r9
363         movb    %dl,%al
364         xorq    %r10,%r8
365         movzwq  (%r11,%r13,2),%r13
366         movzbl  %dl,%ecx
367         shlb    $4,%al
368         movzbq  (%rsp,%rbx,1),%r12
369         shrl    $4,%ecx
370         shlq    $48,%r13
371         xorq    %r8,%r12
372         movq    %r9,%r10
373         xorq    %r13,%r9
374         shrq    $8,%r8
375         movzbq  %r12b,%r12
376         movl    8(%rdi),%edx
377         shrq    $8,%r9
378         xorq    -128(%rbp,%rbx,8),%r8
379         shlq    $56,%r10
380         xorq    (%rbp,%rbx,8),%r9
381         roll    $8,%edx
382         xorq    8(%rsi,%rax,1),%r8
383         xorq    (%rsi,%rax,1),%r9
384         movb    %dl,%al
385         xorq    %r10,%r8
386         movzwq  (%r11,%r12,2),%r12
387         movzbl  %dl,%ebx
388         shlb    $4,%al
389         movzbq  (%rsp,%rcx,1),%r13
390         shrl    $4,%ebx
391         shlq    $48,%r12
392         xorq    %r8,%r13
393         movq    %r9,%r10
394         xorq    %r12,%r9
395         shrq    $8,%r8
396         movzbq  %r13b,%r13
397         shrq    $8,%r9
398         xorq    -128(%rbp,%rcx,8),%r8
399         shlq    $56,%r10
400         xorq    (%rbp,%rcx,8),%r9
401         roll    $8,%edx
402         xorq    8(%rsi,%rax,1),%r8
403         xorq    (%rsi,%rax,1),%r9
404         movb    %dl,%al
405         xorq    %r10,%r8
406         movzwq  (%r11,%r13,2),%r13
407         movzbl  %dl,%ecx
408         shlb    $4,%al
409         movzbq  (%rsp,%rbx,1),%r12
410         shrl    $4,%ecx
411         shlq    $48,%r13
412         xorq    %r8,%r12
413         movq    %r9,%r10
414         xorq    %r13,%r9
415         shrq    $8,%r8
416         movzbq  %r12b,%r12
417         shrq    $8,%r9
418         xorq    -128(%rbp,%rbx,8),%r8
419         shlq    $56,%r10
420         xorq    (%rbp,%rbx,8),%r9
421         roll    $8,%edx
422         xorq    8(%rsi,%rax,1),%r8
423         xorq    (%rsi,%rax,1),%r9
424         movb    %dl,%al
425         xorq    %r10,%r8
426         movzwq  (%r11,%r12,2),%r12
427         movzbl  %dl,%ebx
428         shlb    $4,%al
429         movzbq  (%rsp,%rcx,1),%r13
430         shrl    $4,%ebx
431         shlq    $48,%r12
432         xorq    %r8,%r13
433         movq    %r9,%r10
434         xorq    %r12,%r9
435         shrq    $8,%r8
436         movzbq  %r13b,%r13
437         shrq    $8,%r9
438         xorq    -128(%rbp,%rcx,8),%r8
439         shlq    $56,%r10
440         xorq    (%rbp,%rcx,8),%r9
441         roll    $8,%edx
442         xorq    8(%rsi,%rax,1),%r8
443         xorq    (%rsi,%rax,1),%r9
444         movb    %dl,%al
445         xorq    %r10,%r8
446         movzwq  (%r11,%r13,2),%r13
447         movzbl  %dl,%ecx
448         shlb    $4,%al
449         movzbq  (%rsp,%rbx,1),%r12
450         shrl    $4,%ecx
451         shlq    $48,%r13
452         xorq    %r8,%r12
453         movq    %r9,%r10
454         xorq    %r13,%r9
455         shrq    $8,%r8
456         movzbq  %r12b,%r12
457         movl    4(%rdi),%edx
458         shrq    $8,%r9
459         xorq    -128(%rbp,%rbx,8),%r8
460         shlq    $56,%r10
461         xorq    (%rbp,%rbx,8),%r9
462         roll    $8,%edx
463         xorq    8(%rsi,%rax,1),%r8
464         xorq    (%rsi,%rax,1),%r9
465         movb    %dl,%al
466         xorq    %r10,%r8
467         movzwq  (%r11,%r12,2),%r12
468         movzbl  %dl,%ebx
469         shlb    $4,%al
470         movzbq  (%rsp,%rcx,1),%r13
471         shrl    $4,%ebx
472         shlq    $48,%r12
473         xorq    %r8,%r13
474         movq    %r9,%r10
475         xorq    %r12,%r9
476         shrq    $8,%r8
477         movzbq  %r13b,%r13
478         shrq    $8,%r9
479         xorq    -128(%rbp,%rcx,8),%r8
480         shlq    $56,%r10
481         xorq    (%rbp,%rcx,8),%r9
482         roll    $8,%edx
483         xorq    8(%rsi,%rax,1),%r8
484         xorq    (%rsi,%rax,1),%r9
485         movb    %dl,%al
486         xorq    %r10,%r8
487         movzwq  (%r11,%r13,2),%r13
488         movzbl  %dl,%ecx
489         shlb    $4,%al
490         movzbq  (%rsp,%rbx,1),%r12
491         shrl    $4,%ecx
492         shlq    $48,%r13
493         xorq    %r8,%r12
494         movq    %r9,%r10
495         xorq    %r13,%r9
496         shrq    $8,%r8
497         movzbq  %r12b,%r12
498         shrq    $8,%r9
499         xorq    -128(%rbp,%rbx,8),%r8
500         shlq    $56,%r10
501         xorq    (%rbp,%rbx,8),%r9
502         roll    $8,%edx
503         xorq    8(%rsi,%rax,1),%r8
504         xorq    (%rsi,%rax,1),%r9
505         movb    %dl,%al
506         xorq    %r10,%r8
507         movzwq  (%r11,%r12,2),%r12
508         movzbl  %dl,%ebx
509         shlb    $4,%al
510         movzbq  (%rsp,%rcx,1),%r13
511         shrl    $4,%ebx
512         shlq    $48,%r12
513         xorq    %r8,%r13
514         movq    %r9,%r10
515         xorq    %r12,%r9
516         shrq    $8,%r8
517         movzbq  %r13b,%r13
518         shrq    $8,%r9
519         xorq    -128(%rbp,%rcx,8),%r8
520         shlq    $56,%r10
521         xorq    (%rbp,%rcx,8),%r9
522         roll    $8,%edx
523         xorq    8(%rsi,%rax,1),%r8
524         xorq    (%rsi,%rax,1),%r9
525         movb    %dl,%al
526         xorq    %r10,%r8
527         movzwq  (%r11,%r13,2),%r13
528         movzbl  %dl,%ecx
529         shlb    $4,%al
530         movzbq  (%rsp,%rbx,1),%r12
531         shrl    $4,%ecx
532         shlq    $48,%r13
533         xorq    %r8,%r12
534         movq    %r9,%r10
535         xorq    %r13,%r9
536         shrq    $8,%r8
537         movzbq  %r12b,%r12
538         movl    0(%rdi),%edx
539         shrq    $8,%r9
540         xorq    -128(%rbp,%rbx,8),%r8
541         shlq    $56,%r10
542         xorq    (%rbp,%rbx,8),%r9
543         roll    $8,%edx
544         xorq    8(%rsi,%rax,1),%r8
545         xorq    (%rsi,%rax,1),%r9
546         movb    %dl,%al
547         xorq    %r10,%r8
548         movzwq  (%r11,%r12,2),%r12
549         movzbl  %dl,%ebx
550         shlb    $4,%al
551         movzbq  (%rsp,%rcx,1),%r13
552         shrl    $4,%ebx
553         shlq    $48,%r12
554         xorq    %r8,%r13
555         movq    %r9,%r10
556         xorq    %r12,%r9
557         shrq    $8,%r8
558         movzbq  %r13b,%r13
559         shrq    $8,%r9
560         xorq    -128(%rbp,%rcx,8),%r8
561         shlq    $56,%r10
562         xorq    (%rbp,%rcx,8),%r9
563         roll    $8,%edx
564         xorq    8(%rsi,%rax,1),%r8
565         xorq    (%rsi,%rax,1),%r9
566         movb    %dl,%al
567         xorq    %r10,%r8
568         movzwq  (%r11,%r13,2),%r13
569         movzbl  %dl,%ecx
570         shlb    $4,%al
571         movzbq  (%rsp,%rbx,1),%r12
572         shrl    $4,%ecx
573         shlq    $48,%r13
574         xorq    %r8,%r12
575         movq    %r9,%r10
576         xorq    %r13,%r9
577         shrq    $8,%r8
578         movzbq  %r12b,%r12
579         shrq    $8,%r9
580         xorq    -128(%rbp,%rbx,8),%r8
581         shlq    $56,%r10
582         xorq    (%rbp,%rbx,8),%r9
583         roll    $8,%edx
584         xorq    8(%rsi,%rax,1),%r8
585         xorq    (%rsi,%rax,1),%r9
586         movb    %dl,%al
587         xorq    %r10,%r8
588         movzwq  (%r11,%r12,2),%r12
589         movzbl  %dl,%ebx
590         shlb    $4,%al
591         movzbq  (%rsp,%rcx,1),%r13
592         shrl    $4,%ebx
593         shlq    $48,%r12
594         xorq    %r8,%r13
595         movq    %r9,%r10
596         xorq    %r12,%r9
597         shrq    $8,%r8
598         movzbq  %r13b,%r13
599         shrq    $8,%r9
600         xorq    -128(%rbp,%rcx,8),%r8
601         shlq    $56,%r10
602         xorq    (%rbp,%rcx,8),%r9
603         roll    $8,%edx
604         xorq    8(%rsi,%rax,1),%r8
605         xorq    (%rsi,%rax,1),%r9
606         movb    %dl,%al
607         xorq    %r10,%r8
608         movzwq  (%r11,%r13,2),%r13
609         movzbl  %dl,%ecx
610         shlb    $4,%al
611         movzbq  (%rsp,%rbx,1),%r12
612         andl    $240,%ecx
613         shlq    $48,%r13
614         xorq    %r8,%r12
615         movq    %r9,%r10
616         xorq    %r13,%r9
617         shrq    $8,%r8
618         movzbq  %r12b,%r12
619         movl    -4(%rdi),%edx
620         shrq    $8,%r9
621         xorq    -128(%rbp,%rbx,8),%r8
622         shlq    $56,%r10
623         xorq    (%rbp,%rbx,8),%r9
624         movzwq  (%r11,%r12,2),%r12
625         xorq    8(%rsi,%rax,1),%r8
626         xorq    (%rsi,%rax,1),%r9
627         shlq    $48,%r12
628         xorq    %r10,%r8
629         xorq    %r12,%r9
630         movzbq  %r8b,%r13
631         shrq    $4,%r8
632         movq    %r9,%r10
633         shlb    $4,%r13b
634         shrq    $4,%r9
635         xorq    8(%rsi,%rcx,1),%r8
636         movzwq  (%r11,%r13,2),%r13
637         shlq    $60,%r10
638         xorq    (%rsi,%rcx,1),%r9
639         xorq    %r10,%r8
640         shlq    $48,%r13
641         bswapq  %r8
642         xorq    %r13,%r9
643         bswapq  %r9
644         cmpq    %r15,%r14
645         jb      .Louter_loop
646         movq    %r8,8(%rdi)
647         movq    %r9,(%rdi)
648
649         leaq    280(%rsp),%rsi
650         movq    0(%rsi),%r15
651         movq    8(%rsi),%r14
652         movq    16(%rsi),%r13
653         movq    24(%rsi),%r12
654         movq    32(%rsi),%rbp
655         movq    40(%rsi),%rbx
656         leaq    48(%rsi),%rsp
657 .Lghash_epilogue:
658         .byte   0xf3,0xc3
659 .size   gcm_ghash_4bit,.-gcm_ghash_4bit
660 .globl  gcm_init_clmul
661 .type   gcm_init_clmul,@function
662 .align  16
663 gcm_init_clmul:
664 .L_init_clmul:
665         movdqu  (%rsi),%xmm2
666         pshufd  $78,%xmm2,%xmm2
667
668
669         pshufd  $255,%xmm2,%xmm4
670         movdqa  %xmm2,%xmm3
671         psllq   $1,%xmm2
672         pxor    %xmm5,%xmm5
673         psrlq   $63,%xmm3
674         pcmpgtd %xmm4,%xmm5
675         pslldq  $8,%xmm3
676         por     %xmm3,%xmm2
677
678
679         pand    .L0x1c2_polynomial(%rip),%xmm5
680         pxor    %xmm5,%xmm2
681
682
683         pshufd  $78,%xmm2,%xmm6
684         movdqa  %xmm2,%xmm0
685         pxor    %xmm2,%xmm6
686         movdqa  %xmm0,%xmm1
687         pshufd  $78,%xmm0,%xmm3
688         pxor    %xmm0,%xmm3
689 .byte   102,15,58,68,194,0
690 .byte   102,15,58,68,202,17
691 .byte   102,15,58,68,222,0
692         pxor    %xmm0,%xmm3
693         pxor    %xmm1,%xmm3
694
695         movdqa  %xmm3,%xmm4
696         psrldq  $8,%xmm3
697         pslldq  $8,%xmm4
698         pxor    %xmm3,%xmm1
699         pxor    %xmm4,%xmm0
700
701         movdqa  %xmm0,%xmm4
702         movdqa  %xmm0,%xmm3
703         psllq   $5,%xmm0
704         pxor    %xmm0,%xmm3
705         psllq   $1,%xmm0
706         pxor    %xmm3,%xmm0
707         psllq   $57,%xmm0
708         movdqa  %xmm0,%xmm3
709         pslldq  $8,%xmm0
710         psrldq  $8,%xmm3
711         pxor    %xmm4,%xmm0
712         pxor    %xmm3,%xmm1
713
714
715         movdqa  %xmm0,%xmm4
716         psrlq   $1,%xmm0
717         pxor    %xmm4,%xmm1
718         pxor    %xmm0,%xmm4
719         psrlq   $5,%xmm0
720         pxor    %xmm4,%xmm0
721         psrlq   $1,%xmm0
722         pxor    %xmm1,%xmm0
723         pshufd  $78,%xmm2,%xmm3
724         pshufd  $78,%xmm0,%xmm4
725         pxor    %xmm2,%xmm3
726         movdqu  %xmm2,0(%rdi)
727         pxor    %xmm0,%xmm4
728         movdqu  %xmm0,16(%rdi)
729 .byte   102,15,58,15,227,8
730         movdqu  %xmm4,32(%rdi)
731         movdqa  %xmm0,%xmm1
732         pshufd  $78,%xmm0,%xmm3
733         pxor    %xmm0,%xmm3
734 .byte   102,15,58,68,194,0
735 .byte   102,15,58,68,202,17
736 .byte   102,15,58,68,222,0
737         pxor    %xmm0,%xmm3
738         pxor    %xmm1,%xmm3
739
740         movdqa  %xmm3,%xmm4
741         psrldq  $8,%xmm3
742         pslldq  $8,%xmm4
743         pxor    %xmm3,%xmm1
744         pxor    %xmm4,%xmm0
745
746         movdqa  %xmm0,%xmm4
747         movdqa  %xmm0,%xmm3
748         psllq   $5,%xmm0
749         pxor    %xmm0,%xmm3
750         psllq   $1,%xmm0
751         pxor    %xmm3,%xmm0
752         psllq   $57,%xmm0
753         movdqa  %xmm0,%xmm3
754         pslldq  $8,%xmm0
755         psrldq  $8,%xmm3
756         pxor    %xmm4,%xmm0
757         pxor    %xmm3,%xmm1
758
759
760         movdqa  %xmm0,%xmm4
761         psrlq   $1,%xmm0
762         pxor    %xmm4,%xmm1
763         pxor    %xmm0,%xmm4
764         psrlq   $5,%xmm0
765         pxor    %xmm4,%xmm0
766         psrlq   $1,%xmm0
767         pxor    %xmm1,%xmm0
768         movdqa  %xmm0,%xmm5
769         movdqa  %xmm0,%xmm1
770         pshufd  $78,%xmm0,%xmm3
771         pxor    %xmm0,%xmm3
772 .byte   102,15,58,68,194,0
773 .byte   102,15,58,68,202,17
774 .byte   102,15,58,68,222,0
775         pxor    %xmm0,%xmm3
776         pxor    %xmm1,%xmm3
777
778         movdqa  %xmm3,%xmm4
779         psrldq  $8,%xmm3
780         pslldq  $8,%xmm4
781         pxor    %xmm3,%xmm1
782         pxor    %xmm4,%xmm0
783
784         movdqa  %xmm0,%xmm4
785         movdqa  %xmm0,%xmm3
786         psllq   $5,%xmm0
787         pxor    %xmm0,%xmm3
788         psllq   $1,%xmm0
789         pxor    %xmm3,%xmm0
790         psllq   $57,%xmm0
791         movdqa  %xmm0,%xmm3
792         pslldq  $8,%xmm0
793         psrldq  $8,%xmm3
794         pxor    %xmm4,%xmm0
795         pxor    %xmm3,%xmm1
796
797
798         movdqa  %xmm0,%xmm4
799         psrlq   $1,%xmm0
800         pxor    %xmm4,%xmm1
801         pxor    %xmm0,%xmm4
802         psrlq   $5,%xmm0
803         pxor    %xmm4,%xmm0
804         psrlq   $1,%xmm0
805         pxor    %xmm1,%xmm0
806         pshufd  $78,%xmm5,%xmm3
807         pshufd  $78,%xmm0,%xmm4
808         pxor    %xmm5,%xmm3
809         movdqu  %xmm5,48(%rdi)
810         pxor    %xmm0,%xmm4
811         movdqu  %xmm0,64(%rdi)
812 .byte   102,15,58,15,227,8
813         movdqu  %xmm4,80(%rdi)
814         .byte   0xf3,0xc3
815 .size   gcm_init_clmul,.-gcm_init_clmul
816 .globl  gcm_gmult_clmul
817 .type   gcm_gmult_clmul,@function
818 .align  16
819 gcm_gmult_clmul:
820 .L_gmult_clmul:
821         movdqu  (%rdi),%xmm0
822         movdqa  .Lbswap_mask(%rip),%xmm5
823         movdqu  (%rsi),%xmm2
824         movdqu  32(%rsi),%xmm4
825 .byte   102,15,56,0,197
826         movdqa  %xmm0,%xmm1
827         pshufd  $78,%xmm0,%xmm3
828         pxor    %xmm0,%xmm3
829 .byte   102,15,58,68,194,0
830 .byte   102,15,58,68,202,17
831 .byte   102,15,58,68,220,0
832         pxor    %xmm0,%xmm3
833         pxor    %xmm1,%xmm3
834
835         movdqa  %xmm3,%xmm4
836         psrldq  $8,%xmm3
837         pslldq  $8,%xmm4
838         pxor    %xmm3,%xmm1
839         pxor    %xmm4,%xmm0
840
841         movdqa  %xmm0,%xmm4
842         movdqa  %xmm0,%xmm3
843         psllq   $5,%xmm0
844         pxor    %xmm0,%xmm3
845         psllq   $1,%xmm0
846         pxor    %xmm3,%xmm0
847         psllq   $57,%xmm0
848         movdqa  %xmm0,%xmm3
849         pslldq  $8,%xmm0
850         psrldq  $8,%xmm3
851         pxor    %xmm4,%xmm0
852         pxor    %xmm3,%xmm1
853
854
855         movdqa  %xmm0,%xmm4
856         psrlq   $1,%xmm0
857         pxor    %xmm4,%xmm1
858         pxor    %xmm0,%xmm4
859         psrlq   $5,%xmm0
860         pxor    %xmm4,%xmm0
861         psrlq   $1,%xmm0
862         pxor    %xmm1,%xmm0
863 .byte   102,15,56,0,197
864         movdqu  %xmm0,(%rdi)
865         .byte   0xf3,0xc3
866 .size   gcm_gmult_clmul,.-gcm_gmult_clmul
867 .globl  gcm_ghash_clmul
868 .type   gcm_ghash_clmul,@function
869 .align  32
870 gcm_ghash_clmul:
871 .L_ghash_clmul:
872         movdqa  .Lbswap_mask(%rip),%xmm10
873
874         movdqu  (%rdi),%xmm0
875         movdqu  (%rsi),%xmm2
876         movdqu  32(%rsi),%xmm7
877 .byte   102,65,15,56,0,194
878
879         subq    $0x10,%rcx
880         jz      .Lodd_tail
881
882         movdqu  16(%rsi),%xmm6
883         movl    OPENSSL_ia32cap_P+4(%rip),%eax
884         cmpq    $0x30,%rcx
885         jb      .Lskip4x
886
887         andl    $71303168,%eax
888         cmpl    $4194304,%eax
889         je      .Lskip4x
890
891         subq    $0x30,%rcx
892         movq    $0xA040608020C0E000,%rax
893         movdqu  48(%rsi),%xmm14
894         movdqu  64(%rsi),%xmm15
895
896
897
898
899         movdqu  48(%rdx),%xmm3
900         movdqu  32(%rdx),%xmm11
901 .byte   102,65,15,56,0,218
902 .byte   102,69,15,56,0,218
903         movdqa  %xmm3,%xmm5
904         pshufd  $78,%xmm3,%xmm4
905         pxor    %xmm3,%xmm4
906 .byte   102,15,58,68,218,0
907 .byte   102,15,58,68,234,17
908 .byte   102,15,58,68,231,0
909
910         movdqa  %xmm11,%xmm13
911         pshufd  $78,%xmm11,%xmm12
912         pxor    %xmm11,%xmm12
913 .byte   102,68,15,58,68,222,0
914 .byte   102,68,15,58,68,238,17
915 .byte   102,68,15,58,68,231,16
916         xorps   %xmm11,%xmm3
917         xorps   %xmm13,%xmm5
918         movups  80(%rsi),%xmm7
919         xorps   %xmm12,%xmm4
920
921         movdqu  16(%rdx),%xmm11
922         movdqu  0(%rdx),%xmm8
923 .byte   102,69,15,56,0,218
924 .byte   102,69,15,56,0,194
925         movdqa  %xmm11,%xmm13
926         pshufd  $78,%xmm11,%xmm12
927         pxor    %xmm8,%xmm0
928         pxor    %xmm11,%xmm12
929 .byte   102,69,15,58,68,222,0
930         movdqa  %xmm0,%xmm1
931         pshufd  $78,%xmm0,%xmm8
932         pxor    %xmm0,%xmm8
933 .byte   102,69,15,58,68,238,17
934 .byte   102,68,15,58,68,231,0
935         xorps   %xmm11,%xmm3
936         xorps   %xmm13,%xmm5
937
938         leaq    64(%rdx),%rdx
939         subq    $0x40,%rcx
940         jc      .Ltail4x
941
942         jmp     .Lmod4_loop
943 .align  32
944 .Lmod4_loop:
945 .byte   102,65,15,58,68,199,0
946         xorps   %xmm12,%xmm4
947         movdqu  48(%rdx),%xmm11
948 .byte   102,69,15,56,0,218
949 .byte   102,65,15,58,68,207,17
950         xorps   %xmm3,%xmm0
951         movdqu  32(%rdx),%xmm3
952         movdqa  %xmm11,%xmm13
953 .byte   102,68,15,58,68,199,16
954         pshufd  $78,%xmm11,%xmm12
955         xorps   %xmm5,%xmm1
956         pxor    %xmm11,%xmm12
957 .byte   102,65,15,56,0,218
958         movups  32(%rsi),%xmm7
959         xorps   %xmm4,%xmm8
960 .byte   102,68,15,58,68,218,0
961         pshufd  $78,%xmm3,%xmm4
962
963         pxor    %xmm0,%xmm8
964         movdqa  %xmm3,%xmm5
965         pxor    %xmm1,%xmm8
966         pxor    %xmm3,%xmm4
967         movdqa  %xmm8,%xmm9
968 .byte   102,68,15,58,68,234,17
969         pslldq  $8,%xmm8
970         psrldq  $8,%xmm9
971         pxor    %xmm8,%xmm0
972         movdqa  .L7_mask(%rip),%xmm8
973         pxor    %xmm9,%xmm1
974 .byte   102,76,15,110,200
975
976         pand    %xmm0,%xmm8
977 .byte   102,69,15,56,0,200
978         pxor    %xmm0,%xmm9
979 .byte   102,68,15,58,68,231,0
980         psllq   $57,%xmm9
981         movdqa  %xmm9,%xmm8
982         pslldq  $8,%xmm9
983 .byte   102,15,58,68,222,0
984         psrldq  $8,%xmm8
985         pxor    %xmm9,%xmm0
986         pxor    %xmm8,%xmm1
987         movdqu  0(%rdx),%xmm8
988
989         movdqa  %xmm0,%xmm9
990         psrlq   $1,%xmm0
991 .byte   102,15,58,68,238,17
992         xorps   %xmm11,%xmm3
993         movdqu  16(%rdx),%xmm11
994 .byte   102,69,15,56,0,218
995 .byte   102,15,58,68,231,16
996         xorps   %xmm13,%xmm5
997         movups  80(%rsi),%xmm7
998 .byte   102,69,15,56,0,194
999         pxor    %xmm9,%xmm1
1000         pxor    %xmm0,%xmm9
1001         psrlq   $5,%xmm0
1002
1003         movdqa  %xmm11,%xmm13
1004         pxor    %xmm12,%xmm4
1005         pshufd  $78,%xmm11,%xmm12
1006         pxor    %xmm9,%xmm0
1007         pxor    %xmm8,%xmm1
1008         pxor    %xmm11,%xmm12
1009 .byte   102,69,15,58,68,222,0
1010         psrlq   $1,%xmm0
1011         pxor    %xmm1,%xmm0
1012         movdqa  %xmm0,%xmm1
1013 .byte   102,69,15,58,68,238,17
1014         xorps   %xmm11,%xmm3
1015         pshufd  $78,%xmm0,%xmm8
1016         pxor    %xmm0,%xmm8
1017
1018 .byte   102,68,15,58,68,231,0
1019         xorps   %xmm13,%xmm5
1020
1021         leaq    64(%rdx),%rdx
1022         subq    $0x40,%rcx
1023         jnc     .Lmod4_loop
1024
1025 .Ltail4x:
1026 .byte   102,65,15,58,68,199,0
1027 .byte   102,65,15,58,68,207,17
1028 .byte   102,68,15,58,68,199,16
1029         xorps   %xmm12,%xmm4
1030         xorps   %xmm3,%xmm0
1031         xorps   %xmm5,%xmm1
1032         pxor    %xmm0,%xmm1
1033         pxor    %xmm4,%xmm8
1034
1035         pxor    %xmm1,%xmm8
1036         pxor    %xmm0,%xmm1
1037
1038         movdqa  %xmm8,%xmm9
1039         psrldq  $8,%xmm8
1040         pslldq  $8,%xmm9
1041         pxor    %xmm8,%xmm1
1042         pxor    %xmm9,%xmm0
1043
1044         movdqa  %xmm0,%xmm4
1045         movdqa  %xmm0,%xmm3
1046         psllq   $5,%xmm0
1047         pxor    %xmm0,%xmm3
1048         psllq   $1,%xmm0
1049         pxor    %xmm3,%xmm0
1050         psllq   $57,%xmm0
1051         movdqa  %xmm0,%xmm3
1052         pslldq  $8,%xmm0
1053         psrldq  $8,%xmm3
1054         pxor    %xmm4,%xmm0
1055         pxor    %xmm3,%xmm1
1056
1057
1058         movdqa  %xmm0,%xmm4
1059         psrlq   $1,%xmm0
1060         pxor    %xmm4,%xmm1
1061         pxor    %xmm0,%xmm4
1062         psrlq   $5,%xmm0
1063         pxor    %xmm4,%xmm0
1064         psrlq   $1,%xmm0
1065         pxor    %xmm1,%xmm0
1066         addq    $0x40,%rcx
1067         jz      .Ldone
1068         movdqu  32(%rsi),%xmm7
1069         subq    $0x10,%rcx
1070         jz      .Lodd_tail
1071 .Lskip4x:
1072
1073
1074
1075
1076
1077         movdqu  (%rdx),%xmm8
1078         movdqu  16(%rdx),%xmm3
1079 .byte   102,69,15,56,0,194
1080 .byte   102,65,15,56,0,218
1081         pxor    %xmm8,%xmm0
1082
1083         movdqa  %xmm3,%xmm5
1084         pshufd  $78,%xmm3,%xmm4
1085         pxor    %xmm3,%xmm4
1086 .byte   102,15,58,68,218,0
1087 .byte   102,15,58,68,234,17
1088 .byte   102,15,58,68,231,0
1089
1090         leaq    32(%rdx),%rdx
1091         nop
1092         subq    $0x20,%rcx
1093         jbe     .Leven_tail
1094         nop
1095         jmp     .Lmod_loop
1096
1097 .align  32
1098 .Lmod_loop:
1099         movdqa  %xmm0,%xmm1
1100         movdqa  %xmm4,%xmm8
1101         pshufd  $78,%xmm0,%xmm4
1102         pxor    %xmm0,%xmm4
1103
1104 .byte   102,15,58,68,198,0
1105 .byte   102,15,58,68,206,17
1106 .byte   102,15,58,68,231,16
1107
1108         pxor    %xmm3,%xmm0
1109         pxor    %xmm5,%xmm1
1110         movdqu  (%rdx),%xmm9
1111         pxor    %xmm0,%xmm8
1112 .byte   102,69,15,56,0,202
1113         movdqu  16(%rdx),%xmm3
1114
1115         pxor    %xmm1,%xmm8
1116         pxor    %xmm9,%xmm1
1117         pxor    %xmm8,%xmm4
1118 .byte   102,65,15,56,0,218
1119         movdqa  %xmm4,%xmm8
1120         psrldq  $8,%xmm8
1121         pslldq  $8,%xmm4
1122         pxor    %xmm8,%xmm1
1123         pxor    %xmm4,%xmm0
1124
1125         movdqa  %xmm3,%xmm5
1126
1127         movdqa  %xmm0,%xmm9
1128         movdqa  %xmm0,%xmm8
1129         psllq   $5,%xmm0
1130         pxor    %xmm0,%xmm8
1131 .byte   102,15,58,68,218,0
1132         psllq   $1,%xmm0
1133         pxor    %xmm8,%xmm0
1134         psllq   $57,%xmm0
1135         movdqa  %xmm0,%xmm8
1136         pslldq  $8,%xmm0
1137         psrldq  $8,%xmm8
1138         pxor    %xmm9,%xmm0
1139         pshufd  $78,%xmm5,%xmm4
1140         pxor    %xmm8,%xmm1
1141         pxor    %xmm5,%xmm4
1142
1143         movdqa  %xmm0,%xmm9
1144         psrlq   $1,%xmm0
1145 .byte   102,15,58,68,234,17
1146         pxor    %xmm9,%xmm1
1147         pxor    %xmm0,%xmm9
1148         psrlq   $5,%xmm0
1149         pxor    %xmm9,%xmm0
1150         leaq    32(%rdx),%rdx
1151         psrlq   $1,%xmm0
1152 .byte   102,15,58,68,231,0
1153         pxor    %xmm1,%xmm0
1154
1155         subq    $0x20,%rcx
1156         ja      .Lmod_loop
1157
1158 .Leven_tail:
1159         movdqa  %xmm0,%xmm1
1160         movdqa  %xmm4,%xmm8
1161         pshufd  $78,%xmm0,%xmm4
1162         pxor    %xmm0,%xmm4
1163
1164 .byte   102,15,58,68,198,0
1165 .byte   102,15,58,68,206,17
1166 .byte   102,15,58,68,231,16
1167
1168         pxor    %xmm3,%xmm0
1169         pxor    %xmm5,%xmm1
1170         pxor    %xmm0,%xmm8
1171         pxor    %xmm1,%xmm8
1172         pxor    %xmm8,%xmm4
1173         movdqa  %xmm4,%xmm8
1174         psrldq  $8,%xmm8
1175         pslldq  $8,%xmm4
1176         pxor    %xmm8,%xmm1
1177         pxor    %xmm4,%xmm0
1178
1179         movdqa  %xmm0,%xmm4
1180         movdqa  %xmm0,%xmm3
1181         psllq   $5,%xmm0
1182         pxor    %xmm0,%xmm3
1183         psllq   $1,%xmm0
1184         pxor    %xmm3,%xmm0
1185         psllq   $57,%xmm0
1186         movdqa  %xmm0,%xmm3
1187         pslldq  $8,%xmm0
1188         psrldq  $8,%xmm3
1189         pxor    %xmm4,%xmm0
1190         pxor    %xmm3,%xmm1
1191
1192
1193         movdqa  %xmm0,%xmm4
1194         psrlq   $1,%xmm0
1195         pxor    %xmm4,%xmm1
1196         pxor    %xmm0,%xmm4
1197         psrlq   $5,%xmm0
1198         pxor    %xmm4,%xmm0
1199         psrlq   $1,%xmm0
1200         pxor    %xmm1,%xmm0
1201         testq   %rcx,%rcx
1202         jnz     .Ldone
1203
1204 .Lodd_tail:
1205         movdqu  (%rdx),%xmm8
1206 .byte   102,69,15,56,0,194
1207         pxor    %xmm8,%xmm0
1208         movdqa  %xmm0,%xmm1
1209         pshufd  $78,%xmm0,%xmm3
1210         pxor    %xmm0,%xmm3
1211 .byte   102,15,58,68,194,0
1212 .byte   102,15,58,68,202,17
1213 .byte   102,15,58,68,223,0
1214         pxor    %xmm0,%xmm3
1215         pxor    %xmm1,%xmm3
1216
1217         movdqa  %xmm3,%xmm4
1218         psrldq  $8,%xmm3
1219         pslldq  $8,%xmm4
1220         pxor    %xmm3,%xmm1
1221         pxor    %xmm4,%xmm0
1222
1223         movdqa  %xmm0,%xmm4
1224         movdqa  %xmm0,%xmm3
1225         psllq   $5,%xmm0
1226         pxor    %xmm0,%xmm3
1227         psllq   $1,%xmm0
1228         pxor    %xmm3,%xmm0
1229         psllq   $57,%xmm0
1230         movdqa  %xmm0,%xmm3
1231         pslldq  $8,%xmm0
1232         psrldq  $8,%xmm3
1233         pxor    %xmm4,%xmm0
1234         pxor    %xmm3,%xmm1
1235
1236
1237         movdqa  %xmm0,%xmm4
1238         psrlq   $1,%xmm0
1239         pxor    %xmm4,%xmm1
1240         pxor    %xmm0,%xmm4
1241         psrlq   $5,%xmm0
1242         pxor    %xmm4,%xmm0
1243         psrlq   $1,%xmm0
1244         pxor    %xmm1,%xmm0
1245 .Ldone:
1246 .byte   102,65,15,56,0,194
1247         movdqu  %xmm0,(%rdi)
1248         .byte   0xf3,0xc3
1249 .size   gcm_ghash_clmul,.-gcm_ghash_clmul
1250 .globl  gcm_init_avx
1251 .type   gcm_init_avx,@function
1252 .align  32
1253 gcm_init_avx:
1254         vzeroupper
1255
1256         vmovdqu (%rsi),%xmm2
1257         vpshufd $78,%xmm2,%xmm2
1258
1259
1260         vpshufd $255,%xmm2,%xmm4
1261         vpsrlq  $63,%xmm2,%xmm3
1262         vpsllq  $1,%xmm2,%xmm2
1263         vpxor   %xmm5,%xmm5,%xmm5
1264         vpcmpgtd        %xmm4,%xmm5,%xmm5
1265         vpslldq $8,%xmm3,%xmm3
1266         vpor    %xmm3,%xmm2,%xmm2
1267
1268
1269         vpand   .L0x1c2_polynomial(%rip),%xmm5,%xmm5
1270         vpxor   %xmm5,%xmm2,%xmm2
1271
1272         vpunpckhqdq     %xmm2,%xmm2,%xmm6
1273         vmovdqa %xmm2,%xmm0
1274         vpxor   %xmm2,%xmm6,%xmm6
1275         movq    $4,%r10
1276         jmp     .Linit_start_avx
1277 .align  32
1278 .Linit_loop_avx:
1279         vpalignr        $8,%xmm3,%xmm4,%xmm5
1280         vmovdqu %xmm5,-16(%rdi)
1281         vpunpckhqdq     %xmm0,%xmm0,%xmm3
1282         vpxor   %xmm0,%xmm3,%xmm3
1283         vpclmulqdq      $0x11,%xmm2,%xmm0,%xmm1
1284         vpclmulqdq      $0x00,%xmm2,%xmm0,%xmm0
1285         vpclmulqdq      $0x00,%xmm6,%xmm3,%xmm3
1286         vpxor   %xmm0,%xmm1,%xmm4
1287         vpxor   %xmm4,%xmm3,%xmm3
1288
1289         vpslldq $8,%xmm3,%xmm4
1290         vpsrldq $8,%xmm3,%xmm3
1291         vpxor   %xmm4,%xmm0,%xmm0
1292         vpxor   %xmm3,%xmm1,%xmm1
1293         vpsllq  $57,%xmm0,%xmm3
1294         vpsllq  $62,%xmm0,%xmm4
1295         vpxor   %xmm3,%xmm4,%xmm4
1296         vpsllq  $63,%xmm0,%xmm3
1297         vpxor   %xmm3,%xmm4,%xmm4
1298         vpslldq $8,%xmm4,%xmm3
1299         vpsrldq $8,%xmm4,%xmm4
1300         vpxor   %xmm3,%xmm0,%xmm0
1301         vpxor   %xmm4,%xmm1,%xmm1
1302
1303         vpsrlq  $1,%xmm0,%xmm4
1304         vpxor   %xmm0,%xmm1,%xmm1
1305         vpxor   %xmm4,%xmm0,%xmm0
1306         vpsrlq  $5,%xmm4,%xmm4
1307         vpxor   %xmm4,%xmm0,%xmm0
1308         vpsrlq  $1,%xmm0,%xmm0
1309         vpxor   %xmm1,%xmm0,%xmm0
1310 .Linit_start_avx:
1311         vmovdqa %xmm0,%xmm5
1312         vpunpckhqdq     %xmm0,%xmm0,%xmm3
1313         vpxor   %xmm0,%xmm3,%xmm3
1314         vpclmulqdq      $0x11,%xmm2,%xmm0,%xmm1
1315         vpclmulqdq      $0x00,%xmm2,%xmm0,%xmm0
1316         vpclmulqdq      $0x00,%xmm6,%xmm3,%xmm3
1317         vpxor   %xmm0,%xmm1,%xmm4
1318         vpxor   %xmm4,%xmm3,%xmm3
1319
1320         vpslldq $8,%xmm3,%xmm4
1321         vpsrldq $8,%xmm3,%xmm3
1322         vpxor   %xmm4,%xmm0,%xmm0
1323         vpxor   %xmm3,%xmm1,%xmm1
1324         vpsllq  $57,%xmm0,%xmm3
1325         vpsllq  $62,%xmm0,%xmm4
1326         vpxor   %xmm3,%xmm4,%xmm4
1327         vpsllq  $63,%xmm0,%xmm3
1328         vpxor   %xmm3,%xmm4,%xmm4
1329         vpslldq $8,%xmm4,%xmm3
1330         vpsrldq $8,%xmm4,%xmm4
1331         vpxor   %xmm3,%xmm0,%xmm0
1332         vpxor   %xmm4,%xmm1,%xmm1
1333
1334         vpsrlq  $1,%xmm0,%xmm4
1335         vpxor   %xmm0,%xmm1,%xmm1
1336         vpxor   %xmm4,%xmm0,%xmm0
1337         vpsrlq  $5,%xmm4,%xmm4
1338         vpxor   %xmm4,%xmm0,%xmm0
1339         vpsrlq  $1,%xmm0,%xmm0
1340         vpxor   %xmm1,%xmm0,%xmm0
1341         vpshufd $78,%xmm5,%xmm3
1342         vpshufd $78,%xmm0,%xmm4
1343         vpxor   %xmm5,%xmm3,%xmm3
1344         vmovdqu %xmm5,0(%rdi)
1345         vpxor   %xmm0,%xmm4,%xmm4
1346         vmovdqu %xmm0,16(%rdi)
1347         leaq    48(%rdi),%rdi
1348         subq    $1,%r10
1349         jnz     .Linit_loop_avx
1350
1351         vpalignr        $8,%xmm4,%xmm3,%xmm5
1352         vmovdqu %xmm5,-16(%rdi)
1353
1354         vzeroupper
1355         .byte   0xf3,0xc3
1356 .size   gcm_init_avx,.-gcm_init_avx
1357 .globl  gcm_gmult_avx
1358 .type   gcm_gmult_avx,@function
1359 .align  32
1360 gcm_gmult_avx:
1361         jmp     .L_gmult_clmul
1362 .size   gcm_gmult_avx,.-gcm_gmult_avx
1363 .globl  gcm_ghash_avx
1364 .type   gcm_ghash_avx,@function
1365 .align  32
1366 gcm_ghash_avx:
1367         vzeroupper
1368
1369         vmovdqu (%rdi),%xmm10
1370         leaq    .L0x1c2_polynomial(%rip),%r10
1371         leaq    64(%rsi),%rsi
1372         vmovdqu .Lbswap_mask(%rip),%xmm13
1373         vpshufb %xmm13,%xmm10,%xmm10
1374         cmpq    $0x80,%rcx
1375         jb      .Lshort_avx
1376         subq    $0x80,%rcx
1377
1378         vmovdqu 112(%rdx),%xmm14
1379         vmovdqu 0-64(%rsi),%xmm6
1380         vpshufb %xmm13,%xmm14,%xmm14
1381         vmovdqu 32-64(%rsi),%xmm7
1382
1383         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1384         vmovdqu 96(%rdx),%xmm15
1385         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1386         vpxor   %xmm14,%xmm9,%xmm9
1387         vpshufb %xmm13,%xmm15,%xmm15
1388         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1389         vmovdqu 16-64(%rsi),%xmm6
1390         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1391         vmovdqu 80(%rdx),%xmm14
1392         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
1393         vpxor   %xmm15,%xmm8,%xmm8
1394
1395         vpshufb %xmm13,%xmm14,%xmm14
1396         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
1397         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1398         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
1399         vmovdqu 48-64(%rsi),%xmm6
1400         vpxor   %xmm14,%xmm9,%xmm9
1401         vmovdqu 64(%rdx),%xmm15
1402         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
1403         vmovdqu 80-64(%rsi),%xmm7
1404
1405         vpshufb %xmm13,%xmm15,%xmm15
1406         vpxor   %xmm0,%xmm3,%xmm3
1407         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1408         vpxor   %xmm1,%xmm4,%xmm4
1409         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1410         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1411         vmovdqu 64-64(%rsi),%xmm6
1412         vpxor   %xmm2,%xmm5,%xmm5
1413         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
1414         vpxor   %xmm15,%xmm8,%xmm8
1415
1416         vmovdqu 48(%rdx),%xmm14
1417         vpxor   %xmm3,%xmm0,%xmm0
1418         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
1419         vpxor   %xmm4,%xmm1,%xmm1
1420         vpshufb %xmm13,%xmm14,%xmm14
1421         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
1422         vmovdqu 96-64(%rsi),%xmm6
1423         vpxor   %xmm5,%xmm2,%xmm2
1424         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1425         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
1426         vmovdqu 128-64(%rsi),%xmm7
1427         vpxor   %xmm14,%xmm9,%xmm9
1428
1429         vmovdqu 32(%rdx),%xmm15
1430         vpxor   %xmm0,%xmm3,%xmm3
1431         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1432         vpxor   %xmm1,%xmm4,%xmm4
1433         vpshufb %xmm13,%xmm15,%xmm15
1434         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1435         vmovdqu 112-64(%rsi),%xmm6
1436         vpxor   %xmm2,%xmm5,%xmm5
1437         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1438         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
1439         vpxor   %xmm15,%xmm8,%xmm8
1440
1441         vmovdqu 16(%rdx),%xmm14
1442         vpxor   %xmm3,%xmm0,%xmm0
1443         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
1444         vpxor   %xmm4,%xmm1,%xmm1
1445         vpshufb %xmm13,%xmm14,%xmm14
1446         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
1447         vmovdqu 144-64(%rsi),%xmm6
1448         vpxor   %xmm5,%xmm2,%xmm2
1449         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1450         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
1451         vmovdqu 176-64(%rsi),%xmm7
1452         vpxor   %xmm14,%xmm9,%xmm9
1453
1454         vmovdqu (%rdx),%xmm15
1455         vpxor   %xmm0,%xmm3,%xmm3
1456         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1457         vpxor   %xmm1,%xmm4,%xmm4
1458         vpshufb %xmm13,%xmm15,%xmm15
1459         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1460         vmovdqu 160-64(%rsi),%xmm6
1461         vpxor   %xmm2,%xmm5,%xmm5
1462         vpclmulqdq      $0x10,%xmm7,%xmm9,%xmm2
1463
1464         leaq    128(%rdx),%rdx
1465         cmpq    $0x80,%rcx
1466         jb      .Ltail_avx
1467
1468         vpxor   %xmm10,%xmm15,%xmm15
1469         subq    $0x80,%rcx
1470         jmp     .Loop8x_avx
1471
1472 .align  32
1473 .Loop8x_avx:
1474         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1475         vmovdqu 112(%rdx),%xmm14
1476         vpxor   %xmm0,%xmm3,%xmm3
1477         vpxor   %xmm15,%xmm8,%xmm8
1478         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm10
1479         vpshufb %xmm13,%xmm14,%xmm14
1480         vpxor   %xmm1,%xmm4,%xmm4
1481         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm11
1482         vmovdqu 0-64(%rsi),%xmm6
1483         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1484         vpxor   %xmm2,%xmm5,%xmm5
1485         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm12
1486         vmovdqu 32-64(%rsi),%xmm7
1487         vpxor   %xmm14,%xmm9,%xmm9
1488
1489         vmovdqu 96(%rdx),%xmm15
1490         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1491         vpxor   %xmm3,%xmm10,%xmm10
1492         vpshufb %xmm13,%xmm15,%xmm15
1493         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1494         vxorps  %xmm4,%xmm11,%xmm11
1495         vmovdqu 16-64(%rsi),%xmm6
1496         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1497         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
1498         vpxor   %xmm5,%xmm12,%xmm12
1499         vxorps  %xmm15,%xmm8,%xmm8
1500
1501         vmovdqu 80(%rdx),%xmm14
1502         vpxor   %xmm10,%xmm12,%xmm12
1503         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
1504         vpxor   %xmm11,%xmm12,%xmm12
1505         vpslldq $8,%xmm12,%xmm9
1506         vpxor   %xmm0,%xmm3,%xmm3
1507         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
1508         vpsrldq $8,%xmm12,%xmm12
1509         vpxor   %xmm9,%xmm10,%xmm10
1510         vmovdqu 48-64(%rsi),%xmm6
1511         vpshufb %xmm13,%xmm14,%xmm14
1512         vxorps  %xmm12,%xmm11,%xmm11
1513         vpxor   %xmm1,%xmm4,%xmm4
1514         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1515         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
1516         vmovdqu 80-64(%rsi),%xmm7
1517         vpxor   %xmm14,%xmm9,%xmm9
1518         vpxor   %xmm2,%xmm5,%xmm5
1519
1520         vmovdqu 64(%rdx),%xmm15
1521         vpalignr        $8,%xmm10,%xmm10,%xmm12
1522         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1523         vpshufb %xmm13,%xmm15,%xmm15
1524         vpxor   %xmm3,%xmm0,%xmm0
1525         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1526         vmovdqu 64-64(%rsi),%xmm6
1527         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1528         vpxor   %xmm4,%xmm1,%xmm1
1529         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
1530         vxorps  %xmm15,%xmm8,%xmm8
1531         vpxor   %xmm5,%xmm2,%xmm2
1532
1533         vmovdqu 48(%rdx),%xmm14
1534         vpclmulqdq      $0x10,(%r10),%xmm10,%xmm10
1535         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
1536         vpshufb %xmm13,%xmm14,%xmm14
1537         vpxor   %xmm0,%xmm3,%xmm3
1538         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
1539         vmovdqu 96-64(%rsi),%xmm6
1540         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1541         vpxor   %xmm1,%xmm4,%xmm4
1542         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
1543         vmovdqu 128-64(%rsi),%xmm7
1544         vpxor   %xmm14,%xmm9,%xmm9
1545         vpxor   %xmm2,%xmm5,%xmm5
1546
1547         vmovdqu 32(%rdx),%xmm15
1548         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1549         vpshufb %xmm13,%xmm15,%xmm15
1550         vpxor   %xmm3,%xmm0,%xmm0
1551         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1552         vmovdqu 112-64(%rsi),%xmm6
1553         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1554         vpxor   %xmm4,%xmm1,%xmm1
1555         vpclmulqdq      $0x00,%xmm7,%xmm9,%xmm2
1556         vpxor   %xmm15,%xmm8,%xmm8
1557         vpxor   %xmm5,%xmm2,%xmm2
1558         vxorps  %xmm12,%xmm10,%xmm10
1559
1560         vmovdqu 16(%rdx),%xmm14
1561         vpalignr        $8,%xmm10,%xmm10,%xmm12
1562         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm3
1563         vpshufb %xmm13,%xmm14,%xmm14
1564         vpxor   %xmm0,%xmm3,%xmm3
1565         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm4
1566         vmovdqu 144-64(%rsi),%xmm6
1567         vpclmulqdq      $0x10,(%r10),%xmm10,%xmm10
1568         vxorps  %xmm11,%xmm12,%xmm12
1569         vpunpckhqdq     %xmm14,%xmm14,%xmm9
1570         vpxor   %xmm1,%xmm4,%xmm4
1571         vpclmulqdq      $0x10,%xmm7,%xmm8,%xmm5
1572         vmovdqu 176-64(%rsi),%xmm7
1573         vpxor   %xmm14,%xmm9,%xmm9
1574         vpxor   %xmm2,%xmm5,%xmm5
1575
1576         vmovdqu (%rdx),%xmm15
1577         vpclmulqdq      $0x00,%xmm6,%xmm14,%xmm0
1578         vpshufb %xmm13,%xmm15,%xmm15
1579         vpclmulqdq      $0x11,%xmm6,%xmm14,%xmm1
1580         vmovdqu 160-64(%rsi),%xmm6
1581         vpxor   %xmm12,%xmm15,%xmm15
1582         vpclmulqdq      $0x10,%xmm7,%xmm9,%xmm2
1583         vpxor   %xmm10,%xmm15,%xmm15
1584
1585         leaq    128(%rdx),%rdx
1586         subq    $0x80,%rcx
1587         jnc     .Loop8x_avx
1588
1589         addq    $0x80,%rcx
1590         jmp     .Ltail_no_xor_avx
1591
1592 .align  32
1593 .Lshort_avx:
1594         vmovdqu -16(%rdx,%rcx,1),%xmm14
1595         leaq    (%rdx,%rcx,1),%rdx
1596         vmovdqu 0-64(%rsi),%xmm6
1597         vmovdqu 32-64(%rsi),%xmm7
1598         vpshufb %xmm13,%xmm14,%xmm15
1599
1600         vmovdqa %xmm0,%xmm3
1601         vmovdqa %xmm1,%xmm4
1602         vmovdqa %xmm2,%xmm5
1603         subq    $0x10,%rcx
1604         jz      .Ltail_avx
1605
1606         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1607         vpxor   %xmm0,%xmm3,%xmm3
1608         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
1609         vpxor   %xmm15,%xmm8,%xmm8
1610         vmovdqu -32(%rdx),%xmm14
1611         vpxor   %xmm1,%xmm4,%xmm4
1612         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
1613         vmovdqu 16-64(%rsi),%xmm6
1614         vpshufb %xmm13,%xmm14,%xmm15
1615         vpxor   %xmm2,%xmm5,%xmm5
1616         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
1617         vpsrldq $8,%xmm7,%xmm7
1618         subq    $0x10,%rcx
1619         jz      .Ltail_avx
1620
1621         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1622         vpxor   %xmm0,%xmm3,%xmm3
1623         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
1624         vpxor   %xmm15,%xmm8,%xmm8
1625         vmovdqu -48(%rdx),%xmm14
1626         vpxor   %xmm1,%xmm4,%xmm4
1627         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
1628         vmovdqu 48-64(%rsi),%xmm6
1629         vpshufb %xmm13,%xmm14,%xmm15
1630         vpxor   %xmm2,%xmm5,%xmm5
1631         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
1632         vmovdqu 80-64(%rsi),%xmm7
1633         subq    $0x10,%rcx
1634         jz      .Ltail_avx
1635
1636         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1637         vpxor   %xmm0,%xmm3,%xmm3
1638         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
1639         vpxor   %xmm15,%xmm8,%xmm8
1640         vmovdqu -64(%rdx),%xmm14
1641         vpxor   %xmm1,%xmm4,%xmm4
1642         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
1643         vmovdqu 64-64(%rsi),%xmm6
1644         vpshufb %xmm13,%xmm14,%xmm15
1645         vpxor   %xmm2,%xmm5,%xmm5
1646         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
1647         vpsrldq $8,%xmm7,%xmm7
1648         subq    $0x10,%rcx
1649         jz      .Ltail_avx
1650
1651         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1652         vpxor   %xmm0,%xmm3,%xmm3
1653         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
1654         vpxor   %xmm15,%xmm8,%xmm8
1655         vmovdqu -80(%rdx),%xmm14
1656         vpxor   %xmm1,%xmm4,%xmm4
1657         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
1658         vmovdqu 96-64(%rsi),%xmm6
1659         vpshufb %xmm13,%xmm14,%xmm15
1660         vpxor   %xmm2,%xmm5,%xmm5
1661         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
1662         vmovdqu 128-64(%rsi),%xmm7
1663         subq    $0x10,%rcx
1664         jz      .Ltail_avx
1665
1666         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1667         vpxor   %xmm0,%xmm3,%xmm3
1668         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
1669         vpxor   %xmm15,%xmm8,%xmm8
1670         vmovdqu -96(%rdx),%xmm14
1671         vpxor   %xmm1,%xmm4,%xmm4
1672         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
1673         vmovdqu 112-64(%rsi),%xmm6
1674         vpshufb %xmm13,%xmm14,%xmm15
1675         vpxor   %xmm2,%xmm5,%xmm5
1676         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
1677         vpsrldq $8,%xmm7,%xmm7
1678         subq    $0x10,%rcx
1679         jz      .Ltail_avx
1680
1681         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1682         vpxor   %xmm0,%xmm3,%xmm3
1683         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
1684         vpxor   %xmm15,%xmm8,%xmm8
1685         vmovdqu -112(%rdx),%xmm14
1686         vpxor   %xmm1,%xmm4,%xmm4
1687         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
1688         vmovdqu 144-64(%rsi),%xmm6
1689         vpshufb %xmm13,%xmm14,%xmm15
1690         vpxor   %xmm2,%xmm5,%xmm5
1691         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
1692         vmovq   184-64(%rsi),%xmm7
1693         subq    $0x10,%rcx
1694         jmp     .Ltail_avx
1695
1696 .align  32
1697 .Ltail_avx:
1698         vpxor   %xmm10,%xmm15,%xmm15
1699 .Ltail_no_xor_avx:
1700         vpunpckhqdq     %xmm15,%xmm15,%xmm8
1701         vpxor   %xmm0,%xmm3,%xmm3
1702         vpclmulqdq      $0x00,%xmm6,%xmm15,%xmm0
1703         vpxor   %xmm15,%xmm8,%xmm8
1704         vpxor   %xmm1,%xmm4,%xmm4
1705         vpclmulqdq      $0x11,%xmm6,%xmm15,%xmm1
1706         vpxor   %xmm2,%xmm5,%xmm5
1707         vpclmulqdq      $0x00,%xmm7,%xmm8,%xmm2
1708
1709         vmovdqu (%r10),%xmm12
1710
1711         vpxor   %xmm0,%xmm3,%xmm10
1712         vpxor   %xmm1,%xmm4,%xmm11
1713         vpxor   %xmm2,%xmm5,%xmm5
1714
1715         vpxor   %xmm10,%xmm5,%xmm5
1716         vpxor   %xmm11,%xmm5,%xmm5
1717         vpslldq $8,%xmm5,%xmm9
1718         vpsrldq $8,%xmm5,%xmm5
1719         vpxor   %xmm9,%xmm10,%xmm10
1720         vpxor   %xmm5,%xmm11,%xmm11
1721
1722         vpclmulqdq      $0x10,%xmm12,%xmm10,%xmm9
1723         vpalignr        $8,%xmm10,%xmm10,%xmm10
1724         vpxor   %xmm9,%xmm10,%xmm10
1725
1726         vpclmulqdq      $0x10,%xmm12,%xmm10,%xmm9
1727         vpalignr        $8,%xmm10,%xmm10,%xmm10
1728         vpxor   %xmm11,%xmm10,%xmm10
1729         vpxor   %xmm9,%xmm10,%xmm10
1730
1731         cmpq    $0,%rcx
1732         jne     .Lshort_avx
1733
1734         vpshufb %xmm13,%xmm10,%xmm10
1735         vmovdqu %xmm10,(%rdi)
1736         vzeroupper
1737         .byte   0xf3,0xc3
1738 .size   gcm_ghash_avx,.-gcm_ghash_avx
1739 .align  64
1740 .Lbswap_mask:
1741 .byte   15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1742 .L0x1c2_polynomial:
1743 .byte   1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1744 .L7_mask:
1745 .long   7,0,7,0
1746 .L7_mask_poly:
1747 .long   7,0,450,0
1748 .align  64
1749 .type   .Lrem_4bit,@object
1750 .Lrem_4bit:
1751 .long   0,0,0,471859200,0,943718400,0,610271232
1752 .long   0,1887436800,0,1822425088,0,1220542464,0,1423966208
1753 .long   0,3774873600,0,4246732800,0,3644850176,0,3311403008
1754 .long   0,2441084928,0,2376073216,0,2847932416,0,3051356160
1755 .type   .Lrem_8bit,@object
1756 .Lrem_8bit:
1757 .value  0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
1758 .value  0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
1759 .value  0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
1760 .value  0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
1761 .value  0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
1762 .value  0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
1763 .value  0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
1764 .value  0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
1765 .value  0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
1766 .value  0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
1767 .value  0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
1768 .value  0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
1769 .value  0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
1770 .value  0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
1771 .value  0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
1772 .value  0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
1773 .value  0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
1774 .value  0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
1775 .value  0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
1776 .value  0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
1777 .value  0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
1778 .value  0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
1779 .value  0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
1780 .value  0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
1781 .value  0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
1782 .value  0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
1783 .value  0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
1784 .value  0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
1785 .value  0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
1786 .value  0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
1787 .value  0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
1788 .value  0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
1789
1790 .byte   71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1791 .align  64