]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - secure/lib/libcrypto/amd64/ghash-x86_64.S
- Make libcrypto.so position independent on i386.
[FreeBSD/stable/10.git] / secure / lib / libcrypto / amd64 / ghash-x86_64.S
1 # $FreeBSD$
2 # Do not modify. This file is auto-generated from ghash-x86_64.pl.
3 .text   
4
5 .globl  gcm_gmult_4bit
6 .type   gcm_gmult_4bit,@function
7 .align  16
8 gcm_gmult_4bit:
9         pushq   %rbx
10         pushq   %rbp
11         pushq   %r12
12 .Lgmult_prologue:
13
14         movzbq  15(%rdi),%r8
15         leaq    .Lrem_4bit(%rip),%r11
16         xorq    %rax,%rax
17         xorq    %rbx,%rbx
18         movb    %r8b,%al
19         movb    %r8b,%bl
20         shlb    $4,%al
21         movq    $14,%rcx
22         movq    8(%rsi,%rax,1),%r8
23         movq    (%rsi,%rax,1),%r9
24         andb    $240,%bl
25         movq    %r8,%rdx
26         jmp     .Loop1
27
28 .align  16
29 .Loop1:
30         shrq    $4,%r8
31         andq    $15,%rdx
32         movq    %r9,%r10
33         movb    (%rdi,%rcx,1),%al
34         shrq    $4,%r9
35         xorq    8(%rsi,%rbx,1),%r8
36         shlq    $60,%r10
37         xorq    (%rsi,%rbx,1),%r9
38         movb    %al,%bl
39         xorq    (%r11,%rdx,8),%r9
40         movq    %r8,%rdx
41         shlb    $4,%al
42         xorq    %r10,%r8
43         decq    %rcx
44         js      .Lbreak1
45
46         shrq    $4,%r8
47         andq    $15,%rdx
48         movq    %r9,%r10
49         shrq    $4,%r9
50         xorq    8(%rsi,%rax,1),%r8
51         shlq    $60,%r10
52         xorq    (%rsi,%rax,1),%r9
53         andb    $240,%bl
54         xorq    (%r11,%rdx,8),%r9
55         movq    %r8,%rdx
56         xorq    %r10,%r8
57         jmp     .Loop1
58
59 .align  16
60 .Lbreak1:
61         shrq    $4,%r8
62         andq    $15,%rdx
63         movq    %r9,%r10
64         shrq    $4,%r9
65         xorq    8(%rsi,%rax,1),%r8
66         shlq    $60,%r10
67         xorq    (%rsi,%rax,1),%r9
68         andb    $240,%bl
69         xorq    (%r11,%rdx,8),%r9
70         movq    %r8,%rdx
71         xorq    %r10,%r8
72
73         shrq    $4,%r8
74         andq    $15,%rdx
75         movq    %r9,%r10
76         shrq    $4,%r9
77         xorq    8(%rsi,%rbx,1),%r8
78         shlq    $60,%r10
79         xorq    (%rsi,%rbx,1),%r9
80         xorq    %r10,%r8
81         xorq    (%r11,%rdx,8),%r9
82
83         bswapq  %r8
84         bswapq  %r9
85         movq    %r8,8(%rdi)
86         movq    %r9,(%rdi)
87
88         movq    16(%rsp),%rbx
89         leaq    24(%rsp),%rsp
90 .Lgmult_epilogue:
91         .byte   0xf3,0xc3
92 .size   gcm_gmult_4bit,.-gcm_gmult_4bit
93 .globl  gcm_ghash_4bit
94 .type   gcm_ghash_4bit,@function
95 .align  16
96 gcm_ghash_4bit:
97         pushq   %rbx
98         pushq   %rbp
99         pushq   %r12
100         pushq   %r13
101         pushq   %r14
102         pushq   %r15
103         subq    $280,%rsp
104 .Lghash_prologue:
105         movq    %rdx,%r14
106         movq    %rcx,%r15
107         subq    $-128,%rsi
108         leaq    16+128(%rsp),%rbp
109         xorl    %edx,%edx
110         movq    0+0-128(%rsi),%r8
111         movq    0+8-128(%rsi),%rax
112         movb    %al,%dl
113         shrq    $4,%rax
114         movq    %r8,%r10
115         shrq    $4,%r8
116         movq    16+0-128(%rsi),%r9
117         shlb    $4,%dl
118         movq    16+8-128(%rsi),%rbx
119         shlq    $60,%r10
120         movb    %dl,0(%rsp)
121         orq     %r10,%rax
122         movb    %bl,%dl
123         shrq    $4,%rbx
124         movq    %r9,%r10
125         shrq    $4,%r9
126         movq    %r8,0(%rbp)
127         movq    32+0-128(%rsi),%r8
128         shlb    $4,%dl
129         movq    %rax,0-128(%rbp)
130         movq    32+8-128(%rsi),%rax
131         shlq    $60,%r10
132         movb    %dl,1(%rsp)
133         orq     %r10,%rbx
134         movb    %al,%dl
135         shrq    $4,%rax
136         movq    %r8,%r10
137         shrq    $4,%r8
138         movq    %r9,8(%rbp)
139         movq    48+0-128(%rsi),%r9
140         shlb    $4,%dl
141         movq    %rbx,8-128(%rbp)
142         movq    48+8-128(%rsi),%rbx
143         shlq    $60,%r10
144         movb    %dl,2(%rsp)
145         orq     %r10,%rax
146         movb    %bl,%dl
147         shrq    $4,%rbx
148         movq    %r9,%r10
149         shrq    $4,%r9
150         movq    %r8,16(%rbp)
151         movq    64+0-128(%rsi),%r8
152         shlb    $4,%dl
153         movq    %rax,16-128(%rbp)
154         movq    64+8-128(%rsi),%rax
155         shlq    $60,%r10
156         movb    %dl,3(%rsp)
157         orq     %r10,%rbx
158         movb    %al,%dl
159         shrq    $4,%rax
160         movq    %r8,%r10
161         shrq    $4,%r8
162         movq    %r9,24(%rbp)
163         movq    80+0-128(%rsi),%r9
164         shlb    $4,%dl
165         movq    %rbx,24-128(%rbp)
166         movq    80+8-128(%rsi),%rbx
167         shlq    $60,%r10
168         movb    %dl,4(%rsp)
169         orq     %r10,%rax
170         movb    %bl,%dl
171         shrq    $4,%rbx
172         movq    %r9,%r10
173         shrq    $4,%r9
174         movq    %r8,32(%rbp)
175         movq    96+0-128(%rsi),%r8
176         shlb    $4,%dl
177         movq    %rax,32-128(%rbp)
178         movq    96+8-128(%rsi),%rax
179         shlq    $60,%r10
180         movb    %dl,5(%rsp)
181         orq     %r10,%rbx
182         movb    %al,%dl
183         shrq    $4,%rax
184         movq    %r8,%r10
185         shrq    $4,%r8
186         movq    %r9,40(%rbp)
187         movq    112+0-128(%rsi),%r9
188         shlb    $4,%dl
189         movq    %rbx,40-128(%rbp)
190         movq    112+8-128(%rsi),%rbx
191         shlq    $60,%r10
192         movb    %dl,6(%rsp)
193         orq     %r10,%rax
194         movb    %bl,%dl
195         shrq    $4,%rbx
196         movq    %r9,%r10
197         shrq    $4,%r9
198         movq    %r8,48(%rbp)
199         movq    128+0-128(%rsi),%r8
200         shlb    $4,%dl
201         movq    %rax,48-128(%rbp)
202         movq    128+8-128(%rsi),%rax
203         shlq    $60,%r10
204         movb    %dl,7(%rsp)
205         orq     %r10,%rbx
206         movb    %al,%dl
207         shrq    $4,%rax
208         movq    %r8,%r10
209         shrq    $4,%r8
210         movq    %r9,56(%rbp)
211         movq    144+0-128(%rsi),%r9
212         shlb    $4,%dl
213         movq    %rbx,56-128(%rbp)
214         movq    144+8-128(%rsi),%rbx
215         shlq    $60,%r10
216         movb    %dl,8(%rsp)
217         orq     %r10,%rax
218         movb    %bl,%dl
219         shrq    $4,%rbx
220         movq    %r9,%r10
221         shrq    $4,%r9
222         movq    %r8,64(%rbp)
223         movq    160+0-128(%rsi),%r8
224         shlb    $4,%dl
225         movq    %rax,64-128(%rbp)
226         movq    160+8-128(%rsi),%rax
227         shlq    $60,%r10
228         movb    %dl,9(%rsp)
229         orq     %r10,%rbx
230         movb    %al,%dl
231         shrq    $4,%rax
232         movq    %r8,%r10
233         shrq    $4,%r8
234         movq    %r9,72(%rbp)
235         movq    176+0-128(%rsi),%r9
236         shlb    $4,%dl
237         movq    %rbx,72-128(%rbp)
238         movq    176+8-128(%rsi),%rbx
239         shlq    $60,%r10
240         movb    %dl,10(%rsp)
241         orq     %r10,%rax
242         movb    %bl,%dl
243         shrq    $4,%rbx
244         movq    %r9,%r10
245         shrq    $4,%r9
246         movq    %r8,80(%rbp)
247         movq    192+0-128(%rsi),%r8
248         shlb    $4,%dl
249         movq    %rax,80-128(%rbp)
250         movq    192+8-128(%rsi),%rax
251         shlq    $60,%r10
252         movb    %dl,11(%rsp)
253         orq     %r10,%rbx
254         movb    %al,%dl
255         shrq    $4,%rax
256         movq    %r8,%r10
257         shrq    $4,%r8
258         movq    %r9,88(%rbp)
259         movq    208+0-128(%rsi),%r9
260         shlb    $4,%dl
261         movq    %rbx,88-128(%rbp)
262         movq    208+8-128(%rsi),%rbx
263         shlq    $60,%r10
264         movb    %dl,12(%rsp)
265         orq     %r10,%rax
266         movb    %bl,%dl
267         shrq    $4,%rbx
268         movq    %r9,%r10
269         shrq    $4,%r9
270         movq    %r8,96(%rbp)
271         movq    224+0-128(%rsi),%r8
272         shlb    $4,%dl
273         movq    %rax,96-128(%rbp)
274         movq    224+8-128(%rsi),%rax
275         shlq    $60,%r10
276         movb    %dl,13(%rsp)
277         orq     %r10,%rbx
278         movb    %al,%dl
279         shrq    $4,%rax
280         movq    %r8,%r10
281         shrq    $4,%r8
282         movq    %r9,104(%rbp)
283         movq    240+0-128(%rsi),%r9
284         shlb    $4,%dl
285         movq    %rbx,104-128(%rbp)
286         movq    240+8-128(%rsi),%rbx
287         shlq    $60,%r10
288         movb    %dl,14(%rsp)
289         orq     %r10,%rax
290         movb    %bl,%dl
291         shrq    $4,%rbx
292         movq    %r9,%r10
293         shrq    $4,%r9
294         movq    %r8,112(%rbp)
295         shlb    $4,%dl
296         movq    %rax,112-128(%rbp)
297         shlq    $60,%r10
298         movb    %dl,15(%rsp)
299         orq     %r10,%rbx
300         movq    %r9,120(%rbp)
301         movq    %rbx,120-128(%rbp)
302         addq    $-128,%rsi
303         movq    8(%rdi),%r8
304         movq    0(%rdi),%r9
305         addq    %r14,%r15
306         leaq    .Lrem_8bit(%rip),%r11
307         jmp     .Louter_loop
308 .align  16
309 .Louter_loop:
310         xorq    (%r14),%r9
311         movq    8(%r14),%rdx
312         leaq    16(%r14),%r14
313         xorq    %r8,%rdx
314         movq    %r9,(%rdi)
315         movq    %rdx,8(%rdi)
316         shrq    $32,%rdx
317         xorq    %rax,%rax
318         roll    $8,%edx
319         movb    %dl,%al
320         movzbl  %dl,%ebx
321         shlb    $4,%al
322         shrl    $4,%ebx
323         roll    $8,%edx
324         movq    8(%rsi,%rax,1),%r8
325         movq    (%rsi,%rax,1),%r9
326         movb    %dl,%al
327         movzbl  %dl,%ecx
328         shlb    $4,%al
329         movzbq  (%rsp,%rbx,1),%r12
330         shrl    $4,%ecx
331         xorq    %r8,%r12
332         movq    %r9,%r10
333         shrq    $8,%r8
334         movzbq  %r12b,%r12
335         shrq    $8,%r9
336         xorq    -128(%rbp,%rbx,8),%r8
337         shlq    $56,%r10
338         xorq    (%rbp,%rbx,8),%r9
339         roll    $8,%edx
340         xorq    8(%rsi,%rax,1),%r8
341         xorq    (%rsi,%rax,1),%r9
342         movb    %dl,%al
343         xorq    %r10,%r8
344         movzwq  (%r11,%r12,2),%r12
345         movzbl  %dl,%ebx
346         shlb    $4,%al
347         movzbq  (%rsp,%rcx,1),%r13
348         shrl    $4,%ebx
349         shlq    $48,%r12
350         xorq    %r8,%r13
351         movq    %r9,%r10
352         xorq    %r12,%r9
353         shrq    $8,%r8
354         movzbq  %r13b,%r13
355         shrq    $8,%r9
356         xorq    -128(%rbp,%rcx,8),%r8
357         shlq    $56,%r10
358         xorq    (%rbp,%rcx,8),%r9
359         roll    $8,%edx
360         xorq    8(%rsi,%rax,1),%r8
361         xorq    (%rsi,%rax,1),%r9
362         movb    %dl,%al
363         xorq    %r10,%r8
364         movzwq  (%r11,%r13,2),%r13
365         movzbl  %dl,%ecx
366         shlb    $4,%al
367         movzbq  (%rsp,%rbx,1),%r12
368         shrl    $4,%ecx
369         shlq    $48,%r13
370         xorq    %r8,%r12
371         movq    %r9,%r10
372         xorq    %r13,%r9
373         shrq    $8,%r8
374         movzbq  %r12b,%r12
375         movl    8(%rdi),%edx
376         shrq    $8,%r9
377         xorq    -128(%rbp,%rbx,8),%r8
378         shlq    $56,%r10
379         xorq    (%rbp,%rbx,8),%r9
380         roll    $8,%edx
381         xorq    8(%rsi,%rax,1),%r8
382         xorq    (%rsi,%rax,1),%r9
383         movb    %dl,%al
384         xorq    %r10,%r8
385         movzwq  (%r11,%r12,2),%r12
386         movzbl  %dl,%ebx
387         shlb    $4,%al
388         movzbq  (%rsp,%rcx,1),%r13
389         shrl    $4,%ebx
390         shlq    $48,%r12
391         xorq    %r8,%r13
392         movq    %r9,%r10
393         xorq    %r12,%r9
394         shrq    $8,%r8
395         movzbq  %r13b,%r13
396         shrq    $8,%r9
397         xorq    -128(%rbp,%rcx,8),%r8
398         shlq    $56,%r10
399         xorq    (%rbp,%rcx,8),%r9
400         roll    $8,%edx
401         xorq    8(%rsi,%rax,1),%r8
402         xorq    (%rsi,%rax,1),%r9
403         movb    %dl,%al
404         xorq    %r10,%r8
405         movzwq  (%r11,%r13,2),%r13
406         movzbl  %dl,%ecx
407         shlb    $4,%al
408         movzbq  (%rsp,%rbx,1),%r12
409         shrl    $4,%ecx
410         shlq    $48,%r13
411         xorq    %r8,%r12
412         movq    %r9,%r10
413         xorq    %r13,%r9
414         shrq    $8,%r8
415         movzbq  %r12b,%r12
416         shrq    $8,%r9
417         xorq    -128(%rbp,%rbx,8),%r8
418         shlq    $56,%r10
419         xorq    (%rbp,%rbx,8),%r9
420         roll    $8,%edx
421         xorq    8(%rsi,%rax,1),%r8
422         xorq    (%rsi,%rax,1),%r9
423         movb    %dl,%al
424         xorq    %r10,%r8
425         movzwq  (%r11,%r12,2),%r12
426         movzbl  %dl,%ebx
427         shlb    $4,%al
428         movzbq  (%rsp,%rcx,1),%r13
429         shrl    $4,%ebx
430         shlq    $48,%r12
431         xorq    %r8,%r13
432         movq    %r9,%r10
433         xorq    %r12,%r9
434         shrq    $8,%r8
435         movzbq  %r13b,%r13
436         shrq    $8,%r9
437         xorq    -128(%rbp,%rcx,8),%r8
438         shlq    $56,%r10
439         xorq    (%rbp,%rcx,8),%r9
440         roll    $8,%edx
441         xorq    8(%rsi,%rax,1),%r8
442         xorq    (%rsi,%rax,1),%r9
443         movb    %dl,%al
444         xorq    %r10,%r8
445         movzwq  (%r11,%r13,2),%r13
446         movzbl  %dl,%ecx
447         shlb    $4,%al
448         movzbq  (%rsp,%rbx,1),%r12
449         shrl    $4,%ecx
450         shlq    $48,%r13
451         xorq    %r8,%r12
452         movq    %r9,%r10
453         xorq    %r13,%r9
454         shrq    $8,%r8
455         movzbq  %r12b,%r12
456         movl    4(%rdi),%edx
457         shrq    $8,%r9
458         xorq    -128(%rbp,%rbx,8),%r8
459         shlq    $56,%r10
460         xorq    (%rbp,%rbx,8),%r9
461         roll    $8,%edx
462         xorq    8(%rsi,%rax,1),%r8
463         xorq    (%rsi,%rax,1),%r9
464         movb    %dl,%al
465         xorq    %r10,%r8
466         movzwq  (%r11,%r12,2),%r12
467         movzbl  %dl,%ebx
468         shlb    $4,%al
469         movzbq  (%rsp,%rcx,1),%r13
470         shrl    $4,%ebx
471         shlq    $48,%r12
472         xorq    %r8,%r13
473         movq    %r9,%r10
474         xorq    %r12,%r9
475         shrq    $8,%r8
476         movzbq  %r13b,%r13
477         shrq    $8,%r9
478         xorq    -128(%rbp,%rcx,8),%r8
479         shlq    $56,%r10
480         xorq    (%rbp,%rcx,8),%r9
481         roll    $8,%edx
482         xorq    8(%rsi,%rax,1),%r8
483         xorq    (%rsi,%rax,1),%r9
484         movb    %dl,%al
485         xorq    %r10,%r8
486         movzwq  (%r11,%r13,2),%r13
487         movzbl  %dl,%ecx
488         shlb    $4,%al
489         movzbq  (%rsp,%rbx,1),%r12
490         shrl    $4,%ecx
491         shlq    $48,%r13
492         xorq    %r8,%r12
493         movq    %r9,%r10
494         xorq    %r13,%r9
495         shrq    $8,%r8
496         movzbq  %r12b,%r12
497         shrq    $8,%r9
498         xorq    -128(%rbp,%rbx,8),%r8
499         shlq    $56,%r10
500         xorq    (%rbp,%rbx,8),%r9
501         roll    $8,%edx
502         xorq    8(%rsi,%rax,1),%r8
503         xorq    (%rsi,%rax,1),%r9
504         movb    %dl,%al
505         xorq    %r10,%r8
506         movzwq  (%r11,%r12,2),%r12
507         movzbl  %dl,%ebx
508         shlb    $4,%al
509         movzbq  (%rsp,%rcx,1),%r13
510         shrl    $4,%ebx
511         shlq    $48,%r12
512         xorq    %r8,%r13
513         movq    %r9,%r10
514         xorq    %r12,%r9
515         shrq    $8,%r8
516         movzbq  %r13b,%r13
517         shrq    $8,%r9
518         xorq    -128(%rbp,%rcx,8),%r8
519         shlq    $56,%r10
520         xorq    (%rbp,%rcx,8),%r9
521         roll    $8,%edx
522         xorq    8(%rsi,%rax,1),%r8
523         xorq    (%rsi,%rax,1),%r9
524         movb    %dl,%al
525         xorq    %r10,%r8
526         movzwq  (%r11,%r13,2),%r13
527         movzbl  %dl,%ecx
528         shlb    $4,%al
529         movzbq  (%rsp,%rbx,1),%r12
530         shrl    $4,%ecx
531         shlq    $48,%r13
532         xorq    %r8,%r12
533         movq    %r9,%r10
534         xorq    %r13,%r9
535         shrq    $8,%r8
536         movzbq  %r12b,%r12
537         movl    0(%rdi),%edx
538         shrq    $8,%r9
539         xorq    -128(%rbp,%rbx,8),%r8
540         shlq    $56,%r10
541         xorq    (%rbp,%rbx,8),%r9
542         roll    $8,%edx
543         xorq    8(%rsi,%rax,1),%r8
544         xorq    (%rsi,%rax,1),%r9
545         movb    %dl,%al
546         xorq    %r10,%r8
547         movzwq  (%r11,%r12,2),%r12
548         movzbl  %dl,%ebx
549         shlb    $4,%al
550         movzbq  (%rsp,%rcx,1),%r13
551         shrl    $4,%ebx
552         shlq    $48,%r12
553         xorq    %r8,%r13
554         movq    %r9,%r10
555         xorq    %r12,%r9
556         shrq    $8,%r8
557         movzbq  %r13b,%r13
558         shrq    $8,%r9
559         xorq    -128(%rbp,%rcx,8),%r8
560         shlq    $56,%r10
561         xorq    (%rbp,%rcx,8),%r9
562         roll    $8,%edx
563         xorq    8(%rsi,%rax,1),%r8
564         xorq    (%rsi,%rax,1),%r9
565         movb    %dl,%al
566         xorq    %r10,%r8
567         movzwq  (%r11,%r13,2),%r13
568         movzbl  %dl,%ecx
569         shlb    $4,%al
570         movzbq  (%rsp,%rbx,1),%r12
571         shrl    $4,%ecx
572         shlq    $48,%r13
573         xorq    %r8,%r12
574         movq    %r9,%r10
575         xorq    %r13,%r9
576         shrq    $8,%r8
577         movzbq  %r12b,%r12
578         shrq    $8,%r9
579         xorq    -128(%rbp,%rbx,8),%r8
580         shlq    $56,%r10
581         xorq    (%rbp,%rbx,8),%r9
582         roll    $8,%edx
583         xorq    8(%rsi,%rax,1),%r8
584         xorq    (%rsi,%rax,1),%r9
585         movb    %dl,%al
586         xorq    %r10,%r8
587         movzwq  (%r11,%r12,2),%r12
588         movzbl  %dl,%ebx
589         shlb    $4,%al
590         movzbq  (%rsp,%rcx,1),%r13
591         shrl    $4,%ebx
592         shlq    $48,%r12
593         xorq    %r8,%r13
594         movq    %r9,%r10
595         xorq    %r12,%r9
596         shrq    $8,%r8
597         movzbq  %r13b,%r13
598         shrq    $8,%r9
599         xorq    -128(%rbp,%rcx,8),%r8
600         shlq    $56,%r10
601         xorq    (%rbp,%rcx,8),%r9
602         roll    $8,%edx
603         xorq    8(%rsi,%rax,1),%r8
604         xorq    (%rsi,%rax,1),%r9
605         movb    %dl,%al
606         xorq    %r10,%r8
607         movzwq  (%r11,%r13,2),%r13
608         movzbl  %dl,%ecx
609         shlb    $4,%al
610         movzbq  (%rsp,%rbx,1),%r12
611         andl    $240,%ecx
612         shlq    $48,%r13
613         xorq    %r8,%r12
614         movq    %r9,%r10
615         xorq    %r13,%r9
616         shrq    $8,%r8
617         movzbq  %r12b,%r12
618         movl    -4(%rdi),%edx
619         shrq    $8,%r9
620         xorq    -128(%rbp,%rbx,8),%r8
621         shlq    $56,%r10
622         xorq    (%rbp,%rbx,8),%r9
623         movzwq  (%r11,%r12,2),%r12
624         xorq    8(%rsi,%rax,1),%r8
625         xorq    (%rsi,%rax,1),%r9
626         shlq    $48,%r12
627         xorq    %r10,%r8
628         xorq    %r12,%r9
629         movzbq  %r8b,%r13
630         shrq    $4,%r8
631         movq    %r9,%r10
632         shlb    $4,%r13b
633         shrq    $4,%r9
634         xorq    8(%rsi,%rcx,1),%r8
635         movzwq  (%r11,%r13,2),%r13
636         shlq    $60,%r10
637         xorq    (%rsi,%rcx,1),%r9
638         xorq    %r10,%r8
639         shlq    $48,%r13
640         bswapq  %r8
641         xorq    %r13,%r9
642         bswapq  %r9
643         cmpq    %r15,%r14
644         jb      .Louter_loop
645         movq    %r8,8(%rdi)
646         movq    %r9,(%rdi)
647
648         leaq    280(%rsp),%rsi
649         movq    0(%rsi),%r15
650         movq    8(%rsi),%r14
651         movq    16(%rsi),%r13
652         movq    24(%rsi),%r12
653         movq    32(%rsi),%rbp
654         movq    40(%rsi),%rbx
655         leaq    48(%rsi),%rsp
656 .Lghash_epilogue:
657         .byte   0xf3,0xc3
658 .size   gcm_ghash_4bit,.-gcm_ghash_4bit
659 .globl  gcm_init_clmul
660 .type   gcm_init_clmul,@function
661 .align  16
662 gcm_init_clmul:
663         movdqu  (%rsi),%xmm2
664         pshufd  $78,%xmm2,%xmm2
665
666
667         pshufd  $255,%xmm2,%xmm4
668         movdqa  %xmm2,%xmm3
669         psllq   $1,%xmm2
670         pxor    %xmm5,%xmm5
671         psrlq   $63,%xmm3
672         pcmpgtd %xmm4,%xmm5
673         pslldq  $8,%xmm3
674         por     %xmm3,%xmm2
675
676
677         pand    .L0x1c2_polynomial(%rip),%xmm5
678         pxor    %xmm5,%xmm2
679
680
681         movdqa  %xmm2,%xmm0
682         movdqa  %xmm0,%xmm1
683         pshufd  $78,%xmm0,%xmm3
684         pshufd  $78,%xmm2,%xmm4
685         pxor    %xmm0,%xmm3
686         pxor    %xmm2,%xmm4
687 .byte   102,15,58,68,194,0
688 .byte   102,15,58,68,202,17
689 .byte   102,15,58,68,220,0
690         pxor    %xmm0,%xmm3
691         pxor    %xmm1,%xmm3
692
693         movdqa  %xmm3,%xmm4
694         psrldq  $8,%xmm3
695         pslldq  $8,%xmm4
696         pxor    %xmm3,%xmm1
697         pxor    %xmm4,%xmm0
698
699         movdqa  %xmm0,%xmm3
700         psllq   $1,%xmm0
701         pxor    %xmm3,%xmm0
702         psllq   $5,%xmm0
703         pxor    %xmm3,%xmm0
704         psllq   $57,%xmm0
705         movdqa  %xmm0,%xmm4
706         pslldq  $8,%xmm0
707         psrldq  $8,%xmm4
708         pxor    %xmm3,%xmm0
709         pxor    %xmm4,%xmm1
710
711
712         movdqa  %xmm0,%xmm4
713         psrlq   $5,%xmm0
714         pxor    %xmm4,%xmm0
715         psrlq   $1,%xmm0
716         pxor    %xmm4,%xmm0
717         pxor    %xmm1,%xmm4
718         psrlq   $1,%xmm0
719         pxor    %xmm4,%xmm0
720         movdqu  %xmm2,(%rdi)
721         movdqu  %xmm0,16(%rdi)
722         .byte   0xf3,0xc3
723 .size   gcm_init_clmul,.-gcm_init_clmul
724 .globl  gcm_gmult_clmul
725 .type   gcm_gmult_clmul,@function
726 .align  16
727 gcm_gmult_clmul:
728         movdqu  (%rdi),%xmm0
729         movdqa  .Lbswap_mask(%rip),%xmm5
730         movdqu  (%rsi),%xmm2
731 .byte   102,15,56,0,197
732         movdqa  %xmm0,%xmm1
733         pshufd  $78,%xmm0,%xmm3
734         pshufd  $78,%xmm2,%xmm4
735         pxor    %xmm0,%xmm3
736         pxor    %xmm2,%xmm4
737 .byte   102,15,58,68,194,0
738 .byte   102,15,58,68,202,17
739 .byte   102,15,58,68,220,0
740         pxor    %xmm0,%xmm3
741         pxor    %xmm1,%xmm3
742
743         movdqa  %xmm3,%xmm4
744         psrldq  $8,%xmm3
745         pslldq  $8,%xmm4
746         pxor    %xmm3,%xmm1
747         pxor    %xmm4,%xmm0
748
749         movdqa  %xmm0,%xmm3
750         psllq   $1,%xmm0
751         pxor    %xmm3,%xmm0
752         psllq   $5,%xmm0
753         pxor    %xmm3,%xmm0
754         psllq   $57,%xmm0
755         movdqa  %xmm0,%xmm4
756         pslldq  $8,%xmm0
757         psrldq  $8,%xmm4
758         pxor    %xmm3,%xmm0
759         pxor    %xmm4,%xmm1
760
761
762         movdqa  %xmm0,%xmm4
763         psrlq   $5,%xmm0
764         pxor    %xmm4,%xmm0
765         psrlq   $1,%xmm0
766         pxor    %xmm4,%xmm0
767         pxor    %xmm1,%xmm4
768         psrlq   $1,%xmm0
769         pxor    %xmm4,%xmm0
770 .byte   102,15,56,0,197
771         movdqu  %xmm0,(%rdi)
772         .byte   0xf3,0xc3
773 .size   gcm_gmult_clmul,.-gcm_gmult_clmul
774 .globl  gcm_ghash_clmul
775 .type   gcm_ghash_clmul,@function
776 .align  16
777 gcm_ghash_clmul:
778         movdqa  .Lbswap_mask(%rip),%xmm5
779
780         movdqu  (%rdi),%xmm0
781         movdqu  (%rsi),%xmm2
782 .byte   102,15,56,0,197
783
784         subq    $16,%rcx
785         jz      .Lodd_tail
786
787         movdqu  16(%rsi),%xmm8
788
789
790
791
792
793         movdqu  (%rdx),%xmm3
794         movdqu  16(%rdx),%xmm6
795 .byte   102,15,56,0,221
796 .byte   102,15,56,0,245
797         pxor    %xmm3,%xmm0
798         movdqa  %xmm6,%xmm7
799         pshufd  $78,%xmm6,%xmm3
800         pshufd  $78,%xmm2,%xmm4
801         pxor    %xmm6,%xmm3
802         pxor    %xmm2,%xmm4
803 .byte   102,15,58,68,242,0
804 .byte   102,15,58,68,250,17
805 .byte   102,15,58,68,220,0
806         pxor    %xmm6,%xmm3
807         pxor    %xmm7,%xmm3
808
809         movdqa  %xmm3,%xmm4
810         psrldq  $8,%xmm3
811         pslldq  $8,%xmm4
812         pxor    %xmm3,%xmm7
813         pxor    %xmm4,%xmm6
814         movdqa  %xmm0,%xmm1
815         pshufd  $78,%xmm0,%xmm3
816         pshufd  $78,%xmm8,%xmm4
817         pxor    %xmm0,%xmm3
818         pxor    %xmm8,%xmm4
819
820         leaq    32(%rdx),%rdx
821         subq    $32,%rcx
822         jbe     .Leven_tail
823
824 .Lmod_loop:
825 .byte   102,65,15,58,68,192,0
826 .byte   102,65,15,58,68,200,17
827 .byte   102,15,58,68,220,0
828         pxor    %xmm0,%xmm3
829         pxor    %xmm1,%xmm3
830
831         movdqa  %xmm3,%xmm4
832         psrldq  $8,%xmm3
833         pslldq  $8,%xmm4
834         pxor    %xmm3,%xmm1
835         pxor    %xmm4,%xmm0
836         movdqu  (%rdx),%xmm3
837         pxor    %xmm6,%xmm0
838         pxor    %xmm7,%xmm1
839
840         movdqu  16(%rdx),%xmm6
841 .byte   102,15,56,0,221
842 .byte   102,15,56,0,245
843
844         movdqa  %xmm6,%xmm7
845         pshufd  $78,%xmm6,%xmm9
846         pshufd  $78,%xmm2,%xmm10
847         pxor    %xmm6,%xmm9
848         pxor    %xmm2,%xmm10
849         pxor    %xmm3,%xmm1
850
851         movdqa  %xmm0,%xmm3
852         psllq   $1,%xmm0
853         pxor    %xmm3,%xmm0
854         psllq   $5,%xmm0
855         pxor    %xmm3,%xmm0
856 .byte   102,15,58,68,242,0
857         psllq   $57,%xmm0
858         movdqa  %xmm0,%xmm4
859         pslldq  $8,%xmm0
860         psrldq  $8,%xmm4
861         pxor    %xmm3,%xmm0
862         pxor    %xmm4,%xmm1
863
864 .byte   102,15,58,68,250,17
865         movdqa  %xmm0,%xmm4
866         psrlq   $5,%xmm0
867         pxor    %xmm4,%xmm0
868         psrlq   $1,%xmm0
869         pxor    %xmm4,%xmm0
870         pxor    %xmm1,%xmm4
871         psrlq   $1,%xmm0
872         pxor    %xmm4,%xmm0
873
874 .byte   102,69,15,58,68,202,0
875         movdqa  %xmm0,%xmm1
876         pshufd  $78,%xmm0,%xmm3
877         pshufd  $78,%xmm8,%xmm4
878         pxor    %xmm0,%xmm3
879         pxor    %xmm8,%xmm4
880
881         pxor    %xmm6,%xmm9
882         pxor    %xmm7,%xmm9
883         movdqa  %xmm9,%xmm10
884         psrldq  $8,%xmm9
885         pslldq  $8,%xmm10
886         pxor    %xmm9,%xmm7
887         pxor    %xmm10,%xmm6
888
889         leaq    32(%rdx),%rdx
890         subq    $32,%rcx
891         ja      .Lmod_loop
892
893 .Leven_tail:
894 .byte   102,65,15,58,68,192,0
895 .byte   102,65,15,58,68,200,17
896 .byte   102,15,58,68,220,0
897         pxor    %xmm0,%xmm3
898         pxor    %xmm1,%xmm3
899
900         movdqa  %xmm3,%xmm4
901         psrldq  $8,%xmm3
902         pslldq  $8,%xmm4
903         pxor    %xmm3,%xmm1
904         pxor    %xmm4,%xmm0
905         pxor    %xmm6,%xmm0
906         pxor    %xmm7,%xmm1
907
908         movdqa  %xmm0,%xmm3
909         psllq   $1,%xmm0
910         pxor    %xmm3,%xmm0
911         psllq   $5,%xmm0
912         pxor    %xmm3,%xmm0
913         psllq   $57,%xmm0
914         movdqa  %xmm0,%xmm4
915         pslldq  $8,%xmm0
916         psrldq  $8,%xmm4
917         pxor    %xmm3,%xmm0
918         pxor    %xmm4,%xmm1
919
920
921         movdqa  %xmm0,%xmm4
922         psrlq   $5,%xmm0
923         pxor    %xmm4,%xmm0
924         psrlq   $1,%xmm0
925         pxor    %xmm4,%xmm0
926         pxor    %xmm1,%xmm4
927         psrlq   $1,%xmm0
928         pxor    %xmm4,%xmm0
929         testq   %rcx,%rcx
930         jnz     .Ldone
931
932 .Lodd_tail:
933         movdqu  (%rdx),%xmm3
934 .byte   102,15,56,0,221
935         pxor    %xmm3,%xmm0
936         movdqa  %xmm0,%xmm1
937         pshufd  $78,%xmm0,%xmm3
938         pshufd  $78,%xmm2,%xmm4
939         pxor    %xmm0,%xmm3
940         pxor    %xmm2,%xmm4
941 .byte   102,15,58,68,194,0
942 .byte   102,15,58,68,202,17
943 .byte   102,15,58,68,220,0
944         pxor    %xmm0,%xmm3
945         pxor    %xmm1,%xmm3
946
947         movdqa  %xmm3,%xmm4
948         psrldq  $8,%xmm3
949         pslldq  $8,%xmm4
950         pxor    %xmm3,%xmm1
951         pxor    %xmm4,%xmm0
952
953         movdqa  %xmm0,%xmm3
954         psllq   $1,%xmm0
955         pxor    %xmm3,%xmm0
956         psllq   $5,%xmm0
957         pxor    %xmm3,%xmm0
958         psllq   $57,%xmm0
959         movdqa  %xmm0,%xmm4
960         pslldq  $8,%xmm0
961         psrldq  $8,%xmm4
962         pxor    %xmm3,%xmm0
963         pxor    %xmm4,%xmm1
964
965
966         movdqa  %xmm0,%xmm4
967         psrlq   $5,%xmm0
968         pxor    %xmm4,%xmm0
969         psrlq   $1,%xmm0
970         pxor    %xmm4,%xmm0
971         pxor    %xmm1,%xmm4
972         psrlq   $1,%xmm0
973         pxor    %xmm4,%xmm0
974 .Ldone:
975 .byte   102,15,56,0,197
976         movdqu  %xmm0,(%rdi)
977         .byte   0xf3,0xc3
978 .LSEH_end_gcm_ghash_clmul:
979 .size   gcm_ghash_clmul,.-gcm_ghash_clmul
980 .align  64
981 .Lbswap_mask:
982 .byte   15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
983 .L0x1c2_polynomial:
984 .byte   1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
985 .align  64
986 .type   .Lrem_4bit,@object
987 .Lrem_4bit:
988 .long   0,0,0,471859200,0,943718400,0,610271232
989 .long   0,1887436800,0,1822425088,0,1220542464,0,1423966208
990 .long   0,3774873600,0,4246732800,0,3644850176,0,3311403008
991 .long   0,2441084928,0,2376073216,0,2847932416,0,3051356160
992 .type   .Lrem_8bit,@object
993 .Lrem_8bit:
994 .value  0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
995 .value  0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
996 .value  0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
997 .value  0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
998 .value  0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
999 .value  0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
1000 .value  0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
1001 .value  0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
1002 .value  0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
1003 .value  0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
1004 .value  0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
1005 .value  0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
1006 .value  0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
1007 .value  0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
1008 .value  0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
1009 .value  0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
1010 .value  0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
1011 .value  0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
1012 .value  0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
1013 .value  0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
1014 .value  0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
1015 .value  0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
1016 .value  0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
1017 .value  0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
1018 .value  0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
1019 .value  0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
1020 .value  0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
1021 .value  0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
1022 .value  0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
1023 .value  0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
1024 .value  0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
1025 .value  0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
1026
1027 .byte   71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1028 .align  64