]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - secure/lib/libcrypto/amd64/x86_64-gf2m.S
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / secure / lib / libcrypto / amd64 / x86_64-gf2m.S
1         # $FreeBSD$
2 .text   
3
4 .type   _mul_1x1,@function
5 .align  16
6 _mul_1x1:
7         subq    $128+8,%rsp
8         movq    $-1,%r9
9         leaq    (%rax,%rax,1),%rsi
10         shrq    $3,%r9
11         leaq    (,%rax,4),%rdi
12         andq    %rax,%r9
13         leaq    (,%rax,8),%r12
14         sarq    $63,%rax
15         leaq    (%r9,%r9,1),%r10
16         sarq    $63,%rsi
17         leaq    (,%r9,4),%r11
18         andq    %rbp,%rax
19         sarq    $63,%rdi
20         movq    %rax,%rdx
21         shlq    $63,%rax
22         andq    %rbp,%rsi
23         shrq    $1,%rdx
24         movq    %rsi,%rcx
25         shlq    $62,%rsi
26         andq    %rbp,%rdi
27         shrq    $2,%rcx
28         xorq    %rsi,%rax
29         movq    %rdi,%rbx
30         shlq    $61,%rdi
31         xorq    %rcx,%rdx
32         shrq    $3,%rbx
33         xorq    %rdi,%rax
34         xorq    %rbx,%rdx
35
36         movq    %r9,%r13
37         movq    $0,0(%rsp)
38         xorq    %r10,%r13
39         movq    %r9,8(%rsp)
40         movq    %r11,%r14
41         movq    %r10,16(%rsp)
42         xorq    %r12,%r14
43         movq    %r13,24(%rsp)
44
45         xorq    %r11,%r9
46         movq    %r11,32(%rsp)
47         xorq    %r11,%r10
48         movq    %r9,40(%rsp)
49         xorq    %r11,%r13
50         movq    %r10,48(%rsp)
51         xorq    %r14,%r9
52         movq    %r13,56(%rsp)
53         xorq    %r14,%r10
54
55         movq    %r12,64(%rsp)
56         xorq    %r14,%r13
57         movq    %r9,72(%rsp)
58         xorq    %r11,%r9
59         movq    %r10,80(%rsp)
60         xorq    %r11,%r10
61         movq    %r13,88(%rsp)
62
63         xorq    %r11,%r13
64         movq    %r14,96(%rsp)
65         movq    %r8,%rsi
66         movq    %r9,104(%rsp)
67         andq    %rbp,%rsi
68         movq    %r10,112(%rsp)
69         shrq    $4,%rbp
70         movq    %r13,120(%rsp)
71         movq    %r8,%rdi
72         andq    %rbp,%rdi
73         shrq    $4,%rbp
74
75         movq    (%rsp,%rsi,8),%xmm0
76         movq    %r8,%rsi
77         andq    %rbp,%rsi
78         shrq    $4,%rbp
79         movq    (%rsp,%rdi,8),%rcx
80         movq    %r8,%rdi
81         movq    %rcx,%rbx
82         shlq    $4,%rcx
83         andq    %rbp,%rdi
84         movq    (%rsp,%rsi,8),%xmm1
85         shrq    $60,%rbx
86         xorq    %rcx,%rax
87         pslldq  $1,%xmm1
88         movq    %r8,%rsi
89         shrq    $4,%rbp
90         xorq    %rbx,%rdx
91         andq    %rbp,%rsi
92         shrq    $4,%rbp
93         pxor    %xmm1,%xmm0
94         movq    (%rsp,%rdi,8),%rcx
95         movq    %r8,%rdi
96         movq    %rcx,%rbx
97         shlq    $12,%rcx
98         andq    %rbp,%rdi
99         movq    (%rsp,%rsi,8),%xmm1
100         shrq    $52,%rbx
101         xorq    %rcx,%rax
102         pslldq  $2,%xmm1
103         movq    %r8,%rsi
104         shrq    $4,%rbp
105         xorq    %rbx,%rdx
106         andq    %rbp,%rsi
107         shrq    $4,%rbp
108         pxor    %xmm1,%xmm0
109         movq    (%rsp,%rdi,8),%rcx
110         movq    %r8,%rdi
111         movq    %rcx,%rbx
112         shlq    $20,%rcx
113         andq    %rbp,%rdi
114         movq    (%rsp,%rsi,8),%xmm1
115         shrq    $44,%rbx
116         xorq    %rcx,%rax
117         pslldq  $3,%xmm1
118         movq    %r8,%rsi
119         shrq    $4,%rbp
120         xorq    %rbx,%rdx
121         andq    %rbp,%rsi
122         shrq    $4,%rbp
123         pxor    %xmm1,%xmm0
124         movq    (%rsp,%rdi,8),%rcx
125         movq    %r8,%rdi
126         movq    %rcx,%rbx
127         shlq    $28,%rcx
128         andq    %rbp,%rdi
129         movq    (%rsp,%rsi,8),%xmm1
130         shrq    $36,%rbx
131         xorq    %rcx,%rax
132         pslldq  $4,%xmm1
133         movq    %r8,%rsi
134         shrq    $4,%rbp
135         xorq    %rbx,%rdx
136         andq    %rbp,%rsi
137         shrq    $4,%rbp
138         pxor    %xmm1,%xmm0
139         movq    (%rsp,%rdi,8),%rcx
140         movq    %r8,%rdi
141         movq    %rcx,%rbx
142         shlq    $36,%rcx
143         andq    %rbp,%rdi
144         movq    (%rsp,%rsi,8),%xmm1
145         shrq    $28,%rbx
146         xorq    %rcx,%rax
147         pslldq  $5,%xmm1
148         movq    %r8,%rsi
149         shrq    $4,%rbp
150         xorq    %rbx,%rdx
151         andq    %rbp,%rsi
152         shrq    $4,%rbp
153         pxor    %xmm1,%xmm0
154         movq    (%rsp,%rdi,8),%rcx
155         movq    %r8,%rdi
156         movq    %rcx,%rbx
157         shlq    $44,%rcx
158         andq    %rbp,%rdi
159         movq    (%rsp,%rsi,8),%xmm1
160         shrq    $20,%rbx
161         xorq    %rcx,%rax
162         pslldq  $6,%xmm1
163         movq    %r8,%rsi
164         shrq    $4,%rbp
165         xorq    %rbx,%rdx
166         andq    %rbp,%rsi
167         shrq    $4,%rbp
168         pxor    %xmm1,%xmm0
169         movq    (%rsp,%rdi,8),%rcx
170         movq    %r8,%rdi
171         movq    %rcx,%rbx
172         shlq    $52,%rcx
173         andq    %rbp,%rdi
174         movq    (%rsp,%rsi,8),%xmm1
175         shrq    $12,%rbx
176         xorq    %rcx,%rax
177         pslldq  $7,%xmm1
178         movq    %r8,%rsi
179         shrq    $4,%rbp
180         xorq    %rbx,%rdx
181         andq    %rbp,%rsi
182         shrq    $4,%rbp
183         pxor    %xmm1,%xmm0
184         movq    (%rsp,%rdi,8),%rcx
185         movq    %rcx,%rbx
186         shlq    $60,%rcx
187 .byte   102,72,15,126,198
188         shrq    $4,%rbx
189         xorq    %rcx,%rax
190         psrldq  $8,%xmm0
191         xorq    %rbx,%rdx
192 .byte   102,72,15,126,199
193         xorq    %rsi,%rax
194         xorq    %rdi,%rdx
195
196         addq    $128+8,%rsp
197         .byte   0xf3,0xc3
198 .Lend_mul_1x1:
199 .size   _mul_1x1,.-_mul_1x1
200
201 .globl  bn_GF2m_mul_2x2
202 .type   bn_GF2m_mul_2x2,@function
203 .align  16
204 bn_GF2m_mul_2x2:
205         movq    OPENSSL_ia32cap_P(%rip),%rax
206         btq     $33,%rax
207         jnc     .Lvanilla_mul_2x2
208
209 .byte   102,72,15,110,198
210 .byte   102,72,15,110,201
211 .byte   102,72,15,110,210
212 .byte   102,73,15,110,216
213         movdqa  %xmm0,%xmm4
214         movdqa  %xmm1,%xmm5
215 .byte   102,15,58,68,193,0
216         pxor    %xmm2,%xmm4
217         pxor    %xmm3,%xmm5
218 .byte   102,15,58,68,211,0
219 .byte   102,15,58,68,229,0
220         xorps   %xmm0,%xmm4
221         xorps   %xmm2,%xmm4
222         movdqa  %xmm4,%xmm5
223         pslldq  $8,%xmm4
224         psrldq  $8,%xmm5
225         pxor    %xmm4,%xmm2
226         pxor    %xmm5,%xmm0
227         movdqu  %xmm2,0(%rdi)
228         movdqu  %xmm0,16(%rdi)
229         .byte   0xf3,0xc3
230
231 .align  16
232 .Lvanilla_mul_2x2:
233         leaq    -136(%rsp),%rsp
234         movq    %r14,80(%rsp)
235         movq    %r13,88(%rsp)
236         movq    %r12,96(%rsp)
237         movq    %rbp,104(%rsp)
238         movq    %rbx,112(%rsp)
239 .Lbody_mul_2x2:
240         movq    %rdi,32(%rsp)
241         movq    %rsi,40(%rsp)
242         movq    %rdx,48(%rsp)
243         movq    %rcx,56(%rsp)
244         movq    %r8,64(%rsp)
245
246         movq    $15,%r8
247         movq    %rsi,%rax
248         movq    %rcx,%rbp
249         call    _mul_1x1                
250         movq    %rax,16(%rsp)
251         movq    %rdx,24(%rsp)
252
253         movq    48(%rsp),%rax
254         movq    64(%rsp),%rbp
255         call    _mul_1x1                
256         movq    %rax,0(%rsp)
257         movq    %rdx,8(%rsp)
258
259         movq    40(%rsp),%rax
260         movq    56(%rsp),%rbp
261         xorq    48(%rsp),%rax
262         xorq    64(%rsp),%rbp
263         call    _mul_1x1                
264         movq    0(%rsp),%rbx
265         movq    8(%rsp),%rcx
266         movq    16(%rsp),%rdi
267         movq    24(%rsp),%rsi
268         movq    32(%rsp),%rbp
269
270         xorq    %rdx,%rax
271         xorq    %rcx,%rdx
272         xorq    %rbx,%rax
273         movq    %rbx,0(%rbp)
274         xorq    %rdi,%rdx
275         movq    %rsi,24(%rbp)
276         xorq    %rsi,%rax
277         xorq    %rsi,%rdx
278         xorq    %rdx,%rax
279         movq    %rdx,16(%rbp)
280         movq    %rax,8(%rbp)
281
282         movq    80(%rsp),%r14
283         movq    88(%rsp),%r13
284         movq    96(%rsp),%r12
285         movq    104(%rsp),%rbp
286         movq    112(%rsp),%rbx
287         leaq    136(%rsp),%rsp
288         .byte   0xf3,0xc3
289 .Lend_mul_2x2:
290 .size   bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
291 .byte   71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
292 .align  16