]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S
MFC: r325328
[FreeBSD/FreeBSD.git] / secure / lib / libcrypto / amd64 / ecp_nistz256-x86_64.S
1 /* $FreeBSD$ */
2 /* Do not modify. This file is auto-generated from ecp_nistz256-x86_64.pl. */
3 .text   
4
5
6
7 .align  64
8 .Lpoly:
9 .quad   0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001
10
11
12 .LRR:
13 .quad   0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd
14
15 .LOne:
16 .long   1,1,1,1,1,1,1,1
17 .LTwo:
18 .long   2,2,2,2,2,2,2,2
19 .LThree:
20 .long   3,3,3,3,3,3,3,3
21 .LONE_mont:
22 .quad   0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe
23
24 .globl  ecp_nistz256_mul_by_2
25 .type   ecp_nistz256_mul_by_2,@function
26 .align  64
27 ecp_nistz256_mul_by_2:
28         pushq   %r12
29         pushq   %r13
30
31         movq    0(%rsi),%r8
32         xorq    %r13,%r13
33         movq    8(%rsi),%r9
34         addq    %r8,%r8
35         movq    16(%rsi),%r10
36         adcq    %r9,%r9
37         movq    24(%rsi),%r11
38         leaq    .Lpoly(%rip),%rsi
39         movq    %r8,%rax
40         adcq    %r10,%r10
41         adcq    %r11,%r11
42         movq    %r9,%rdx
43         adcq    $0,%r13
44
45         subq    0(%rsi),%r8
46         movq    %r10,%rcx
47         sbbq    8(%rsi),%r9
48         sbbq    16(%rsi),%r10
49         movq    %r11,%r12
50         sbbq    24(%rsi),%r11
51         sbbq    $0,%r13
52
53         cmovcq  %rax,%r8
54         cmovcq  %rdx,%r9
55         movq    %r8,0(%rdi)
56         cmovcq  %rcx,%r10
57         movq    %r9,8(%rdi)
58         cmovcq  %r12,%r11
59         movq    %r10,16(%rdi)
60         movq    %r11,24(%rdi)
61
62         popq    %r13
63         popq    %r12
64         .byte   0xf3,0xc3
65 .size   ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2
66
67
68
69 .globl  ecp_nistz256_div_by_2
70 .type   ecp_nistz256_div_by_2,@function
71 .align  32
72 ecp_nistz256_div_by_2:
73         pushq   %r12
74         pushq   %r13
75
76         movq    0(%rsi),%r8
77         movq    8(%rsi),%r9
78         movq    16(%rsi),%r10
79         movq    %r8,%rax
80         movq    24(%rsi),%r11
81         leaq    .Lpoly(%rip),%rsi
82
83         movq    %r9,%rdx
84         xorq    %r13,%r13
85         addq    0(%rsi),%r8
86         movq    %r10,%rcx
87         adcq    8(%rsi),%r9
88         adcq    16(%rsi),%r10
89         movq    %r11,%r12
90         adcq    24(%rsi),%r11
91         adcq    $0,%r13
92         xorq    %rsi,%rsi
93         testq   $1,%rax
94
95         cmovzq  %rax,%r8
96         cmovzq  %rdx,%r9
97         cmovzq  %rcx,%r10
98         cmovzq  %r12,%r11
99         cmovzq  %rsi,%r13
100
101         movq    %r9,%rax
102         shrq    $1,%r8
103         shlq    $63,%rax
104         movq    %r10,%rdx
105         shrq    $1,%r9
106         orq     %rax,%r8
107         shlq    $63,%rdx
108         movq    %r11,%rcx
109         shrq    $1,%r10
110         orq     %rdx,%r9
111         shlq    $63,%rcx
112         shrq    $1,%r11
113         shlq    $63,%r13
114         orq     %rcx,%r10
115         orq     %r13,%r11
116
117         movq    %r8,0(%rdi)
118         movq    %r9,8(%rdi)
119         movq    %r10,16(%rdi)
120         movq    %r11,24(%rdi)
121
122         popq    %r13
123         popq    %r12
124         .byte   0xf3,0xc3
125 .size   ecp_nistz256_div_by_2,.-ecp_nistz256_div_by_2
126
127
128
129 .globl  ecp_nistz256_mul_by_3
130 .type   ecp_nistz256_mul_by_3,@function
131 .align  32
132 ecp_nistz256_mul_by_3:
133         pushq   %r12
134         pushq   %r13
135
136         movq    0(%rsi),%r8
137         xorq    %r13,%r13
138         movq    8(%rsi),%r9
139         addq    %r8,%r8
140         movq    16(%rsi),%r10
141         adcq    %r9,%r9
142         movq    24(%rsi),%r11
143         movq    %r8,%rax
144         adcq    %r10,%r10
145         adcq    %r11,%r11
146         movq    %r9,%rdx
147         adcq    $0,%r13
148
149         subq    $-1,%r8
150         movq    %r10,%rcx
151         sbbq    .Lpoly+8(%rip),%r9
152         sbbq    $0,%r10
153         movq    %r11,%r12
154         sbbq    .Lpoly+24(%rip),%r11
155         sbbq    $0,%r13
156
157         cmovcq  %rax,%r8
158         cmovcq  %rdx,%r9
159         cmovcq  %rcx,%r10
160         cmovcq  %r12,%r11
161
162         xorq    %r13,%r13
163         addq    0(%rsi),%r8
164         adcq    8(%rsi),%r9
165         movq    %r8,%rax
166         adcq    16(%rsi),%r10
167         adcq    24(%rsi),%r11
168         movq    %r9,%rdx
169         adcq    $0,%r13
170
171         subq    $-1,%r8
172         movq    %r10,%rcx
173         sbbq    .Lpoly+8(%rip),%r9
174         sbbq    $0,%r10
175         movq    %r11,%r12
176         sbbq    .Lpoly+24(%rip),%r11
177         sbbq    $0,%r13
178
179         cmovcq  %rax,%r8
180         cmovcq  %rdx,%r9
181         movq    %r8,0(%rdi)
182         cmovcq  %rcx,%r10
183         movq    %r9,8(%rdi)
184         cmovcq  %r12,%r11
185         movq    %r10,16(%rdi)
186         movq    %r11,24(%rdi)
187
188         popq    %r13
189         popq    %r12
190         .byte   0xf3,0xc3
191 .size   ecp_nistz256_mul_by_3,.-ecp_nistz256_mul_by_3
192
193
194
195 .globl  ecp_nistz256_add
196 .type   ecp_nistz256_add,@function
197 .align  32
198 ecp_nistz256_add:
199         pushq   %r12
200         pushq   %r13
201
202         movq    0(%rsi),%r8
203         xorq    %r13,%r13
204         movq    8(%rsi),%r9
205         movq    16(%rsi),%r10
206         movq    24(%rsi),%r11
207         leaq    .Lpoly(%rip),%rsi
208
209         addq    0(%rdx),%r8
210         adcq    8(%rdx),%r9
211         movq    %r8,%rax
212         adcq    16(%rdx),%r10
213         adcq    24(%rdx),%r11
214         movq    %r9,%rdx
215         adcq    $0,%r13
216
217         subq    0(%rsi),%r8
218         movq    %r10,%rcx
219         sbbq    8(%rsi),%r9
220         sbbq    16(%rsi),%r10
221         movq    %r11,%r12
222         sbbq    24(%rsi),%r11
223         sbbq    $0,%r13
224
225         cmovcq  %rax,%r8
226         cmovcq  %rdx,%r9
227         movq    %r8,0(%rdi)
228         cmovcq  %rcx,%r10
229         movq    %r9,8(%rdi)
230         cmovcq  %r12,%r11
231         movq    %r10,16(%rdi)
232         movq    %r11,24(%rdi)
233
234         popq    %r13
235         popq    %r12
236         .byte   0xf3,0xc3
237 .size   ecp_nistz256_add,.-ecp_nistz256_add
238
239
240
241 .globl  ecp_nistz256_sub
242 .type   ecp_nistz256_sub,@function
243 .align  32
244 ecp_nistz256_sub:
245         pushq   %r12
246         pushq   %r13
247
248         movq    0(%rsi),%r8
249         xorq    %r13,%r13
250         movq    8(%rsi),%r9
251         movq    16(%rsi),%r10
252         movq    24(%rsi),%r11
253         leaq    .Lpoly(%rip),%rsi
254
255         subq    0(%rdx),%r8
256         sbbq    8(%rdx),%r9
257         movq    %r8,%rax
258         sbbq    16(%rdx),%r10
259         sbbq    24(%rdx),%r11
260         movq    %r9,%rdx
261         sbbq    $0,%r13
262
263         addq    0(%rsi),%r8
264         movq    %r10,%rcx
265         adcq    8(%rsi),%r9
266         adcq    16(%rsi),%r10
267         movq    %r11,%r12
268         adcq    24(%rsi),%r11
269         testq   %r13,%r13
270
271         cmovzq  %rax,%r8
272         cmovzq  %rdx,%r9
273         movq    %r8,0(%rdi)
274         cmovzq  %rcx,%r10
275         movq    %r9,8(%rdi)
276         cmovzq  %r12,%r11
277         movq    %r10,16(%rdi)
278         movq    %r11,24(%rdi)
279
280         popq    %r13
281         popq    %r12
282         .byte   0xf3,0xc3
283 .size   ecp_nistz256_sub,.-ecp_nistz256_sub
284
285
286
287 .globl  ecp_nistz256_neg
288 .type   ecp_nistz256_neg,@function
289 .align  32
290 ecp_nistz256_neg:
291         pushq   %r12
292         pushq   %r13
293
294         xorq    %r8,%r8
295         xorq    %r9,%r9
296         xorq    %r10,%r10
297         xorq    %r11,%r11
298         xorq    %r13,%r13
299
300         subq    0(%rsi),%r8
301         sbbq    8(%rsi),%r9
302         sbbq    16(%rsi),%r10
303         movq    %r8,%rax
304         sbbq    24(%rsi),%r11
305         leaq    .Lpoly(%rip),%rsi
306         movq    %r9,%rdx
307         sbbq    $0,%r13
308
309         addq    0(%rsi),%r8
310         movq    %r10,%rcx
311         adcq    8(%rsi),%r9
312         adcq    16(%rsi),%r10
313         movq    %r11,%r12
314         adcq    24(%rsi),%r11
315         testq   %r13,%r13
316
317         cmovzq  %rax,%r8
318         cmovzq  %rdx,%r9
319         movq    %r8,0(%rdi)
320         cmovzq  %rcx,%r10
321         movq    %r9,8(%rdi)
322         cmovzq  %r12,%r11
323         movq    %r10,16(%rdi)
324         movq    %r11,24(%rdi)
325
326         popq    %r13
327         popq    %r12
328         .byte   0xf3,0xc3
329 .size   ecp_nistz256_neg,.-ecp_nistz256_neg
330
331
332
333
334 .globl  ecp_nistz256_to_mont
335 .type   ecp_nistz256_to_mont,@function
336 .align  32
337 ecp_nistz256_to_mont:
338         movl    $0x80100,%ecx
339         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
340         leaq    .LRR(%rip),%rdx
341         jmp     .Lmul_mont
342 .size   ecp_nistz256_to_mont,.-ecp_nistz256_to_mont
343
344
345
346
347
348
349
350 .globl  ecp_nistz256_mul_mont
351 .type   ecp_nistz256_mul_mont,@function
352 .align  32
353 ecp_nistz256_mul_mont:
354         movl    $0x80100,%ecx
355         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
356 .Lmul_mont:
357         pushq   %rbp
358         pushq   %rbx
359         pushq   %r12
360         pushq   %r13
361         pushq   %r14
362         pushq   %r15
363         cmpl    $0x80100,%ecx
364         je      .Lmul_montx
365         movq    %rdx,%rbx
366         movq    0(%rdx),%rax
367         movq    0(%rsi),%r9
368         movq    8(%rsi),%r10
369         movq    16(%rsi),%r11
370         movq    24(%rsi),%r12
371
372         call    __ecp_nistz256_mul_montq
373         jmp     .Lmul_mont_done
374
375 .align  32
376 .Lmul_montx:
377         movq    %rdx,%rbx
378         movq    0(%rdx),%rdx
379         movq    0(%rsi),%r9
380         movq    8(%rsi),%r10
381         movq    16(%rsi),%r11
382         movq    24(%rsi),%r12
383         leaq    -128(%rsi),%rsi
384
385         call    __ecp_nistz256_mul_montx
386 .Lmul_mont_done:
387         popq    %r15
388         popq    %r14
389         popq    %r13
390         popq    %r12
391         popq    %rbx
392         popq    %rbp
393         .byte   0xf3,0xc3
394 .size   ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
395
396 .type   __ecp_nistz256_mul_montq,@function
397 .align  32
398 __ecp_nistz256_mul_montq:
399
400
401         movq    %rax,%rbp
402         mulq    %r9
403         movq    .Lpoly+8(%rip),%r14
404         movq    %rax,%r8
405         movq    %rbp,%rax
406         movq    %rdx,%r9
407
408         mulq    %r10
409         movq    .Lpoly+24(%rip),%r15
410         addq    %rax,%r9
411         movq    %rbp,%rax
412         adcq    $0,%rdx
413         movq    %rdx,%r10
414
415         mulq    %r11
416         addq    %rax,%r10
417         movq    %rbp,%rax
418         adcq    $0,%rdx
419         movq    %rdx,%r11
420
421         mulq    %r12
422         addq    %rax,%r11
423         movq    %r8,%rax
424         adcq    $0,%rdx
425         xorq    %r13,%r13
426         movq    %rdx,%r12
427
428
429
430
431
432
433
434
435
436
437         movq    %r8,%rbp
438         shlq    $32,%r8
439         mulq    %r15
440         shrq    $32,%rbp
441         addq    %r8,%r9
442         adcq    %rbp,%r10
443         adcq    %rax,%r11
444         movq    8(%rbx),%rax
445         adcq    %rdx,%r12
446         adcq    $0,%r13
447         xorq    %r8,%r8
448
449
450
451         movq    %rax,%rbp
452         mulq    0(%rsi)
453         addq    %rax,%r9
454         movq    %rbp,%rax
455         adcq    $0,%rdx
456         movq    %rdx,%rcx
457
458         mulq    8(%rsi)
459         addq    %rcx,%r10
460         adcq    $0,%rdx
461         addq    %rax,%r10
462         movq    %rbp,%rax
463         adcq    $0,%rdx
464         movq    %rdx,%rcx
465
466         mulq    16(%rsi)
467         addq    %rcx,%r11
468         adcq    $0,%rdx
469         addq    %rax,%r11
470         movq    %rbp,%rax
471         adcq    $0,%rdx
472         movq    %rdx,%rcx
473
474         mulq    24(%rsi)
475         addq    %rcx,%r12
476         adcq    $0,%rdx
477         addq    %rax,%r12
478         movq    %r9,%rax
479         adcq    %rdx,%r13
480         adcq    $0,%r8
481
482
483
484         movq    %r9,%rbp
485         shlq    $32,%r9
486         mulq    %r15
487         shrq    $32,%rbp
488         addq    %r9,%r10
489         adcq    %rbp,%r11
490         adcq    %rax,%r12
491         movq    16(%rbx),%rax
492         adcq    %rdx,%r13
493         adcq    $0,%r8
494         xorq    %r9,%r9
495
496
497
498         movq    %rax,%rbp
499         mulq    0(%rsi)
500         addq    %rax,%r10
501         movq    %rbp,%rax
502         adcq    $0,%rdx
503         movq    %rdx,%rcx
504
505         mulq    8(%rsi)
506         addq    %rcx,%r11
507         adcq    $0,%rdx
508         addq    %rax,%r11
509         movq    %rbp,%rax
510         adcq    $0,%rdx
511         movq    %rdx,%rcx
512
513         mulq    16(%rsi)
514         addq    %rcx,%r12
515         adcq    $0,%rdx
516         addq    %rax,%r12
517         movq    %rbp,%rax
518         adcq    $0,%rdx
519         movq    %rdx,%rcx
520
521         mulq    24(%rsi)
522         addq    %rcx,%r13
523         adcq    $0,%rdx
524         addq    %rax,%r13
525         movq    %r10,%rax
526         adcq    %rdx,%r8
527         adcq    $0,%r9
528
529
530
531         movq    %r10,%rbp
532         shlq    $32,%r10
533         mulq    %r15
534         shrq    $32,%rbp
535         addq    %r10,%r11
536         adcq    %rbp,%r12
537         adcq    %rax,%r13
538         movq    24(%rbx),%rax
539         adcq    %rdx,%r8
540         adcq    $0,%r9
541         xorq    %r10,%r10
542
543
544
545         movq    %rax,%rbp
546         mulq    0(%rsi)
547         addq    %rax,%r11
548         movq    %rbp,%rax
549         adcq    $0,%rdx
550         movq    %rdx,%rcx
551
552         mulq    8(%rsi)
553         addq    %rcx,%r12
554         adcq    $0,%rdx
555         addq    %rax,%r12
556         movq    %rbp,%rax
557         adcq    $0,%rdx
558         movq    %rdx,%rcx
559
560         mulq    16(%rsi)
561         addq    %rcx,%r13
562         adcq    $0,%rdx
563         addq    %rax,%r13
564         movq    %rbp,%rax
565         adcq    $0,%rdx
566         movq    %rdx,%rcx
567
568         mulq    24(%rsi)
569         addq    %rcx,%r8
570         adcq    $0,%rdx
571         addq    %rax,%r8
572         movq    %r11,%rax
573         adcq    %rdx,%r9
574         adcq    $0,%r10
575
576
577
578         movq    %r11,%rbp
579         shlq    $32,%r11
580         mulq    %r15
581         shrq    $32,%rbp
582         addq    %r11,%r12
583         adcq    %rbp,%r13
584         movq    %r12,%rcx
585         adcq    %rax,%r8
586         adcq    %rdx,%r9
587         movq    %r13,%rbp
588         adcq    $0,%r10
589
590
591
592         subq    $-1,%r12
593         movq    %r8,%rbx
594         sbbq    %r14,%r13
595         sbbq    $0,%r8
596         movq    %r9,%rdx
597         sbbq    %r15,%r9
598         sbbq    $0,%r10
599
600         cmovcq  %rcx,%r12
601         cmovcq  %rbp,%r13
602         movq    %r12,0(%rdi)
603         cmovcq  %rbx,%r8
604         movq    %r13,8(%rdi)
605         cmovcq  %rdx,%r9
606         movq    %r8,16(%rdi)
607         movq    %r9,24(%rdi)
608
609         .byte   0xf3,0xc3
610 .size   __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
611
612
613
614
615
616
617
618
619 .globl  ecp_nistz256_sqr_mont
620 .type   ecp_nistz256_sqr_mont,@function
621 .align  32
622 ecp_nistz256_sqr_mont:
623         movl    $0x80100,%ecx
624         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
625         pushq   %rbp
626         pushq   %rbx
627         pushq   %r12
628         pushq   %r13
629         pushq   %r14
630         pushq   %r15
631         cmpl    $0x80100,%ecx
632         je      .Lsqr_montx
633         movq    0(%rsi),%rax
634         movq    8(%rsi),%r14
635         movq    16(%rsi),%r15
636         movq    24(%rsi),%r8
637
638         call    __ecp_nistz256_sqr_montq
639         jmp     .Lsqr_mont_done
640
641 .align  32
642 .Lsqr_montx:
643         movq    0(%rsi),%rdx
644         movq    8(%rsi),%r14
645         movq    16(%rsi),%r15
646         movq    24(%rsi),%r8
647         leaq    -128(%rsi),%rsi
648
649         call    __ecp_nistz256_sqr_montx
650 .Lsqr_mont_done:
651         popq    %r15
652         popq    %r14
653         popq    %r13
654         popq    %r12
655         popq    %rbx
656         popq    %rbp
657         .byte   0xf3,0xc3
658 .size   ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
659
660 .type   __ecp_nistz256_sqr_montq,@function
661 .align  32
662 __ecp_nistz256_sqr_montq:
663         movq    %rax,%r13
664         mulq    %r14
665         movq    %rax,%r9
666         movq    %r15,%rax
667         movq    %rdx,%r10
668
669         mulq    %r13
670         addq    %rax,%r10
671         movq    %r8,%rax
672         adcq    $0,%rdx
673         movq    %rdx,%r11
674
675         mulq    %r13
676         addq    %rax,%r11
677         movq    %r15,%rax
678         adcq    $0,%rdx
679         movq    %rdx,%r12
680
681
682         mulq    %r14
683         addq    %rax,%r11
684         movq    %r8,%rax
685         adcq    $0,%rdx
686         movq    %rdx,%rbp
687
688         mulq    %r14
689         addq    %rax,%r12
690         movq    %r8,%rax
691         adcq    $0,%rdx
692         addq    %rbp,%r12
693         movq    %rdx,%r13
694         adcq    $0,%r13
695
696
697         mulq    %r15
698         xorq    %r15,%r15
699         addq    %rax,%r13
700         movq    0(%rsi),%rax
701         movq    %rdx,%r14
702         adcq    $0,%r14
703
704         addq    %r9,%r9
705         adcq    %r10,%r10
706         adcq    %r11,%r11
707         adcq    %r12,%r12
708         adcq    %r13,%r13
709         adcq    %r14,%r14
710         adcq    $0,%r15
711
712         mulq    %rax
713         movq    %rax,%r8
714         movq    8(%rsi),%rax
715         movq    %rdx,%rcx
716
717         mulq    %rax
718         addq    %rcx,%r9
719         adcq    %rax,%r10
720         movq    16(%rsi),%rax
721         adcq    $0,%rdx
722         movq    %rdx,%rcx
723
724         mulq    %rax
725         addq    %rcx,%r11
726         adcq    %rax,%r12
727         movq    24(%rsi),%rax
728         adcq    $0,%rdx
729         movq    %rdx,%rcx
730
731         mulq    %rax
732         addq    %rcx,%r13
733         adcq    %rax,%r14
734         movq    %r8,%rax
735         adcq    %rdx,%r15
736
737         movq    .Lpoly+8(%rip),%rsi
738         movq    .Lpoly+24(%rip),%rbp
739
740
741
742
743         movq    %r8,%rcx
744         shlq    $32,%r8
745         mulq    %rbp
746         shrq    $32,%rcx
747         addq    %r8,%r9
748         adcq    %rcx,%r10
749         adcq    %rax,%r11
750         movq    %r9,%rax
751         adcq    $0,%rdx
752
753
754
755         movq    %r9,%rcx
756         shlq    $32,%r9
757         movq    %rdx,%r8
758         mulq    %rbp
759         shrq    $32,%rcx
760         addq    %r9,%r10
761         adcq    %rcx,%r11
762         adcq    %rax,%r8
763         movq    %r10,%rax
764         adcq    $0,%rdx
765
766
767
768         movq    %r10,%rcx
769         shlq    $32,%r10
770         movq    %rdx,%r9
771         mulq    %rbp
772         shrq    $32,%rcx
773         addq    %r10,%r11
774         adcq    %rcx,%r8
775         adcq    %rax,%r9
776         movq    %r11,%rax
777         adcq    $0,%rdx
778
779
780
781         movq    %r11,%rcx
782         shlq    $32,%r11
783         movq    %rdx,%r10
784         mulq    %rbp
785         shrq    $32,%rcx
786         addq    %r11,%r8
787         adcq    %rcx,%r9
788         adcq    %rax,%r10
789         adcq    $0,%rdx
790         xorq    %r11,%r11
791
792
793
794         addq    %r8,%r12
795         adcq    %r9,%r13
796         movq    %r12,%r8
797         adcq    %r10,%r14
798         adcq    %rdx,%r15
799         movq    %r13,%r9
800         adcq    $0,%r11
801
802         subq    $-1,%r12
803         movq    %r14,%r10
804         sbbq    %rsi,%r13
805         sbbq    $0,%r14
806         movq    %r15,%rcx
807         sbbq    %rbp,%r15
808         sbbq    $0,%r11
809
810         cmovcq  %r8,%r12
811         cmovcq  %r9,%r13
812         movq    %r12,0(%rdi)
813         cmovcq  %r10,%r14
814         movq    %r13,8(%rdi)
815         cmovcq  %rcx,%r15
816         movq    %r14,16(%rdi)
817         movq    %r15,24(%rdi)
818
819         .byte   0xf3,0xc3
820 .size   __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
821 .type   __ecp_nistz256_mul_montx,@function
822 .align  32
823 __ecp_nistz256_mul_montx:
824
825
826         mulxq   %r9,%r8,%r9
827         mulxq   %r10,%rcx,%r10
828         movq    $32,%r14
829         xorq    %r13,%r13
830         mulxq   %r11,%rbp,%r11
831         movq    .Lpoly+24(%rip),%r15
832         adcq    %rcx,%r9
833         mulxq   %r12,%rcx,%r12
834         movq    %r8,%rdx
835         adcq    %rbp,%r10
836         shlxq   %r14,%r8,%rbp
837         adcq    %rcx,%r11
838         shrxq   %r14,%r8,%rcx
839         adcq    $0,%r12
840
841
842
843         addq    %rbp,%r9
844         adcq    %rcx,%r10
845
846         mulxq   %r15,%rcx,%rbp
847         movq    8(%rbx),%rdx
848         adcq    %rcx,%r11
849         adcq    %rbp,%r12
850         adcq    $0,%r13
851         xorq    %r8,%r8
852
853
854
855         mulxq   0+128(%rsi),%rcx,%rbp
856         adcxq   %rcx,%r9
857         adoxq   %rbp,%r10
858
859         mulxq   8+128(%rsi),%rcx,%rbp
860         adcxq   %rcx,%r10
861         adoxq   %rbp,%r11
862
863         mulxq   16+128(%rsi),%rcx,%rbp
864         adcxq   %rcx,%r11
865         adoxq   %rbp,%r12
866
867         mulxq   24+128(%rsi),%rcx,%rbp
868         movq    %r9,%rdx
869         adcxq   %rcx,%r12
870         shlxq   %r14,%r9,%rcx
871         adoxq   %rbp,%r13
872         shrxq   %r14,%r9,%rbp
873
874         adcxq   %r8,%r13
875         adoxq   %r8,%r8
876         adcq    $0,%r8
877
878
879
880         addq    %rcx,%r10
881         adcq    %rbp,%r11
882
883         mulxq   %r15,%rcx,%rbp
884         movq    16(%rbx),%rdx
885         adcq    %rcx,%r12
886         adcq    %rbp,%r13
887         adcq    $0,%r8
888         xorq    %r9,%r9
889
890
891
892         mulxq   0+128(%rsi),%rcx,%rbp
893         adcxq   %rcx,%r10
894         adoxq   %rbp,%r11
895
896         mulxq   8+128(%rsi),%rcx,%rbp
897         adcxq   %rcx,%r11
898         adoxq   %rbp,%r12
899
900         mulxq   16+128(%rsi),%rcx,%rbp
901         adcxq   %rcx,%r12
902         adoxq   %rbp,%r13
903
904         mulxq   24+128(%rsi),%rcx,%rbp
905         movq    %r10,%rdx
906         adcxq   %rcx,%r13
907         shlxq   %r14,%r10,%rcx
908         adoxq   %rbp,%r8
909         shrxq   %r14,%r10,%rbp
910
911         adcxq   %r9,%r8
912         adoxq   %r9,%r9
913         adcq    $0,%r9
914
915
916
917         addq    %rcx,%r11
918         adcq    %rbp,%r12
919
920         mulxq   %r15,%rcx,%rbp
921         movq    24(%rbx),%rdx
922         adcq    %rcx,%r13
923         adcq    %rbp,%r8
924         adcq    $0,%r9
925         xorq    %r10,%r10
926
927
928
929         mulxq   0+128(%rsi),%rcx,%rbp
930         adcxq   %rcx,%r11
931         adoxq   %rbp,%r12
932
933         mulxq   8+128(%rsi),%rcx,%rbp
934         adcxq   %rcx,%r12
935         adoxq   %rbp,%r13
936
937         mulxq   16+128(%rsi),%rcx,%rbp
938         adcxq   %rcx,%r13
939         adoxq   %rbp,%r8
940
941         mulxq   24+128(%rsi),%rcx,%rbp
942         movq    %r11,%rdx
943         adcxq   %rcx,%r8
944         shlxq   %r14,%r11,%rcx
945         adoxq   %rbp,%r9
946         shrxq   %r14,%r11,%rbp
947
948         adcxq   %r10,%r9
949         adoxq   %r10,%r10
950         adcq    $0,%r10
951
952
953
954         addq    %rcx,%r12
955         adcq    %rbp,%r13
956
957         mulxq   %r15,%rcx,%rbp
958         movq    %r12,%rbx
959         movq    .Lpoly+8(%rip),%r14
960         adcq    %rcx,%r8
961         movq    %r13,%rdx
962         adcq    %rbp,%r9
963         adcq    $0,%r10
964
965
966
967         xorl    %eax,%eax
968         movq    %r8,%rcx
969         sbbq    $-1,%r12
970         sbbq    %r14,%r13
971         sbbq    $0,%r8
972         movq    %r9,%rbp
973         sbbq    %r15,%r9
974         sbbq    $0,%r10
975
976         cmovcq  %rbx,%r12
977         cmovcq  %rdx,%r13
978         movq    %r12,0(%rdi)
979         cmovcq  %rcx,%r8
980         movq    %r13,8(%rdi)
981         cmovcq  %rbp,%r9
982         movq    %r8,16(%rdi)
983         movq    %r9,24(%rdi)
984
985         .byte   0xf3,0xc3
986 .size   __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx
987
988 .type   __ecp_nistz256_sqr_montx,@function
989 .align  32
990 __ecp_nistz256_sqr_montx:
991         mulxq   %r14,%r9,%r10
992         mulxq   %r15,%rcx,%r11
993         xorl    %eax,%eax
994         adcq    %rcx,%r10
995         mulxq   %r8,%rbp,%r12
996         movq    %r14,%rdx
997         adcq    %rbp,%r11
998         adcq    $0,%r12
999         xorq    %r13,%r13
1000
1001
1002         mulxq   %r15,%rcx,%rbp
1003         adcxq   %rcx,%r11
1004         adoxq   %rbp,%r12
1005
1006         mulxq   %r8,%rcx,%rbp
1007         movq    %r15,%rdx
1008         adcxq   %rcx,%r12
1009         adoxq   %rbp,%r13
1010         adcq    $0,%r13
1011
1012
1013         mulxq   %r8,%rcx,%r14
1014         movq    0+128(%rsi),%rdx
1015         xorq    %r15,%r15
1016         adcxq   %r9,%r9
1017         adoxq   %rcx,%r13
1018         adcxq   %r10,%r10
1019         adoxq   %r15,%r14
1020
1021         mulxq   %rdx,%r8,%rbp
1022         movq    8+128(%rsi),%rdx
1023         adcxq   %r11,%r11
1024         adoxq   %rbp,%r9
1025         adcxq   %r12,%r12
1026         mulxq   %rdx,%rcx,%rax
1027         movq    16+128(%rsi),%rdx
1028         adcxq   %r13,%r13
1029         adoxq   %rcx,%r10
1030         adcxq   %r14,%r14
1031 .byte   0x67
1032         mulxq   %rdx,%rcx,%rbp
1033         movq    24+128(%rsi),%rdx
1034         adoxq   %rax,%r11
1035         adcxq   %r15,%r15
1036         adoxq   %rcx,%r12
1037         movq    $32,%rsi
1038         adoxq   %rbp,%r13
1039 .byte   0x67,0x67
1040         mulxq   %rdx,%rcx,%rax
1041         movq    .Lpoly+24(%rip),%rdx
1042         adoxq   %rcx,%r14
1043         shlxq   %rsi,%r8,%rcx
1044         adoxq   %rax,%r15
1045         shrxq   %rsi,%r8,%rax
1046         movq    %rdx,%rbp
1047
1048
1049         addq    %rcx,%r9
1050         adcq    %rax,%r10
1051
1052         mulxq   %r8,%rcx,%r8
1053         adcq    %rcx,%r11
1054         shlxq   %rsi,%r9,%rcx
1055         adcq    $0,%r8
1056         shrxq   %rsi,%r9,%rax
1057
1058
1059         addq    %rcx,%r10
1060         adcq    %rax,%r11
1061
1062         mulxq   %r9,%rcx,%r9
1063         adcq    %rcx,%r8
1064         shlxq   %rsi,%r10,%rcx
1065         adcq    $0,%r9
1066         shrxq   %rsi,%r10,%rax
1067
1068
1069         addq    %rcx,%r11
1070         adcq    %rax,%r8
1071
1072         mulxq   %r10,%rcx,%r10
1073         adcq    %rcx,%r9
1074         shlxq   %rsi,%r11,%rcx
1075         adcq    $0,%r10
1076         shrxq   %rsi,%r11,%rax
1077
1078
1079         addq    %rcx,%r8
1080         adcq    %rax,%r9
1081
1082         mulxq   %r11,%rcx,%r11
1083         adcq    %rcx,%r10
1084         adcq    $0,%r11
1085
1086         xorq    %rdx,%rdx
1087         addq    %r8,%r12
1088         movq    .Lpoly+8(%rip),%rsi
1089         adcq    %r9,%r13
1090         movq    %r12,%r8
1091         adcq    %r10,%r14
1092         adcq    %r11,%r15
1093         movq    %r13,%r9
1094         adcq    $0,%rdx
1095
1096         subq    $-1,%r12
1097         movq    %r14,%r10
1098         sbbq    %rsi,%r13
1099         sbbq    $0,%r14
1100         movq    %r15,%r11
1101         sbbq    %rbp,%r15
1102         sbbq    $0,%rdx
1103
1104         cmovcq  %r8,%r12
1105         cmovcq  %r9,%r13
1106         movq    %r12,0(%rdi)
1107         cmovcq  %r10,%r14
1108         movq    %r13,8(%rdi)
1109         cmovcq  %r11,%r15
1110         movq    %r14,16(%rdi)
1111         movq    %r15,24(%rdi)
1112
1113         .byte   0xf3,0xc3
1114 .size   __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx
1115
1116
1117
1118
1119
1120
1121 .globl  ecp_nistz256_from_mont
1122 .type   ecp_nistz256_from_mont,@function
1123 .align  32
1124 ecp_nistz256_from_mont:
1125         pushq   %r12
1126         pushq   %r13
1127
1128         movq    0(%rsi),%rax
1129         movq    .Lpoly+24(%rip),%r13
1130         movq    8(%rsi),%r9
1131         movq    16(%rsi),%r10
1132         movq    24(%rsi),%r11
1133         movq    %rax,%r8
1134         movq    .Lpoly+8(%rip),%r12
1135
1136
1137
1138         movq    %rax,%rcx
1139         shlq    $32,%r8
1140         mulq    %r13
1141         shrq    $32,%rcx
1142         addq    %r8,%r9
1143         adcq    %rcx,%r10
1144         adcq    %rax,%r11
1145         movq    %r9,%rax
1146         adcq    $0,%rdx
1147
1148
1149
1150         movq    %r9,%rcx
1151         shlq    $32,%r9
1152         movq    %rdx,%r8
1153         mulq    %r13
1154         shrq    $32,%rcx
1155         addq    %r9,%r10
1156         adcq    %rcx,%r11
1157         adcq    %rax,%r8
1158         movq    %r10,%rax
1159         adcq    $0,%rdx
1160
1161
1162
1163         movq    %r10,%rcx
1164         shlq    $32,%r10
1165         movq    %rdx,%r9
1166         mulq    %r13
1167         shrq    $32,%rcx
1168         addq    %r10,%r11
1169         adcq    %rcx,%r8
1170         adcq    %rax,%r9
1171         movq    %r11,%rax
1172         adcq    $0,%rdx
1173
1174
1175
1176         movq    %r11,%rcx
1177         shlq    $32,%r11
1178         movq    %rdx,%r10
1179         mulq    %r13
1180         shrq    $32,%rcx
1181         addq    %r11,%r8
1182         adcq    %rcx,%r9
1183         movq    %r8,%rcx
1184         adcq    %rax,%r10
1185         movq    %r9,%rsi
1186         adcq    $0,%rdx
1187
1188
1189
1190         subq    $-1,%r8
1191         movq    %r10,%rax
1192         sbbq    %r12,%r9
1193         sbbq    $0,%r10
1194         movq    %rdx,%r11
1195         sbbq    %r13,%rdx
1196         sbbq    %r13,%r13
1197
1198         cmovnzq %rcx,%r8
1199         cmovnzq %rsi,%r9
1200         movq    %r8,0(%rdi)
1201         cmovnzq %rax,%r10
1202         movq    %r9,8(%rdi)
1203         cmovzq  %rdx,%r11
1204         movq    %r10,16(%rdi)
1205         movq    %r11,24(%rdi)
1206
1207         popq    %r13
1208         popq    %r12
1209         .byte   0xf3,0xc3
1210 .size   ecp_nistz256_from_mont,.-ecp_nistz256_from_mont
1211
1212
1213 .globl  ecp_nistz256_select_w5
1214 .type   ecp_nistz256_select_w5,@function
1215 .align  32
1216 ecp_nistz256_select_w5:
1217         movl    OPENSSL_ia32cap_P+8(%rip),%eax
1218         testl   $32,%eax
1219         jnz     .Lavx2_select_w5
1220         movdqa  .LOne(%rip),%xmm0
1221         movd    %edx,%xmm1
1222
1223         pxor    %xmm2,%xmm2
1224         pxor    %xmm3,%xmm3
1225         pxor    %xmm4,%xmm4
1226         pxor    %xmm5,%xmm5
1227         pxor    %xmm6,%xmm6
1228         pxor    %xmm7,%xmm7
1229
1230         movdqa  %xmm0,%xmm8
1231         pshufd  $0,%xmm1,%xmm1
1232
1233         movq    $16,%rax
1234 .Lselect_loop_sse_w5:
1235
1236         movdqa  %xmm8,%xmm15
1237         paddd   %xmm0,%xmm8
1238         pcmpeqd %xmm1,%xmm15
1239
1240         movdqa  0(%rsi),%xmm9
1241         movdqa  16(%rsi),%xmm10
1242         movdqa  32(%rsi),%xmm11
1243         movdqa  48(%rsi),%xmm12
1244         movdqa  64(%rsi),%xmm13
1245         movdqa  80(%rsi),%xmm14
1246         leaq    96(%rsi),%rsi
1247
1248         pand    %xmm15,%xmm9
1249         pand    %xmm15,%xmm10
1250         por     %xmm9,%xmm2
1251         pand    %xmm15,%xmm11
1252         por     %xmm10,%xmm3
1253         pand    %xmm15,%xmm12
1254         por     %xmm11,%xmm4
1255         pand    %xmm15,%xmm13
1256         por     %xmm12,%xmm5
1257         pand    %xmm15,%xmm14
1258         por     %xmm13,%xmm6
1259         por     %xmm14,%xmm7
1260
1261         decq    %rax
1262         jnz     .Lselect_loop_sse_w5
1263
1264         movdqu  %xmm2,0(%rdi)
1265         movdqu  %xmm3,16(%rdi)
1266         movdqu  %xmm4,32(%rdi)
1267         movdqu  %xmm5,48(%rdi)
1268         movdqu  %xmm6,64(%rdi)
1269         movdqu  %xmm7,80(%rdi)
1270         .byte   0xf3,0xc3
1271 .size   ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
1272
1273
1274
1275 .globl  ecp_nistz256_select_w7
1276 .type   ecp_nistz256_select_w7,@function
1277 .align  32
1278 ecp_nistz256_select_w7:
1279         movl    OPENSSL_ia32cap_P+8(%rip),%eax
1280         testl   $32,%eax
1281         jnz     .Lavx2_select_w7
1282         movdqa  .LOne(%rip),%xmm8
1283         movd    %edx,%xmm1
1284
1285         pxor    %xmm2,%xmm2
1286         pxor    %xmm3,%xmm3
1287         pxor    %xmm4,%xmm4
1288         pxor    %xmm5,%xmm5
1289
1290         movdqa  %xmm8,%xmm0
1291         pshufd  $0,%xmm1,%xmm1
1292         movq    $64,%rax
1293
1294 .Lselect_loop_sse_w7:
1295         movdqa  %xmm8,%xmm15
1296         paddd   %xmm0,%xmm8
1297         movdqa  0(%rsi),%xmm9
1298         movdqa  16(%rsi),%xmm10
1299         pcmpeqd %xmm1,%xmm15
1300         movdqa  32(%rsi),%xmm11
1301         movdqa  48(%rsi),%xmm12
1302         leaq    64(%rsi),%rsi
1303
1304         pand    %xmm15,%xmm9
1305         pand    %xmm15,%xmm10
1306         por     %xmm9,%xmm2
1307         pand    %xmm15,%xmm11
1308         por     %xmm10,%xmm3
1309         pand    %xmm15,%xmm12
1310         por     %xmm11,%xmm4
1311         prefetcht0      255(%rsi)
1312         por     %xmm12,%xmm5
1313
1314         decq    %rax
1315         jnz     .Lselect_loop_sse_w7
1316
1317         movdqu  %xmm2,0(%rdi)
1318         movdqu  %xmm3,16(%rdi)
1319         movdqu  %xmm4,32(%rdi)
1320         movdqu  %xmm5,48(%rdi)
1321         .byte   0xf3,0xc3
1322 .size   ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
1323
1324
1325 .type   ecp_nistz256_avx2_select_w5,@function
1326 .align  32
1327 ecp_nistz256_avx2_select_w5:
1328 .Lavx2_select_w5:
1329         vzeroupper
1330         vmovdqa .LTwo(%rip),%ymm0
1331
1332         vpxor   %ymm2,%ymm2,%ymm2
1333         vpxor   %ymm3,%ymm3,%ymm3
1334         vpxor   %ymm4,%ymm4,%ymm4
1335
1336         vmovdqa .LOne(%rip),%ymm5
1337         vmovdqa .LTwo(%rip),%ymm10
1338
1339         vmovd   %edx,%xmm1
1340         vpermd  %ymm1,%ymm2,%ymm1
1341
1342         movq    $8,%rax
1343 .Lselect_loop_avx2_w5:
1344
1345         vmovdqa 0(%rsi),%ymm6
1346         vmovdqa 32(%rsi),%ymm7
1347         vmovdqa 64(%rsi),%ymm8
1348
1349         vmovdqa 96(%rsi),%ymm11
1350         vmovdqa 128(%rsi),%ymm12
1351         vmovdqa 160(%rsi),%ymm13
1352
1353         vpcmpeqd        %ymm1,%ymm5,%ymm9
1354         vpcmpeqd        %ymm1,%ymm10,%ymm14
1355
1356         vpaddd  %ymm0,%ymm5,%ymm5
1357         vpaddd  %ymm0,%ymm10,%ymm10
1358         leaq    192(%rsi),%rsi
1359
1360         vpand   %ymm9,%ymm6,%ymm6
1361         vpand   %ymm9,%ymm7,%ymm7
1362         vpand   %ymm9,%ymm8,%ymm8
1363         vpand   %ymm14,%ymm11,%ymm11
1364         vpand   %ymm14,%ymm12,%ymm12
1365         vpand   %ymm14,%ymm13,%ymm13
1366
1367         vpxor   %ymm6,%ymm2,%ymm2
1368         vpxor   %ymm7,%ymm3,%ymm3
1369         vpxor   %ymm8,%ymm4,%ymm4
1370         vpxor   %ymm11,%ymm2,%ymm2
1371         vpxor   %ymm12,%ymm3,%ymm3
1372         vpxor   %ymm13,%ymm4,%ymm4
1373
1374         decq    %rax
1375         jnz     .Lselect_loop_avx2_w5
1376
1377         vmovdqu %ymm2,0(%rdi)
1378         vmovdqu %ymm3,32(%rdi)
1379         vmovdqu %ymm4,64(%rdi)
1380         vzeroupper
1381         .byte   0xf3,0xc3
1382 .size   ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5
1383
1384
1385
1386 .globl  ecp_nistz256_avx2_select_w7
1387 .type   ecp_nistz256_avx2_select_w7,@function
1388 .align  32
1389 ecp_nistz256_avx2_select_w7:
1390 .Lavx2_select_w7:
1391         vzeroupper
1392         vmovdqa .LThree(%rip),%ymm0
1393
1394         vpxor   %ymm2,%ymm2,%ymm2
1395         vpxor   %ymm3,%ymm3,%ymm3
1396
1397         vmovdqa .LOne(%rip),%ymm4
1398         vmovdqa .LTwo(%rip),%ymm8
1399         vmovdqa .LThree(%rip),%ymm12
1400
1401         vmovd   %edx,%xmm1
1402         vpermd  %ymm1,%ymm2,%ymm1
1403
1404
1405         movq    $21,%rax
1406 .Lselect_loop_avx2_w7:
1407
1408         vmovdqa 0(%rsi),%ymm5
1409         vmovdqa 32(%rsi),%ymm6
1410
1411         vmovdqa 64(%rsi),%ymm9
1412         vmovdqa 96(%rsi),%ymm10
1413
1414         vmovdqa 128(%rsi),%ymm13
1415         vmovdqa 160(%rsi),%ymm14
1416
1417         vpcmpeqd        %ymm1,%ymm4,%ymm7
1418         vpcmpeqd        %ymm1,%ymm8,%ymm11
1419         vpcmpeqd        %ymm1,%ymm12,%ymm15
1420
1421         vpaddd  %ymm0,%ymm4,%ymm4
1422         vpaddd  %ymm0,%ymm8,%ymm8
1423         vpaddd  %ymm0,%ymm12,%ymm12
1424         leaq    192(%rsi),%rsi
1425
1426         vpand   %ymm7,%ymm5,%ymm5
1427         vpand   %ymm7,%ymm6,%ymm6
1428         vpand   %ymm11,%ymm9,%ymm9
1429         vpand   %ymm11,%ymm10,%ymm10
1430         vpand   %ymm15,%ymm13,%ymm13
1431         vpand   %ymm15,%ymm14,%ymm14
1432
1433         vpxor   %ymm5,%ymm2,%ymm2
1434         vpxor   %ymm6,%ymm3,%ymm3
1435         vpxor   %ymm9,%ymm2,%ymm2
1436         vpxor   %ymm10,%ymm3,%ymm3
1437         vpxor   %ymm13,%ymm2,%ymm2
1438         vpxor   %ymm14,%ymm3,%ymm3
1439
1440         decq    %rax
1441         jnz     .Lselect_loop_avx2_w7
1442
1443
1444         vmovdqa 0(%rsi),%ymm5
1445         vmovdqa 32(%rsi),%ymm6
1446
1447         vpcmpeqd        %ymm1,%ymm4,%ymm7
1448
1449         vpand   %ymm7,%ymm5,%ymm5
1450         vpand   %ymm7,%ymm6,%ymm6
1451
1452         vpxor   %ymm5,%ymm2,%ymm2
1453         vpxor   %ymm6,%ymm3,%ymm3
1454
1455         vmovdqu %ymm2,0(%rdi)
1456         vmovdqu %ymm3,32(%rdi)
1457         vzeroupper
1458         .byte   0xf3,0xc3
1459 .size   ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
1460 .type   __ecp_nistz256_add_toq,@function
1461 .align  32
1462 __ecp_nistz256_add_toq:
1463         xorq    %r11,%r11
1464         addq    0(%rbx),%r12
1465         adcq    8(%rbx),%r13
1466         movq    %r12,%rax
1467         adcq    16(%rbx),%r8
1468         adcq    24(%rbx),%r9
1469         movq    %r13,%rbp
1470         adcq    $0,%r11
1471
1472         subq    $-1,%r12
1473         movq    %r8,%rcx
1474         sbbq    %r14,%r13
1475         sbbq    $0,%r8
1476         movq    %r9,%r10
1477         sbbq    %r15,%r9
1478         sbbq    $0,%r11
1479
1480         cmovcq  %rax,%r12
1481         cmovcq  %rbp,%r13
1482         movq    %r12,0(%rdi)
1483         cmovcq  %rcx,%r8
1484         movq    %r13,8(%rdi)
1485         cmovcq  %r10,%r9
1486         movq    %r8,16(%rdi)
1487         movq    %r9,24(%rdi)
1488
1489         .byte   0xf3,0xc3
1490 .size   __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq
1491
1492 .type   __ecp_nistz256_sub_fromq,@function
1493 .align  32
1494 __ecp_nistz256_sub_fromq:
1495         subq    0(%rbx),%r12
1496         sbbq    8(%rbx),%r13
1497         movq    %r12,%rax
1498         sbbq    16(%rbx),%r8
1499         sbbq    24(%rbx),%r9
1500         movq    %r13,%rbp
1501         sbbq    %r11,%r11
1502
1503         addq    $-1,%r12
1504         movq    %r8,%rcx
1505         adcq    %r14,%r13
1506         adcq    $0,%r8
1507         movq    %r9,%r10
1508         adcq    %r15,%r9
1509         testq   %r11,%r11
1510
1511         cmovzq  %rax,%r12
1512         cmovzq  %rbp,%r13
1513         movq    %r12,0(%rdi)
1514         cmovzq  %rcx,%r8
1515         movq    %r13,8(%rdi)
1516         cmovzq  %r10,%r9
1517         movq    %r8,16(%rdi)
1518         movq    %r9,24(%rdi)
1519
1520         .byte   0xf3,0xc3
1521 .size   __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq
1522
1523 .type   __ecp_nistz256_subq,@function
1524 .align  32
1525 __ecp_nistz256_subq:
1526         subq    %r12,%rax
1527         sbbq    %r13,%rbp
1528         movq    %rax,%r12
1529         sbbq    %r8,%rcx
1530         sbbq    %r9,%r10
1531         movq    %rbp,%r13
1532         sbbq    %r11,%r11
1533
1534         addq    $-1,%rax
1535         movq    %rcx,%r8
1536         adcq    %r14,%rbp
1537         adcq    $0,%rcx
1538         movq    %r10,%r9
1539         adcq    %r15,%r10
1540         testq   %r11,%r11
1541
1542         cmovnzq %rax,%r12
1543         cmovnzq %rbp,%r13
1544         cmovnzq %rcx,%r8
1545         cmovnzq %r10,%r9
1546
1547         .byte   0xf3,0xc3
1548 .size   __ecp_nistz256_subq,.-__ecp_nistz256_subq
1549
1550 .type   __ecp_nistz256_mul_by_2q,@function
1551 .align  32
1552 __ecp_nistz256_mul_by_2q:
1553         xorq    %r11,%r11
1554         addq    %r12,%r12
1555         adcq    %r13,%r13
1556         movq    %r12,%rax
1557         adcq    %r8,%r8
1558         adcq    %r9,%r9
1559         movq    %r13,%rbp
1560         adcq    $0,%r11
1561
1562         subq    $-1,%r12
1563         movq    %r8,%rcx
1564         sbbq    %r14,%r13
1565         sbbq    $0,%r8
1566         movq    %r9,%r10
1567         sbbq    %r15,%r9
1568         sbbq    $0,%r11
1569
1570         cmovcq  %rax,%r12
1571         cmovcq  %rbp,%r13
1572         movq    %r12,0(%rdi)
1573         cmovcq  %rcx,%r8
1574         movq    %r13,8(%rdi)
1575         cmovcq  %r10,%r9
1576         movq    %r8,16(%rdi)
1577         movq    %r9,24(%rdi)
1578
1579         .byte   0xf3,0xc3
1580 .size   __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q
1581 .globl  ecp_nistz256_point_double
1582 .type   ecp_nistz256_point_double,@function
1583 .align  32
1584 ecp_nistz256_point_double:
1585         movl    $0x80100,%ecx
1586         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
1587         cmpl    $0x80100,%ecx
1588         je      .Lpoint_doublex
1589         pushq   %rbp
1590         pushq   %rbx
1591         pushq   %r12
1592         pushq   %r13
1593         pushq   %r14
1594         pushq   %r15
1595         subq    $160+8,%rsp
1596
1597 .Lpoint_double_shortcutq:
1598         movdqu  0(%rsi),%xmm0
1599         movq    %rsi,%rbx
1600         movdqu  16(%rsi),%xmm1
1601         movq    32+0(%rsi),%r12
1602         movq    32+8(%rsi),%r13
1603         movq    32+16(%rsi),%r8
1604         movq    32+24(%rsi),%r9
1605         movq    .Lpoly+8(%rip),%r14
1606         movq    .Lpoly+24(%rip),%r15
1607         movdqa  %xmm0,96(%rsp)
1608         movdqa  %xmm1,96+16(%rsp)
1609         leaq    32(%rdi),%r10
1610         leaq    64(%rdi),%r11
1611 .byte   102,72,15,110,199
1612 .byte   102,73,15,110,202
1613 .byte   102,73,15,110,211
1614
1615         leaq    0(%rsp),%rdi
1616         call    __ecp_nistz256_mul_by_2q
1617
1618         movq    64+0(%rsi),%rax
1619         movq    64+8(%rsi),%r14
1620         movq    64+16(%rsi),%r15
1621         movq    64+24(%rsi),%r8
1622         leaq    64-0(%rsi),%rsi
1623         leaq    64(%rsp),%rdi
1624         call    __ecp_nistz256_sqr_montq
1625
1626         movq    0+0(%rsp),%rax
1627         movq    8+0(%rsp),%r14
1628         leaq    0+0(%rsp),%rsi
1629         movq    16+0(%rsp),%r15
1630         movq    24+0(%rsp),%r8
1631         leaq    0(%rsp),%rdi
1632         call    __ecp_nistz256_sqr_montq
1633
1634         movq    32(%rbx),%rax
1635         movq    64+0(%rbx),%r9
1636         movq    64+8(%rbx),%r10
1637         movq    64+16(%rbx),%r11
1638         movq    64+24(%rbx),%r12
1639         leaq    64-0(%rbx),%rsi
1640         leaq    32(%rbx),%rbx
1641 .byte   102,72,15,126,215
1642         call    __ecp_nistz256_mul_montq
1643         call    __ecp_nistz256_mul_by_2q
1644
1645         movq    96+0(%rsp),%r12
1646         movq    96+8(%rsp),%r13
1647         leaq    64(%rsp),%rbx
1648         movq    96+16(%rsp),%r8
1649         movq    96+24(%rsp),%r9
1650         leaq    32(%rsp),%rdi
1651         call    __ecp_nistz256_add_toq
1652
1653         movq    96+0(%rsp),%r12
1654         movq    96+8(%rsp),%r13
1655         leaq    64(%rsp),%rbx
1656         movq    96+16(%rsp),%r8
1657         movq    96+24(%rsp),%r9
1658         leaq    64(%rsp),%rdi
1659         call    __ecp_nistz256_sub_fromq
1660
1661         movq    0+0(%rsp),%rax
1662         movq    8+0(%rsp),%r14
1663         leaq    0+0(%rsp),%rsi
1664         movq    16+0(%rsp),%r15
1665         movq    24+0(%rsp),%r8
1666 .byte   102,72,15,126,207
1667         call    __ecp_nistz256_sqr_montq
1668         xorq    %r9,%r9
1669         movq    %r12,%rax
1670         addq    $-1,%r12
1671         movq    %r13,%r10
1672         adcq    %rsi,%r13
1673         movq    %r14,%rcx
1674         adcq    $0,%r14
1675         movq    %r15,%r8
1676         adcq    %rbp,%r15
1677         adcq    $0,%r9
1678         xorq    %rsi,%rsi
1679         testq   $1,%rax
1680
1681         cmovzq  %rax,%r12
1682         cmovzq  %r10,%r13
1683         cmovzq  %rcx,%r14
1684         cmovzq  %r8,%r15
1685         cmovzq  %rsi,%r9
1686
1687         movq    %r13,%rax
1688         shrq    $1,%r12
1689         shlq    $63,%rax
1690         movq    %r14,%r10
1691         shrq    $1,%r13
1692         orq     %rax,%r12
1693         shlq    $63,%r10
1694         movq    %r15,%rcx
1695         shrq    $1,%r14
1696         orq     %r10,%r13
1697         shlq    $63,%rcx
1698         movq    %r12,0(%rdi)
1699         shrq    $1,%r15
1700         movq    %r13,8(%rdi)
1701         shlq    $63,%r9
1702         orq     %rcx,%r14
1703         orq     %r9,%r15
1704         movq    %r14,16(%rdi)
1705         movq    %r15,24(%rdi)
1706         movq    64(%rsp),%rax
1707         leaq    64(%rsp),%rbx
1708         movq    0+32(%rsp),%r9
1709         movq    8+32(%rsp),%r10
1710         leaq    0+32(%rsp),%rsi
1711         movq    16+32(%rsp),%r11
1712         movq    24+32(%rsp),%r12
1713         leaq    32(%rsp),%rdi
1714         call    __ecp_nistz256_mul_montq
1715
1716         leaq    128(%rsp),%rdi
1717         call    __ecp_nistz256_mul_by_2q
1718
1719         leaq    32(%rsp),%rbx
1720         leaq    32(%rsp),%rdi
1721         call    __ecp_nistz256_add_toq
1722
1723         movq    96(%rsp),%rax
1724         leaq    96(%rsp),%rbx
1725         movq    0+0(%rsp),%r9
1726         movq    8+0(%rsp),%r10
1727         leaq    0+0(%rsp),%rsi
1728         movq    16+0(%rsp),%r11
1729         movq    24+0(%rsp),%r12
1730         leaq    0(%rsp),%rdi
1731         call    __ecp_nistz256_mul_montq
1732
1733         leaq    128(%rsp),%rdi
1734         call    __ecp_nistz256_mul_by_2q
1735
1736         movq    0+32(%rsp),%rax
1737         movq    8+32(%rsp),%r14
1738         leaq    0+32(%rsp),%rsi
1739         movq    16+32(%rsp),%r15
1740         movq    24+32(%rsp),%r8
1741 .byte   102,72,15,126,199
1742         call    __ecp_nistz256_sqr_montq
1743
1744         leaq    128(%rsp),%rbx
1745         movq    %r14,%r8
1746         movq    %r15,%r9
1747         movq    %rsi,%r14
1748         movq    %rbp,%r15
1749         call    __ecp_nistz256_sub_fromq
1750
1751         movq    0+0(%rsp),%rax
1752         movq    0+8(%rsp),%rbp
1753         movq    0+16(%rsp),%rcx
1754         movq    0+24(%rsp),%r10
1755         leaq    0(%rsp),%rdi
1756         call    __ecp_nistz256_subq
1757
1758         movq    32(%rsp),%rax
1759         leaq    32(%rsp),%rbx
1760         movq    %r12,%r14
1761         xorl    %ecx,%ecx
1762         movq    %r12,0+0(%rsp)
1763         movq    %r13,%r10
1764         movq    %r13,0+8(%rsp)
1765         cmovzq  %r8,%r11
1766         movq    %r8,0+16(%rsp)
1767         leaq    0-0(%rsp),%rsi
1768         cmovzq  %r9,%r12
1769         movq    %r9,0+24(%rsp)
1770         movq    %r14,%r9
1771         leaq    0(%rsp),%rdi
1772         call    __ecp_nistz256_mul_montq
1773
1774 .byte   102,72,15,126,203
1775 .byte   102,72,15,126,207
1776         call    __ecp_nistz256_sub_fromq
1777
1778         addq    $160+8,%rsp
1779         popq    %r15
1780         popq    %r14
1781         popq    %r13
1782         popq    %r12
1783         popq    %rbx
1784         popq    %rbp
1785         .byte   0xf3,0xc3
1786 .size   ecp_nistz256_point_double,.-ecp_nistz256_point_double
1787 .globl  ecp_nistz256_point_add
1788 .type   ecp_nistz256_point_add,@function
1789 .align  32
1790 ecp_nistz256_point_add:
1791         movl    $0x80100,%ecx
1792         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
1793         cmpl    $0x80100,%ecx
1794         je      .Lpoint_addx
1795         pushq   %rbp
1796         pushq   %rbx
1797         pushq   %r12
1798         pushq   %r13
1799         pushq   %r14
1800         pushq   %r15
1801         subq    $576+8,%rsp
1802
1803         movdqu  0(%rsi),%xmm0
1804         movdqu  16(%rsi),%xmm1
1805         movdqu  32(%rsi),%xmm2
1806         movdqu  48(%rsi),%xmm3
1807         movdqu  64(%rsi),%xmm4
1808         movdqu  80(%rsi),%xmm5
1809         movq    %rsi,%rbx
1810         movq    %rdx,%rsi
1811         movdqa  %xmm0,384(%rsp)
1812         movdqa  %xmm1,384+16(%rsp)
1813         movdqa  %xmm2,416(%rsp)
1814         movdqa  %xmm3,416+16(%rsp)
1815         movdqa  %xmm4,448(%rsp)
1816         movdqa  %xmm5,448+16(%rsp)
1817         por     %xmm4,%xmm5
1818
1819         movdqu  0(%rsi),%xmm0
1820         pshufd  $0xb1,%xmm5,%xmm3
1821         movdqu  16(%rsi),%xmm1
1822         movdqu  32(%rsi),%xmm2
1823         por     %xmm3,%xmm5
1824         movdqu  48(%rsi),%xmm3
1825         movq    64+0(%rsi),%rax
1826         movq    64+8(%rsi),%r14
1827         movq    64+16(%rsi),%r15
1828         movq    64+24(%rsi),%r8
1829         movdqa  %xmm0,480(%rsp)
1830         pshufd  $0x1e,%xmm5,%xmm4
1831         movdqa  %xmm1,480+16(%rsp)
1832         movdqu  64(%rsi),%xmm0
1833         movdqu  80(%rsi),%xmm1
1834         movdqa  %xmm2,512(%rsp)
1835         movdqa  %xmm3,512+16(%rsp)
1836         por     %xmm4,%xmm5
1837         pxor    %xmm4,%xmm4
1838         por     %xmm0,%xmm1
1839 .byte   102,72,15,110,199
1840
1841         leaq    64-0(%rsi),%rsi
1842         movq    %rax,544+0(%rsp)
1843         movq    %r14,544+8(%rsp)
1844         movq    %r15,544+16(%rsp)
1845         movq    %r8,544+24(%rsp)
1846         leaq    96(%rsp),%rdi
1847         call    __ecp_nistz256_sqr_montq
1848
1849         pcmpeqd %xmm4,%xmm5
1850         pshufd  $0xb1,%xmm1,%xmm4
1851         por     %xmm1,%xmm4
1852         pshufd  $0,%xmm5,%xmm5
1853         pshufd  $0x1e,%xmm4,%xmm3
1854         por     %xmm3,%xmm4
1855         pxor    %xmm3,%xmm3
1856         pcmpeqd %xmm3,%xmm4
1857         pshufd  $0,%xmm4,%xmm4
1858         movq    64+0(%rbx),%rax
1859         movq    64+8(%rbx),%r14
1860         movq    64+16(%rbx),%r15
1861         movq    64+24(%rbx),%r8
1862 .byte   102,72,15,110,203
1863
1864         leaq    64-0(%rbx),%rsi
1865         leaq    32(%rsp),%rdi
1866         call    __ecp_nistz256_sqr_montq
1867
1868         movq    544(%rsp),%rax
1869         leaq    544(%rsp),%rbx
1870         movq    0+96(%rsp),%r9
1871         movq    8+96(%rsp),%r10
1872         leaq    0+96(%rsp),%rsi
1873         movq    16+96(%rsp),%r11
1874         movq    24+96(%rsp),%r12
1875         leaq    224(%rsp),%rdi
1876         call    __ecp_nistz256_mul_montq
1877
1878         movq    448(%rsp),%rax
1879         leaq    448(%rsp),%rbx
1880         movq    0+32(%rsp),%r9
1881         movq    8+32(%rsp),%r10
1882         leaq    0+32(%rsp),%rsi
1883         movq    16+32(%rsp),%r11
1884         movq    24+32(%rsp),%r12
1885         leaq    256(%rsp),%rdi
1886         call    __ecp_nistz256_mul_montq
1887
1888         movq    416(%rsp),%rax
1889         leaq    416(%rsp),%rbx
1890         movq    0+224(%rsp),%r9
1891         movq    8+224(%rsp),%r10
1892         leaq    0+224(%rsp),%rsi
1893         movq    16+224(%rsp),%r11
1894         movq    24+224(%rsp),%r12
1895         leaq    224(%rsp),%rdi
1896         call    __ecp_nistz256_mul_montq
1897
1898         movq    512(%rsp),%rax
1899         leaq    512(%rsp),%rbx
1900         movq    0+256(%rsp),%r9
1901         movq    8+256(%rsp),%r10
1902         leaq    0+256(%rsp),%rsi
1903         movq    16+256(%rsp),%r11
1904         movq    24+256(%rsp),%r12
1905         leaq    256(%rsp),%rdi
1906         call    __ecp_nistz256_mul_montq
1907
1908         leaq    224(%rsp),%rbx
1909         leaq    64(%rsp),%rdi
1910         call    __ecp_nistz256_sub_fromq
1911
1912         orq     %r13,%r12
1913         movdqa  %xmm4,%xmm2
1914         orq     %r8,%r12
1915         orq     %r9,%r12
1916         por     %xmm5,%xmm2
1917 .byte   102,73,15,110,220
1918
1919         movq    384(%rsp),%rax
1920         leaq    384(%rsp),%rbx
1921         movq    0+96(%rsp),%r9
1922         movq    8+96(%rsp),%r10
1923         leaq    0+96(%rsp),%rsi
1924         movq    16+96(%rsp),%r11
1925         movq    24+96(%rsp),%r12
1926         leaq    160(%rsp),%rdi
1927         call    __ecp_nistz256_mul_montq
1928
1929         movq    480(%rsp),%rax
1930         leaq    480(%rsp),%rbx
1931         movq    0+32(%rsp),%r9
1932         movq    8+32(%rsp),%r10
1933         leaq    0+32(%rsp),%rsi
1934         movq    16+32(%rsp),%r11
1935         movq    24+32(%rsp),%r12
1936         leaq    192(%rsp),%rdi
1937         call    __ecp_nistz256_mul_montq
1938
1939         leaq    160(%rsp),%rbx
1940         leaq    0(%rsp),%rdi
1941         call    __ecp_nistz256_sub_fromq
1942
1943         orq     %r13,%r12
1944         orq     %r8,%r12
1945         orq     %r9,%r12
1946
1947 .byte   0x3e
1948         jnz     .Ladd_proceedq
1949 .byte   102,73,15,126,208
1950 .byte   102,73,15,126,217
1951         testq   %r8,%r8
1952         jnz     .Ladd_proceedq
1953         testq   %r9,%r9
1954         jz      .Ladd_doubleq
1955
1956 .byte   102,72,15,126,199
1957         pxor    %xmm0,%xmm0
1958         movdqu  %xmm0,0(%rdi)
1959         movdqu  %xmm0,16(%rdi)
1960         movdqu  %xmm0,32(%rdi)
1961         movdqu  %xmm0,48(%rdi)
1962         movdqu  %xmm0,64(%rdi)
1963         movdqu  %xmm0,80(%rdi)
1964         jmp     .Ladd_doneq
1965
1966 .align  32
1967 .Ladd_doubleq:
1968 .byte   102,72,15,126,206
1969 .byte   102,72,15,126,199
1970         addq    $416,%rsp
1971         jmp     .Lpoint_double_shortcutq
1972
1973 .align  32
1974 .Ladd_proceedq:
1975         movq    0+64(%rsp),%rax
1976         movq    8+64(%rsp),%r14
1977         leaq    0+64(%rsp),%rsi
1978         movq    16+64(%rsp),%r15
1979         movq    24+64(%rsp),%r8
1980         leaq    96(%rsp),%rdi
1981         call    __ecp_nistz256_sqr_montq
1982
1983         movq    448(%rsp),%rax
1984         leaq    448(%rsp),%rbx
1985         movq    0+0(%rsp),%r9
1986         movq    8+0(%rsp),%r10
1987         leaq    0+0(%rsp),%rsi
1988         movq    16+0(%rsp),%r11
1989         movq    24+0(%rsp),%r12
1990         leaq    352(%rsp),%rdi
1991         call    __ecp_nistz256_mul_montq
1992
1993         movq    0+0(%rsp),%rax
1994         movq    8+0(%rsp),%r14
1995         leaq    0+0(%rsp),%rsi
1996         movq    16+0(%rsp),%r15
1997         movq    24+0(%rsp),%r8
1998         leaq    32(%rsp),%rdi
1999         call    __ecp_nistz256_sqr_montq
2000
2001         movq    544(%rsp),%rax
2002         leaq    544(%rsp),%rbx
2003         movq    0+352(%rsp),%r9
2004         movq    8+352(%rsp),%r10
2005         leaq    0+352(%rsp),%rsi
2006         movq    16+352(%rsp),%r11
2007         movq    24+352(%rsp),%r12
2008         leaq    352(%rsp),%rdi
2009         call    __ecp_nistz256_mul_montq
2010
2011         movq    0(%rsp),%rax
2012         leaq    0(%rsp),%rbx
2013         movq    0+32(%rsp),%r9
2014         movq    8+32(%rsp),%r10
2015         leaq    0+32(%rsp),%rsi
2016         movq    16+32(%rsp),%r11
2017         movq    24+32(%rsp),%r12
2018         leaq    128(%rsp),%rdi
2019         call    __ecp_nistz256_mul_montq
2020
2021         movq    160(%rsp),%rax
2022         leaq    160(%rsp),%rbx
2023         movq    0+32(%rsp),%r9
2024         movq    8+32(%rsp),%r10
2025         leaq    0+32(%rsp),%rsi
2026         movq    16+32(%rsp),%r11
2027         movq    24+32(%rsp),%r12
2028         leaq    192(%rsp),%rdi
2029         call    __ecp_nistz256_mul_montq
2030
2031
2032
2033
2034         xorq    %r11,%r11
2035         addq    %r12,%r12
2036         leaq    96(%rsp),%rsi
2037         adcq    %r13,%r13
2038         movq    %r12,%rax
2039         adcq    %r8,%r8
2040         adcq    %r9,%r9
2041         movq    %r13,%rbp
2042         adcq    $0,%r11
2043
2044         subq    $-1,%r12
2045         movq    %r8,%rcx
2046         sbbq    %r14,%r13
2047         sbbq    $0,%r8
2048         movq    %r9,%r10
2049         sbbq    %r15,%r9
2050         sbbq    $0,%r11
2051
2052         cmovcq  %rax,%r12
2053         movq    0(%rsi),%rax
2054         cmovcq  %rbp,%r13
2055         movq    8(%rsi),%rbp
2056         cmovcq  %rcx,%r8
2057         movq    16(%rsi),%rcx
2058         cmovcq  %r10,%r9
2059         movq    24(%rsi),%r10
2060
2061         call    __ecp_nistz256_subq
2062
2063         leaq    128(%rsp),%rbx
2064         leaq    288(%rsp),%rdi
2065         call    __ecp_nistz256_sub_fromq
2066
2067         movq    192+0(%rsp),%rax
2068         movq    192+8(%rsp),%rbp
2069         movq    192+16(%rsp),%rcx
2070         movq    192+24(%rsp),%r10
2071         leaq    320(%rsp),%rdi
2072
2073         call    __ecp_nistz256_subq
2074
2075         movq    %r12,0(%rdi)
2076         movq    %r13,8(%rdi)
2077         movq    %r8,16(%rdi)
2078         movq    %r9,24(%rdi)
2079         movq    128(%rsp),%rax
2080         leaq    128(%rsp),%rbx
2081         movq    0+224(%rsp),%r9
2082         movq    8+224(%rsp),%r10
2083         leaq    0+224(%rsp),%rsi
2084         movq    16+224(%rsp),%r11
2085         movq    24+224(%rsp),%r12
2086         leaq    256(%rsp),%rdi
2087         call    __ecp_nistz256_mul_montq
2088
2089         movq    320(%rsp),%rax
2090         leaq    320(%rsp),%rbx
2091         movq    0+64(%rsp),%r9
2092         movq    8+64(%rsp),%r10
2093         leaq    0+64(%rsp),%rsi
2094         movq    16+64(%rsp),%r11
2095         movq    24+64(%rsp),%r12
2096         leaq    320(%rsp),%rdi
2097         call    __ecp_nistz256_mul_montq
2098
2099         leaq    256(%rsp),%rbx
2100         leaq    320(%rsp),%rdi
2101         call    __ecp_nistz256_sub_fromq
2102
2103 .byte   102,72,15,126,199
2104
2105         movdqa  %xmm5,%xmm0
2106         movdqa  %xmm5,%xmm1
2107         pandn   352(%rsp),%xmm0
2108         movdqa  %xmm5,%xmm2
2109         pandn   352+16(%rsp),%xmm1
2110         movdqa  %xmm5,%xmm3
2111         pand    544(%rsp),%xmm2
2112         pand    544+16(%rsp),%xmm3
2113         por     %xmm0,%xmm2
2114         por     %xmm1,%xmm3
2115
2116         movdqa  %xmm4,%xmm0
2117         movdqa  %xmm4,%xmm1
2118         pandn   %xmm2,%xmm0
2119         movdqa  %xmm4,%xmm2
2120         pandn   %xmm3,%xmm1
2121         movdqa  %xmm4,%xmm3
2122         pand    448(%rsp),%xmm2
2123         pand    448+16(%rsp),%xmm3
2124         por     %xmm0,%xmm2
2125         por     %xmm1,%xmm3
2126         movdqu  %xmm2,64(%rdi)
2127         movdqu  %xmm3,80(%rdi)
2128
2129         movdqa  %xmm5,%xmm0
2130         movdqa  %xmm5,%xmm1
2131         pandn   288(%rsp),%xmm0
2132         movdqa  %xmm5,%xmm2
2133         pandn   288+16(%rsp),%xmm1
2134         movdqa  %xmm5,%xmm3
2135         pand    480(%rsp),%xmm2
2136         pand    480+16(%rsp),%xmm3
2137         por     %xmm0,%xmm2
2138         por     %xmm1,%xmm3
2139
2140         movdqa  %xmm4,%xmm0
2141         movdqa  %xmm4,%xmm1
2142         pandn   %xmm2,%xmm0
2143         movdqa  %xmm4,%xmm2
2144         pandn   %xmm3,%xmm1
2145         movdqa  %xmm4,%xmm3
2146         pand    384(%rsp),%xmm2
2147         pand    384+16(%rsp),%xmm3
2148         por     %xmm0,%xmm2
2149         por     %xmm1,%xmm3
2150         movdqu  %xmm2,0(%rdi)
2151         movdqu  %xmm3,16(%rdi)
2152
2153         movdqa  %xmm5,%xmm0
2154         movdqa  %xmm5,%xmm1
2155         pandn   320(%rsp),%xmm0
2156         movdqa  %xmm5,%xmm2
2157         pandn   320+16(%rsp),%xmm1
2158         movdqa  %xmm5,%xmm3
2159         pand    512(%rsp),%xmm2
2160         pand    512+16(%rsp),%xmm3
2161         por     %xmm0,%xmm2
2162         por     %xmm1,%xmm3
2163
2164         movdqa  %xmm4,%xmm0
2165         movdqa  %xmm4,%xmm1
2166         pandn   %xmm2,%xmm0
2167         movdqa  %xmm4,%xmm2
2168         pandn   %xmm3,%xmm1
2169         movdqa  %xmm4,%xmm3
2170         pand    416(%rsp),%xmm2
2171         pand    416+16(%rsp),%xmm3
2172         por     %xmm0,%xmm2
2173         por     %xmm1,%xmm3
2174         movdqu  %xmm2,32(%rdi)
2175         movdqu  %xmm3,48(%rdi)
2176
2177 .Ladd_doneq:
2178         addq    $576+8,%rsp
2179         popq    %r15
2180         popq    %r14
2181         popq    %r13
2182         popq    %r12
2183         popq    %rbx
2184         popq    %rbp
2185         .byte   0xf3,0xc3
2186 .size   ecp_nistz256_point_add,.-ecp_nistz256_point_add
2187 .globl  ecp_nistz256_point_add_affine
2188 .type   ecp_nistz256_point_add_affine,@function
2189 .align  32
2190 ecp_nistz256_point_add_affine:
2191         movl    $0x80100,%ecx
2192         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
2193         cmpl    $0x80100,%ecx
2194         je      .Lpoint_add_affinex
2195         pushq   %rbp
2196         pushq   %rbx
2197         pushq   %r12
2198         pushq   %r13
2199         pushq   %r14
2200         pushq   %r15
2201         subq    $480+8,%rsp
2202
2203         movdqu  0(%rsi),%xmm0
2204         movq    %rdx,%rbx
2205         movdqu  16(%rsi),%xmm1
2206         movdqu  32(%rsi),%xmm2
2207         movdqu  48(%rsi),%xmm3
2208         movdqu  64(%rsi),%xmm4
2209         movdqu  80(%rsi),%xmm5
2210         movq    64+0(%rsi),%rax
2211         movq    64+8(%rsi),%r14
2212         movq    64+16(%rsi),%r15
2213         movq    64+24(%rsi),%r8
2214         movdqa  %xmm0,320(%rsp)
2215         movdqa  %xmm1,320+16(%rsp)
2216         movdqa  %xmm2,352(%rsp)
2217         movdqa  %xmm3,352+16(%rsp)
2218         movdqa  %xmm4,384(%rsp)
2219         movdqa  %xmm5,384+16(%rsp)
2220         por     %xmm4,%xmm5
2221
2222         movdqu  0(%rbx),%xmm0
2223         pshufd  $0xb1,%xmm5,%xmm3
2224         movdqu  16(%rbx),%xmm1
2225         movdqu  32(%rbx),%xmm2
2226         por     %xmm3,%xmm5
2227         movdqu  48(%rbx),%xmm3
2228         movdqa  %xmm0,416(%rsp)
2229         pshufd  $0x1e,%xmm5,%xmm4
2230         movdqa  %xmm1,416+16(%rsp)
2231         por     %xmm0,%xmm1
2232 .byte   102,72,15,110,199
2233         movdqa  %xmm2,448(%rsp)
2234         movdqa  %xmm3,448+16(%rsp)
2235         por     %xmm2,%xmm3
2236         por     %xmm4,%xmm5
2237         pxor    %xmm4,%xmm4
2238         por     %xmm1,%xmm3
2239
2240         leaq    64-0(%rsi),%rsi
2241         leaq    32(%rsp),%rdi
2242         call    __ecp_nistz256_sqr_montq
2243
2244         pcmpeqd %xmm4,%xmm5
2245         pshufd  $0xb1,%xmm3,%xmm4
2246         movq    0(%rbx),%rax
2247
2248         movq    %r12,%r9
2249         por     %xmm3,%xmm4
2250         pshufd  $0,%xmm5,%xmm5
2251         pshufd  $0x1e,%xmm4,%xmm3
2252         movq    %r13,%r10
2253         por     %xmm3,%xmm4
2254         pxor    %xmm3,%xmm3
2255         movq    %r14,%r11
2256         pcmpeqd %xmm3,%xmm4
2257         pshufd  $0,%xmm4,%xmm4
2258
2259         leaq    32-0(%rsp),%rsi
2260         movq    %r15,%r12
2261         leaq    0(%rsp),%rdi
2262         call    __ecp_nistz256_mul_montq
2263
2264         leaq    320(%rsp),%rbx
2265         leaq    64(%rsp),%rdi
2266         call    __ecp_nistz256_sub_fromq
2267
2268         movq    384(%rsp),%rax
2269         leaq    384(%rsp),%rbx
2270         movq    0+32(%rsp),%r9
2271         movq    8+32(%rsp),%r10
2272         leaq    0+32(%rsp),%rsi
2273         movq    16+32(%rsp),%r11
2274         movq    24+32(%rsp),%r12
2275         leaq    32(%rsp),%rdi
2276         call    __ecp_nistz256_mul_montq
2277
2278         movq    384(%rsp),%rax
2279         leaq    384(%rsp),%rbx
2280         movq    0+64(%rsp),%r9
2281         movq    8+64(%rsp),%r10
2282         leaq    0+64(%rsp),%rsi
2283         movq    16+64(%rsp),%r11
2284         movq    24+64(%rsp),%r12
2285         leaq    288(%rsp),%rdi
2286         call    __ecp_nistz256_mul_montq
2287
2288         movq    448(%rsp),%rax
2289         leaq    448(%rsp),%rbx
2290         movq    0+32(%rsp),%r9
2291         movq    8+32(%rsp),%r10
2292         leaq    0+32(%rsp),%rsi
2293         movq    16+32(%rsp),%r11
2294         movq    24+32(%rsp),%r12
2295         leaq    32(%rsp),%rdi
2296         call    __ecp_nistz256_mul_montq
2297
2298         leaq    352(%rsp),%rbx
2299         leaq    96(%rsp),%rdi
2300         call    __ecp_nistz256_sub_fromq
2301
2302         movq    0+64(%rsp),%rax
2303         movq    8+64(%rsp),%r14
2304         leaq    0+64(%rsp),%rsi
2305         movq    16+64(%rsp),%r15
2306         movq    24+64(%rsp),%r8
2307         leaq    128(%rsp),%rdi
2308         call    __ecp_nistz256_sqr_montq
2309
2310         movq    0+96(%rsp),%rax
2311         movq    8+96(%rsp),%r14
2312         leaq    0+96(%rsp),%rsi
2313         movq    16+96(%rsp),%r15
2314         movq    24+96(%rsp),%r8
2315         leaq    192(%rsp),%rdi
2316         call    __ecp_nistz256_sqr_montq
2317
2318         movq    128(%rsp),%rax
2319         leaq    128(%rsp),%rbx
2320         movq    0+64(%rsp),%r9
2321         movq    8+64(%rsp),%r10
2322         leaq    0+64(%rsp),%rsi
2323         movq    16+64(%rsp),%r11
2324         movq    24+64(%rsp),%r12
2325         leaq    160(%rsp),%rdi
2326         call    __ecp_nistz256_mul_montq
2327
2328         movq    320(%rsp),%rax
2329         leaq    320(%rsp),%rbx
2330         movq    0+128(%rsp),%r9
2331         movq    8+128(%rsp),%r10
2332         leaq    0+128(%rsp),%rsi
2333         movq    16+128(%rsp),%r11
2334         movq    24+128(%rsp),%r12
2335         leaq    0(%rsp),%rdi
2336         call    __ecp_nistz256_mul_montq
2337
2338
2339
2340
2341         xorq    %r11,%r11
2342         addq    %r12,%r12
2343         leaq    192(%rsp),%rsi
2344         adcq    %r13,%r13
2345         movq    %r12,%rax
2346         adcq    %r8,%r8
2347         adcq    %r9,%r9
2348         movq    %r13,%rbp
2349         adcq    $0,%r11
2350
2351         subq    $-1,%r12
2352         movq    %r8,%rcx
2353         sbbq    %r14,%r13
2354         sbbq    $0,%r8
2355         movq    %r9,%r10
2356         sbbq    %r15,%r9
2357         sbbq    $0,%r11
2358
2359         cmovcq  %rax,%r12
2360         movq    0(%rsi),%rax
2361         cmovcq  %rbp,%r13
2362         movq    8(%rsi),%rbp
2363         cmovcq  %rcx,%r8
2364         movq    16(%rsi),%rcx
2365         cmovcq  %r10,%r9
2366         movq    24(%rsi),%r10
2367
2368         call    __ecp_nistz256_subq
2369
2370         leaq    160(%rsp),%rbx
2371         leaq    224(%rsp),%rdi
2372         call    __ecp_nistz256_sub_fromq
2373
2374         movq    0+0(%rsp),%rax
2375         movq    0+8(%rsp),%rbp
2376         movq    0+16(%rsp),%rcx
2377         movq    0+24(%rsp),%r10
2378         leaq    64(%rsp),%rdi
2379
2380         call    __ecp_nistz256_subq
2381
2382         movq    %r12,0(%rdi)
2383         movq    %r13,8(%rdi)
2384         movq    %r8,16(%rdi)
2385         movq    %r9,24(%rdi)
2386         movq    352(%rsp),%rax
2387         leaq    352(%rsp),%rbx
2388         movq    0+160(%rsp),%r9
2389         movq    8+160(%rsp),%r10
2390         leaq    0+160(%rsp),%rsi
2391         movq    16+160(%rsp),%r11
2392         movq    24+160(%rsp),%r12
2393         leaq    32(%rsp),%rdi
2394         call    __ecp_nistz256_mul_montq
2395
2396         movq    96(%rsp),%rax
2397         leaq    96(%rsp),%rbx
2398         movq    0+64(%rsp),%r9
2399         movq    8+64(%rsp),%r10
2400         leaq    0+64(%rsp),%rsi
2401         movq    16+64(%rsp),%r11
2402         movq    24+64(%rsp),%r12
2403         leaq    64(%rsp),%rdi
2404         call    __ecp_nistz256_mul_montq
2405
2406         leaq    32(%rsp),%rbx
2407         leaq    256(%rsp),%rdi
2408         call    __ecp_nistz256_sub_fromq
2409
2410 .byte   102,72,15,126,199
2411
2412         movdqa  %xmm5,%xmm0
2413         movdqa  %xmm5,%xmm1
2414         pandn   288(%rsp),%xmm0
2415         movdqa  %xmm5,%xmm2
2416         pandn   288+16(%rsp),%xmm1
2417         movdqa  %xmm5,%xmm3
2418         pand    .LONE_mont(%rip),%xmm2
2419         pand    .LONE_mont+16(%rip),%xmm3
2420         por     %xmm0,%xmm2
2421         por     %xmm1,%xmm3
2422
2423         movdqa  %xmm4,%xmm0
2424         movdqa  %xmm4,%xmm1
2425         pandn   %xmm2,%xmm0
2426         movdqa  %xmm4,%xmm2
2427         pandn   %xmm3,%xmm1
2428         movdqa  %xmm4,%xmm3
2429         pand    384(%rsp),%xmm2
2430         pand    384+16(%rsp),%xmm3
2431         por     %xmm0,%xmm2
2432         por     %xmm1,%xmm3
2433         movdqu  %xmm2,64(%rdi)
2434         movdqu  %xmm3,80(%rdi)
2435
2436         movdqa  %xmm5,%xmm0
2437         movdqa  %xmm5,%xmm1
2438         pandn   224(%rsp),%xmm0
2439         movdqa  %xmm5,%xmm2
2440         pandn   224+16(%rsp),%xmm1
2441         movdqa  %xmm5,%xmm3
2442         pand    416(%rsp),%xmm2
2443         pand    416+16(%rsp),%xmm3
2444         por     %xmm0,%xmm2
2445         por     %xmm1,%xmm3
2446
2447         movdqa  %xmm4,%xmm0
2448         movdqa  %xmm4,%xmm1
2449         pandn   %xmm2,%xmm0
2450         movdqa  %xmm4,%xmm2
2451         pandn   %xmm3,%xmm1
2452         movdqa  %xmm4,%xmm3
2453         pand    320(%rsp),%xmm2
2454         pand    320+16(%rsp),%xmm3
2455         por     %xmm0,%xmm2
2456         por     %xmm1,%xmm3
2457         movdqu  %xmm2,0(%rdi)
2458         movdqu  %xmm3,16(%rdi)
2459
2460         movdqa  %xmm5,%xmm0
2461         movdqa  %xmm5,%xmm1
2462         pandn   256(%rsp),%xmm0
2463         movdqa  %xmm5,%xmm2
2464         pandn   256+16(%rsp),%xmm1
2465         movdqa  %xmm5,%xmm3
2466         pand    448(%rsp),%xmm2
2467         pand    448+16(%rsp),%xmm3
2468         por     %xmm0,%xmm2
2469         por     %xmm1,%xmm3
2470
2471         movdqa  %xmm4,%xmm0
2472         movdqa  %xmm4,%xmm1
2473         pandn   %xmm2,%xmm0
2474         movdqa  %xmm4,%xmm2
2475         pandn   %xmm3,%xmm1
2476         movdqa  %xmm4,%xmm3
2477         pand    352(%rsp),%xmm2
2478         pand    352+16(%rsp),%xmm3
2479         por     %xmm0,%xmm2
2480         por     %xmm1,%xmm3
2481         movdqu  %xmm2,32(%rdi)
2482         movdqu  %xmm3,48(%rdi)
2483
2484         addq    $480+8,%rsp
2485         popq    %r15
2486         popq    %r14
2487         popq    %r13
2488         popq    %r12
2489         popq    %rbx
2490         popq    %rbp
2491         .byte   0xf3,0xc3
2492 .size   ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine
2493 .type   __ecp_nistz256_add_tox,@function
2494 .align  32
2495 __ecp_nistz256_add_tox:
2496         xorq    %r11,%r11
2497         adcq    0(%rbx),%r12
2498         adcq    8(%rbx),%r13
2499         movq    %r12,%rax
2500         adcq    16(%rbx),%r8
2501         adcq    24(%rbx),%r9
2502         movq    %r13,%rbp
2503         adcq    $0,%r11
2504
2505         xorq    %r10,%r10
2506         sbbq    $-1,%r12
2507         movq    %r8,%rcx
2508         sbbq    %r14,%r13
2509         sbbq    $0,%r8
2510         movq    %r9,%r10
2511         sbbq    %r15,%r9
2512         sbbq    $0,%r11
2513
2514         cmovcq  %rax,%r12
2515         cmovcq  %rbp,%r13
2516         movq    %r12,0(%rdi)
2517         cmovcq  %rcx,%r8
2518         movq    %r13,8(%rdi)
2519         cmovcq  %r10,%r9
2520         movq    %r8,16(%rdi)
2521         movq    %r9,24(%rdi)
2522
2523         .byte   0xf3,0xc3
2524 .size   __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox
2525
2526 .type   __ecp_nistz256_sub_fromx,@function
2527 .align  32
2528 __ecp_nistz256_sub_fromx:
2529         xorq    %r11,%r11
2530         sbbq    0(%rbx),%r12
2531         sbbq    8(%rbx),%r13
2532         movq    %r12,%rax
2533         sbbq    16(%rbx),%r8
2534         sbbq    24(%rbx),%r9
2535         movq    %r13,%rbp
2536         sbbq    $0,%r11
2537
2538         xorq    %r10,%r10
2539         adcq    $-1,%r12
2540         movq    %r8,%rcx
2541         adcq    %r14,%r13
2542         adcq    $0,%r8
2543         movq    %r9,%r10
2544         adcq    %r15,%r9
2545
2546         btq     $0,%r11
2547         cmovncq %rax,%r12
2548         cmovncq %rbp,%r13
2549         movq    %r12,0(%rdi)
2550         cmovncq %rcx,%r8
2551         movq    %r13,8(%rdi)
2552         cmovncq %r10,%r9
2553         movq    %r8,16(%rdi)
2554         movq    %r9,24(%rdi)
2555
2556         .byte   0xf3,0xc3
2557 .size   __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx
2558
2559 .type   __ecp_nistz256_subx,@function
2560 .align  32
2561 __ecp_nistz256_subx:
2562         xorq    %r11,%r11
2563         sbbq    %r12,%rax
2564         sbbq    %r13,%rbp
2565         movq    %rax,%r12
2566         sbbq    %r8,%rcx
2567         sbbq    %r9,%r10
2568         movq    %rbp,%r13
2569         sbbq    $0,%r11
2570
2571         xorq    %r9,%r9
2572         adcq    $-1,%rax
2573         movq    %rcx,%r8
2574         adcq    %r14,%rbp
2575         adcq    $0,%rcx
2576         movq    %r10,%r9
2577         adcq    %r15,%r10
2578
2579         btq     $0,%r11
2580         cmovcq  %rax,%r12
2581         cmovcq  %rbp,%r13
2582         cmovcq  %rcx,%r8
2583         cmovcq  %r10,%r9
2584
2585         .byte   0xf3,0xc3
2586 .size   __ecp_nistz256_subx,.-__ecp_nistz256_subx
2587
2588 .type   __ecp_nistz256_mul_by_2x,@function
2589 .align  32
2590 __ecp_nistz256_mul_by_2x:
2591         xorq    %r11,%r11
2592         adcq    %r12,%r12
2593         adcq    %r13,%r13
2594         movq    %r12,%rax
2595         adcq    %r8,%r8
2596         adcq    %r9,%r9
2597         movq    %r13,%rbp
2598         adcq    $0,%r11
2599
2600         xorq    %r10,%r10
2601         sbbq    $-1,%r12
2602         movq    %r8,%rcx
2603         sbbq    %r14,%r13
2604         sbbq    $0,%r8
2605         movq    %r9,%r10
2606         sbbq    %r15,%r9
2607         sbbq    $0,%r11
2608
2609         cmovcq  %rax,%r12
2610         cmovcq  %rbp,%r13
2611         movq    %r12,0(%rdi)
2612         cmovcq  %rcx,%r8
2613         movq    %r13,8(%rdi)
2614         cmovcq  %r10,%r9
2615         movq    %r8,16(%rdi)
2616         movq    %r9,24(%rdi)
2617
2618         .byte   0xf3,0xc3
2619 .size   __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x
2620 .type   ecp_nistz256_point_doublex,@function
2621 .align  32
2622 ecp_nistz256_point_doublex:
2623 .Lpoint_doublex:
2624         pushq   %rbp
2625         pushq   %rbx
2626         pushq   %r12
2627         pushq   %r13
2628         pushq   %r14
2629         pushq   %r15
2630         subq    $160+8,%rsp
2631
2632 .Lpoint_double_shortcutx:
2633         movdqu  0(%rsi),%xmm0
2634         movq    %rsi,%rbx
2635         movdqu  16(%rsi),%xmm1
2636         movq    32+0(%rsi),%r12
2637         movq    32+8(%rsi),%r13
2638         movq    32+16(%rsi),%r8
2639         movq    32+24(%rsi),%r9
2640         movq    .Lpoly+8(%rip),%r14
2641         movq    .Lpoly+24(%rip),%r15
2642         movdqa  %xmm0,96(%rsp)
2643         movdqa  %xmm1,96+16(%rsp)
2644         leaq    32(%rdi),%r10
2645         leaq    64(%rdi),%r11
2646 .byte   102,72,15,110,199
2647 .byte   102,73,15,110,202
2648 .byte   102,73,15,110,211
2649
2650         leaq    0(%rsp),%rdi
2651         call    __ecp_nistz256_mul_by_2x
2652
2653         movq    64+0(%rsi),%rdx
2654         movq    64+8(%rsi),%r14
2655         movq    64+16(%rsi),%r15
2656         movq    64+24(%rsi),%r8
2657         leaq    64-128(%rsi),%rsi
2658         leaq    64(%rsp),%rdi
2659         call    __ecp_nistz256_sqr_montx
2660
2661         movq    0+0(%rsp),%rdx
2662         movq    8+0(%rsp),%r14
2663         leaq    -128+0(%rsp),%rsi
2664         movq    16+0(%rsp),%r15
2665         movq    24+0(%rsp),%r8
2666         leaq    0(%rsp),%rdi
2667         call    __ecp_nistz256_sqr_montx
2668
2669         movq    32(%rbx),%rdx
2670         movq    64+0(%rbx),%r9
2671         movq    64+8(%rbx),%r10
2672         movq    64+16(%rbx),%r11
2673         movq    64+24(%rbx),%r12
2674         leaq    64-128(%rbx),%rsi
2675         leaq    32(%rbx),%rbx
2676 .byte   102,72,15,126,215
2677         call    __ecp_nistz256_mul_montx
2678         call    __ecp_nistz256_mul_by_2x
2679
2680         movq    96+0(%rsp),%r12
2681         movq    96+8(%rsp),%r13
2682         leaq    64(%rsp),%rbx
2683         movq    96+16(%rsp),%r8
2684         movq    96+24(%rsp),%r9
2685         leaq    32(%rsp),%rdi
2686         call    __ecp_nistz256_add_tox
2687
2688         movq    96+0(%rsp),%r12
2689         movq    96+8(%rsp),%r13
2690         leaq    64(%rsp),%rbx
2691         movq    96+16(%rsp),%r8
2692         movq    96+24(%rsp),%r9
2693         leaq    64(%rsp),%rdi
2694         call    __ecp_nistz256_sub_fromx
2695
2696         movq    0+0(%rsp),%rdx
2697         movq    8+0(%rsp),%r14
2698         leaq    -128+0(%rsp),%rsi
2699         movq    16+0(%rsp),%r15
2700         movq    24+0(%rsp),%r8
2701 .byte   102,72,15,126,207
2702         call    __ecp_nistz256_sqr_montx
2703         xorq    %r9,%r9
2704         movq    %r12,%rax
2705         addq    $-1,%r12
2706         movq    %r13,%r10
2707         adcq    %rsi,%r13
2708         movq    %r14,%rcx
2709         adcq    $0,%r14
2710         movq    %r15,%r8
2711         adcq    %rbp,%r15
2712         adcq    $0,%r9
2713         xorq    %rsi,%rsi
2714         testq   $1,%rax
2715
2716         cmovzq  %rax,%r12
2717         cmovzq  %r10,%r13
2718         cmovzq  %rcx,%r14
2719         cmovzq  %r8,%r15
2720         cmovzq  %rsi,%r9
2721
2722         movq    %r13,%rax
2723         shrq    $1,%r12
2724         shlq    $63,%rax
2725         movq    %r14,%r10
2726         shrq    $1,%r13
2727         orq     %rax,%r12
2728         shlq    $63,%r10
2729         movq    %r15,%rcx
2730         shrq    $1,%r14
2731         orq     %r10,%r13
2732         shlq    $63,%rcx
2733         movq    %r12,0(%rdi)
2734         shrq    $1,%r15
2735         movq    %r13,8(%rdi)
2736         shlq    $63,%r9
2737         orq     %rcx,%r14
2738         orq     %r9,%r15
2739         movq    %r14,16(%rdi)
2740         movq    %r15,24(%rdi)
2741         movq    64(%rsp),%rdx
2742         leaq    64(%rsp),%rbx
2743         movq    0+32(%rsp),%r9
2744         movq    8+32(%rsp),%r10
2745         leaq    -128+32(%rsp),%rsi
2746         movq    16+32(%rsp),%r11
2747         movq    24+32(%rsp),%r12
2748         leaq    32(%rsp),%rdi
2749         call    __ecp_nistz256_mul_montx
2750
2751         leaq    128(%rsp),%rdi
2752         call    __ecp_nistz256_mul_by_2x
2753
2754         leaq    32(%rsp),%rbx
2755         leaq    32(%rsp),%rdi
2756         call    __ecp_nistz256_add_tox
2757
2758         movq    96(%rsp),%rdx
2759         leaq    96(%rsp),%rbx
2760         movq    0+0(%rsp),%r9
2761         movq    8+0(%rsp),%r10
2762         leaq    -128+0(%rsp),%rsi
2763         movq    16+0(%rsp),%r11
2764         movq    24+0(%rsp),%r12
2765         leaq    0(%rsp),%rdi
2766         call    __ecp_nistz256_mul_montx
2767
2768         leaq    128(%rsp),%rdi
2769         call    __ecp_nistz256_mul_by_2x
2770
2771         movq    0+32(%rsp),%rdx
2772         movq    8+32(%rsp),%r14
2773         leaq    -128+32(%rsp),%rsi
2774         movq    16+32(%rsp),%r15
2775         movq    24+32(%rsp),%r8
2776 .byte   102,72,15,126,199
2777         call    __ecp_nistz256_sqr_montx
2778
2779         leaq    128(%rsp),%rbx
2780         movq    %r14,%r8
2781         movq    %r15,%r9
2782         movq    %rsi,%r14
2783         movq    %rbp,%r15
2784         call    __ecp_nistz256_sub_fromx
2785
2786         movq    0+0(%rsp),%rax
2787         movq    0+8(%rsp),%rbp
2788         movq    0+16(%rsp),%rcx
2789         movq    0+24(%rsp),%r10
2790         leaq    0(%rsp),%rdi
2791         call    __ecp_nistz256_subx
2792
2793         movq    32(%rsp),%rdx
2794         leaq    32(%rsp),%rbx
2795         movq    %r12,%r14
2796         xorl    %ecx,%ecx
2797         movq    %r12,0+0(%rsp)
2798         movq    %r13,%r10
2799         movq    %r13,0+8(%rsp)
2800         cmovzq  %r8,%r11
2801         movq    %r8,0+16(%rsp)
2802         leaq    0-128(%rsp),%rsi
2803         cmovzq  %r9,%r12
2804         movq    %r9,0+24(%rsp)
2805         movq    %r14,%r9
2806         leaq    0(%rsp),%rdi
2807         call    __ecp_nistz256_mul_montx
2808
2809 .byte   102,72,15,126,203
2810 .byte   102,72,15,126,207
2811         call    __ecp_nistz256_sub_fromx
2812
2813         addq    $160+8,%rsp
2814         popq    %r15
2815         popq    %r14
2816         popq    %r13
2817         popq    %r12
2818         popq    %rbx
2819         popq    %rbp
2820         .byte   0xf3,0xc3
2821 .size   ecp_nistz256_point_doublex,.-ecp_nistz256_point_doublex
2822 .type   ecp_nistz256_point_addx,@function
2823 .align  32
2824 ecp_nistz256_point_addx:
2825 .Lpoint_addx:
2826         pushq   %rbp
2827         pushq   %rbx
2828         pushq   %r12
2829         pushq   %r13
2830         pushq   %r14
2831         pushq   %r15
2832         subq    $576+8,%rsp
2833
2834         movdqu  0(%rsi),%xmm0
2835         movdqu  16(%rsi),%xmm1
2836         movdqu  32(%rsi),%xmm2
2837         movdqu  48(%rsi),%xmm3
2838         movdqu  64(%rsi),%xmm4
2839         movdqu  80(%rsi),%xmm5
2840         movq    %rsi,%rbx
2841         movq    %rdx,%rsi
2842         movdqa  %xmm0,384(%rsp)
2843         movdqa  %xmm1,384+16(%rsp)
2844         movdqa  %xmm2,416(%rsp)
2845         movdqa  %xmm3,416+16(%rsp)
2846         movdqa  %xmm4,448(%rsp)
2847         movdqa  %xmm5,448+16(%rsp)
2848         por     %xmm4,%xmm5
2849
2850         movdqu  0(%rsi),%xmm0
2851         pshufd  $0xb1,%xmm5,%xmm3
2852         movdqu  16(%rsi),%xmm1
2853         movdqu  32(%rsi),%xmm2
2854         por     %xmm3,%xmm5
2855         movdqu  48(%rsi),%xmm3
2856         movq    64+0(%rsi),%rdx
2857         movq    64+8(%rsi),%r14
2858         movq    64+16(%rsi),%r15
2859         movq    64+24(%rsi),%r8
2860         movdqa  %xmm0,480(%rsp)
2861         pshufd  $0x1e,%xmm5,%xmm4
2862         movdqa  %xmm1,480+16(%rsp)
2863         movdqu  64(%rsi),%xmm0
2864         movdqu  80(%rsi),%xmm1
2865         movdqa  %xmm2,512(%rsp)
2866         movdqa  %xmm3,512+16(%rsp)
2867         por     %xmm4,%xmm5
2868         pxor    %xmm4,%xmm4
2869         por     %xmm0,%xmm1
2870 .byte   102,72,15,110,199
2871
2872         leaq    64-128(%rsi),%rsi
2873         movq    %rdx,544+0(%rsp)
2874         movq    %r14,544+8(%rsp)
2875         movq    %r15,544+16(%rsp)
2876         movq    %r8,544+24(%rsp)
2877         leaq    96(%rsp),%rdi
2878         call    __ecp_nistz256_sqr_montx
2879
2880         pcmpeqd %xmm4,%xmm5
2881         pshufd  $0xb1,%xmm1,%xmm4
2882         por     %xmm1,%xmm4
2883         pshufd  $0,%xmm5,%xmm5
2884         pshufd  $0x1e,%xmm4,%xmm3
2885         por     %xmm3,%xmm4
2886         pxor    %xmm3,%xmm3
2887         pcmpeqd %xmm3,%xmm4
2888         pshufd  $0,%xmm4,%xmm4
2889         movq    64+0(%rbx),%rdx
2890         movq    64+8(%rbx),%r14
2891         movq    64+16(%rbx),%r15
2892         movq    64+24(%rbx),%r8
2893 .byte   102,72,15,110,203
2894
2895         leaq    64-128(%rbx),%rsi
2896         leaq    32(%rsp),%rdi
2897         call    __ecp_nistz256_sqr_montx
2898
2899         movq    544(%rsp),%rdx
2900         leaq    544(%rsp),%rbx
2901         movq    0+96(%rsp),%r9
2902         movq    8+96(%rsp),%r10
2903         leaq    -128+96(%rsp),%rsi
2904         movq    16+96(%rsp),%r11
2905         movq    24+96(%rsp),%r12
2906         leaq    224(%rsp),%rdi
2907         call    __ecp_nistz256_mul_montx
2908
2909         movq    448(%rsp),%rdx
2910         leaq    448(%rsp),%rbx
2911         movq    0+32(%rsp),%r9
2912         movq    8+32(%rsp),%r10
2913         leaq    -128+32(%rsp),%rsi
2914         movq    16+32(%rsp),%r11
2915         movq    24+32(%rsp),%r12
2916         leaq    256(%rsp),%rdi
2917         call    __ecp_nistz256_mul_montx
2918
2919         movq    416(%rsp),%rdx
2920         leaq    416(%rsp),%rbx
2921         movq    0+224(%rsp),%r9
2922         movq    8+224(%rsp),%r10
2923         leaq    -128+224(%rsp),%rsi
2924         movq    16+224(%rsp),%r11
2925         movq    24+224(%rsp),%r12
2926         leaq    224(%rsp),%rdi
2927         call    __ecp_nistz256_mul_montx
2928
2929         movq    512(%rsp),%rdx
2930         leaq    512(%rsp),%rbx
2931         movq    0+256(%rsp),%r9
2932         movq    8+256(%rsp),%r10
2933         leaq    -128+256(%rsp),%rsi
2934         movq    16+256(%rsp),%r11
2935         movq    24+256(%rsp),%r12
2936         leaq    256(%rsp),%rdi
2937         call    __ecp_nistz256_mul_montx
2938
2939         leaq    224(%rsp),%rbx
2940         leaq    64(%rsp),%rdi
2941         call    __ecp_nistz256_sub_fromx
2942
2943         orq     %r13,%r12
2944         movdqa  %xmm4,%xmm2
2945         orq     %r8,%r12
2946         orq     %r9,%r12
2947         por     %xmm5,%xmm2
2948 .byte   102,73,15,110,220
2949
2950         movq    384(%rsp),%rdx
2951         leaq    384(%rsp),%rbx
2952         movq    0+96(%rsp),%r9
2953         movq    8+96(%rsp),%r10
2954         leaq    -128+96(%rsp),%rsi
2955         movq    16+96(%rsp),%r11
2956         movq    24+96(%rsp),%r12
2957         leaq    160(%rsp),%rdi
2958         call    __ecp_nistz256_mul_montx
2959
2960         movq    480(%rsp),%rdx
2961         leaq    480(%rsp),%rbx
2962         movq    0+32(%rsp),%r9
2963         movq    8+32(%rsp),%r10
2964         leaq    -128+32(%rsp),%rsi
2965         movq    16+32(%rsp),%r11
2966         movq    24+32(%rsp),%r12
2967         leaq    192(%rsp),%rdi
2968         call    __ecp_nistz256_mul_montx
2969
2970         leaq    160(%rsp),%rbx
2971         leaq    0(%rsp),%rdi
2972         call    __ecp_nistz256_sub_fromx
2973
2974         orq     %r13,%r12
2975         orq     %r8,%r12
2976         orq     %r9,%r12
2977
2978 .byte   0x3e
2979         jnz     .Ladd_proceedx
2980 .byte   102,73,15,126,208
2981 .byte   102,73,15,126,217
2982         testq   %r8,%r8
2983         jnz     .Ladd_proceedx
2984         testq   %r9,%r9
2985         jz      .Ladd_doublex
2986
2987 .byte   102,72,15,126,199
2988         pxor    %xmm0,%xmm0
2989         movdqu  %xmm0,0(%rdi)
2990         movdqu  %xmm0,16(%rdi)
2991         movdqu  %xmm0,32(%rdi)
2992         movdqu  %xmm0,48(%rdi)
2993         movdqu  %xmm0,64(%rdi)
2994         movdqu  %xmm0,80(%rdi)
2995         jmp     .Ladd_donex
2996
2997 .align  32
2998 .Ladd_doublex:
2999 .byte   102,72,15,126,206
3000 .byte   102,72,15,126,199
3001         addq    $416,%rsp
3002         jmp     .Lpoint_double_shortcutx
3003
3004 .align  32
3005 .Ladd_proceedx:
3006         movq    0+64(%rsp),%rdx
3007         movq    8+64(%rsp),%r14
3008         leaq    -128+64(%rsp),%rsi
3009         movq    16+64(%rsp),%r15
3010         movq    24+64(%rsp),%r8
3011         leaq    96(%rsp),%rdi
3012         call    __ecp_nistz256_sqr_montx
3013
3014         movq    448(%rsp),%rdx
3015         leaq    448(%rsp),%rbx
3016         movq    0+0(%rsp),%r9
3017         movq    8+0(%rsp),%r10
3018         leaq    -128+0(%rsp),%rsi
3019         movq    16+0(%rsp),%r11
3020         movq    24+0(%rsp),%r12
3021         leaq    352(%rsp),%rdi
3022         call    __ecp_nistz256_mul_montx
3023
3024         movq    0+0(%rsp),%rdx
3025         movq    8+0(%rsp),%r14
3026         leaq    -128+0(%rsp),%rsi
3027         movq    16+0(%rsp),%r15
3028         movq    24+0(%rsp),%r8
3029         leaq    32(%rsp),%rdi
3030         call    __ecp_nistz256_sqr_montx
3031
3032         movq    544(%rsp),%rdx
3033         leaq    544(%rsp),%rbx
3034         movq    0+352(%rsp),%r9
3035         movq    8+352(%rsp),%r10
3036         leaq    -128+352(%rsp),%rsi
3037         movq    16+352(%rsp),%r11
3038         movq    24+352(%rsp),%r12
3039         leaq    352(%rsp),%rdi
3040         call    __ecp_nistz256_mul_montx
3041
3042         movq    0(%rsp),%rdx
3043         leaq    0(%rsp),%rbx
3044         movq    0+32(%rsp),%r9
3045         movq    8+32(%rsp),%r10
3046         leaq    -128+32(%rsp),%rsi
3047         movq    16+32(%rsp),%r11
3048         movq    24+32(%rsp),%r12
3049         leaq    128(%rsp),%rdi
3050         call    __ecp_nistz256_mul_montx
3051
3052         movq    160(%rsp),%rdx
3053         leaq    160(%rsp),%rbx
3054         movq    0+32(%rsp),%r9
3055         movq    8+32(%rsp),%r10
3056         leaq    -128+32(%rsp),%rsi
3057         movq    16+32(%rsp),%r11
3058         movq    24+32(%rsp),%r12
3059         leaq    192(%rsp),%rdi
3060         call    __ecp_nistz256_mul_montx
3061
3062
3063
3064
3065         xorq    %r11,%r11
3066         addq    %r12,%r12
3067         leaq    96(%rsp),%rsi
3068         adcq    %r13,%r13
3069         movq    %r12,%rax
3070         adcq    %r8,%r8
3071         adcq    %r9,%r9
3072         movq    %r13,%rbp
3073         adcq    $0,%r11
3074
3075         subq    $-1,%r12
3076         movq    %r8,%rcx
3077         sbbq    %r14,%r13
3078         sbbq    $0,%r8
3079         movq    %r9,%r10
3080         sbbq    %r15,%r9
3081         sbbq    $0,%r11
3082
3083         cmovcq  %rax,%r12
3084         movq    0(%rsi),%rax
3085         cmovcq  %rbp,%r13
3086         movq    8(%rsi),%rbp
3087         cmovcq  %rcx,%r8
3088         movq    16(%rsi),%rcx
3089         cmovcq  %r10,%r9
3090         movq    24(%rsi),%r10
3091
3092         call    __ecp_nistz256_subx
3093
3094         leaq    128(%rsp),%rbx
3095         leaq    288(%rsp),%rdi
3096         call    __ecp_nistz256_sub_fromx
3097
3098         movq    192+0(%rsp),%rax
3099         movq    192+8(%rsp),%rbp
3100         movq    192+16(%rsp),%rcx
3101         movq    192+24(%rsp),%r10
3102         leaq    320(%rsp),%rdi
3103
3104         call    __ecp_nistz256_subx
3105
3106         movq    %r12,0(%rdi)
3107         movq    %r13,8(%rdi)
3108         movq    %r8,16(%rdi)
3109         movq    %r9,24(%rdi)
3110         movq    128(%rsp),%rdx
3111         leaq    128(%rsp),%rbx
3112         movq    0+224(%rsp),%r9
3113         movq    8+224(%rsp),%r10
3114         leaq    -128+224(%rsp),%rsi
3115         movq    16+224(%rsp),%r11
3116         movq    24+224(%rsp),%r12
3117         leaq    256(%rsp),%rdi
3118         call    __ecp_nistz256_mul_montx
3119
3120         movq    320(%rsp),%rdx
3121         leaq    320(%rsp),%rbx
3122         movq    0+64(%rsp),%r9
3123         movq    8+64(%rsp),%r10
3124         leaq    -128+64(%rsp),%rsi
3125         movq    16+64(%rsp),%r11
3126         movq    24+64(%rsp),%r12
3127         leaq    320(%rsp),%rdi
3128         call    __ecp_nistz256_mul_montx
3129
3130         leaq    256(%rsp),%rbx
3131         leaq    320(%rsp),%rdi
3132         call    __ecp_nistz256_sub_fromx
3133
3134 .byte   102,72,15,126,199
3135
3136         movdqa  %xmm5,%xmm0
3137         movdqa  %xmm5,%xmm1
3138         pandn   352(%rsp),%xmm0
3139         movdqa  %xmm5,%xmm2
3140         pandn   352+16(%rsp),%xmm1
3141         movdqa  %xmm5,%xmm3
3142         pand    544(%rsp),%xmm2
3143         pand    544+16(%rsp),%xmm3
3144         por     %xmm0,%xmm2
3145         por     %xmm1,%xmm3
3146
3147         movdqa  %xmm4,%xmm0
3148         movdqa  %xmm4,%xmm1
3149         pandn   %xmm2,%xmm0
3150         movdqa  %xmm4,%xmm2
3151         pandn   %xmm3,%xmm1
3152         movdqa  %xmm4,%xmm3
3153         pand    448(%rsp),%xmm2
3154         pand    448+16(%rsp),%xmm3
3155         por     %xmm0,%xmm2
3156         por     %xmm1,%xmm3
3157         movdqu  %xmm2,64(%rdi)
3158         movdqu  %xmm3,80(%rdi)
3159
3160         movdqa  %xmm5,%xmm0
3161         movdqa  %xmm5,%xmm1
3162         pandn   288(%rsp),%xmm0
3163         movdqa  %xmm5,%xmm2
3164         pandn   288+16(%rsp),%xmm1
3165         movdqa  %xmm5,%xmm3
3166         pand    480(%rsp),%xmm2
3167         pand    480+16(%rsp),%xmm3
3168         por     %xmm0,%xmm2
3169         por     %xmm1,%xmm3
3170
3171         movdqa  %xmm4,%xmm0
3172         movdqa  %xmm4,%xmm1
3173         pandn   %xmm2,%xmm0
3174         movdqa  %xmm4,%xmm2
3175         pandn   %xmm3,%xmm1
3176         movdqa  %xmm4,%xmm3
3177         pand    384(%rsp),%xmm2
3178         pand    384+16(%rsp),%xmm3
3179         por     %xmm0,%xmm2
3180         por     %xmm1,%xmm3
3181         movdqu  %xmm2,0(%rdi)
3182         movdqu  %xmm3,16(%rdi)
3183
3184         movdqa  %xmm5,%xmm0
3185         movdqa  %xmm5,%xmm1
3186         pandn   320(%rsp),%xmm0
3187         movdqa  %xmm5,%xmm2
3188         pandn   320+16(%rsp),%xmm1
3189         movdqa  %xmm5,%xmm3
3190         pand    512(%rsp),%xmm2
3191         pand    512+16(%rsp),%xmm3
3192         por     %xmm0,%xmm2
3193         por     %xmm1,%xmm3
3194
3195         movdqa  %xmm4,%xmm0
3196         movdqa  %xmm4,%xmm1
3197         pandn   %xmm2,%xmm0
3198         movdqa  %xmm4,%xmm2
3199         pandn   %xmm3,%xmm1
3200         movdqa  %xmm4,%xmm3
3201         pand    416(%rsp),%xmm2
3202         pand    416+16(%rsp),%xmm3
3203         por     %xmm0,%xmm2
3204         por     %xmm1,%xmm3
3205         movdqu  %xmm2,32(%rdi)
3206         movdqu  %xmm3,48(%rdi)
3207
3208 .Ladd_donex:
3209         addq    $576+8,%rsp
3210         popq    %r15
3211         popq    %r14
3212         popq    %r13
3213         popq    %r12
3214         popq    %rbx
3215         popq    %rbp
3216         .byte   0xf3,0xc3
3217 .size   ecp_nistz256_point_addx,.-ecp_nistz256_point_addx
3218 .type   ecp_nistz256_point_add_affinex,@function
3219 .align  32
3220 ecp_nistz256_point_add_affinex:
3221 .Lpoint_add_affinex:
3222         pushq   %rbp
3223         pushq   %rbx
3224         pushq   %r12
3225         pushq   %r13
3226         pushq   %r14
3227         pushq   %r15
3228         subq    $480+8,%rsp
3229
3230         movdqu  0(%rsi),%xmm0
3231         movq    %rdx,%rbx
3232         movdqu  16(%rsi),%xmm1
3233         movdqu  32(%rsi),%xmm2
3234         movdqu  48(%rsi),%xmm3
3235         movdqu  64(%rsi),%xmm4
3236         movdqu  80(%rsi),%xmm5
3237         movq    64+0(%rsi),%rdx
3238         movq    64+8(%rsi),%r14
3239         movq    64+16(%rsi),%r15
3240         movq    64+24(%rsi),%r8
3241         movdqa  %xmm0,320(%rsp)
3242         movdqa  %xmm1,320+16(%rsp)
3243         movdqa  %xmm2,352(%rsp)
3244         movdqa  %xmm3,352+16(%rsp)
3245         movdqa  %xmm4,384(%rsp)
3246         movdqa  %xmm5,384+16(%rsp)
3247         por     %xmm4,%xmm5
3248
3249         movdqu  0(%rbx),%xmm0
3250         pshufd  $0xb1,%xmm5,%xmm3
3251         movdqu  16(%rbx),%xmm1
3252         movdqu  32(%rbx),%xmm2
3253         por     %xmm3,%xmm5
3254         movdqu  48(%rbx),%xmm3
3255         movdqa  %xmm0,416(%rsp)
3256         pshufd  $0x1e,%xmm5,%xmm4
3257         movdqa  %xmm1,416+16(%rsp)
3258         por     %xmm0,%xmm1
3259 .byte   102,72,15,110,199
3260         movdqa  %xmm2,448(%rsp)
3261         movdqa  %xmm3,448+16(%rsp)
3262         por     %xmm2,%xmm3
3263         por     %xmm4,%xmm5
3264         pxor    %xmm4,%xmm4
3265         por     %xmm1,%xmm3
3266
3267         leaq    64-128(%rsi),%rsi
3268         leaq    32(%rsp),%rdi
3269         call    __ecp_nistz256_sqr_montx
3270
3271         pcmpeqd %xmm4,%xmm5
3272         pshufd  $0xb1,%xmm3,%xmm4
3273         movq    0(%rbx),%rdx
3274
3275         movq    %r12,%r9
3276         por     %xmm3,%xmm4
3277         pshufd  $0,%xmm5,%xmm5
3278         pshufd  $0x1e,%xmm4,%xmm3
3279         movq    %r13,%r10
3280         por     %xmm3,%xmm4
3281         pxor    %xmm3,%xmm3
3282         movq    %r14,%r11
3283         pcmpeqd %xmm3,%xmm4
3284         pshufd  $0,%xmm4,%xmm4
3285
3286         leaq    32-128(%rsp),%rsi
3287         movq    %r15,%r12
3288         leaq    0(%rsp),%rdi
3289         call    __ecp_nistz256_mul_montx
3290
3291         leaq    320(%rsp),%rbx
3292         leaq    64(%rsp),%rdi
3293         call    __ecp_nistz256_sub_fromx
3294
3295         movq    384(%rsp),%rdx
3296         leaq    384(%rsp),%rbx
3297         movq    0+32(%rsp),%r9
3298         movq    8+32(%rsp),%r10
3299         leaq    -128+32(%rsp),%rsi
3300         movq    16+32(%rsp),%r11
3301         movq    24+32(%rsp),%r12
3302         leaq    32(%rsp),%rdi
3303         call    __ecp_nistz256_mul_montx
3304
3305         movq    384(%rsp),%rdx
3306         leaq    384(%rsp),%rbx
3307         movq    0+64(%rsp),%r9
3308         movq    8+64(%rsp),%r10
3309         leaq    -128+64(%rsp),%rsi
3310         movq    16+64(%rsp),%r11
3311         movq    24+64(%rsp),%r12
3312         leaq    288(%rsp),%rdi
3313         call    __ecp_nistz256_mul_montx
3314
3315         movq    448(%rsp),%rdx
3316         leaq    448(%rsp),%rbx
3317         movq    0+32(%rsp),%r9
3318         movq    8+32(%rsp),%r10
3319         leaq    -128+32(%rsp),%rsi
3320         movq    16+32(%rsp),%r11
3321         movq    24+32(%rsp),%r12
3322         leaq    32(%rsp),%rdi
3323         call    __ecp_nistz256_mul_montx
3324
3325         leaq    352(%rsp),%rbx
3326         leaq    96(%rsp),%rdi
3327         call    __ecp_nistz256_sub_fromx
3328
3329         movq    0+64(%rsp),%rdx
3330         movq    8+64(%rsp),%r14
3331         leaq    -128+64(%rsp),%rsi
3332         movq    16+64(%rsp),%r15
3333         movq    24+64(%rsp),%r8
3334         leaq    128(%rsp),%rdi
3335         call    __ecp_nistz256_sqr_montx
3336
3337         movq    0+96(%rsp),%rdx
3338         movq    8+96(%rsp),%r14
3339         leaq    -128+96(%rsp),%rsi
3340         movq    16+96(%rsp),%r15
3341         movq    24+96(%rsp),%r8
3342         leaq    192(%rsp),%rdi
3343         call    __ecp_nistz256_sqr_montx
3344
3345         movq    128(%rsp),%rdx
3346         leaq    128(%rsp),%rbx
3347         movq    0+64(%rsp),%r9
3348         movq    8+64(%rsp),%r10
3349         leaq    -128+64(%rsp),%rsi
3350         movq    16+64(%rsp),%r11
3351         movq    24+64(%rsp),%r12
3352         leaq    160(%rsp),%rdi
3353         call    __ecp_nistz256_mul_montx
3354
3355         movq    320(%rsp),%rdx
3356         leaq    320(%rsp),%rbx
3357         movq    0+128(%rsp),%r9
3358         movq    8+128(%rsp),%r10
3359         leaq    -128+128(%rsp),%rsi
3360         movq    16+128(%rsp),%r11
3361         movq    24+128(%rsp),%r12
3362         leaq    0(%rsp),%rdi
3363         call    __ecp_nistz256_mul_montx
3364
3365
3366
3367
3368         xorq    %r11,%r11
3369         addq    %r12,%r12
3370         leaq    192(%rsp),%rsi
3371         adcq    %r13,%r13
3372         movq    %r12,%rax
3373         adcq    %r8,%r8
3374         adcq    %r9,%r9
3375         movq    %r13,%rbp
3376         adcq    $0,%r11
3377
3378         subq    $-1,%r12
3379         movq    %r8,%rcx
3380         sbbq    %r14,%r13
3381         sbbq    $0,%r8
3382         movq    %r9,%r10
3383         sbbq    %r15,%r9
3384         sbbq    $0,%r11
3385
3386         cmovcq  %rax,%r12
3387         movq    0(%rsi),%rax
3388         cmovcq  %rbp,%r13
3389         movq    8(%rsi),%rbp
3390         cmovcq  %rcx,%r8
3391         movq    16(%rsi),%rcx
3392         cmovcq  %r10,%r9
3393         movq    24(%rsi),%r10
3394
3395         call    __ecp_nistz256_subx
3396
3397         leaq    160(%rsp),%rbx
3398         leaq    224(%rsp),%rdi
3399         call    __ecp_nistz256_sub_fromx
3400
3401         movq    0+0(%rsp),%rax
3402         movq    0+8(%rsp),%rbp
3403         movq    0+16(%rsp),%rcx
3404         movq    0+24(%rsp),%r10
3405         leaq    64(%rsp),%rdi
3406
3407         call    __ecp_nistz256_subx
3408
3409         movq    %r12,0(%rdi)
3410         movq    %r13,8(%rdi)
3411         movq    %r8,16(%rdi)
3412         movq    %r9,24(%rdi)
3413         movq    352(%rsp),%rdx
3414         leaq    352(%rsp),%rbx
3415         movq    0+160(%rsp),%r9
3416         movq    8+160(%rsp),%r10
3417         leaq    -128+160(%rsp),%rsi
3418         movq    16+160(%rsp),%r11
3419         movq    24+160(%rsp),%r12
3420         leaq    32(%rsp),%rdi
3421         call    __ecp_nistz256_mul_montx
3422
3423         movq    96(%rsp),%rdx
3424         leaq    96(%rsp),%rbx
3425         movq    0+64(%rsp),%r9
3426         movq    8+64(%rsp),%r10
3427         leaq    -128+64(%rsp),%rsi
3428         movq    16+64(%rsp),%r11
3429         movq    24+64(%rsp),%r12
3430         leaq    64(%rsp),%rdi
3431         call    __ecp_nistz256_mul_montx
3432
3433         leaq    32(%rsp),%rbx
3434         leaq    256(%rsp),%rdi
3435         call    __ecp_nistz256_sub_fromx
3436
3437 .byte   102,72,15,126,199
3438
3439         movdqa  %xmm5,%xmm0
3440         movdqa  %xmm5,%xmm1
3441         pandn   288(%rsp),%xmm0
3442         movdqa  %xmm5,%xmm2
3443         pandn   288+16(%rsp),%xmm1
3444         movdqa  %xmm5,%xmm3
3445         pand    .LONE_mont(%rip),%xmm2
3446         pand    .LONE_mont+16(%rip),%xmm3
3447         por     %xmm0,%xmm2
3448         por     %xmm1,%xmm3
3449
3450         movdqa  %xmm4,%xmm0
3451         movdqa  %xmm4,%xmm1
3452         pandn   %xmm2,%xmm0
3453         movdqa  %xmm4,%xmm2
3454         pandn   %xmm3,%xmm1
3455         movdqa  %xmm4,%xmm3
3456         pand    384(%rsp),%xmm2
3457         pand    384+16(%rsp),%xmm3
3458         por     %xmm0,%xmm2
3459         por     %xmm1,%xmm3
3460         movdqu  %xmm2,64(%rdi)
3461         movdqu  %xmm3,80(%rdi)
3462
3463         movdqa  %xmm5,%xmm0
3464         movdqa  %xmm5,%xmm1
3465         pandn   224(%rsp),%xmm0
3466         movdqa  %xmm5,%xmm2
3467         pandn   224+16(%rsp),%xmm1
3468         movdqa  %xmm5,%xmm3
3469         pand    416(%rsp),%xmm2
3470         pand    416+16(%rsp),%xmm3
3471         por     %xmm0,%xmm2
3472         por     %xmm1,%xmm3
3473
3474         movdqa  %xmm4,%xmm0
3475         movdqa  %xmm4,%xmm1
3476         pandn   %xmm2,%xmm0
3477         movdqa  %xmm4,%xmm2
3478         pandn   %xmm3,%xmm1
3479         movdqa  %xmm4,%xmm3
3480         pand    320(%rsp),%xmm2
3481         pand    320+16(%rsp),%xmm3
3482         por     %xmm0,%xmm2
3483         por     %xmm1,%xmm3
3484         movdqu  %xmm2,0(%rdi)
3485         movdqu  %xmm3,16(%rdi)
3486
3487         movdqa  %xmm5,%xmm0
3488         movdqa  %xmm5,%xmm1
3489         pandn   256(%rsp),%xmm0
3490         movdqa  %xmm5,%xmm2
3491         pandn   256+16(%rsp),%xmm1
3492         movdqa  %xmm5,%xmm3
3493         pand    448(%rsp),%xmm2
3494         pand    448+16(%rsp),%xmm3
3495         por     %xmm0,%xmm2
3496         por     %xmm1,%xmm3
3497
3498         movdqa  %xmm4,%xmm0
3499         movdqa  %xmm4,%xmm1
3500         pandn   %xmm2,%xmm0
3501         movdqa  %xmm4,%xmm2
3502         pandn   %xmm3,%xmm1
3503         movdqa  %xmm4,%xmm3
3504         pand    352(%rsp),%xmm2
3505         pand    352+16(%rsp),%xmm3
3506         por     %xmm0,%xmm2
3507         por     %xmm1,%xmm3
3508         movdqu  %xmm2,32(%rdi)
3509         movdqu  %xmm3,48(%rdi)
3510
3511         addq    $480+8,%rsp
3512         popq    %r15
3513         popq    %r14
3514         popq    %r13
3515         popq    %r12
3516         popq    %rbx
3517         popq    %rbp
3518         .byte   0xf3,0xc3
3519 .size   ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex