]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S
Import mandoc 1.4.1rc2
[FreeBSD/FreeBSD.git] / secure / lib / libcrypto / amd64 / ecp_nistz256-x86_64.S
1 /* $FreeBSD$ */
2 /* Do not modify. This file is auto-generated from ecp_nistz256-x86_64.pl. */
3 .text   
4
5
6
7 .align  64
8 .Lpoly:
9 .quad   0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001
10
11
12 .LRR:
13 .quad   0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd
14
15 .LOne:
16 .long   1,1,1,1,1,1,1,1
17 .LTwo:
18 .long   2,2,2,2,2,2,2,2
19 .LThree:
20 .long   3,3,3,3,3,3,3,3
21 .LONE_mont:
22 .quad   0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe
23
24 .globl  ecp_nistz256_mul_by_2
25 .type   ecp_nistz256_mul_by_2,@function
26 .align  64
27 ecp_nistz256_mul_by_2:
28         pushq   %r12
29         pushq   %r13
30
31         movq    0(%rsi),%r8
32         xorq    %r13,%r13
33         movq    8(%rsi),%r9
34         addq    %r8,%r8
35         movq    16(%rsi),%r10
36         adcq    %r9,%r9
37         movq    24(%rsi),%r11
38         leaq    .Lpoly(%rip),%rsi
39         movq    %r8,%rax
40         adcq    %r10,%r10
41         adcq    %r11,%r11
42         movq    %r9,%rdx
43         adcq    $0,%r13
44
45         subq    0(%rsi),%r8
46         movq    %r10,%rcx
47         sbbq    8(%rsi),%r9
48         sbbq    16(%rsi),%r10
49         movq    %r11,%r12
50         sbbq    24(%rsi),%r11
51         sbbq    $0,%r13
52
53         cmovcq  %rax,%r8
54         cmovcq  %rdx,%r9
55         movq    %r8,0(%rdi)
56         cmovcq  %rcx,%r10
57         movq    %r9,8(%rdi)
58         cmovcq  %r12,%r11
59         movq    %r10,16(%rdi)
60         movq    %r11,24(%rdi)
61
62         popq    %r13
63         popq    %r12
64         .byte   0xf3,0xc3
65 .size   ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2
66
67
68
69 .globl  ecp_nistz256_div_by_2
70 .type   ecp_nistz256_div_by_2,@function
71 .align  32
72 ecp_nistz256_div_by_2:
73         pushq   %r12
74         pushq   %r13
75
76         movq    0(%rsi),%r8
77         movq    8(%rsi),%r9
78         movq    16(%rsi),%r10
79         movq    %r8,%rax
80         movq    24(%rsi),%r11
81         leaq    .Lpoly(%rip),%rsi
82
83         movq    %r9,%rdx
84         xorq    %r13,%r13
85         addq    0(%rsi),%r8
86         movq    %r10,%rcx
87         adcq    8(%rsi),%r9
88         adcq    16(%rsi),%r10
89         movq    %r11,%r12
90         adcq    24(%rsi),%r11
91         adcq    $0,%r13
92         xorq    %rsi,%rsi
93         testq   $1,%rax
94
95         cmovzq  %rax,%r8
96         cmovzq  %rdx,%r9
97         cmovzq  %rcx,%r10
98         cmovzq  %r12,%r11
99         cmovzq  %rsi,%r13
100
101         movq    %r9,%rax
102         shrq    $1,%r8
103         shlq    $63,%rax
104         movq    %r10,%rdx
105         shrq    $1,%r9
106         orq     %rax,%r8
107         shlq    $63,%rdx
108         movq    %r11,%rcx
109         shrq    $1,%r10
110         orq     %rdx,%r9
111         shlq    $63,%rcx
112         shrq    $1,%r11
113         shlq    $63,%r13
114         orq     %rcx,%r10
115         orq     %r13,%r11
116
117         movq    %r8,0(%rdi)
118         movq    %r9,8(%rdi)
119         movq    %r10,16(%rdi)
120         movq    %r11,24(%rdi)
121
122         popq    %r13
123         popq    %r12
124         .byte   0xf3,0xc3
125 .size   ecp_nistz256_div_by_2,.-ecp_nistz256_div_by_2
126
127
128
129 .globl  ecp_nistz256_mul_by_3
130 .type   ecp_nistz256_mul_by_3,@function
131 .align  32
132 ecp_nistz256_mul_by_3:
133         pushq   %r12
134         pushq   %r13
135
136         movq    0(%rsi),%r8
137         xorq    %r13,%r13
138         movq    8(%rsi),%r9
139         addq    %r8,%r8
140         movq    16(%rsi),%r10
141         adcq    %r9,%r9
142         movq    24(%rsi),%r11
143         movq    %r8,%rax
144         adcq    %r10,%r10
145         adcq    %r11,%r11
146         movq    %r9,%rdx
147         adcq    $0,%r13
148
149         subq    $-1,%r8
150         movq    %r10,%rcx
151         sbbq    .Lpoly+8(%rip),%r9
152         sbbq    $0,%r10
153         movq    %r11,%r12
154         sbbq    .Lpoly+24(%rip),%r11
155         sbbq    $0,%r13
156
157         cmovcq  %rax,%r8
158         cmovcq  %rdx,%r9
159         cmovcq  %rcx,%r10
160         cmovcq  %r12,%r11
161
162         xorq    %r13,%r13
163         addq    0(%rsi),%r8
164         adcq    8(%rsi),%r9
165         movq    %r8,%rax
166         adcq    16(%rsi),%r10
167         adcq    24(%rsi),%r11
168         movq    %r9,%rdx
169         adcq    $0,%r13
170
171         subq    $-1,%r8
172         movq    %r10,%rcx
173         sbbq    .Lpoly+8(%rip),%r9
174         sbbq    $0,%r10
175         movq    %r11,%r12
176         sbbq    .Lpoly+24(%rip),%r11
177         sbbq    $0,%r13
178
179         cmovcq  %rax,%r8
180         cmovcq  %rdx,%r9
181         movq    %r8,0(%rdi)
182         cmovcq  %rcx,%r10
183         movq    %r9,8(%rdi)
184         cmovcq  %r12,%r11
185         movq    %r10,16(%rdi)
186         movq    %r11,24(%rdi)
187
188         popq    %r13
189         popq    %r12
190         .byte   0xf3,0xc3
191 .size   ecp_nistz256_mul_by_3,.-ecp_nistz256_mul_by_3
192
193
194
195 .globl  ecp_nistz256_add
196 .type   ecp_nistz256_add,@function
197 .align  32
198 ecp_nistz256_add:
199         pushq   %r12
200         pushq   %r13
201
202         movq    0(%rsi),%r8
203         xorq    %r13,%r13
204         movq    8(%rsi),%r9
205         movq    16(%rsi),%r10
206         movq    24(%rsi),%r11
207         leaq    .Lpoly(%rip),%rsi
208
209         addq    0(%rdx),%r8
210         adcq    8(%rdx),%r9
211         movq    %r8,%rax
212         adcq    16(%rdx),%r10
213         adcq    24(%rdx),%r11
214         movq    %r9,%rdx
215         adcq    $0,%r13
216
217         subq    0(%rsi),%r8
218         movq    %r10,%rcx
219         sbbq    8(%rsi),%r9
220         sbbq    16(%rsi),%r10
221         movq    %r11,%r12
222         sbbq    24(%rsi),%r11
223         sbbq    $0,%r13
224
225         cmovcq  %rax,%r8
226         cmovcq  %rdx,%r9
227         movq    %r8,0(%rdi)
228         cmovcq  %rcx,%r10
229         movq    %r9,8(%rdi)
230         cmovcq  %r12,%r11
231         movq    %r10,16(%rdi)
232         movq    %r11,24(%rdi)
233
234         popq    %r13
235         popq    %r12
236         .byte   0xf3,0xc3
237 .size   ecp_nistz256_add,.-ecp_nistz256_add
238
239
240
241 .globl  ecp_nistz256_sub
242 .type   ecp_nistz256_sub,@function
243 .align  32
244 ecp_nistz256_sub:
245         pushq   %r12
246         pushq   %r13
247
248         movq    0(%rsi),%r8
249         xorq    %r13,%r13
250         movq    8(%rsi),%r9
251         movq    16(%rsi),%r10
252         movq    24(%rsi),%r11
253         leaq    .Lpoly(%rip),%rsi
254
255         subq    0(%rdx),%r8
256         sbbq    8(%rdx),%r9
257         movq    %r8,%rax
258         sbbq    16(%rdx),%r10
259         sbbq    24(%rdx),%r11
260         movq    %r9,%rdx
261         sbbq    $0,%r13
262
263         addq    0(%rsi),%r8
264         movq    %r10,%rcx
265         adcq    8(%rsi),%r9
266         adcq    16(%rsi),%r10
267         movq    %r11,%r12
268         adcq    24(%rsi),%r11
269         testq   %r13,%r13
270
271         cmovzq  %rax,%r8
272         cmovzq  %rdx,%r9
273         movq    %r8,0(%rdi)
274         cmovzq  %rcx,%r10
275         movq    %r9,8(%rdi)
276         cmovzq  %r12,%r11
277         movq    %r10,16(%rdi)
278         movq    %r11,24(%rdi)
279
280         popq    %r13
281         popq    %r12
282         .byte   0xf3,0xc3
283 .size   ecp_nistz256_sub,.-ecp_nistz256_sub
284
285
286
287 .globl  ecp_nistz256_neg
288 .type   ecp_nistz256_neg,@function
289 .align  32
290 ecp_nistz256_neg:
291         pushq   %r12
292         pushq   %r13
293
294         xorq    %r8,%r8
295         xorq    %r9,%r9
296         xorq    %r10,%r10
297         xorq    %r11,%r11
298         xorq    %r13,%r13
299
300         subq    0(%rsi),%r8
301         sbbq    8(%rsi),%r9
302         sbbq    16(%rsi),%r10
303         movq    %r8,%rax
304         sbbq    24(%rsi),%r11
305         leaq    .Lpoly(%rip),%rsi
306         movq    %r9,%rdx
307         sbbq    $0,%r13
308
309         addq    0(%rsi),%r8
310         movq    %r10,%rcx
311         adcq    8(%rsi),%r9
312         adcq    16(%rsi),%r10
313         movq    %r11,%r12
314         adcq    24(%rsi),%r11
315         testq   %r13,%r13
316
317         cmovzq  %rax,%r8
318         cmovzq  %rdx,%r9
319         movq    %r8,0(%rdi)
320         cmovzq  %rcx,%r10
321         movq    %r9,8(%rdi)
322         cmovzq  %r12,%r11
323         movq    %r10,16(%rdi)
324         movq    %r11,24(%rdi)
325
326         popq    %r13
327         popq    %r12
328         .byte   0xf3,0xc3
329 .size   ecp_nistz256_neg,.-ecp_nistz256_neg
330
331
332
333
334 .globl  ecp_nistz256_to_mont
335 .type   ecp_nistz256_to_mont,@function
336 .align  32
337 ecp_nistz256_to_mont:
338         movl    $0x80100,%ecx
339         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
340         leaq    .LRR(%rip),%rdx
341         jmp     .Lmul_mont
342 .size   ecp_nistz256_to_mont,.-ecp_nistz256_to_mont
343
344
345
346
347
348
349
350 .globl  ecp_nistz256_mul_mont
351 .type   ecp_nistz256_mul_mont,@function
352 .align  32
353 ecp_nistz256_mul_mont:
354         movl    $0x80100,%ecx
355         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
356 .Lmul_mont:
357         pushq   %rbp
358         pushq   %rbx
359         pushq   %r12
360         pushq   %r13
361         pushq   %r14
362         pushq   %r15
363         cmpl    $0x80100,%ecx
364         je      .Lmul_montx
365         movq    %rdx,%rbx
366         movq    0(%rdx),%rax
367         movq    0(%rsi),%r9
368         movq    8(%rsi),%r10
369         movq    16(%rsi),%r11
370         movq    24(%rsi),%r12
371
372         call    __ecp_nistz256_mul_montq
373         jmp     .Lmul_mont_done
374
375 .align  32
376 .Lmul_montx:
377         movq    %rdx,%rbx
378         movq    0(%rdx),%rdx
379         movq    0(%rsi),%r9
380         movq    8(%rsi),%r10
381         movq    16(%rsi),%r11
382         movq    24(%rsi),%r12
383         leaq    -128(%rsi),%rsi
384
385         call    __ecp_nistz256_mul_montx
386 .Lmul_mont_done:
387         popq    %r15
388         popq    %r14
389         popq    %r13
390         popq    %r12
391         popq    %rbx
392         popq    %rbp
393         .byte   0xf3,0xc3
394 .size   ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
395
396 .type   __ecp_nistz256_mul_montq,@function
397 .align  32
398 __ecp_nistz256_mul_montq:
399
400
401         movq    %rax,%rbp
402         mulq    %r9
403         movq    .Lpoly+8(%rip),%r14
404         movq    %rax,%r8
405         movq    %rbp,%rax
406         movq    %rdx,%r9
407
408         mulq    %r10
409         movq    .Lpoly+24(%rip),%r15
410         addq    %rax,%r9
411         movq    %rbp,%rax
412         adcq    $0,%rdx
413         movq    %rdx,%r10
414
415         mulq    %r11
416         addq    %rax,%r10
417         movq    %rbp,%rax
418         adcq    $0,%rdx
419         movq    %rdx,%r11
420
421         mulq    %r12
422         addq    %rax,%r11
423         movq    %r8,%rax
424         adcq    $0,%rdx
425         xorq    %r13,%r13
426         movq    %rdx,%r12
427
428
429
430
431
432
433
434
435
436
437         movq    %r8,%rbp
438         shlq    $32,%r8
439         mulq    %r15
440         shrq    $32,%rbp
441         addq    %r8,%r9
442         adcq    %rbp,%r10
443         adcq    %rax,%r11
444         movq    8(%rbx),%rax
445         adcq    %rdx,%r12
446         adcq    $0,%r13
447         xorq    %r8,%r8
448
449
450
451         movq    %rax,%rbp
452         mulq    0(%rsi)
453         addq    %rax,%r9
454         movq    %rbp,%rax
455         adcq    $0,%rdx
456         movq    %rdx,%rcx
457
458         mulq    8(%rsi)
459         addq    %rcx,%r10
460         adcq    $0,%rdx
461         addq    %rax,%r10
462         movq    %rbp,%rax
463         adcq    $0,%rdx
464         movq    %rdx,%rcx
465
466         mulq    16(%rsi)
467         addq    %rcx,%r11
468         adcq    $0,%rdx
469         addq    %rax,%r11
470         movq    %rbp,%rax
471         adcq    $0,%rdx
472         movq    %rdx,%rcx
473
474         mulq    24(%rsi)
475         addq    %rcx,%r12
476         adcq    $0,%rdx
477         addq    %rax,%r12
478         movq    %r9,%rax
479         adcq    %rdx,%r13
480         adcq    $0,%r8
481
482
483
484         movq    %r9,%rbp
485         shlq    $32,%r9
486         mulq    %r15
487         shrq    $32,%rbp
488         addq    %r9,%r10
489         adcq    %rbp,%r11
490         adcq    %rax,%r12
491         movq    16(%rbx),%rax
492         adcq    %rdx,%r13
493         adcq    $0,%r8
494         xorq    %r9,%r9
495
496
497
498         movq    %rax,%rbp
499         mulq    0(%rsi)
500         addq    %rax,%r10
501         movq    %rbp,%rax
502         adcq    $0,%rdx
503         movq    %rdx,%rcx
504
505         mulq    8(%rsi)
506         addq    %rcx,%r11
507         adcq    $0,%rdx
508         addq    %rax,%r11
509         movq    %rbp,%rax
510         adcq    $0,%rdx
511         movq    %rdx,%rcx
512
513         mulq    16(%rsi)
514         addq    %rcx,%r12
515         adcq    $0,%rdx
516         addq    %rax,%r12
517         movq    %rbp,%rax
518         adcq    $0,%rdx
519         movq    %rdx,%rcx
520
521         mulq    24(%rsi)
522         addq    %rcx,%r13
523         adcq    $0,%rdx
524         addq    %rax,%r13
525         movq    %r10,%rax
526         adcq    %rdx,%r8
527         adcq    $0,%r9
528
529
530
531         movq    %r10,%rbp
532         shlq    $32,%r10
533         mulq    %r15
534         shrq    $32,%rbp
535         addq    %r10,%r11
536         adcq    %rbp,%r12
537         adcq    %rax,%r13
538         movq    24(%rbx),%rax
539         adcq    %rdx,%r8
540         adcq    $0,%r9
541         xorq    %r10,%r10
542
543
544
545         movq    %rax,%rbp
546         mulq    0(%rsi)
547         addq    %rax,%r11
548         movq    %rbp,%rax
549         adcq    $0,%rdx
550         movq    %rdx,%rcx
551
552         mulq    8(%rsi)
553         addq    %rcx,%r12
554         adcq    $0,%rdx
555         addq    %rax,%r12
556         movq    %rbp,%rax
557         adcq    $0,%rdx
558         movq    %rdx,%rcx
559
560         mulq    16(%rsi)
561         addq    %rcx,%r13
562         adcq    $0,%rdx
563         addq    %rax,%r13
564         movq    %rbp,%rax
565         adcq    $0,%rdx
566         movq    %rdx,%rcx
567
568         mulq    24(%rsi)
569         addq    %rcx,%r8
570         adcq    $0,%rdx
571         addq    %rax,%r8
572         movq    %r11,%rax
573         adcq    %rdx,%r9
574         adcq    $0,%r10
575
576
577
578         movq    %r11,%rbp
579         shlq    $32,%r11
580         mulq    %r15
581         shrq    $32,%rbp
582         addq    %r11,%r12
583         adcq    %rbp,%r13
584         movq    %r12,%rcx
585         adcq    %rax,%r8
586         adcq    %rdx,%r9
587         movq    %r13,%rbp
588         adcq    $0,%r10
589
590
591
592         subq    $-1,%r12
593         movq    %r8,%rbx
594         sbbq    %r14,%r13
595         sbbq    $0,%r8
596         movq    %r9,%rdx
597         sbbq    %r15,%r9
598         sbbq    $0,%r10
599
600         cmovcq  %rcx,%r12
601         cmovcq  %rbp,%r13
602         movq    %r12,0(%rdi)
603         cmovcq  %rbx,%r8
604         movq    %r13,8(%rdi)
605         cmovcq  %rdx,%r9
606         movq    %r8,16(%rdi)
607         movq    %r9,24(%rdi)
608
609         .byte   0xf3,0xc3
610 .size   __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
611
612
613
614
615
616
617
618
619 .globl  ecp_nistz256_sqr_mont
620 .type   ecp_nistz256_sqr_mont,@function
621 .align  32
622 ecp_nistz256_sqr_mont:
623         movl    $0x80100,%ecx
624         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
625         pushq   %rbp
626         pushq   %rbx
627         pushq   %r12
628         pushq   %r13
629         pushq   %r14
630         pushq   %r15
631         cmpl    $0x80100,%ecx
632         je      .Lsqr_montx
633         movq    0(%rsi),%rax
634         movq    8(%rsi),%r14
635         movq    16(%rsi),%r15
636         movq    24(%rsi),%r8
637
638         call    __ecp_nistz256_sqr_montq
639         jmp     .Lsqr_mont_done
640
641 .align  32
642 .Lsqr_montx:
643         movq    0(%rsi),%rdx
644         movq    8(%rsi),%r14
645         movq    16(%rsi),%r15
646         movq    24(%rsi),%r8
647         leaq    -128(%rsi),%rsi
648
649         call    __ecp_nistz256_sqr_montx
650 .Lsqr_mont_done:
651         popq    %r15
652         popq    %r14
653         popq    %r13
654         popq    %r12
655         popq    %rbx
656         popq    %rbp
657         .byte   0xf3,0xc3
658 .size   ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
659
660 .type   __ecp_nistz256_sqr_montq,@function
661 .align  32
662 __ecp_nistz256_sqr_montq:
663         movq    %rax,%r13
664         mulq    %r14
665         movq    %rax,%r9
666         movq    %r15,%rax
667         movq    %rdx,%r10
668
669         mulq    %r13
670         addq    %rax,%r10
671         movq    %r8,%rax
672         adcq    $0,%rdx
673         movq    %rdx,%r11
674
675         mulq    %r13
676         addq    %rax,%r11
677         movq    %r15,%rax
678         adcq    $0,%rdx
679         movq    %rdx,%r12
680
681
682         mulq    %r14
683         addq    %rax,%r11
684         movq    %r8,%rax
685         adcq    $0,%rdx
686         movq    %rdx,%rbp
687
688         mulq    %r14
689         addq    %rax,%r12
690         movq    %r8,%rax
691         adcq    $0,%rdx
692         addq    %rbp,%r12
693         movq    %rdx,%r13
694         adcq    $0,%r13
695
696
697         mulq    %r15
698         xorq    %r15,%r15
699         addq    %rax,%r13
700         movq    0(%rsi),%rax
701         movq    %rdx,%r14
702         adcq    $0,%r14
703
704         addq    %r9,%r9
705         adcq    %r10,%r10
706         adcq    %r11,%r11
707         adcq    %r12,%r12
708         adcq    %r13,%r13
709         adcq    %r14,%r14
710         adcq    $0,%r15
711
712         mulq    %rax
713         movq    %rax,%r8
714         movq    8(%rsi),%rax
715         movq    %rdx,%rcx
716
717         mulq    %rax
718         addq    %rcx,%r9
719         adcq    %rax,%r10
720         movq    16(%rsi),%rax
721         adcq    $0,%rdx
722         movq    %rdx,%rcx
723
724         mulq    %rax
725         addq    %rcx,%r11
726         adcq    %rax,%r12
727         movq    24(%rsi),%rax
728         adcq    $0,%rdx
729         movq    %rdx,%rcx
730
731         mulq    %rax
732         addq    %rcx,%r13
733         adcq    %rax,%r14
734         movq    %r8,%rax
735         adcq    %rdx,%r15
736
737         movq    .Lpoly+8(%rip),%rsi
738         movq    .Lpoly+24(%rip),%rbp
739
740
741
742
743         movq    %r8,%rcx
744         shlq    $32,%r8
745         mulq    %rbp
746         shrq    $32,%rcx
747         addq    %r8,%r9
748         adcq    %rcx,%r10
749         adcq    %rax,%r11
750         movq    %r9,%rax
751         adcq    $0,%rdx
752
753
754
755         movq    %r9,%rcx
756         shlq    $32,%r9
757         movq    %rdx,%r8
758         mulq    %rbp
759         shrq    $32,%rcx
760         addq    %r9,%r10
761         adcq    %rcx,%r11
762         adcq    %rax,%r8
763         movq    %r10,%rax
764         adcq    $0,%rdx
765
766
767
768         movq    %r10,%rcx
769         shlq    $32,%r10
770         movq    %rdx,%r9
771         mulq    %rbp
772         shrq    $32,%rcx
773         addq    %r10,%r11
774         adcq    %rcx,%r8
775         adcq    %rax,%r9
776         movq    %r11,%rax
777         adcq    $0,%rdx
778
779
780
781         movq    %r11,%rcx
782         shlq    $32,%r11
783         movq    %rdx,%r10
784         mulq    %rbp
785         shrq    $32,%rcx
786         addq    %r11,%r8
787         adcq    %rcx,%r9
788         adcq    %rax,%r10
789         adcq    $0,%rdx
790         xorq    %r11,%r11
791
792
793
794         addq    %r8,%r12
795         adcq    %r9,%r13
796         movq    %r12,%r8
797         adcq    %r10,%r14
798         adcq    %rdx,%r15
799         movq    %r13,%r9
800         adcq    $0,%r11
801
802         subq    $-1,%r12
803         movq    %r14,%r10
804         sbbq    %rsi,%r13
805         sbbq    $0,%r14
806         movq    %r15,%rcx
807         sbbq    %rbp,%r15
808         sbbq    $0,%r11
809
810         cmovcq  %r8,%r12
811         cmovcq  %r9,%r13
812         movq    %r12,0(%rdi)
813         cmovcq  %r10,%r14
814         movq    %r13,8(%rdi)
815         cmovcq  %rcx,%r15
816         movq    %r14,16(%rdi)
817         movq    %r15,24(%rdi)
818
819         .byte   0xf3,0xc3
820 .size   __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
821 .type   __ecp_nistz256_mul_montx,@function
822 .align  32
823 __ecp_nistz256_mul_montx:
824
825
826         mulxq   %r9,%r8,%r9
827         mulxq   %r10,%rcx,%r10
828         movq    $32,%r14
829         xorq    %r13,%r13
830         mulxq   %r11,%rbp,%r11
831         movq    .Lpoly+24(%rip),%r15
832         adcq    %rcx,%r9
833         mulxq   %r12,%rcx,%r12
834         movq    %r8,%rdx
835         adcq    %rbp,%r10
836         shlxq   %r14,%r8,%rbp
837         adcq    %rcx,%r11
838         shrxq   %r14,%r8,%rcx
839         adcq    $0,%r12
840
841
842
843         addq    %rbp,%r9
844         adcq    %rcx,%r10
845
846         mulxq   %r15,%rcx,%rbp
847         movq    8(%rbx),%rdx
848         adcq    %rcx,%r11
849         adcq    %rbp,%r12
850         adcq    $0,%r13
851         xorq    %r8,%r8
852
853
854
855         mulxq   0+128(%rsi),%rcx,%rbp
856         adcxq   %rcx,%r9
857         adoxq   %rbp,%r10
858
859         mulxq   8+128(%rsi),%rcx,%rbp
860         adcxq   %rcx,%r10
861         adoxq   %rbp,%r11
862
863         mulxq   16+128(%rsi),%rcx,%rbp
864         adcxq   %rcx,%r11
865         adoxq   %rbp,%r12
866
867         mulxq   24+128(%rsi),%rcx,%rbp
868         movq    %r9,%rdx
869         adcxq   %rcx,%r12
870         shlxq   %r14,%r9,%rcx
871         adoxq   %rbp,%r13
872         shrxq   %r14,%r9,%rbp
873
874         adcxq   %r8,%r13
875         adoxq   %r8,%r8
876         adcq    $0,%r8
877
878
879
880         addq    %rcx,%r10
881         adcq    %rbp,%r11
882
883         mulxq   %r15,%rcx,%rbp
884         movq    16(%rbx),%rdx
885         adcq    %rcx,%r12
886         adcq    %rbp,%r13
887         adcq    $0,%r8
888         xorq    %r9,%r9
889
890
891
892         mulxq   0+128(%rsi),%rcx,%rbp
893         adcxq   %rcx,%r10
894         adoxq   %rbp,%r11
895
896         mulxq   8+128(%rsi),%rcx,%rbp
897         adcxq   %rcx,%r11
898         adoxq   %rbp,%r12
899
900         mulxq   16+128(%rsi),%rcx,%rbp
901         adcxq   %rcx,%r12
902         adoxq   %rbp,%r13
903
904         mulxq   24+128(%rsi),%rcx,%rbp
905         movq    %r10,%rdx
906         adcxq   %rcx,%r13
907         shlxq   %r14,%r10,%rcx
908         adoxq   %rbp,%r8
909         shrxq   %r14,%r10,%rbp
910
911         adcxq   %r9,%r8
912         adoxq   %r9,%r9
913         adcq    $0,%r9
914
915
916
917         addq    %rcx,%r11
918         adcq    %rbp,%r12
919
920         mulxq   %r15,%rcx,%rbp
921         movq    24(%rbx),%rdx
922         adcq    %rcx,%r13
923         adcq    %rbp,%r8
924         adcq    $0,%r9
925         xorq    %r10,%r10
926
927
928
929         mulxq   0+128(%rsi),%rcx,%rbp
930         adcxq   %rcx,%r11
931         adoxq   %rbp,%r12
932
933         mulxq   8+128(%rsi),%rcx,%rbp
934         adcxq   %rcx,%r12
935         adoxq   %rbp,%r13
936
937         mulxq   16+128(%rsi),%rcx,%rbp
938         adcxq   %rcx,%r13
939         adoxq   %rbp,%r8
940
941         mulxq   24+128(%rsi),%rcx,%rbp
942         movq    %r11,%rdx
943         adcxq   %rcx,%r8
944         shlxq   %r14,%r11,%rcx
945         adoxq   %rbp,%r9
946         shrxq   %r14,%r11,%rbp
947
948         adcxq   %r10,%r9
949         adoxq   %r10,%r10
950         adcq    $0,%r10
951
952
953
954         addq    %rcx,%r12
955         adcq    %rbp,%r13
956
957         mulxq   %r15,%rcx,%rbp
958         movq    %r12,%rbx
959         movq    .Lpoly+8(%rip),%r14
960         adcq    %rcx,%r8
961         movq    %r13,%rdx
962         adcq    %rbp,%r9
963         adcq    $0,%r10
964
965
966
967         xorl    %eax,%eax
968         movq    %r8,%rcx
969         sbbq    $-1,%r12
970         sbbq    %r14,%r13
971         sbbq    $0,%r8
972         movq    %r9,%rbp
973         sbbq    %r15,%r9
974         sbbq    $0,%r10
975
976         cmovcq  %rbx,%r12
977         cmovcq  %rdx,%r13
978         movq    %r12,0(%rdi)
979         cmovcq  %rcx,%r8
980         movq    %r13,8(%rdi)
981         cmovcq  %rbp,%r9
982         movq    %r8,16(%rdi)
983         movq    %r9,24(%rdi)
984
985         .byte   0xf3,0xc3
986 .size   __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx
987
988 .type   __ecp_nistz256_sqr_montx,@function
989 .align  32
990 __ecp_nistz256_sqr_montx:
991         mulxq   %r14,%r9,%r10
992         mulxq   %r15,%rcx,%r11
993         xorl    %eax,%eax
994         adcq    %rcx,%r10
995         mulxq   %r8,%rbp,%r12
996         movq    %r14,%rdx
997         adcq    %rbp,%r11
998         adcq    $0,%r12
999         xorq    %r13,%r13
1000
1001
1002         mulxq   %r15,%rcx,%rbp
1003         adcxq   %rcx,%r11
1004         adoxq   %rbp,%r12
1005
1006         mulxq   %r8,%rcx,%rbp
1007         movq    %r15,%rdx
1008         adcxq   %rcx,%r12
1009         adoxq   %rbp,%r13
1010         adcq    $0,%r13
1011
1012
1013         mulxq   %r8,%rcx,%r14
1014         movq    0+128(%rsi),%rdx
1015         xorq    %r15,%r15
1016         adcxq   %r9,%r9
1017         adoxq   %rcx,%r13
1018         adcxq   %r10,%r10
1019         adoxq   %r15,%r14
1020
1021         mulxq   %rdx,%r8,%rbp
1022         movq    8+128(%rsi),%rdx
1023         adcxq   %r11,%r11
1024         adoxq   %rbp,%r9
1025         adcxq   %r12,%r12
1026         mulxq   %rdx,%rcx,%rax
1027         movq    16+128(%rsi),%rdx
1028         adcxq   %r13,%r13
1029         adoxq   %rcx,%r10
1030         adcxq   %r14,%r14
1031 .byte   0x67
1032         mulxq   %rdx,%rcx,%rbp
1033         movq    24+128(%rsi),%rdx
1034         adoxq   %rax,%r11
1035         adcxq   %r15,%r15
1036         adoxq   %rcx,%r12
1037         movq    $32,%rsi
1038         adoxq   %rbp,%r13
1039 .byte   0x67,0x67
1040         mulxq   %rdx,%rcx,%rax
1041         movq    %r8,%rdx
1042         adoxq   %rcx,%r14
1043         shlxq   %rsi,%r8,%rcx
1044         adoxq   %rax,%r15
1045         shrxq   %rsi,%r8,%rax
1046         movq    .Lpoly+24(%rip),%rbp
1047
1048
1049         addq    %rcx,%r9
1050         adcq    %rax,%r10
1051
1052         mulxq   %rbp,%rcx,%r8
1053         movq    %r9,%rdx
1054         adcq    %rcx,%r11
1055         shlxq   %rsi,%r9,%rcx
1056         adcq    $0,%r8
1057         shrxq   %rsi,%r9,%rax
1058
1059
1060         addq    %rcx,%r10
1061         adcq    %rax,%r11
1062
1063         mulxq   %rbp,%rcx,%r9
1064         movq    %r10,%rdx
1065         adcq    %rcx,%r8
1066         shlxq   %rsi,%r10,%rcx
1067         adcq    $0,%r9
1068         shrxq   %rsi,%r10,%rax
1069
1070
1071         addq    %rcx,%r11
1072         adcq    %rax,%r8
1073
1074         mulxq   %rbp,%rcx,%r10
1075         movq    %r11,%rdx
1076         adcq    %rcx,%r9
1077         shlxq   %rsi,%r11,%rcx
1078         adcq    $0,%r10
1079         shrxq   %rsi,%r11,%rax
1080
1081
1082         addq    %rcx,%r8
1083         adcq    %rax,%r9
1084
1085         mulxq   %rbp,%rcx,%r11
1086         adcq    %rcx,%r10
1087         adcq    $0,%r11
1088
1089         xorq    %rdx,%rdx
1090         adcq    %r8,%r12
1091         movq    .Lpoly+8(%rip),%rsi
1092         adcq    %r9,%r13
1093         movq    %r12,%r8
1094         adcq    %r10,%r14
1095         adcq    %r11,%r15
1096         movq    %r13,%r9
1097         adcq    $0,%rdx
1098
1099         xorl    %eax,%eax
1100         sbbq    $-1,%r12
1101         movq    %r14,%r10
1102         sbbq    %rsi,%r13
1103         sbbq    $0,%r14
1104         movq    %r15,%r11
1105         sbbq    %rbp,%r15
1106         sbbq    $0,%rdx
1107
1108         cmovcq  %r8,%r12
1109         cmovcq  %r9,%r13
1110         movq    %r12,0(%rdi)
1111         cmovcq  %r10,%r14
1112         movq    %r13,8(%rdi)
1113         cmovcq  %r11,%r15
1114         movq    %r14,16(%rdi)
1115         movq    %r15,24(%rdi)
1116
1117         .byte   0xf3,0xc3
1118 .size   __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx
1119
1120
1121
1122
1123
1124
1125 .globl  ecp_nistz256_from_mont
1126 .type   ecp_nistz256_from_mont,@function
1127 .align  32
1128 ecp_nistz256_from_mont:
1129         pushq   %r12
1130         pushq   %r13
1131
1132         movq    0(%rsi),%rax
1133         movq    .Lpoly+24(%rip),%r13
1134         movq    8(%rsi),%r9
1135         movq    16(%rsi),%r10
1136         movq    24(%rsi),%r11
1137         movq    %rax,%r8
1138         movq    .Lpoly+8(%rip),%r12
1139
1140
1141
1142         movq    %rax,%rcx
1143         shlq    $32,%r8
1144         mulq    %r13
1145         shrq    $32,%rcx
1146         addq    %r8,%r9
1147         adcq    %rcx,%r10
1148         adcq    %rax,%r11
1149         movq    %r9,%rax
1150         adcq    $0,%rdx
1151
1152
1153
1154         movq    %r9,%rcx
1155         shlq    $32,%r9
1156         movq    %rdx,%r8
1157         mulq    %r13
1158         shrq    $32,%rcx
1159         addq    %r9,%r10
1160         adcq    %rcx,%r11
1161         adcq    %rax,%r8
1162         movq    %r10,%rax
1163         adcq    $0,%rdx
1164
1165
1166
1167         movq    %r10,%rcx
1168         shlq    $32,%r10
1169         movq    %rdx,%r9
1170         mulq    %r13
1171         shrq    $32,%rcx
1172         addq    %r10,%r11
1173         adcq    %rcx,%r8
1174         adcq    %rax,%r9
1175         movq    %r11,%rax
1176         adcq    $0,%rdx
1177
1178
1179
1180         movq    %r11,%rcx
1181         shlq    $32,%r11
1182         movq    %rdx,%r10
1183         mulq    %r13
1184         shrq    $32,%rcx
1185         addq    %r11,%r8
1186         adcq    %rcx,%r9
1187         movq    %r8,%rcx
1188         adcq    %rax,%r10
1189         movq    %r9,%rsi
1190         adcq    $0,%rdx
1191
1192
1193
1194         subq    $-1,%r8
1195         movq    %r10,%rax
1196         sbbq    %r12,%r9
1197         sbbq    $0,%r10
1198         movq    %rdx,%r11
1199         sbbq    %r13,%rdx
1200         sbbq    %r13,%r13
1201
1202         cmovnzq %rcx,%r8
1203         cmovnzq %rsi,%r9
1204         movq    %r8,0(%rdi)
1205         cmovnzq %rax,%r10
1206         movq    %r9,8(%rdi)
1207         cmovzq  %rdx,%r11
1208         movq    %r10,16(%rdi)
1209         movq    %r11,24(%rdi)
1210
1211         popq    %r13
1212         popq    %r12
1213         .byte   0xf3,0xc3
1214 .size   ecp_nistz256_from_mont,.-ecp_nistz256_from_mont
1215
1216
1217 .globl  ecp_nistz256_select_w5
1218 .type   ecp_nistz256_select_w5,@function
1219 .align  32
1220 ecp_nistz256_select_w5:
1221         movl    OPENSSL_ia32cap_P+8(%rip),%eax
1222         testl   $32,%eax
1223         jnz     .Lavx2_select_w5
1224         movdqa  .LOne(%rip),%xmm0
1225         movd    %edx,%xmm1
1226
1227         pxor    %xmm2,%xmm2
1228         pxor    %xmm3,%xmm3
1229         pxor    %xmm4,%xmm4
1230         pxor    %xmm5,%xmm5
1231         pxor    %xmm6,%xmm6
1232         pxor    %xmm7,%xmm7
1233
1234         movdqa  %xmm0,%xmm8
1235         pshufd  $0,%xmm1,%xmm1
1236
1237         movq    $16,%rax
1238 .Lselect_loop_sse_w5:
1239
1240         movdqa  %xmm8,%xmm15
1241         paddd   %xmm0,%xmm8
1242         pcmpeqd %xmm1,%xmm15
1243
1244         movdqa  0(%rsi),%xmm9
1245         movdqa  16(%rsi),%xmm10
1246         movdqa  32(%rsi),%xmm11
1247         movdqa  48(%rsi),%xmm12
1248         movdqa  64(%rsi),%xmm13
1249         movdqa  80(%rsi),%xmm14
1250         leaq    96(%rsi),%rsi
1251
1252         pand    %xmm15,%xmm9
1253         pand    %xmm15,%xmm10
1254         por     %xmm9,%xmm2
1255         pand    %xmm15,%xmm11
1256         por     %xmm10,%xmm3
1257         pand    %xmm15,%xmm12
1258         por     %xmm11,%xmm4
1259         pand    %xmm15,%xmm13
1260         por     %xmm12,%xmm5
1261         pand    %xmm15,%xmm14
1262         por     %xmm13,%xmm6
1263         por     %xmm14,%xmm7
1264
1265         decq    %rax
1266         jnz     .Lselect_loop_sse_w5
1267
1268         movdqu  %xmm2,0(%rdi)
1269         movdqu  %xmm3,16(%rdi)
1270         movdqu  %xmm4,32(%rdi)
1271         movdqu  %xmm5,48(%rdi)
1272         movdqu  %xmm6,64(%rdi)
1273         movdqu  %xmm7,80(%rdi)
1274         .byte   0xf3,0xc3
1275 .size   ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
1276
1277
1278
1279 .globl  ecp_nistz256_select_w7
1280 .type   ecp_nistz256_select_w7,@function
1281 .align  32
1282 ecp_nistz256_select_w7:
1283         movl    OPENSSL_ia32cap_P+8(%rip),%eax
1284         testl   $32,%eax
1285         jnz     .Lavx2_select_w7
1286         movdqa  .LOne(%rip),%xmm8
1287         movd    %edx,%xmm1
1288
1289         pxor    %xmm2,%xmm2
1290         pxor    %xmm3,%xmm3
1291         pxor    %xmm4,%xmm4
1292         pxor    %xmm5,%xmm5
1293
1294         movdqa  %xmm8,%xmm0
1295         pshufd  $0,%xmm1,%xmm1
1296         movq    $64,%rax
1297
1298 .Lselect_loop_sse_w7:
1299         movdqa  %xmm8,%xmm15
1300         paddd   %xmm0,%xmm8
1301         movdqa  0(%rsi),%xmm9
1302         movdqa  16(%rsi),%xmm10
1303         pcmpeqd %xmm1,%xmm15
1304         movdqa  32(%rsi),%xmm11
1305         movdqa  48(%rsi),%xmm12
1306         leaq    64(%rsi),%rsi
1307
1308         pand    %xmm15,%xmm9
1309         pand    %xmm15,%xmm10
1310         por     %xmm9,%xmm2
1311         pand    %xmm15,%xmm11
1312         por     %xmm10,%xmm3
1313         pand    %xmm15,%xmm12
1314         por     %xmm11,%xmm4
1315         prefetcht0      255(%rsi)
1316         por     %xmm12,%xmm5
1317
1318         decq    %rax
1319         jnz     .Lselect_loop_sse_w7
1320
1321         movdqu  %xmm2,0(%rdi)
1322         movdqu  %xmm3,16(%rdi)
1323         movdqu  %xmm4,32(%rdi)
1324         movdqu  %xmm5,48(%rdi)
1325         .byte   0xf3,0xc3
1326 .size   ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
1327
1328
1329 .type   ecp_nistz256_avx2_select_w5,@function
1330 .align  32
1331 ecp_nistz256_avx2_select_w5:
1332 .Lavx2_select_w5:
1333         vzeroupper
1334         vmovdqa .LTwo(%rip),%ymm0
1335
1336         vpxor   %ymm2,%ymm2,%ymm2
1337         vpxor   %ymm3,%ymm3,%ymm3
1338         vpxor   %ymm4,%ymm4,%ymm4
1339
1340         vmovdqa .LOne(%rip),%ymm5
1341         vmovdqa .LTwo(%rip),%ymm10
1342
1343         vmovd   %edx,%xmm1
1344         vpermd  %ymm1,%ymm2,%ymm1
1345
1346         movq    $8,%rax
1347 .Lselect_loop_avx2_w5:
1348
1349         vmovdqa 0(%rsi),%ymm6
1350         vmovdqa 32(%rsi),%ymm7
1351         vmovdqa 64(%rsi),%ymm8
1352
1353         vmovdqa 96(%rsi),%ymm11
1354         vmovdqa 128(%rsi),%ymm12
1355         vmovdqa 160(%rsi),%ymm13
1356
1357         vpcmpeqd        %ymm1,%ymm5,%ymm9
1358         vpcmpeqd        %ymm1,%ymm10,%ymm14
1359
1360         vpaddd  %ymm0,%ymm5,%ymm5
1361         vpaddd  %ymm0,%ymm10,%ymm10
1362         leaq    192(%rsi),%rsi
1363
1364         vpand   %ymm9,%ymm6,%ymm6
1365         vpand   %ymm9,%ymm7,%ymm7
1366         vpand   %ymm9,%ymm8,%ymm8
1367         vpand   %ymm14,%ymm11,%ymm11
1368         vpand   %ymm14,%ymm12,%ymm12
1369         vpand   %ymm14,%ymm13,%ymm13
1370
1371         vpxor   %ymm6,%ymm2,%ymm2
1372         vpxor   %ymm7,%ymm3,%ymm3
1373         vpxor   %ymm8,%ymm4,%ymm4
1374         vpxor   %ymm11,%ymm2,%ymm2
1375         vpxor   %ymm12,%ymm3,%ymm3
1376         vpxor   %ymm13,%ymm4,%ymm4
1377
1378         decq    %rax
1379         jnz     .Lselect_loop_avx2_w5
1380
1381         vmovdqu %ymm2,0(%rdi)
1382         vmovdqu %ymm3,32(%rdi)
1383         vmovdqu %ymm4,64(%rdi)
1384         vzeroupper
1385         .byte   0xf3,0xc3
1386 .size   ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5
1387
1388
1389
1390 .globl  ecp_nistz256_avx2_select_w7
1391 .type   ecp_nistz256_avx2_select_w7,@function
1392 .align  32
1393 ecp_nistz256_avx2_select_w7:
1394 .Lavx2_select_w7:
1395         vzeroupper
1396         vmovdqa .LThree(%rip),%ymm0
1397
1398         vpxor   %ymm2,%ymm2,%ymm2
1399         vpxor   %ymm3,%ymm3,%ymm3
1400
1401         vmovdqa .LOne(%rip),%ymm4
1402         vmovdqa .LTwo(%rip),%ymm8
1403         vmovdqa .LThree(%rip),%ymm12
1404
1405         vmovd   %edx,%xmm1
1406         vpermd  %ymm1,%ymm2,%ymm1
1407
1408
1409         movq    $21,%rax
1410 .Lselect_loop_avx2_w7:
1411
1412         vmovdqa 0(%rsi),%ymm5
1413         vmovdqa 32(%rsi),%ymm6
1414
1415         vmovdqa 64(%rsi),%ymm9
1416         vmovdqa 96(%rsi),%ymm10
1417
1418         vmovdqa 128(%rsi),%ymm13
1419         vmovdqa 160(%rsi),%ymm14
1420
1421         vpcmpeqd        %ymm1,%ymm4,%ymm7
1422         vpcmpeqd        %ymm1,%ymm8,%ymm11
1423         vpcmpeqd        %ymm1,%ymm12,%ymm15
1424
1425         vpaddd  %ymm0,%ymm4,%ymm4
1426         vpaddd  %ymm0,%ymm8,%ymm8
1427         vpaddd  %ymm0,%ymm12,%ymm12
1428         leaq    192(%rsi),%rsi
1429
1430         vpand   %ymm7,%ymm5,%ymm5
1431         vpand   %ymm7,%ymm6,%ymm6
1432         vpand   %ymm11,%ymm9,%ymm9
1433         vpand   %ymm11,%ymm10,%ymm10
1434         vpand   %ymm15,%ymm13,%ymm13
1435         vpand   %ymm15,%ymm14,%ymm14
1436
1437         vpxor   %ymm5,%ymm2,%ymm2
1438         vpxor   %ymm6,%ymm3,%ymm3
1439         vpxor   %ymm9,%ymm2,%ymm2
1440         vpxor   %ymm10,%ymm3,%ymm3
1441         vpxor   %ymm13,%ymm2,%ymm2
1442         vpxor   %ymm14,%ymm3,%ymm3
1443
1444         decq    %rax
1445         jnz     .Lselect_loop_avx2_w7
1446
1447
1448         vmovdqa 0(%rsi),%ymm5
1449         vmovdqa 32(%rsi),%ymm6
1450
1451         vpcmpeqd        %ymm1,%ymm4,%ymm7
1452
1453         vpand   %ymm7,%ymm5,%ymm5
1454         vpand   %ymm7,%ymm6,%ymm6
1455
1456         vpxor   %ymm5,%ymm2,%ymm2
1457         vpxor   %ymm6,%ymm3,%ymm3
1458
1459         vmovdqu %ymm2,0(%rdi)
1460         vmovdqu %ymm3,32(%rdi)
1461         vzeroupper
1462         .byte   0xf3,0xc3
1463 .size   ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
1464 .type   __ecp_nistz256_add_toq,@function
1465 .align  32
1466 __ecp_nistz256_add_toq:
1467         xorq    %r11,%r11
1468         addq    0(%rbx),%r12
1469         adcq    8(%rbx),%r13
1470         movq    %r12,%rax
1471         adcq    16(%rbx),%r8
1472         adcq    24(%rbx),%r9
1473         movq    %r13,%rbp
1474         adcq    $0,%r11
1475
1476         subq    $-1,%r12
1477         movq    %r8,%rcx
1478         sbbq    %r14,%r13
1479         sbbq    $0,%r8
1480         movq    %r9,%r10
1481         sbbq    %r15,%r9
1482         sbbq    $0,%r11
1483
1484         cmovcq  %rax,%r12
1485         cmovcq  %rbp,%r13
1486         movq    %r12,0(%rdi)
1487         cmovcq  %rcx,%r8
1488         movq    %r13,8(%rdi)
1489         cmovcq  %r10,%r9
1490         movq    %r8,16(%rdi)
1491         movq    %r9,24(%rdi)
1492
1493         .byte   0xf3,0xc3
1494 .size   __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq
1495
1496 .type   __ecp_nistz256_sub_fromq,@function
1497 .align  32
1498 __ecp_nistz256_sub_fromq:
1499         subq    0(%rbx),%r12
1500         sbbq    8(%rbx),%r13
1501         movq    %r12,%rax
1502         sbbq    16(%rbx),%r8
1503         sbbq    24(%rbx),%r9
1504         movq    %r13,%rbp
1505         sbbq    %r11,%r11
1506
1507         addq    $-1,%r12
1508         movq    %r8,%rcx
1509         adcq    %r14,%r13
1510         adcq    $0,%r8
1511         movq    %r9,%r10
1512         adcq    %r15,%r9
1513         testq   %r11,%r11
1514
1515         cmovzq  %rax,%r12
1516         cmovzq  %rbp,%r13
1517         movq    %r12,0(%rdi)
1518         cmovzq  %rcx,%r8
1519         movq    %r13,8(%rdi)
1520         cmovzq  %r10,%r9
1521         movq    %r8,16(%rdi)
1522         movq    %r9,24(%rdi)
1523
1524         .byte   0xf3,0xc3
1525 .size   __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq
1526
1527 .type   __ecp_nistz256_subq,@function
1528 .align  32
1529 __ecp_nistz256_subq:
1530         subq    %r12,%rax
1531         sbbq    %r13,%rbp
1532         movq    %rax,%r12
1533         sbbq    %r8,%rcx
1534         sbbq    %r9,%r10
1535         movq    %rbp,%r13
1536         sbbq    %r11,%r11
1537
1538         addq    $-1,%rax
1539         movq    %rcx,%r8
1540         adcq    %r14,%rbp
1541         adcq    $0,%rcx
1542         movq    %r10,%r9
1543         adcq    %r15,%r10
1544         testq   %r11,%r11
1545
1546         cmovnzq %rax,%r12
1547         cmovnzq %rbp,%r13
1548         cmovnzq %rcx,%r8
1549         cmovnzq %r10,%r9
1550
1551         .byte   0xf3,0xc3
1552 .size   __ecp_nistz256_subq,.-__ecp_nistz256_subq
1553
1554 .type   __ecp_nistz256_mul_by_2q,@function
1555 .align  32
1556 __ecp_nistz256_mul_by_2q:
1557         xorq    %r11,%r11
1558         addq    %r12,%r12
1559         adcq    %r13,%r13
1560         movq    %r12,%rax
1561         adcq    %r8,%r8
1562         adcq    %r9,%r9
1563         movq    %r13,%rbp
1564         adcq    $0,%r11
1565
1566         subq    $-1,%r12
1567         movq    %r8,%rcx
1568         sbbq    %r14,%r13
1569         sbbq    $0,%r8
1570         movq    %r9,%r10
1571         sbbq    %r15,%r9
1572         sbbq    $0,%r11
1573
1574         cmovcq  %rax,%r12
1575         cmovcq  %rbp,%r13
1576         movq    %r12,0(%rdi)
1577         cmovcq  %rcx,%r8
1578         movq    %r13,8(%rdi)
1579         cmovcq  %r10,%r9
1580         movq    %r8,16(%rdi)
1581         movq    %r9,24(%rdi)
1582
1583         .byte   0xf3,0xc3
1584 .size   __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q
1585 .globl  ecp_nistz256_point_double
1586 .type   ecp_nistz256_point_double,@function
1587 .align  32
1588 ecp_nistz256_point_double:
1589         movl    $0x80100,%ecx
1590         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
1591         cmpl    $0x80100,%ecx
1592         je      .Lpoint_doublex
1593         pushq   %rbp
1594         pushq   %rbx
1595         pushq   %r12
1596         pushq   %r13
1597         pushq   %r14
1598         pushq   %r15
1599         subq    $160+8,%rsp
1600
1601 .Lpoint_double_shortcutq:
1602         movdqu  0(%rsi),%xmm0
1603         movq    %rsi,%rbx
1604         movdqu  16(%rsi),%xmm1
1605         movq    32+0(%rsi),%r12
1606         movq    32+8(%rsi),%r13
1607         movq    32+16(%rsi),%r8
1608         movq    32+24(%rsi),%r9
1609         movq    .Lpoly+8(%rip),%r14
1610         movq    .Lpoly+24(%rip),%r15
1611         movdqa  %xmm0,96(%rsp)
1612         movdqa  %xmm1,96+16(%rsp)
1613         leaq    32(%rdi),%r10
1614         leaq    64(%rdi),%r11
1615 .byte   102,72,15,110,199
1616 .byte   102,73,15,110,202
1617 .byte   102,73,15,110,211
1618
1619         leaq    0(%rsp),%rdi
1620         call    __ecp_nistz256_mul_by_2q
1621
1622         movq    64+0(%rsi),%rax
1623         movq    64+8(%rsi),%r14
1624         movq    64+16(%rsi),%r15
1625         movq    64+24(%rsi),%r8
1626         leaq    64-0(%rsi),%rsi
1627         leaq    64(%rsp),%rdi
1628         call    __ecp_nistz256_sqr_montq
1629
1630         movq    0+0(%rsp),%rax
1631         movq    8+0(%rsp),%r14
1632         leaq    0+0(%rsp),%rsi
1633         movq    16+0(%rsp),%r15
1634         movq    24+0(%rsp),%r8
1635         leaq    0(%rsp),%rdi
1636         call    __ecp_nistz256_sqr_montq
1637
1638         movq    32(%rbx),%rax
1639         movq    64+0(%rbx),%r9
1640         movq    64+8(%rbx),%r10
1641         movq    64+16(%rbx),%r11
1642         movq    64+24(%rbx),%r12
1643         leaq    64-0(%rbx),%rsi
1644         leaq    32(%rbx),%rbx
1645 .byte   102,72,15,126,215
1646         call    __ecp_nistz256_mul_montq
1647         call    __ecp_nistz256_mul_by_2q
1648
1649         movq    96+0(%rsp),%r12
1650         movq    96+8(%rsp),%r13
1651         leaq    64(%rsp),%rbx
1652         movq    96+16(%rsp),%r8
1653         movq    96+24(%rsp),%r9
1654         leaq    32(%rsp),%rdi
1655         call    __ecp_nistz256_add_toq
1656
1657         movq    96+0(%rsp),%r12
1658         movq    96+8(%rsp),%r13
1659         leaq    64(%rsp),%rbx
1660         movq    96+16(%rsp),%r8
1661         movq    96+24(%rsp),%r9
1662         leaq    64(%rsp),%rdi
1663         call    __ecp_nistz256_sub_fromq
1664
1665         movq    0+0(%rsp),%rax
1666         movq    8+0(%rsp),%r14
1667         leaq    0+0(%rsp),%rsi
1668         movq    16+0(%rsp),%r15
1669         movq    24+0(%rsp),%r8
1670 .byte   102,72,15,126,207
1671         call    __ecp_nistz256_sqr_montq
1672         xorq    %r9,%r9
1673         movq    %r12,%rax
1674         addq    $-1,%r12
1675         movq    %r13,%r10
1676         adcq    %rsi,%r13
1677         movq    %r14,%rcx
1678         adcq    $0,%r14
1679         movq    %r15,%r8
1680         adcq    %rbp,%r15
1681         adcq    $0,%r9
1682         xorq    %rsi,%rsi
1683         testq   $1,%rax
1684
1685         cmovzq  %rax,%r12
1686         cmovzq  %r10,%r13
1687         cmovzq  %rcx,%r14
1688         cmovzq  %r8,%r15
1689         cmovzq  %rsi,%r9
1690
1691         movq    %r13,%rax
1692         shrq    $1,%r12
1693         shlq    $63,%rax
1694         movq    %r14,%r10
1695         shrq    $1,%r13
1696         orq     %rax,%r12
1697         shlq    $63,%r10
1698         movq    %r15,%rcx
1699         shrq    $1,%r14
1700         orq     %r10,%r13
1701         shlq    $63,%rcx
1702         movq    %r12,0(%rdi)
1703         shrq    $1,%r15
1704         movq    %r13,8(%rdi)
1705         shlq    $63,%r9
1706         orq     %rcx,%r14
1707         orq     %r9,%r15
1708         movq    %r14,16(%rdi)
1709         movq    %r15,24(%rdi)
1710         movq    64(%rsp),%rax
1711         leaq    64(%rsp),%rbx
1712         movq    0+32(%rsp),%r9
1713         movq    8+32(%rsp),%r10
1714         leaq    0+32(%rsp),%rsi
1715         movq    16+32(%rsp),%r11
1716         movq    24+32(%rsp),%r12
1717         leaq    32(%rsp),%rdi
1718         call    __ecp_nistz256_mul_montq
1719
1720         leaq    128(%rsp),%rdi
1721         call    __ecp_nistz256_mul_by_2q
1722
1723         leaq    32(%rsp),%rbx
1724         leaq    32(%rsp),%rdi
1725         call    __ecp_nistz256_add_toq
1726
1727         movq    96(%rsp),%rax
1728         leaq    96(%rsp),%rbx
1729         movq    0+0(%rsp),%r9
1730         movq    8+0(%rsp),%r10
1731         leaq    0+0(%rsp),%rsi
1732         movq    16+0(%rsp),%r11
1733         movq    24+0(%rsp),%r12
1734         leaq    0(%rsp),%rdi
1735         call    __ecp_nistz256_mul_montq
1736
1737         leaq    128(%rsp),%rdi
1738         call    __ecp_nistz256_mul_by_2q
1739
1740         movq    0+32(%rsp),%rax
1741         movq    8+32(%rsp),%r14
1742         leaq    0+32(%rsp),%rsi
1743         movq    16+32(%rsp),%r15
1744         movq    24+32(%rsp),%r8
1745 .byte   102,72,15,126,199
1746         call    __ecp_nistz256_sqr_montq
1747
1748         leaq    128(%rsp),%rbx
1749         movq    %r14,%r8
1750         movq    %r15,%r9
1751         movq    %rsi,%r14
1752         movq    %rbp,%r15
1753         call    __ecp_nistz256_sub_fromq
1754
1755         movq    0+0(%rsp),%rax
1756         movq    0+8(%rsp),%rbp
1757         movq    0+16(%rsp),%rcx
1758         movq    0+24(%rsp),%r10
1759         leaq    0(%rsp),%rdi
1760         call    __ecp_nistz256_subq
1761
1762         movq    32(%rsp),%rax
1763         leaq    32(%rsp),%rbx
1764         movq    %r12,%r14
1765         xorl    %ecx,%ecx
1766         movq    %r12,0+0(%rsp)
1767         movq    %r13,%r10
1768         movq    %r13,0+8(%rsp)
1769         cmovzq  %r8,%r11
1770         movq    %r8,0+16(%rsp)
1771         leaq    0-0(%rsp),%rsi
1772         cmovzq  %r9,%r12
1773         movq    %r9,0+24(%rsp)
1774         movq    %r14,%r9
1775         leaq    0(%rsp),%rdi
1776         call    __ecp_nistz256_mul_montq
1777
1778 .byte   102,72,15,126,203
1779 .byte   102,72,15,126,207
1780         call    __ecp_nistz256_sub_fromq
1781
1782         addq    $160+8,%rsp
1783         popq    %r15
1784         popq    %r14
1785         popq    %r13
1786         popq    %r12
1787         popq    %rbx
1788         popq    %rbp
1789         .byte   0xf3,0xc3
1790 .size   ecp_nistz256_point_double,.-ecp_nistz256_point_double
1791 .globl  ecp_nistz256_point_add
1792 .type   ecp_nistz256_point_add,@function
1793 .align  32
1794 ecp_nistz256_point_add:
1795         movl    $0x80100,%ecx
1796         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
1797         cmpl    $0x80100,%ecx
1798         je      .Lpoint_addx
1799         pushq   %rbp
1800         pushq   %rbx
1801         pushq   %r12
1802         pushq   %r13
1803         pushq   %r14
1804         pushq   %r15
1805         subq    $576+8,%rsp
1806
1807         movdqu  0(%rsi),%xmm0
1808         movdqu  16(%rsi),%xmm1
1809         movdqu  32(%rsi),%xmm2
1810         movdqu  48(%rsi),%xmm3
1811         movdqu  64(%rsi),%xmm4
1812         movdqu  80(%rsi),%xmm5
1813         movq    %rsi,%rbx
1814         movq    %rdx,%rsi
1815         movdqa  %xmm0,384(%rsp)
1816         movdqa  %xmm1,384+16(%rsp)
1817         movdqa  %xmm2,416(%rsp)
1818         movdqa  %xmm3,416+16(%rsp)
1819         movdqa  %xmm4,448(%rsp)
1820         movdqa  %xmm5,448+16(%rsp)
1821         por     %xmm4,%xmm5
1822
1823         movdqu  0(%rsi),%xmm0
1824         pshufd  $0xb1,%xmm5,%xmm3
1825         movdqu  16(%rsi),%xmm1
1826         movdqu  32(%rsi),%xmm2
1827         por     %xmm3,%xmm5
1828         movdqu  48(%rsi),%xmm3
1829         movq    64+0(%rsi),%rax
1830         movq    64+8(%rsi),%r14
1831         movq    64+16(%rsi),%r15
1832         movq    64+24(%rsi),%r8
1833         movdqa  %xmm0,480(%rsp)
1834         pshufd  $0x1e,%xmm5,%xmm4
1835         movdqa  %xmm1,480+16(%rsp)
1836         movdqu  64(%rsi),%xmm0
1837         movdqu  80(%rsi),%xmm1
1838         movdqa  %xmm2,512(%rsp)
1839         movdqa  %xmm3,512+16(%rsp)
1840         por     %xmm4,%xmm5
1841         pxor    %xmm4,%xmm4
1842         por     %xmm0,%xmm1
1843 .byte   102,72,15,110,199
1844
1845         leaq    64-0(%rsi),%rsi
1846         movq    %rax,544+0(%rsp)
1847         movq    %r14,544+8(%rsp)
1848         movq    %r15,544+16(%rsp)
1849         movq    %r8,544+24(%rsp)
1850         leaq    96(%rsp),%rdi
1851         call    __ecp_nistz256_sqr_montq
1852
1853         pcmpeqd %xmm4,%xmm5
1854         pshufd  $0xb1,%xmm1,%xmm4
1855         por     %xmm1,%xmm4
1856         pshufd  $0,%xmm5,%xmm5
1857         pshufd  $0x1e,%xmm4,%xmm3
1858         por     %xmm3,%xmm4
1859         pxor    %xmm3,%xmm3
1860         pcmpeqd %xmm3,%xmm4
1861         pshufd  $0,%xmm4,%xmm4
1862         movq    64+0(%rbx),%rax
1863         movq    64+8(%rbx),%r14
1864         movq    64+16(%rbx),%r15
1865         movq    64+24(%rbx),%r8
1866 .byte   102,72,15,110,203
1867
1868         leaq    64-0(%rbx),%rsi
1869         leaq    32(%rsp),%rdi
1870         call    __ecp_nistz256_sqr_montq
1871
1872         movq    544(%rsp),%rax
1873         leaq    544(%rsp),%rbx
1874         movq    0+96(%rsp),%r9
1875         movq    8+96(%rsp),%r10
1876         leaq    0+96(%rsp),%rsi
1877         movq    16+96(%rsp),%r11
1878         movq    24+96(%rsp),%r12
1879         leaq    224(%rsp),%rdi
1880         call    __ecp_nistz256_mul_montq
1881
1882         movq    448(%rsp),%rax
1883         leaq    448(%rsp),%rbx
1884         movq    0+32(%rsp),%r9
1885         movq    8+32(%rsp),%r10
1886         leaq    0+32(%rsp),%rsi
1887         movq    16+32(%rsp),%r11
1888         movq    24+32(%rsp),%r12
1889         leaq    256(%rsp),%rdi
1890         call    __ecp_nistz256_mul_montq
1891
1892         movq    416(%rsp),%rax
1893         leaq    416(%rsp),%rbx
1894         movq    0+224(%rsp),%r9
1895         movq    8+224(%rsp),%r10
1896         leaq    0+224(%rsp),%rsi
1897         movq    16+224(%rsp),%r11
1898         movq    24+224(%rsp),%r12
1899         leaq    224(%rsp),%rdi
1900         call    __ecp_nistz256_mul_montq
1901
1902         movq    512(%rsp),%rax
1903         leaq    512(%rsp),%rbx
1904         movq    0+256(%rsp),%r9
1905         movq    8+256(%rsp),%r10
1906         leaq    0+256(%rsp),%rsi
1907         movq    16+256(%rsp),%r11
1908         movq    24+256(%rsp),%r12
1909         leaq    256(%rsp),%rdi
1910         call    __ecp_nistz256_mul_montq
1911
1912         leaq    224(%rsp),%rbx
1913         leaq    64(%rsp),%rdi
1914         call    __ecp_nistz256_sub_fromq
1915
1916         orq     %r13,%r12
1917         movdqa  %xmm4,%xmm2
1918         orq     %r8,%r12
1919         orq     %r9,%r12
1920         por     %xmm5,%xmm2
1921 .byte   102,73,15,110,220
1922
1923         movq    384(%rsp),%rax
1924         leaq    384(%rsp),%rbx
1925         movq    0+96(%rsp),%r9
1926         movq    8+96(%rsp),%r10
1927         leaq    0+96(%rsp),%rsi
1928         movq    16+96(%rsp),%r11
1929         movq    24+96(%rsp),%r12
1930         leaq    160(%rsp),%rdi
1931         call    __ecp_nistz256_mul_montq
1932
1933         movq    480(%rsp),%rax
1934         leaq    480(%rsp),%rbx
1935         movq    0+32(%rsp),%r9
1936         movq    8+32(%rsp),%r10
1937         leaq    0+32(%rsp),%rsi
1938         movq    16+32(%rsp),%r11
1939         movq    24+32(%rsp),%r12
1940         leaq    192(%rsp),%rdi
1941         call    __ecp_nistz256_mul_montq
1942
1943         leaq    160(%rsp),%rbx
1944         leaq    0(%rsp),%rdi
1945         call    __ecp_nistz256_sub_fromq
1946
1947         orq     %r13,%r12
1948         orq     %r8,%r12
1949         orq     %r9,%r12
1950
1951 .byte   0x3e
1952         jnz     .Ladd_proceedq
1953 .byte   102,73,15,126,208
1954 .byte   102,73,15,126,217
1955         testq   %r8,%r8
1956         jnz     .Ladd_proceedq
1957         testq   %r9,%r9
1958         jz      .Ladd_doubleq
1959
1960 .byte   102,72,15,126,199
1961         pxor    %xmm0,%xmm0
1962         movdqu  %xmm0,0(%rdi)
1963         movdqu  %xmm0,16(%rdi)
1964         movdqu  %xmm0,32(%rdi)
1965         movdqu  %xmm0,48(%rdi)
1966         movdqu  %xmm0,64(%rdi)
1967         movdqu  %xmm0,80(%rdi)
1968         jmp     .Ladd_doneq
1969
1970 .align  32
1971 .Ladd_doubleq:
1972 .byte   102,72,15,126,206
1973 .byte   102,72,15,126,199
1974         addq    $416,%rsp
1975         jmp     .Lpoint_double_shortcutq
1976
1977 .align  32
1978 .Ladd_proceedq:
1979         movq    0+64(%rsp),%rax
1980         movq    8+64(%rsp),%r14
1981         leaq    0+64(%rsp),%rsi
1982         movq    16+64(%rsp),%r15
1983         movq    24+64(%rsp),%r8
1984         leaq    96(%rsp),%rdi
1985         call    __ecp_nistz256_sqr_montq
1986
1987         movq    448(%rsp),%rax
1988         leaq    448(%rsp),%rbx
1989         movq    0+0(%rsp),%r9
1990         movq    8+0(%rsp),%r10
1991         leaq    0+0(%rsp),%rsi
1992         movq    16+0(%rsp),%r11
1993         movq    24+0(%rsp),%r12
1994         leaq    352(%rsp),%rdi
1995         call    __ecp_nistz256_mul_montq
1996
1997         movq    0+0(%rsp),%rax
1998         movq    8+0(%rsp),%r14
1999         leaq    0+0(%rsp),%rsi
2000         movq    16+0(%rsp),%r15
2001         movq    24+0(%rsp),%r8
2002         leaq    32(%rsp),%rdi
2003         call    __ecp_nistz256_sqr_montq
2004
2005         movq    544(%rsp),%rax
2006         leaq    544(%rsp),%rbx
2007         movq    0+352(%rsp),%r9
2008         movq    8+352(%rsp),%r10
2009         leaq    0+352(%rsp),%rsi
2010         movq    16+352(%rsp),%r11
2011         movq    24+352(%rsp),%r12
2012         leaq    352(%rsp),%rdi
2013         call    __ecp_nistz256_mul_montq
2014
2015         movq    0(%rsp),%rax
2016         leaq    0(%rsp),%rbx
2017         movq    0+32(%rsp),%r9
2018         movq    8+32(%rsp),%r10
2019         leaq    0+32(%rsp),%rsi
2020         movq    16+32(%rsp),%r11
2021         movq    24+32(%rsp),%r12
2022         leaq    128(%rsp),%rdi
2023         call    __ecp_nistz256_mul_montq
2024
2025         movq    160(%rsp),%rax
2026         leaq    160(%rsp),%rbx
2027         movq    0+32(%rsp),%r9
2028         movq    8+32(%rsp),%r10
2029         leaq    0+32(%rsp),%rsi
2030         movq    16+32(%rsp),%r11
2031         movq    24+32(%rsp),%r12
2032         leaq    192(%rsp),%rdi
2033         call    __ecp_nistz256_mul_montq
2034
2035
2036
2037
2038         xorq    %r11,%r11
2039         addq    %r12,%r12
2040         leaq    96(%rsp),%rsi
2041         adcq    %r13,%r13
2042         movq    %r12,%rax
2043         adcq    %r8,%r8
2044         adcq    %r9,%r9
2045         movq    %r13,%rbp
2046         adcq    $0,%r11
2047
2048         subq    $-1,%r12
2049         movq    %r8,%rcx
2050         sbbq    %r14,%r13
2051         sbbq    $0,%r8
2052         movq    %r9,%r10
2053         sbbq    %r15,%r9
2054         sbbq    $0,%r11
2055
2056         cmovcq  %rax,%r12
2057         movq    0(%rsi),%rax
2058         cmovcq  %rbp,%r13
2059         movq    8(%rsi),%rbp
2060         cmovcq  %rcx,%r8
2061         movq    16(%rsi),%rcx
2062         cmovcq  %r10,%r9
2063         movq    24(%rsi),%r10
2064
2065         call    __ecp_nistz256_subq
2066
2067         leaq    128(%rsp),%rbx
2068         leaq    288(%rsp),%rdi
2069         call    __ecp_nistz256_sub_fromq
2070
2071         movq    192+0(%rsp),%rax
2072         movq    192+8(%rsp),%rbp
2073         movq    192+16(%rsp),%rcx
2074         movq    192+24(%rsp),%r10
2075         leaq    320(%rsp),%rdi
2076
2077         call    __ecp_nistz256_subq
2078
2079         movq    %r12,0(%rdi)
2080         movq    %r13,8(%rdi)
2081         movq    %r8,16(%rdi)
2082         movq    %r9,24(%rdi)
2083         movq    128(%rsp),%rax
2084         leaq    128(%rsp),%rbx
2085         movq    0+224(%rsp),%r9
2086         movq    8+224(%rsp),%r10
2087         leaq    0+224(%rsp),%rsi
2088         movq    16+224(%rsp),%r11
2089         movq    24+224(%rsp),%r12
2090         leaq    256(%rsp),%rdi
2091         call    __ecp_nistz256_mul_montq
2092
2093         movq    320(%rsp),%rax
2094         leaq    320(%rsp),%rbx
2095         movq    0+64(%rsp),%r9
2096         movq    8+64(%rsp),%r10
2097         leaq    0+64(%rsp),%rsi
2098         movq    16+64(%rsp),%r11
2099         movq    24+64(%rsp),%r12
2100         leaq    320(%rsp),%rdi
2101         call    __ecp_nistz256_mul_montq
2102
2103         leaq    256(%rsp),%rbx
2104         leaq    320(%rsp),%rdi
2105         call    __ecp_nistz256_sub_fromq
2106
2107 .byte   102,72,15,126,199
2108
2109         movdqa  %xmm5,%xmm0
2110         movdqa  %xmm5,%xmm1
2111         pandn   352(%rsp),%xmm0
2112         movdqa  %xmm5,%xmm2
2113         pandn   352+16(%rsp),%xmm1
2114         movdqa  %xmm5,%xmm3
2115         pand    544(%rsp),%xmm2
2116         pand    544+16(%rsp),%xmm3
2117         por     %xmm0,%xmm2
2118         por     %xmm1,%xmm3
2119
2120         movdqa  %xmm4,%xmm0
2121         movdqa  %xmm4,%xmm1
2122         pandn   %xmm2,%xmm0
2123         movdqa  %xmm4,%xmm2
2124         pandn   %xmm3,%xmm1
2125         movdqa  %xmm4,%xmm3
2126         pand    448(%rsp),%xmm2
2127         pand    448+16(%rsp),%xmm3
2128         por     %xmm0,%xmm2
2129         por     %xmm1,%xmm3
2130         movdqu  %xmm2,64(%rdi)
2131         movdqu  %xmm3,80(%rdi)
2132
2133         movdqa  %xmm5,%xmm0
2134         movdqa  %xmm5,%xmm1
2135         pandn   288(%rsp),%xmm0
2136         movdqa  %xmm5,%xmm2
2137         pandn   288+16(%rsp),%xmm1
2138         movdqa  %xmm5,%xmm3
2139         pand    480(%rsp),%xmm2
2140         pand    480+16(%rsp),%xmm3
2141         por     %xmm0,%xmm2
2142         por     %xmm1,%xmm3
2143
2144         movdqa  %xmm4,%xmm0
2145         movdqa  %xmm4,%xmm1
2146         pandn   %xmm2,%xmm0
2147         movdqa  %xmm4,%xmm2
2148         pandn   %xmm3,%xmm1
2149         movdqa  %xmm4,%xmm3
2150         pand    384(%rsp),%xmm2
2151         pand    384+16(%rsp),%xmm3
2152         por     %xmm0,%xmm2
2153         por     %xmm1,%xmm3
2154         movdqu  %xmm2,0(%rdi)
2155         movdqu  %xmm3,16(%rdi)
2156
2157         movdqa  %xmm5,%xmm0
2158         movdqa  %xmm5,%xmm1
2159         pandn   320(%rsp),%xmm0
2160         movdqa  %xmm5,%xmm2
2161         pandn   320+16(%rsp),%xmm1
2162         movdqa  %xmm5,%xmm3
2163         pand    512(%rsp),%xmm2
2164         pand    512+16(%rsp),%xmm3
2165         por     %xmm0,%xmm2
2166         por     %xmm1,%xmm3
2167
2168         movdqa  %xmm4,%xmm0
2169         movdqa  %xmm4,%xmm1
2170         pandn   %xmm2,%xmm0
2171         movdqa  %xmm4,%xmm2
2172         pandn   %xmm3,%xmm1
2173         movdqa  %xmm4,%xmm3
2174         pand    416(%rsp),%xmm2
2175         pand    416+16(%rsp),%xmm3
2176         por     %xmm0,%xmm2
2177         por     %xmm1,%xmm3
2178         movdqu  %xmm2,32(%rdi)
2179         movdqu  %xmm3,48(%rdi)
2180
2181 .Ladd_doneq:
2182         addq    $576+8,%rsp
2183         popq    %r15
2184         popq    %r14
2185         popq    %r13
2186         popq    %r12
2187         popq    %rbx
2188         popq    %rbp
2189         .byte   0xf3,0xc3
2190 .size   ecp_nistz256_point_add,.-ecp_nistz256_point_add
2191 .globl  ecp_nistz256_point_add_affine
2192 .type   ecp_nistz256_point_add_affine,@function
2193 .align  32
2194 ecp_nistz256_point_add_affine:
2195         movl    $0x80100,%ecx
2196         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
2197         cmpl    $0x80100,%ecx
2198         je      .Lpoint_add_affinex
2199         pushq   %rbp
2200         pushq   %rbx
2201         pushq   %r12
2202         pushq   %r13
2203         pushq   %r14
2204         pushq   %r15
2205         subq    $480+8,%rsp
2206
2207         movdqu  0(%rsi),%xmm0
2208         movq    %rdx,%rbx
2209         movdqu  16(%rsi),%xmm1
2210         movdqu  32(%rsi),%xmm2
2211         movdqu  48(%rsi),%xmm3
2212         movdqu  64(%rsi),%xmm4
2213         movdqu  80(%rsi),%xmm5
2214         movq    64+0(%rsi),%rax
2215         movq    64+8(%rsi),%r14
2216         movq    64+16(%rsi),%r15
2217         movq    64+24(%rsi),%r8
2218         movdqa  %xmm0,320(%rsp)
2219         movdqa  %xmm1,320+16(%rsp)
2220         movdqa  %xmm2,352(%rsp)
2221         movdqa  %xmm3,352+16(%rsp)
2222         movdqa  %xmm4,384(%rsp)
2223         movdqa  %xmm5,384+16(%rsp)
2224         por     %xmm4,%xmm5
2225
2226         movdqu  0(%rbx),%xmm0
2227         pshufd  $0xb1,%xmm5,%xmm3
2228         movdqu  16(%rbx),%xmm1
2229         movdqu  32(%rbx),%xmm2
2230         por     %xmm3,%xmm5
2231         movdqu  48(%rbx),%xmm3
2232         movdqa  %xmm0,416(%rsp)
2233         pshufd  $0x1e,%xmm5,%xmm4
2234         movdqa  %xmm1,416+16(%rsp)
2235         por     %xmm0,%xmm1
2236 .byte   102,72,15,110,199
2237         movdqa  %xmm2,448(%rsp)
2238         movdqa  %xmm3,448+16(%rsp)
2239         por     %xmm2,%xmm3
2240         por     %xmm4,%xmm5
2241         pxor    %xmm4,%xmm4
2242         por     %xmm1,%xmm3
2243
2244         leaq    64-0(%rsi),%rsi
2245         leaq    32(%rsp),%rdi
2246         call    __ecp_nistz256_sqr_montq
2247
2248         pcmpeqd %xmm4,%xmm5
2249         pshufd  $0xb1,%xmm3,%xmm4
2250         movq    0(%rbx),%rax
2251
2252         movq    %r12,%r9
2253         por     %xmm3,%xmm4
2254         pshufd  $0,%xmm5,%xmm5
2255         pshufd  $0x1e,%xmm4,%xmm3
2256         movq    %r13,%r10
2257         por     %xmm3,%xmm4
2258         pxor    %xmm3,%xmm3
2259         movq    %r14,%r11
2260         pcmpeqd %xmm3,%xmm4
2261         pshufd  $0,%xmm4,%xmm4
2262
2263         leaq    32-0(%rsp),%rsi
2264         movq    %r15,%r12
2265         leaq    0(%rsp),%rdi
2266         call    __ecp_nistz256_mul_montq
2267
2268         leaq    320(%rsp),%rbx
2269         leaq    64(%rsp),%rdi
2270         call    __ecp_nistz256_sub_fromq
2271
2272         movq    384(%rsp),%rax
2273         leaq    384(%rsp),%rbx
2274         movq    0+32(%rsp),%r9
2275         movq    8+32(%rsp),%r10
2276         leaq    0+32(%rsp),%rsi
2277         movq    16+32(%rsp),%r11
2278         movq    24+32(%rsp),%r12
2279         leaq    32(%rsp),%rdi
2280         call    __ecp_nistz256_mul_montq
2281
2282         movq    384(%rsp),%rax
2283         leaq    384(%rsp),%rbx
2284         movq    0+64(%rsp),%r9
2285         movq    8+64(%rsp),%r10
2286         leaq    0+64(%rsp),%rsi
2287         movq    16+64(%rsp),%r11
2288         movq    24+64(%rsp),%r12
2289         leaq    288(%rsp),%rdi
2290         call    __ecp_nistz256_mul_montq
2291
2292         movq    448(%rsp),%rax
2293         leaq    448(%rsp),%rbx
2294         movq    0+32(%rsp),%r9
2295         movq    8+32(%rsp),%r10
2296         leaq    0+32(%rsp),%rsi
2297         movq    16+32(%rsp),%r11
2298         movq    24+32(%rsp),%r12
2299         leaq    32(%rsp),%rdi
2300         call    __ecp_nistz256_mul_montq
2301
2302         leaq    352(%rsp),%rbx
2303         leaq    96(%rsp),%rdi
2304         call    __ecp_nistz256_sub_fromq
2305
2306         movq    0+64(%rsp),%rax
2307         movq    8+64(%rsp),%r14
2308         leaq    0+64(%rsp),%rsi
2309         movq    16+64(%rsp),%r15
2310         movq    24+64(%rsp),%r8
2311         leaq    128(%rsp),%rdi
2312         call    __ecp_nistz256_sqr_montq
2313
2314         movq    0+96(%rsp),%rax
2315         movq    8+96(%rsp),%r14
2316         leaq    0+96(%rsp),%rsi
2317         movq    16+96(%rsp),%r15
2318         movq    24+96(%rsp),%r8
2319         leaq    192(%rsp),%rdi
2320         call    __ecp_nistz256_sqr_montq
2321
2322         movq    128(%rsp),%rax
2323         leaq    128(%rsp),%rbx
2324         movq    0+64(%rsp),%r9
2325         movq    8+64(%rsp),%r10
2326         leaq    0+64(%rsp),%rsi
2327         movq    16+64(%rsp),%r11
2328         movq    24+64(%rsp),%r12
2329         leaq    160(%rsp),%rdi
2330         call    __ecp_nistz256_mul_montq
2331
2332         movq    320(%rsp),%rax
2333         leaq    320(%rsp),%rbx
2334         movq    0+128(%rsp),%r9
2335         movq    8+128(%rsp),%r10
2336         leaq    0+128(%rsp),%rsi
2337         movq    16+128(%rsp),%r11
2338         movq    24+128(%rsp),%r12
2339         leaq    0(%rsp),%rdi
2340         call    __ecp_nistz256_mul_montq
2341
2342
2343
2344
2345         xorq    %r11,%r11
2346         addq    %r12,%r12
2347         leaq    192(%rsp),%rsi
2348         adcq    %r13,%r13
2349         movq    %r12,%rax
2350         adcq    %r8,%r8
2351         adcq    %r9,%r9
2352         movq    %r13,%rbp
2353         adcq    $0,%r11
2354
2355         subq    $-1,%r12
2356         movq    %r8,%rcx
2357         sbbq    %r14,%r13
2358         sbbq    $0,%r8
2359         movq    %r9,%r10
2360         sbbq    %r15,%r9
2361         sbbq    $0,%r11
2362
2363         cmovcq  %rax,%r12
2364         movq    0(%rsi),%rax
2365         cmovcq  %rbp,%r13
2366         movq    8(%rsi),%rbp
2367         cmovcq  %rcx,%r8
2368         movq    16(%rsi),%rcx
2369         cmovcq  %r10,%r9
2370         movq    24(%rsi),%r10
2371
2372         call    __ecp_nistz256_subq
2373
2374         leaq    160(%rsp),%rbx
2375         leaq    224(%rsp),%rdi
2376         call    __ecp_nistz256_sub_fromq
2377
2378         movq    0+0(%rsp),%rax
2379         movq    0+8(%rsp),%rbp
2380         movq    0+16(%rsp),%rcx
2381         movq    0+24(%rsp),%r10
2382         leaq    64(%rsp),%rdi
2383
2384         call    __ecp_nistz256_subq
2385
2386         movq    %r12,0(%rdi)
2387         movq    %r13,8(%rdi)
2388         movq    %r8,16(%rdi)
2389         movq    %r9,24(%rdi)
2390         movq    352(%rsp),%rax
2391         leaq    352(%rsp),%rbx
2392         movq    0+160(%rsp),%r9
2393         movq    8+160(%rsp),%r10
2394         leaq    0+160(%rsp),%rsi
2395         movq    16+160(%rsp),%r11
2396         movq    24+160(%rsp),%r12
2397         leaq    32(%rsp),%rdi
2398         call    __ecp_nistz256_mul_montq
2399
2400         movq    96(%rsp),%rax
2401         leaq    96(%rsp),%rbx
2402         movq    0+64(%rsp),%r9
2403         movq    8+64(%rsp),%r10
2404         leaq    0+64(%rsp),%rsi
2405         movq    16+64(%rsp),%r11
2406         movq    24+64(%rsp),%r12
2407         leaq    64(%rsp),%rdi
2408         call    __ecp_nistz256_mul_montq
2409
2410         leaq    32(%rsp),%rbx
2411         leaq    256(%rsp),%rdi
2412         call    __ecp_nistz256_sub_fromq
2413
2414 .byte   102,72,15,126,199
2415
2416         movdqa  %xmm5,%xmm0
2417         movdqa  %xmm5,%xmm1
2418         pandn   288(%rsp),%xmm0
2419         movdqa  %xmm5,%xmm2
2420         pandn   288+16(%rsp),%xmm1
2421         movdqa  %xmm5,%xmm3
2422         pand    .LONE_mont(%rip),%xmm2
2423         pand    .LONE_mont+16(%rip),%xmm3
2424         por     %xmm0,%xmm2
2425         por     %xmm1,%xmm3
2426
2427         movdqa  %xmm4,%xmm0
2428         movdqa  %xmm4,%xmm1
2429         pandn   %xmm2,%xmm0
2430         movdqa  %xmm4,%xmm2
2431         pandn   %xmm3,%xmm1
2432         movdqa  %xmm4,%xmm3
2433         pand    384(%rsp),%xmm2
2434         pand    384+16(%rsp),%xmm3
2435         por     %xmm0,%xmm2
2436         por     %xmm1,%xmm3
2437         movdqu  %xmm2,64(%rdi)
2438         movdqu  %xmm3,80(%rdi)
2439
2440         movdqa  %xmm5,%xmm0
2441         movdqa  %xmm5,%xmm1
2442         pandn   224(%rsp),%xmm0
2443         movdqa  %xmm5,%xmm2
2444         pandn   224+16(%rsp),%xmm1
2445         movdqa  %xmm5,%xmm3
2446         pand    416(%rsp),%xmm2
2447         pand    416+16(%rsp),%xmm3
2448         por     %xmm0,%xmm2
2449         por     %xmm1,%xmm3
2450
2451         movdqa  %xmm4,%xmm0
2452         movdqa  %xmm4,%xmm1
2453         pandn   %xmm2,%xmm0
2454         movdqa  %xmm4,%xmm2
2455         pandn   %xmm3,%xmm1
2456         movdqa  %xmm4,%xmm3
2457         pand    320(%rsp),%xmm2
2458         pand    320+16(%rsp),%xmm3
2459         por     %xmm0,%xmm2
2460         por     %xmm1,%xmm3
2461         movdqu  %xmm2,0(%rdi)
2462         movdqu  %xmm3,16(%rdi)
2463
2464         movdqa  %xmm5,%xmm0
2465         movdqa  %xmm5,%xmm1
2466         pandn   256(%rsp),%xmm0
2467         movdqa  %xmm5,%xmm2
2468         pandn   256+16(%rsp),%xmm1
2469         movdqa  %xmm5,%xmm3
2470         pand    448(%rsp),%xmm2
2471         pand    448+16(%rsp),%xmm3
2472         por     %xmm0,%xmm2
2473         por     %xmm1,%xmm3
2474
2475         movdqa  %xmm4,%xmm0
2476         movdqa  %xmm4,%xmm1
2477         pandn   %xmm2,%xmm0
2478         movdqa  %xmm4,%xmm2
2479         pandn   %xmm3,%xmm1
2480         movdqa  %xmm4,%xmm3
2481         pand    352(%rsp),%xmm2
2482         pand    352+16(%rsp),%xmm3
2483         por     %xmm0,%xmm2
2484         por     %xmm1,%xmm3
2485         movdqu  %xmm2,32(%rdi)
2486         movdqu  %xmm3,48(%rdi)
2487
2488         addq    $480+8,%rsp
2489         popq    %r15
2490         popq    %r14
2491         popq    %r13
2492         popq    %r12
2493         popq    %rbx
2494         popq    %rbp
2495         .byte   0xf3,0xc3
2496 .size   ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine
2497 .type   __ecp_nistz256_add_tox,@function
2498 .align  32
2499 __ecp_nistz256_add_tox:
2500         xorq    %r11,%r11
2501         adcq    0(%rbx),%r12
2502         adcq    8(%rbx),%r13
2503         movq    %r12,%rax
2504         adcq    16(%rbx),%r8
2505         adcq    24(%rbx),%r9
2506         movq    %r13,%rbp
2507         adcq    $0,%r11
2508
2509         xorq    %r10,%r10
2510         sbbq    $-1,%r12
2511         movq    %r8,%rcx
2512         sbbq    %r14,%r13
2513         sbbq    $0,%r8
2514         movq    %r9,%r10
2515         sbbq    %r15,%r9
2516         sbbq    $0,%r11
2517
2518         cmovcq  %rax,%r12
2519         cmovcq  %rbp,%r13
2520         movq    %r12,0(%rdi)
2521         cmovcq  %rcx,%r8
2522         movq    %r13,8(%rdi)
2523         cmovcq  %r10,%r9
2524         movq    %r8,16(%rdi)
2525         movq    %r9,24(%rdi)
2526
2527         .byte   0xf3,0xc3
2528 .size   __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox
2529
2530 .type   __ecp_nistz256_sub_fromx,@function
2531 .align  32
2532 __ecp_nistz256_sub_fromx:
2533         xorq    %r11,%r11
2534         sbbq    0(%rbx),%r12
2535         sbbq    8(%rbx),%r13
2536         movq    %r12,%rax
2537         sbbq    16(%rbx),%r8
2538         sbbq    24(%rbx),%r9
2539         movq    %r13,%rbp
2540         sbbq    $0,%r11
2541
2542         xorq    %r10,%r10
2543         adcq    $-1,%r12
2544         movq    %r8,%rcx
2545         adcq    %r14,%r13
2546         adcq    $0,%r8
2547         movq    %r9,%r10
2548         adcq    %r15,%r9
2549
2550         btq     $0,%r11
2551         cmovncq %rax,%r12
2552         cmovncq %rbp,%r13
2553         movq    %r12,0(%rdi)
2554         cmovncq %rcx,%r8
2555         movq    %r13,8(%rdi)
2556         cmovncq %r10,%r9
2557         movq    %r8,16(%rdi)
2558         movq    %r9,24(%rdi)
2559
2560         .byte   0xf3,0xc3
2561 .size   __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx
2562
2563 .type   __ecp_nistz256_subx,@function
2564 .align  32
2565 __ecp_nistz256_subx:
2566         xorq    %r11,%r11
2567         sbbq    %r12,%rax
2568         sbbq    %r13,%rbp
2569         movq    %rax,%r12
2570         sbbq    %r8,%rcx
2571         sbbq    %r9,%r10
2572         movq    %rbp,%r13
2573         sbbq    $0,%r11
2574
2575         xorq    %r9,%r9
2576         adcq    $-1,%rax
2577         movq    %rcx,%r8
2578         adcq    %r14,%rbp
2579         adcq    $0,%rcx
2580         movq    %r10,%r9
2581         adcq    %r15,%r10
2582
2583         btq     $0,%r11
2584         cmovcq  %rax,%r12
2585         cmovcq  %rbp,%r13
2586         cmovcq  %rcx,%r8
2587         cmovcq  %r10,%r9
2588
2589         .byte   0xf3,0xc3
2590 .size   __ecp_nistz256_subx,.-__ecp_nistz256_subx
2591
2592 .type   __ecp_nistz256_mul_by_2x,@function
2593 .align  32
2594 __ecp_nistz256_mul_by_2x:
2595         xorq    %r11,%r11
2596         adcq    %r12,%r12
2597         adcq    %r13,%r13
2598         movq    %r12,%rax
2599         adcq    %r8,%r8
2600         adcq    %r9,%r9
2601         movq    %r13,%rbp
2602         adcq    $0,%r11
2603
2604         xorq    %r10,%r10
2605         sbbq    $-1,%r12
2606         movq    %r8,%rcx
2607         sbbq    %r14,%r13
2608         sbbq    $0,%r8
2609         movq    %r9,%r10
2610         sbbq    %r15,%r9
2611         sbbq    $0,%r11
2612
2613         cmovcq  %rax,%r12
2614         cmovcq  %rbp,%r13
2615         movq    %r12,0(%rdi)
2616         cmovcq  %rcx,%r8
2617         movq    %r13,8(%rdi)
2618         cmovcq  %r10,%r9
2619         movq    %r8,16(%rdi)
2620         movq    %r9,24(%rdi)
2621
2622         .byte   0xf3,0xc3
2623 .size   __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x
2624 .type   ecp_nistz256_point_doublex,@function
2625 .align  32
2626 ecp_nistz256_point_doublex:
2627 .Lpoint_doublex:
2628         pushq   %rbp
2629         pushq   %rbx
2630         pushq   %r12
2631         pushq   %r13
2632         pushq   %r14
2633         pushq   %r15
2634         subq    $160+8,%rsp
2635
2636 .Lpoint_double_shortcutx:
2637         movdqu  0(%rsi),%xmm0
2638         movq    %rsi,%rbx
2639         movdqu  16(%rsi),%xmm1
2640         movq    32+0(%rsi),%r12
2641         movq    32+8(%rsi),%r13
2642         movq    32+16(%rsi),%r8
2643         movq    32+24(%rsi),%r9
2644         movq    .Lpoly+8(%rip),%r14
2645         movq    .Lpoly+24(%rip),%r15
2646         movdqa  %xmm0,96(%rsp)
2647         movdqa  %xmm1,96+16(%rsp)
2648         leaq    32(%rdi),%r10
2649         leaq    64(%rdi),%r11
2650 .byte   102,72,15,110,199
2651 .byte   102,73,15,110,202
2652 .byte   102,73,15,110,211
2653
2654         leaq    0(%rsp),%rdi
2655         call    __ecp_nistz256_mul_by_2x
2656
2657         movq    64+0(%rsi),%rdx
2658         movq    64+8(%rsi),%r14
2659         movq    64+16(%rsi),%r15
2660         movq    64+24(%rsi),%r8
2661         leaq    64-128(%rsi),%rsi
2662         leaq    64(%rsp),%rdi
2663         call    __ecp_nistz256_sqr_montx
2664
2665         movq    0+0(%rsp),%rdx
2666         movq    8+0(%rsp),%r14
2667         leaq    -128+0(%rsp),%rsi
2668         movq    16+0(%rsp),%r15
2669         movq    24+0(%rsp),%r8
2670         leaq    0(%rsp),%rdi
2671         call    __ecp_nistz256_sqr_montx
2672
2673         movq    32(%rbx),%rdx
2674         movq    64+0(%rbx),%r9
2675         movq    64+8(%rbx),%r10
2676         movq    64+16(%rbx),%r11
2677         movq    64+24(%rbx),%r12
2678         leaq    64-128(%rbx),%rsi
2679         leaq    32(%rbx),%rbx
2680 .byte   102,72,15,126,215
2681         call    __ecp_nistz256_mul_montx
2682         call    __ecp_nistz256_mul_by_2x
2683
2684         movq    96+0(%rsp),%r12
2685         movq    96+8(%rsp),%r13
2686         leaq    64(%rsp),%rbx
2687         movq    96+16(%rsp),%r8
2688         movq    96+24(%rsp),%r9
2689         leaq    32(%rsp),%rdi
2690         call    __ecp_nistz256_add_tox
2691
2692         movq    96+0(%rsp),%r12
2693         movq    96+8(%rsp),%r13
2694         leaq    64(%rsp),%rbx
2695         movq    96+16(%rsp),%r8
2696         movq    96+24(%rsp),%r9
2697         leaq    64(%rsp),%rdi
2698         call    __ecp_nistz256_sub_fromx
2699
2700         movq    0+0(%rsp),%rdx
2701         movq    8+0(%rsp),%r14
2702         leaq    -128+0(%rsp),%rsi
2703         movq    16+0(%rsp),%r15
2704         movq    24+0(%rsp),%r8
2705 .byte   102,72,15,126,207
2706         call    __ecp_nistz256_sqr_montx
2707         xorq    %r9,%r9
2708         movq    %r12,%rax
2709         addq    $-1,%r12
2710         movq    %r13,%r10
2711         adcq    %rsi,%r13
2712         movq    %r14,%rcx
2713         adcq    $0,%r14
2714         movq    %r15,%r8
2715         adcq    %rbp,%r15
2716         adcq    $0,%r9
2717         xorq    %rsi,%rsi
2718         testq   $1,%rax
2719
2720         cmovzq  %rax,%r12
2721         cmovzq  %r10,%r13
2722         cmovzq  %rcx,%r14
2723         cmovzq  %r8,%r15
2724         cmovzq  %rsi,%r9
2725
2726         movq    %r13,%rax
2727         shrq    $1,%r12
2728         shlq    $63,%rax
2729         movq    %r14,%r10
2730         shrq    $1,%r13
2731         orq     %rax,%r12
2732         shlq    $63,%r10
2733         movq    %r15,%rcx
2734         shrq    $1,%r14
2735         orq     %r10,%r13
2736         shlq    $63,%rcx
2737         movq    %r12,0(%rdi)
2738         shrq    $1,%r15
2739         movq    %r13,8(%rdi)
2740         shlq    $63,%r9
2741         orq     %rcx,%r14
2742         orq     %r9,%r15
2743         movq    %r14,16(%rdi)
2744         movq    %r15,24(%rdi)
2745         movq    64(%rsp),%rdx
2746         leaq    64(%rsp),%rbx
2747         movq    0+32(%rsp),%r9
2748         movq    8+32(%rsp),%r10
2749         leaq    -128+32(%rsp),%rsi
2750         movq    16+32(%rsp),%r11
2751         movq    24+32(%rsp),%r12
2752         leaq    32(%rsp),%rdi
2753         call    __ecp_nistz256_mul_montx
2754
2755         leaq    128(%rsp),%rdi
2756         call    __ecp_nistz256_mul_by_2x
2757
2758         leaq    32(%rsp),%rbx
2759         leaq    32(%rsp),%rdi
2760         call    __ecp_nistz256_add_tox
2761
2762         movq    96(%rsp),%rdx
2763         leaq    96(%rsp),%rbx
2764         movq    0+0(%rsp),%r9
2765         movq    8+0(%rsp),%r10
2766         leaq    -128+0(%rsp),%rsi
2767         movq    16+0(%rsp),%r11
2768         movq    24+0(%rsp),%r12
2769         leaq    0(%rsp),%rdi
2770         call    __ecp_nistz256_mul_montx
2771
2772         leaq    128(%rsp),%rdi
2773         call    __ecp_nistz256_mul_by_2x
2774
2775         movq    0+32(%rsp),%rdx
2776         movq    8+32(%rsp),%r14
2777         leaq    -128+32(%rsp),%rsi
2778         movq    16+32(%rsp),%r15
2779         movq    24+32(%rsp),%r8
2780 .byte   102,72,15,126,199
2781         call    __ecp_nistz256_sqr_montx
2782
2783         leaq    128(%rsp),%rbx
2784         movq    %r14,%r8
2785         movq    %r15,%r9
2786         movq    %rsi,%r14
2787         movq    %rbp,%r15
2788         call    __ecp_nistz256_sub_fromx
2789
2790         movq    0+0(%rsp),%rax
2791         movq    0+8(%rsp),%rbp
2792         movq    0+16(%rsp),%rcx
2793         movq    0+24(%rsp),%r10
2794         leaq    0(%rsp),%rdi
2795         call    __ecp_nistz256_subx
2796
2797         movq    32(%rsp),%rdx
2798         leaq    32(%rsp),%rbx
2799         movq    %r12,%r14
2800         xorl    %ecx,%ecx
2801         movq    %r12,0+0(%rsp)
2802         movq    %r13,%r10
2803         movq    %r13,0+8(%rsp)
2804         cmovzq  %r8,%r11
2805         movq    %r8,0+16(%rsp)
2806         leaq    0-128(%rsp),%rsi
2807         cmovzq  %r9,%r12
2808         movq    %r9,0+24(%rsp)
2809         movq    %r14,%r9
2810         leaq    0(%rsp),%rdi
2811         call    __ecp_nistz256_mul_montx
2812
2813 .byte   102,72,15,126,203
2814 .byte   102,72,15,126,207
2815         call    __ecp_nistz256_sub_fromx
2816
2817         addq    $160+8,%rsp
2818         popq    %r15
2819         popq    %r14
2820         popq    %r13
2821         popq    %r12
2822         popq    %rbx
2823         popq    %rbp
2824         .byte   0xf3,0xc3
2825 .size   ecp_nistz256_point_doublex,.-ecp_nistz256_point_doublex
2826 .type   ecp_nistz256_point_addx,@function
2827 .align  32
2828 ecp_nistz256_point_addx:
2829 .Lpoint_addx:
2830         pushq   %rbp
2831         pushq   %rbx
2832         pushq   %r12
2833         pushq   %r13
2834         pushq   %r14
2835         pushq   %r15
2836         subq    $576+8,%rsp
2837
2838         movdqu  0(%rsi),%xmm0
2839         movdqu  16(%rsi),%xmm1
2840         movdqu  32(%rsi),%xmm2
2841         movdqu  48(%rsi),%xmm3
2842         movdqu  64(%rsi),%xmm4
2843         movdqu  80(%rsi),%xmm5
2844         movq    %rsi,%rbx
2845         movq    %rdx,%rsi
2846         movdqa  %xmm0,384(%rsp)
2847         movdqa  %xmm1,384+16(%rsp)
2848         movdqa  %xmm2,416(%rsp)
2849         movdqa  %xmm3,416+16(%rsp)
2850         movdqa  %xmm4,448(%rsp)
2851         movdqa  %xmm5,448+16(%rsp)
2852         por     %xmm4,%xmm5
2853
2854         movdqu  0(%rsi),%xmm0
2855         pshufd  $0xb1,%xmm5,%xmm3
2856         movdqu  16(%rsi),%xmm1
2857         movdqu  32(%rsi),%xmm2
2858         por     %xmm3,%xmm5
2859         movdqu  48(%rsi),%xmm3
2860         movq    64+0(%rsi),%rdx
2861         movq    64+8(%rsi),%r14
2862         movq    64+16(%rsi),%r15
2863         movq    64+24(%rsi),%r8
2864         movdqa  %xmm0,480(%rsp)
2865         pshufd  $0x1e,%xmm5,%xmm4
2866         movdqa  %xmm1,480+16(%rsp)
2867         movdqu  64(%rsi),%xmm0
2868         movdqu  80(%rsi),%xmm1
2869         movdqa  %xmm2,512(%rsp)
2870         movdqa  %xmm3,512+16(%rsp)
2871         por     %xmm4,%xmm5
2872         pxor    %xmm4,%xmm4
2873         por     %xmm0,%xmm1
2874 .byte   102,72,15,110,199
2875
2876         leaq    64-128(%rsi),%rsi
2877         movq    %rdx,544+0(%rsp)
2878         movq    %r14,544+8(%rsp)
2879         movq    %r15,544+16(%rsp)
2880         movq    %r8,544+24(%rsp)
2881         leaq    96(%rsp),%rdi
2882         call    __ecp_nistz256_sqr_montx
2883
2884         pcmpeqd %xmm4,%xmm5
2885         pshufd  $0xb1,%xmm1,%xmm4
2886         por     %xmm1,%xmm4
2887         pshufd  $0,%xmm5,%xmm5
2888         pshufd  $0x1e,%xmm4,%xmm3
2889         por     %xmm3,%xmm4
2890         pxor    %xmm3,%xmm3
2891         pcmpeqd %xmm3,%xmm4
2892         pshufd  $0,%xmm4,%xmm4
2893         movq    64+0(%rbx),%rdx
2894         movq    64+8(%rbx),%r14
2895         movq    64+16(%rbx),%r15
2896         movq    64+24(%rbx),%r8
2897 .byte   102,72,15,110,203
2898
2899         leaq    64-128(%rbx),%rsi
2900         leaq    32(%rsp),%rdi
2901         call    __ecp_nistz256_sqr_montx
2902
2903         movq    544(%rsp),%rdx
2904         leaq    544(%rsp),%rbx
2905         movq    0+96(%rsp),%r9
2906         movq    8+96(%rsp),%r10
2907         leaq    -128+96(%rsp),%rsi
2908         movq    16+96(%rsp),%r11
2909         movq    24+96(%rsp),%r12
2910         leaq    224(%rsp),%rdi
2911         call    __ecp_nistz256_mul_montx
2912
2913         movq    448(%rsp),%rdx
2914         leaq    448(%rsp),%rbx
2915         movq    0+32(%rsp),%r9
2916         movq    8+32(%rsp),%r10
2917         leaq    -128+32(%rsp),%rsi
2918         movq    16+32(%rsp),%r11
2919         movq    24+32(%rsp),%r12
2920         leaq    256(%rsp),%rdi
2921         call    __ecp_nistz256_mul_montx
2922
2923         movq    416(%rsp),%rdx
2924         leaq    416(%rsp),%rbx
2925         movq    0+224(%rsp),%r9
2926         movq    8+224(%rsp),%r10
2927         leaq    -128+224(%rsp),%rsi
2928         movq    16+224(%rsp),%r11
2929         movq    24+224(%rsp),%r12
2930         leaq    224(%rsp),%rdi
2931         call    __ecp_nistz256_mul_montx
2932
2933         movq    512(%rsp),%rdx
2934         leaq    512(%rsp),%rbx
2935         movq    0+256(%rsp),%r9
2936         movq    8+256(%rsp),%r10
2937         leaq    -128+256(%rsp),%rsi
2938         movq    16+256(%rsp),%r11
2939         movq    24+256(%rsp),%r12
2940         leaq    256(%rsp),%rdi
2941         call    __ecp_nistz256_mul_montx
2942
2943         leaq    224(%rsp),%rbx
2944         leaq    64(%rsp),%rdi
2945         call    __ecp_nistz256_sub_fromx
2946
2947         orq     %r13,%r12
2948         movdqa  %xmm4,%xmm2
2949         orq     %r8,%r12
2950         orq     %r9,%r12
2951         por     %xmm5,%xmm2
2952 .byte   102,73,15,110,220
2953
2954         movq    384(%rsp),%rdx
2955         leaq    384(%rsp),%rbx
2956         movq    0+96(%rsp),%r9
2957         movq    8+96(%rsp),%r10
2958         leaq    -128+96(%rsp),%rsi
2959         movq    16+96(%rsp),%r11
2960         movq    24+96(%rsp),%r12
2961         leaq    160(%rsp),%rdi
2962         call    __ecp_nistz256_mul_montx
2963
2964         movq    480(%rsp),%rdx
2965         leaq    480(%rsp),%rbx
2966         movq    0+32(%rsp),%r9
2967         movq    8+32(%rsp),%r10
2968         leaq    -128+32(%rsp),%rsi
2969         movq    16+32(%rsp),%r11
2970         movq    24+32(%rsp),%r12
2971         leaq    192(%rsp),%rdi
2972         call    __ecp_nistz256_mul_montx
2973
2974         leaq    160(%rsp),%rbx
2975         leaq    0(%rsp),%rdi
2976         call    __ecp_nistz256_sub_fromx
2977
2978         orq     %r13,%r12
2979         orq     %r8,%r12
2980         orq     %r9,%r12
2981
2982 .byte   0x3e
2983         jnz     .Ladd_proceedx
2984 .byte   102,73,15,126,208
2985 .byte   102,73,15,126,217
2986         testq   %r8,%r8
2987         jnz     .Ladd_proceedx
2988         testq   %r9,%r9
2989         jz      .Ladd_doublex
2990
2991 .byte   102,72,15,126,199
2992         pxor    %xmm0,%xmm0
2993         movdqu  %xmm0,0(%rdi)
2994         movdqu  %xmm0,16(%rdi)
2995         movdqu  %xmm0,32(%rdi)
2996         movdqu  %xmm0,48(%rdi)
2997         movdqu  %xmm0,64(%rdi)
2998         movdqu  %xmm0,80(%rdi)
2999         jmp     .Ladd_donex
3000
3001 .align  32
3002 .Ladd_doublex:
3003 .byte   102,72,15,126,206
3004 .byte   102,72,15,126,199
3005         addq    $416,%rsp
3006         jmp     .Lpoint_double_shortcutx
3007
3008 .align  32
3009 .Ladd_proceedx:
3010         movq    0+64(%rsp),%rdx
3011         movq    8+64(%rsp),%r14
3012         leaq    -128+64(%rsp),%rsi
3013         movq    16+64(%rsp),%r15
3014         movq    24+64(%rsp),%r8
3015         leaq    96(%rsp),%rdi
3016         call    __ecp_nistz256_sqr_montx
3017
3018         movq    448(%rsp),%rdx
3019         leaq    448(%rsp),%rbx
3020         movq    0+0(%rsp),%r9
3021         movq    8+0(%rsp),%r10
3022         leaq    -128+0(%rsp),%rsi
3023         movq    16+0(%rsp),%r11
3024         movq    24+0(%rsp),%r12
3025         leaq    352(%rsp),%rdi
3026         call    __ecp_nistz256_mul_montx
3027
3028         movq    0+0(%rsp),%rdx
3029         movq    8+0(%rsp),%r14
3030         leaq    -128+0(%rsp),%rsi
3031         movq    16+0(%rsp),%r15
3032         movq    24+0(%rsp),%r8
3033         leaq    32(%rsp),%rdi
3034         call    __ecp_nistz256_sqr_montx
3035
3036         movq    544(%rsp),%rdx
3037         leaq    544(%rsp),%rbx
3038         movq    0+352(%rsp),%r9
3039         movq    8+352(%rsp),%r10
3040         leaq    -128+352(%rsp),%rsi
3041         movq    16+352(%rsp),%r11
3042         movq    24+352(%rsp),%r12
3043         leaq    352(%rsp),%rdi
3044         call    __ecp_nistz256_mul_montx
3045
3046         movq    0(%rsp),%rdx
3047         leaq    0(%rsp),%rbx
3048         movq    0+32(%rsp),%r9
3049         movq    8+32(%rsp),%r10
3050         leaq    -128+32(%rsp),%rsi
3051         movq    16+32(%rsp),%r11
3052         movq    24+32(%rsp),%r12
3053         leaq    128(%rsp),%rdi
3054         call    __ecp_nistz256_mul_montx
3055
3056         movq    160(%rsp),%rdx
3057         leaq    160(%rsp),%rbx
3058         movq    0+32(%rsp),%r9
3059         movq    8+32(%rsp),%r10
3060         leaq    -128+32(%rsp),%rsi
3061         movq    16+32(%rsp),%r11
3062         movq    24+32(%rsp),%r12
3063         leaq    192(%rsp),%rdi
3064         call    __ecp_nistz256_mul_montx
3065
3066
3067
3068
3069         xorq    %r11,%r11
3070         addq    %r12,%r12
3071         leaq    96(%rsp),%rsi
3072         adcq    %r13,%r13
3073         movq    %r12,%rax
3074         adcq    %r8,%r8
3075         adcq    %r9,%r9
3076         movq    %r13,%rbp
3077         adcq    $0,%r11
3078
3079         subq    $-1,%r12
3080         movq    %r8,%rcx
3081         sbbq    %r14,%r13
3082         sbbq    $0,%r8
3083         movq    %r9,%r10
3084         sbbq    %r15,%r9
3085         sbbq    $0,%r11
3086
3087         cmovcq  %rax,%r12
3088         movq    0(%rsi),%rax
3089         cmovcq  %rbp,%r13
3090         movq    8(%rsi),%rbp
3091         cmovcq  %rcx,%r8
3092         movq    16(%rsi),%rcx
3093         cmovcq  %r10,%r9
3094         movq    24(%rsi),%r10
3095
3096         call    __ecp_nistz256_subx
3097
3098         leaq    128(%rsp),%rbx
3099         leaq    288(%rsp),%rdi
3100         call    __ecp_nistz256_sub_fromx
3101
3102         movq    192+0(%rsp),%rax
3103         movq    192+8(%rsp),%rbp
3104         movq    192+16(%rsp),%rcx
3105         movq    192+24(%rsp),%r10
3106         leaq    320(%rsp),%rdi
3107
3108         call    __ecp_nistz256_subx
3109
3110         movq    %r12,0(%rdi)
3111         movq    %r13,8(%rdi)
3112         movq    %r8,16(%rdi)
3113         movq    %r9,24(%rdi)
3114         movq    128(%rsp),%rdx
3115         leaq    128(%rsp),%rbx
3116         movq    0+224(%rsp),%r9
3117         movq    8+224(%rsp),%r10
3118         leaq    -128+224(%rsp),%rsi
3119         movq    16+224(%rsp),%r11
3120         movq    24+224(%rsp),%r12
3121         leaq    256(%rsp),%rdi
3122         call    __ecp_nistz256_mul_montx
3123
3124         movq    320(%rsp),%rdx
3125         leaq    320(%rsp),%rbx
3126         movq    0+64(%rsp),%r9
3127         movq    8+64(%rsp),%r10
3128         leaq    -128+64(%rsp),%rsi
3129         movq    16+64(%rsp),%r11
3130         movq    24+64(%rsp),%r12
3131         leaq    320(%rsp),%rdi
3132         call    __ecp_nistz256_mul_montx
3133
3134         leaq    256(%rsp),%rbx
3135         leaq    320(%rsp),%rdi
3136         call    __ecp_nistz256_sub_fromx
3137
3138 .byte   102,72,15,126,199
3139
3140         movdqa  %xmm5,%xmm0
3141         movdqa  %xmm5,%xmm1
3142         pandn   352(%rsp),%xmm0
3143         movdqa  %xmm5,%xmm2
3144         pandn   352+16(%rsp),%xmm1
3145         movdqa  %xmm5,%xmm3
3146         pand    544(%rsp),%xmm2
3147         pand    544+16(%rsp),%xmm3
3148         por     %xmm0,%xmm2
3149         por     %xmm1,%xmm3
3150
3151         movdqa  %xmm4,%xmm0
3152         movdqa  %xmm4,%xmm1
3153         pandn   %xmm2,%xmm0
3154         movdqa  %xmm4,%xmm2
3155         pandn   %xmm3,%xmm1
3156         movdqa  %xmm4,%xmm3
3157         pand    448(%rsp),%xmm2
3158         pand    448+16(%rsp),%xmm3
3159         por     %xmm0,%xmm2
3160         por     %xmm1,%xmm3
3161         movdqu  %xmm2,64(%rdi)
3162         movdqu  %xmm3,80(%rdi)
3163
3164         movdqa  %xmm5,%xmm0
3165         movdqa  %xmm5,%xmm1
3166         pandn   288(%rsp),%xmm0
3167         movdqa  %xmm5,%xmm2
3168         pandn   288+16(%rsp),%xmm1
3169         movdqa  %xmm5,%xmm3
3170         pand    480(%rsp),%xmm2
3171         pand    480+16(%rsp),%xmm3
3172         por     %xmm0,%xmm2
3173         por     %xmm1,%xmm3
3174
3175         movdqa  %xmm4,%xmm0
3176         movdqa  %xmm4,%xmm1
3177         pandn   %xmm2,%xmm0
3178         movdqa  %xmm4,%xmm2
3179         pandn   %xmm3,%xmm1
3180         movdqa  %xmm4,%xmm3
3181         pand    384(%rsp),%xmm2
3182         pand    384+16(%rsp),%xmm3
3183         por     %xmm0,%xmm2
3184         por     %xmm1,%xmm3
3185         movdqu  %xmm2,0(%rdi)
3186         movdqu  %xmm3,16(%rdi)
3187
3188         movdqa  %xmm5,%xmm0
3189         movdqa  %xmm5,%xmm1
3190         pandn   320(%rsp),%xmm0
3191         movdqa  %xmm5,%xmm2
3192         pandn   320+16(%rsp),%xmm1
3193         movdqa  %xmm5,%xmm3
3194         pand    512(%rsp),%xmm2
3195         pand    512+16(%rsp),%xmm3
3196         por     %xmm0,%xmm2
3197         por     %xmm1,%xmm3
3198
3199         movdqa  %xmm4,%xmm0
3200         movdqa  %xmm4,%xmm1
3201         pandn   %xmm2,%xmm0
3202         movdqa  %xmm4,%xmm2
3203         pandn   %xmm3,%xmm1
3204         movdqa  %xmm4,%xmm3
3205         pand    416(%rsp),%xmm2
3206         pand    416+16(%rsp),%xmm3
3207         por     %xmm0,%xmm2
3208         por     %xmm1,%xmm3
3209         movdqu  %xmm2,32(%rdi)
3210         movdqu  %xmm3,48(%rdi)
3211
3212 .Ladd_donex:
3213         addq    $576+8,%rsp
3214         popq    %r15
3215         popq    %r14
3216         popq    %r13
3217         popq    %r12
3218         popq    %rbx
3219         popq    %rbp
3220         .byte   0xf3,0xc3
3221 .size   ecp_nistz256_point_addx,.-ecp_nistz256_point_addx
3222 .type   ecp_nistz256_point_add_affinex,@function
3223 .align  32
3224 ecp_nistz256_point_add_affinex:
3225 .Lpoint_add_affinex:
3226         pushq   %rbp
3227         pushq   %rbx
3228         pushq   %r12
3229         pushq   %r13
3230         pushq   %r14
3231         pushq   %r15
3232         subq    $480+8,%rsp
3233
3234         movdqu  0(%rsi),%xmm0
3235         movq    %rdx,%rbx
3236         movdqu  16(%rsi),%xmm1
3237         movdqu  32(%rsi),%xmm2
3238         movdqu  48(%rsi),%xmm3
3239         movdqu  64(%rsi),%xmm4
3240         movdqu  80(%rsi),%xmm5
3241         movq    64+0(%rsi),%rdx
3242         movq    64+8(%rsi),%r14
3243         movq    64+16(%rsi),%r15
3244         movq    64+24(%rsi),%r8
3245         movdqa  %xmm0,320(%rsp)
3246         movdqa  %xmm1,320+16(%rsp)
3247         movdqa  %xmm2,352(%rsp)
3248         movdqa  %xmm3,352+16(%rsp)
3249         movdqa  %xmm4,384(%rsp)
3250         movdqa  %xmm5,384+16(%rsp)
3251         por     %xmm4,%xmm5
3252
3253         movdqu  0(%rbx),%xmm0
3254         pshufd  $0xb1,%xmm5,%xmm3
3255         movdqu  16(%rbx),%xmm1
3256         movdqu  32(%rbx),%xmm2
3257         por     %xmm3,%xmm5
3258         movdqu  48(%rbx),%xmm3
3259         movdqa  %xmm0,416(%rsp)
3260         pshufd  $0x1e,%xmm5,%xmm4
3261         movdqa  %xmm1,416+16(%rsp)
3262         por     %xmm0,%xmm1
3263 .byte   102,72,15,110,199
3264         movdqa  %xmm2,448(%rsp)
3265         movdqa  %xmm3,448+16(%rsp)
3266         por     %xmm2,%xmm3
3267         por     %xmm4,%xmm5
3268         pxor    %xmm4,%xmm4
3269         por     %xmm1,%xmm3
3270
3271         leaq    64-128(%rsi),%rsi
3272         leaq    32(%rsp),%rdi
3273         call    __ecp_nistz256_sqr_montx
3274
3275         pcmpeqd %xmm4,%xmm5
3276         pshufd  $0xb1,%xmm3,%xmm4
3277         movq    0(%rbx),%rdx
3278
3279         movq    %r12,%r9
3280         por     %xmm3,%xmm4
3281         pshufd  $0,%xmm5,%xmm5
3282         pshufd  $0x1e,%xmm4,%xmm3
3283         movq    %r13,%r10
3284         por     %xmm3,%xmm4
3285         pxor    %xmm3,%xmm3
3286         movq    %r14,%r11
3287         pcmpeqd %xmm3,%xmm4
3288         pshufd  $0,%xmm4,%xmm4
3289
3290         leaq    32-128(%rsp),%rsi
3291         movq    %r15,%r12
3292         leaq    0(%rsp),%rdi
3293         call    __ecp_nistz256_mul_montx
3294
3295         leaq    320(%rsp),%rbx
3296         leaq    64(%rsp),%rdi
3297         call    __ecp_nistz256_sub_fromx
3298
3299         movq    384(%rsp),%rdx
3300         leaq    384(%rsp),%rbx
3301         movq    0+32(%rsp),%r9
3302         movq    8+32(%rsp),%r10
3303         leaq    -128+32(%rsp),%rsi
3304         movq    16+32(%rsp),%r11
3305         movq    24+32(%rsp),%r12
3306         leaq    32(%rsp),%rdi
3307         call    __ecp_nistz256_mul_montx
3308
3309         movq    384(%rsp),%rdx
3310         leaq    384(%rsp),%rbx
3311         movq    0+64(%rsp),%r9
3312         movq    8+64(%rsp),%r10
3313         leaq    -128+64(%rsp),%rsi
3314         movq    16+64(%rsp),%r11
3315         movq    24+64(%rsp),%r12
3316         leaq    288(%rsp),%rdi
3317         call    __ecp_nistz256_mul_montx
3318
3319         movq    448(%rsp),%rdx
3320         leaq    448(%rsp),%rbx
3321         movq    0+32(%rsp),%r9
3322         movq    8+32(%rsp),%r10
3323         leaq    -128+32(%rsp),%rsi
3324         movq    16+32(%rsp),%r11
3325         movq    24+32(%rsp),%r12
3326         leaq    32(%rsp),%rdi
3327         call    __ecp_nistz256_mul_montx
3328
3329         leaq    352(%rsp),%rbx
3330         leaq    96(%rsp),%rdi
3331         call    __ecp_nistz256_sub_fromx
3332
3333         movq    0+64(%rsp),%rdx
3334         movq    8+64(%rsp),%r14
3335         leaq    -128+64(%rsp),%rsi
3336         movq    16+64(%rsp),%r15
3337         movq    24+64(%rsp),%r8
3338         leaq    128(%rsp),%rdi
3339         call    __ecp_nistz256_sqr_montx
3340
3341         movq    0+96(%rsp),%rdx
3342         movq    8+96(%rsp),%r14
3343         leaq    -128+96(%rsp),%rsi
3344         movq    16+96(%rsp),%r15
3345         movq    24+96(%rsp),%r8
3346         leaq    192(%rsp),%rdi
3347         call    __ecp_nistz256_sqr_montx
3348
3349         movq    128(%rsp),%rdx
3350         leaq    128(%rsp),%rbx
3351         movq    0+64(%rsp),%r9
3352         movq    8+64(%rsp),%r10
3353         leaq    -128+64(%rsp),%rsi
3354         movq    16+64(%rsp),%r11
3355         movq    24+64(%rsp),%r12
3356         leaq    160(%rsp),%rdi
3357         call    __ecp_nistz256_mul_montx
3358
3359         movq    320(%rsp),%rdx
3360         leaq    320(%rsp),%rbx
3361         movq    0+128(%rsp),%r9
3362         movq    8+128(%rsp),%r10
3363         leaq    -128+128(%rsp),%rsi
3364         movq    16+128(%rsp),%r11
3365         movq    24+128(%rsp),%r12
3366         leaq    0(%rsp),%rdi
3367         call    __ecp_nistz256_mul_montx
3368
3369
3370
3371
3372         xorq    %r11,%r11
3373         addq    %r12,%r12
3374         leaq    192(%rsp),%rsi
3375         adcq    %r13,%r13
3376         movq    %r12,%rax
3377         adcq    %r8,%r8
3378         adcq    %r9,%r9
3379         movq    %r13,%rbp
3380         adcq    $0,%r11
3381
3382         subq    $-1,%r12
3383         movq    %r8,%rcx
3384         sbbq    %r14,%r13
3385         sbbq    $0,%r8
3386         movq    %r9,%r10
3387         sbbq    %r15,%r9
3388         sbbq    $0,%r11
3389
3390         cmovcq  %rax,%r12
3391         movq    0(%rsi),%rax
3392         cmovcq  %rbp,%r13
3393         movq    8(%rsi),%rbp
3394         cmovcq  %rcx,%r8
3395         movq    16(%rsi),%rcx
3396         cmovcq  %r10,%r9
3397         movq    24(%rsi),%r10
3398
3399         call    __ecp_nistz256_subx
3400
3401         leaq    160(%rsp),%rbx
3402         leaq    224(%rsp),%rdi
3403         call    __ecp_nistz256_sub_fromx
3404
3405         movq    0+0(%rsp),%rax
3406         movq    0+8(%rsp),%rbp
3407         movq    0+16(%rsp),%rcx
3408         movq    0+24(%rsp),%r10
3409         leaq    64(%rsp),%rdi
3410
3411         call    __ecp_nistz256_subx
3412
3413         movq    %r12,0(%rdi)
3414         movq    %r13,8(%rdi)
3415         movq    %r8,16(%rdi)
3416         movq    %r9,24(%rdi)
3417         movq    352(%rsp),%rdx
3418         leaq    352(%rsp),%rbx
3419         movq    0+160(%rsp),%r9
3420         movq    8+160(%rsp),%r10
3421         leaq    -128+160(%rsp),%rsi
3422         movq    16+160(%rsp),%r11
3423         movq    24+160(%rsp),%r12
3424         leaq    32(%rsp),%rdi
3425         call    __ecp_nistz256_mul_montx
3426
3427         movq    96(%rsp),%rdx
3428         leaq    96(%rsp),%rbx
3429         movq    0+64(%rsp),%r9
3430         movq    8+64(%rsp),%r10
3431         leaq    -128+64(%rsp),%rsi
3432         movq    16+64(%rsp),%r11
3433         movq    24+64(%rsp),%r12
3434         leaq    64(%rsp),%rdi
3435         call    __ecp_nistz256_mul_montx
3436
3437         leaq    32(%rsp),%rbx
3438         leaq    256(%rsp),%rdi
3439         call    __ecp_nistz256_sub_fromx
3440
3441 .byte   102,72,15,126,199
3442
3443         movdqa  %xmm5,%xmm0
3444         movdqa  %xmm5,%xmm1
3445         pandn   288(%rsp),%xmm0
3446         movdqa  %xmm5,%xmm2
3447         pandn   288+16(%rsp),%xmm1
3448         movdqa  %xmm5,%xmm3
3449         pand    .LONE_mont(%rip),%xmm2
3450         pand    .LONE_mont+16(%rip),%xmm3
3451         por     %xmm0,%xmm2
3452         por     %xmm1,%xmm3
3453
3454         movdqa  %xmm4,%xmm0
3455         movdqa  %xmm4,%xmm1
3456         pandn   %xmm2,%xmm0
3457         movdqa  %xmm4,%xmm2
3458         pandn   %xmm3,%xmm1
3459         movdqa  %xmm4,%xmm3
3460         pand    384(%rsp),%xmm2
3461         pand    384+16(%rsp),%xmm3
3462         por     %xmm0,%xmm2
3463         por     %xmm1,%xmm3
3464         movdqu  %xmm2,64(%rdi)
3465         movdqu  %xmm3,80(%rdi)
3466
3467         movdqa  %xmm5,%xmm0
3468         movdqa  %xmm5,%xmm1
3469         pandn   224(%rsp),%xmm0
3470         movdqa  %xmm5,%xmm2
3471         pandn   224+16(%rsp),%xmm1
3472         movdqa  %xmm5,%xmm3
3473         pand    416(%rsp),%xmm2
3474         pand    416+16(%rsp),%xmm3
3475         por     %xmm0,%xmm2
3476         por     %xmm1,%xmm3
3477
3478         movdqa  %xmm4,%xmm0
3479         movdqa  %xmm4,%xmm1
3480         pandn   %xmm2,%xmm0
3481         movdqa  %xmm4,%xmm2
3482         pandn   %xmm3,%xmm1
3483         movdqa  %xmm4,%xmm3
3484         pand    320(%rsp),%xmm2
3485         pand    320+16(%rsp),%xmm3
3486         por     %xmm0,%xmm2
3487         por     %xmm1,%xmm3
3488         movdqu  %xmm2,0(%rdi)
3489         movdqu  %xmm3,16(%rdi)
3490
3491         movdqa  %xmm5,%xmm0
3492         movdqa  %xmm5,%xmm1
3493         pandn   256(%rsp),%xmm0
3494         movdqa  %xmm5,%xmm2
3495         pandn   256+16(%rsp),%xmm1
3496         movdqa  %xmm5,%xmm3
3497         pand    448(%rsp),%xmm2
3498         pand    448+16(%rsp),%xmm3
3499         por     %xmm0,%xmm2
3500         por     %xmm1,%xmm3
3501
3502         movdqa  %xmm4,%xmm0
3503         movdqa  %xmm4,%xmm1
3504         pandn   %xmm2,%xmm0
3505         movdqa  %xmm4,%xmm2
3506         pandn   %xmm3,%xmm1
3507         movdqa  %xmm4,%xmm3
3508         pand    352(%rsp),%xmm2
3509         pand    352+16(%rsp),%xmm3
3510         por     %xmm0,%xmm2
3511         por     %xmm1,%xmm3
3512         movdqu  %xmm2,32(%rdi)
3513         movdqu  %xmm3,48(%rdi)
3514
3515         addq    $480+8,%rsp
3516         popq    %r15
3517         popq    %r14
3518         popq    %r13
3519         popq    %r12
3520         popq    %rbx
3521         popq    %rbp
3522         .byte   0xf3,0xc3
3523 .size   ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex