]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S
MFV r305420:
[FreeBSD/FreeBSD.git] / secure / lib / libcrypto / amd64 / ecp_nistz256-x86_64.S
1 /* $FreeBSD$ */
2 /* Do not modify. This file is auto-generated from ecp_nistz256-x86_64.pl. */
3 .text   
4
5
6
7 .align  64
8 .Lpoly:
9 .quad   0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001
10
11
12 .LRR:
13 .quad   0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd
14
15 .LOne:
16 .long   1,1,1,1,1,1,1,1
17 .LTwo:
18 .long   2,2,2,2,2,2,2,2
19 .LThree:
20 .long   3,3,3,3,3,3,3,3
21 .LONE_mont:
22 .quad   0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe
23
24 .globl  ecp_nistz256_mul_by_2
25 .type   ecp_nistz256_mul_by_2,@function
26 .align  64
27 ecp_nistz256_mul_by_2:
28         pushq   %r12
29         pushq   %r13
30
31         movq    0(%rsi),%r8
32         movq    8(%rsi),%r9
33         addq    %r8,%r8
34         movq    16(%rsi),%r10
35         adcq    %r9,%r9
36         movq    24(%rsi),%r11
37         leaq    .Lpoly(%rip),%rsi
38         movq    %r8,%rax
39         adcq    %r10,%r10
40         adcq    %r11,%r11
41         movq    %r9,%rdx
42         sbbq    %r13,%r13
43
44         subq    0(%rsi),%r8
45         movq    %r10,%rcx
46         sbbq    8(%rsi),%r9
47         sbbq    16(%rsi),%r10
48         movq    %r11,%r12
49         sbbq    24(%rsi),%r11
50         testq   %r13,%r13
51
52         cmovzq  %rax,%r8
53         cmovzq  %rdx,%r9
54         movq    %r8,0(%rdi)
55         cmovzq  %rcx,%r10
56         movq    %r9,8(%rdi)
57         cmovzq  %r12,%r11
58         movq    %r10,16(%rdi)
59         movq    %r11,24(%rdi)
60
61         popq    %r13
62         popq    %r12
63         .byte   0xf3,0xc3
64 .size   ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2
65
66
67
68 .globl  ecp_nistz256_div_by_2
69 .type   ecp_nistz256_div_by_2,@function
70 .align  32
71 ecp_nistz256_div_by_2:
72         pushq   %r12
73         pushq   %r13
74
75         movq    0(%rsi),%r8
76         movq    8(%rsi),%r9
77         movq    16(%rsi),%r10
78         movq    %r8,%rax
79         movq    24(%rsi),%r11
80         leaq    .Lpoly(%rip),%rsi
81
82         movq    %r9,%rdx
83         xorq    %r13,%r13
84         addq    0(%rsi),%r8
85         movq    %r10,%rcx
86         adcq    8(%rsi),%r9
87         adcq    16(%rsi),%r10
88         movq    %r11,%r12
89         adcq    24(%rsi),%r11
90         adcq    $0,%r13
91         xorq    %rsi,%rsi
92         testq   $1,%rax
93
94         cmovzq  %rax,%r8
95         cmovzq  %rdx,%r9
96         cmovzq  %rcx,%r10
97         cmovzq  %r12,%r11
98         cmovzq  %rsi,%r13
99
100         movq    %r9,%rax
101         shrq    $1,%r8
102         shlq    $63,%rax
103         movq    %r10,%rdx
104         shrq    $1,%r9
105         orq     %rax,%r8
106         shlq    $63,%rdx
107         movq    %r11,%rcx
108         shrq    $1,%r10
109         orq     %rdx,%r9
110         shlq    $63,%rcx
111         shrq    $1,%r11
112         shlq    $63,%r13
113         orq     %rcx,%r10
114         orq     %r13,%r11
115
116         movq    %r8,0(%rdi)
117         movq    %r9,8(%rdi)
118         movq    %r10,16(%rdi)
119         movq    %r11,24(%rdi)
120
121         popq    %r13
122         popq    %r12
123         .byte   0xf3,0xc3
124 .size   ecp_nistz256_div_by_2,.-ecp_nistz256_div_by_2
125
126
127
128 .globl  ecp_nistz256_mul_by_3
129 .type   ecp_nistz256_mul_by_3,@function
130 .align  32
131 ecp_nistz256_mul_by_3:
132         pushq   %r12
133         pushq   %r13
134
135         movq    0(%rsi),%r8
136         xorq    %r13,%r13
137         movq    8(%rsi),%r9
138         addq    %r8,%r8
139         movq    16(%rsi),%r10
140         adcq    %r9,%r9
141         movq    24(%rsi),%r11
142         movq    %r8,%rax
143         adcq    %r10,%r10
144         adcq    %r11,%r11
145         movq    %r9,%rdx
146         adcq    $0,%r13
147
148         subq    $-1,%r8
149         movq    %r10,%rcx
150         sbbq    .Lpoly+8(%rip),%r9
151         sbbq    $0,%r10
152         movq    %r11,%r12
153         sbbq    .Lpoly+24(%rip),%r11
154         testq   %r13,%r13
155
156         cmovzq  %rax,%r8
157         cmovzq  %rdx,%r9
158         cmovzq  %rcx,%r10
159         cmovzq  %r12,%r11
160
161         xorq    %r13,%r13
162         addq    0(%rsi),%r8
163         adcq    8(%rsi),%r9
164         movq    %r8,%rax
165         adcq    16(%rsi),%r10
166         adcq    24(%rsi),%r11
167         movq    %r9,%rdx
168         adcq    $0,%r13
169
170         subq    $-1,%r8
171         movq    %r10,%rcx
172         sbbq    .Lpoly+8(%rip),%r9
173         sbbq    $0,%r10
174         movq    %r11,%r12
175         sbbq    .Lpoly+24(%rip),%r11
176         testq   %r13,%r13
177
178         cmovzq  %rax,%r8
179         cmovzq  %rdx,%r9
180         movq    %r8,0(%rdi)
181         cmovzq  %rcx,%r10
182         movq    %r9,8(%rdi)
183         cmovzq  %r12,%r11
184         movq    %r10,16(%rdi)
185         movq    %r11,24(%rdi)
186
187         popq    %r13
188         popq    %r12
189         .byte   0xf3,0xc3
190 .size   ecp_nistz256_mul_by_3,.-ecp_nistz256_mul_by_3
191
192
193
194 .globl  ecp_nistz256_add
195 .type   ecp_nistz256_add,@function
196 .align  32
197 ecp_nistz256_add:
198         pushq   %r12
199         pushq   %r13
200
201         movq    0(%rsi),%r8
202         xorq    %r13,%r13
203         movq    8(%rsi),%r9
204         movq    16(%rsi),%r10
205         movq    24(%rsi),%r11
206         leaq    .Lpoly(%rip),%rsi
207
208         addq    0(%rdx),%r8
209         adcq    8(%rdx),%r9
210         movq    %r8,%rax
211         adcq    16(%rdx),%r10
212         adcq    24(%rdx),%r11
213         movq    %r9,%rdx
214         adcq    $0,%r13
215
216         subq    0(%rsi),%r8
217         movq    %r10,%rcx
218         sbbq    8(%rsi),%r9
219         sbbq    16(%rsi),%r10
220         movq    %r11,%r12
221         sbbq    24(%rsi),%r11
222         testq   %r13,%r13
223
224         cmovzq  %rax,%r8
225         cmovzq  %rdx,%r9
226         movq    %r8,0(%rdi)
227         cmovzq  %rcx,%r10
228         movq    %r9,8(%rdi)
229         cmovzq  %r12,%r11
230         movq    %r10,16(%rdi)
231         movq    %r11,24(%rdi)
232
233         popq    %r13
234         popq    %r12
235         .byte   0xf3,0xc3
236 .size   ecp_nistz256_add,.-ecp_nistz256_add
237
238
239
240 .globl  ecp_nistz256_sub
241 .type   ecp_nistz256_sub,@function
242 .align  32
243 ecp_nistz256_sub:
244         pushq   %r12
245         pushq   %r13
246
247         movq    0(%rsi),%r8
248         xorq    %r13,%r13
249         movq    8(%rsi),%r9
250         movq    16(%rsi),%r10
251         movq    24(%rsi),%r11
252         leaq    .Lpoly(%rip),%rsi
253
254         subq    0(%rdx),%r8
255         sbbq    8(%rdx),%r9
256         movq    %r8,%rax
257         sbbq    16(%rdx),%r10
258         sbbq    24(%rdx),%r11
259         movq    %r9,%rdx
260         sbbq    $0,%r13
261
262         addq    0(%rsi),%r8
263         movq    %r10,%rcx
264         adcq    8(%rsi),%r9
265         adcq    16(%rsi),%r10
266         movq    %r11,%r12
267         adcq    24(%rsi),%r11
268         testq   %r13,%r13
269
270         cmovzq  %rax,%r8
271         cmovzq  %rdx,%r9
272         movq    %r8,0(%rdi)
273         cmovzq  %rcx,%r10
274         movq    %r9,8(%rdi)
275         cmovzq  %r12,%r11
276         movq    %r10,16(%rdi)
277         movq    %r11,24(%rdi)
278
279         popq    %r13
280         popq    %r12
281         .byte   0xf3,0xc3
282 .size   ecp_nistz256_sub,.-ecp_nistz256_sub
283
284
285
286 .globl  ecp_nistz256_neg
287 .type   ecp_nistz256_neg,@function
288 .align  32
289 ecp_nistz256_neg:
290         pushq   %r12
291         pushq   %r13
292
293         xorq    %r8,%r8
294         xorq    %r9,%r9
295         xorq    %r10,%r10
296         xorq    %r11,%r11
297         xorq    %r13,%r13
298
299         subq    0(%rsi),%r8
300         sbbq    8(%rsi),%r9
301         sbbq    16(%rsi),%r10
302         movq    %r8,%rax
303         sbbq    24(%rsi),%r11
304         leaq    .Lpoly(%rip),%rsi
305         movq    %r9,%rdx
306         sbbq    $0,%r13
307
308         addq    0(%rsi),%r8
309         movq    %r10,%rcx
310         adcq    8(%rsi),%r9
311         adcq    16(%rsi),%r10
312         movq    %r11,%r12
313         adcq    24(%rsi),%r11
314         testq   %r13,%r13
315
316         cmovzq  %rax,%r8
317         cmovzq  %rdx,%r9
318         movq    %r8,0(%rdi)
319         cmovzq  %rcx,%r10
320         movq    %r9,8(%rdi)
321         cmovzq  %r12,%r11
322         movq    %r10,16(%rdi)
323         movq    %r11,24(%rdi)
324
325         popq    %r13
326         popq    %r12
327         .byte   0xf3,0xc3
328 .size   ecp_nistz256_neg,.-ecp_nistz256_neg
329
330
331
332
333 .globl  ecp_nistz256_to_mont
334 .type   ecp_nistz256_to_mont,@function
335 .align  32
336 ecp_nistz256_to_mont:
337         movl    $0x80100,%ecx
338         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
339         leaq    .LRR(%rip),%rdx
340         jmp     .Lmul_mont
341 .size   ecp_nistz256_to_mont,.-ecp_nistz256_to_mont
342
343
344
345
346
347
348
349 .globl  ecp_nistz256_mul_mont
350 .type   ecp_nistz256_mul_mont,@function
351 .align  32
352 ecp_nistz256_mul_mont:
353         movl    $0x80100,%ecx
354         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
355 .Lmul_mont:
356         pushq   %rbp
357         pushq   %rbx
358         pushq   %r12
359         pushq   %r13
360         pushq   %r14
361         pushq   %r15
362         cmpl    $0x80100,%ecx
363         je      .Lmul_montx
364         movq    %rdx,%rbx
365         movq    0(%rdx),%rax
366         movq    0(%rsi),%r9
367         movq    8(%rsi),%r10
368         movq    16(%rsi),%r11
369         movq    24(%rsi),%r12
370
371         call    __ecp_nistz256_mul_montq
372         jmp     .Lmul_mont_done
373
374 .align  32
375 .Lmul_montx:
376         movq    %rdx,%rbx
377         movq    0(%rdx),%rdx
378         movq    0(%rsi),%r9
379         movq    8(%rsi),%r10
380         movq    16(%rsi),%r11
381         movq    24(%rsi),%r12
382         leaq    -128(%rsi),%rsi
383
384         call    __ecp_nistz256_mul_montx
385 .Lmul_mont_done:
386         popq    %r15
387         popq    %r14
388         popq    %r13
389         popq    %r12
390         popq    %rbx
391         popq    %rbp
392         .byte   0xf3,0xc3
393 .size   ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
394
395 .type   __ecp_nistz256_mul_montq,@function
396 .align  32
397 __ecp_nistz256_mul_montq:
398
399
400         movq    %rax,%rbp
401         mulq    %r9
402         movq    .Lpoly+8(%rip),%r14
403         movq    %rax,%r8
404         movq    %rbp,%rax
405         movq    %rdx,%r9
406
407         mulq    %r10
408         movq    .Lpoly+24(%rip),%r15
409         addq    %rax,%r9
410         movq    %rbp,%rax
411         adcq    $0,%rdx
412         movq    %rdx,%r10
413
414         mulq    %r11
415         addq    %rax,%r10
416         movq    %rbp,%rax
417         adcq    $0,%rdx
418         movq    %rdx,%r11
419
420         mulq    %r12
421         addq    %rax,%r11
422         movq    %r8,%rax
423         adcq    $0,%rdx
424         xorq    %r13,%r13
425         movq    %rdx,%r12
426
427
428
429
430
431
432
433
434
435
436         movq    %r8,%rbp
437         shlq    $32,%r8
438         mulq    %r15
439         shrq    $32,%rbp
440         addq    %r8,%r9
441         adcq    %rbp,%r10
442         adcq    %rax,%r11
443         movq    8(%rbx),%rax
444         adcq    %rdx,%r12
445         adcq    $0,%r13
446         xorq    %r8,%r8
447
448
449
450         movq    %rax,%rbp
451         mulq    0(%rsi)
452         addq    %rax,%r9
453         movq    %rbp,%rax
454         adcq    $0,%rdx
455         movq    %rdx,%rcx
456
457         mulq    8(%rsi)
458         addq    %rcx,%r10
459         adcq    $0,%rdx
460         addq    %rax,%r10
461         movq    %rbp,%rax
462         adcq    $0,%rdx
463         movq    %rdx,%rcx
464
465         mulq    16(%rsi)
466         addq    %rcx,%r11
467         adcq    $0,%rdx
468         addq    %rax,%r11
469         movq    %rbp,%rax
470         adcq    $0,%rdx
471         movq    %rdx,%rcx
472
473         mulq    24(%rsi)
474         addq    %rcx,%r12
475         adcq    $0,%rdx
476         addq    %rax,%r12
477         movq    %r9,%rax
478         adcq    %rdx,%r13
479         adcq    $0,%r8
480
481
482
483         movq    %r9,%rbp
484         shlq    $32,%r9
485         mulq    %r15
486         shrq    $32,%rbp
487         addq    %r9,%r10
488         adcq    %rbp,%r11
489         adcq    %rax,%r12
490         movq    16(%rbx),%rax
491         adcq    %rdx,%r13
492         adcq    $0,%r8
493         xorq    %r9,%r9
494
495
496
497         movq    %rax,%rbp
498         mulq    0(%rsi)
499         addq    %rax,%r10
500         movq    %rbp,%rax
501         adcq    $0,%rdx
502         movq    %rdx,%rcx
503
504         mulq    8(%rsi)
505         addq    %rcx,%r11
506         adcq    $0,%rdx
507         addq    %rax,%r11
508         movq    %rbp,%rax
509         adcq    $0,%rdx
510         movq    %rdx,%rcx
511
512         mulq    16(%rsi)
513         addq    %rcx,%r12
514         adcq    $0,%rdx
515         addq    %rax,%r12
516         movq    %rbp,%rax
517         adcq    $0,%rdx
518         movq    %rdx,%rcx
519
520         mulq    24(%rsi)
521         addq    %rcx,%r13
522         adcq    $0,%rdx
523         addq    %rax,%r13
524         movq    %r10,%rax
525         adcq    %rdx,%r8
526         adcq    $0,%r9
527
528
529
530         movq    %r10,%rbp
531         shlq    $32,%r10
532         mulq    %r15
533         shrq    $32,%rbp
534         addq    %r10,%r11
535         adcq    %rbp,%r12
536         adcq    %rax,%r13
537         movq    24(%rbx),%rax
538         adcq    %rdx,%r8
539         adcq    $0,%r9
540         xorq    %r10,%r10
541
542
543
544         movq    %rax,%rbp
545         mulq    0(%rsi)
546         addq    %rax,%r11
547         movq    %rbp,%rax
548         adcq    $0,%rdx
549         movq    %rdx,%rcx
550
551         mulq    8(%rsi)
552         addq    %rcx,%r12
553         adcq    $0,%rdx
554         addq    %rax,%r12
555         movq    %rbp,%rax
556         adcq    $0,%rdx
557         movq    %rdx,%rcx
558
559         mulq    16(%rsi)
560         addq    %rcx,%r13
561         adcq    $0,%rdx
562         addq    %rax,%r13
563         movq    %rbp,%rax
564         adcq    $0,%rdx
565         movq    %rdx,%rcx
566
567         mulq    24(%rsi)
568         addq    %rcx,%r8
569         adcq    $0,%rdx
570         addq    %rax,%r8
571         movq    %r11,%rax
572         adcq    %rdx,%r9
573         adcq    $0,%r10
574
575
576
577         movq    %r11,%rbp
578         shlq    $32,%r11
579         mulq    %r15
580         shrq    $32,%rbp
581         addq    %r11,%r12
582         adcq    %rbp,%r13
583         movq    %r12,%rcx
584         adcq    %rax,%r8
585         adcq    %rdx,%r9
586         movq    %r13,%rbp
587         adcq    $0,%r10
588
589
590
591         subq    $-1,%r12
592         movq    %r8,%rbx
593         sbbq    %r14,%r13
594         sbbq    $0,%r8
595         movq    %r9,%rdx
596         sbbq    %r15,%r9
597         sbbq    $0,%r10
598
599         cmovcq  %rcx,%r12
600         cmovcq  %rbp,%r13
601         movq    %r12,0(%rdi)
602         cmovcq  %rbx,%r8
603         movq    %r13,8(%rdi)
604         cmovcq  %rdx,%r9
605         movq    %r8,16(%rdi)
606         movq    %r9,24(%rdi)
607
608         .byte   0xf3,0xc3
609 .size   __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
610
611
612
613
614
615
616
617
618 .globl  ecp_nistz256_sqr_mont
619 .type   ecp_nistz256_sqr_mont,@function
620 .align  32
621 ecp_nistz256_sqr_mont:
622         movl    $0x80100,%ecx
623         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
624         pushq   %rbp
625         pushq   %rbx
626         pushq   %r12
627         pushq   %r13
628         pushq   %r14
629         pushq   %r15
630         cmpl    $0x80100,%ecx
631         je      .Lsqr_montx
632         movq    0(%rsi),%rax
633         movq    8(%rsi),%r14
634         movq    16(%rsi),%r15
635         movq    24(%rsi),%r8
636
637         call    __ecp_nistz256_sqr_montq
638         jmp     .Lsqr_mont_done
639
640 .align  32
641 .Lsqr_montx:
642         movq    0(%rsi),%rdx
643         movq    8(%rsi),%r14
644         movq    16(%rsi),%r15
645         movq    24(%rsi),%r8
646         leaq    -128(%rsi),%rsi
647
648         call    __ecp_nistz256_sqr_montx
649 .Lsqr_mont_done:
650         popq    %r15
651         popq    %r14
652         popq    %r13
653         popq    %r12
654         popq    %rbx
655         popq    %rbp
656         .byte   0xf3,0xc3
657 .size   ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
658
659 .type   __ecp_nistz256_sqr_montq,@function
660 .align  32
661 __ecp_nistz256_sqr_montq:
662         movq    %rax,%r13
663         mulq    %r14
664         movq    %rax,%r9
665         movq    %r15,%rax
666         movq    %rdx,%r10
667
668         mulq    %r13
669         addq    %rax,%r10
670         movq    %r8,%rax
671         adcq    $0,%rdx
672         movq    %rdx,%r11
673
674         mulq    %r13
675         addq    %rax,%r11
676         movq    %r15,%rax
677         adcq    $0,%rdx
678         movq    %rdx,%r12
679
680
681         mulq    %r14
682         addq    %rax,%r11
683         movq    %r8,%rax
684         adcq    $0,%rdx
685         movq    %rdx,%rbp
686
687         mulq    %r14
688         addq    %rax,%r12
689         movq    %r8,%rax
690         adcq    $0,%rdx
691         addq    %rbp,%r12
692         movq    %rdx,%r13
693         adcq    $0,%r13
694
695
696         mulq    %r15
697         xorq    %r15,%r15
698         addq    %rax,%r13
699         movq    0(%rsi),%rax
700         movq    %rdx,%r14
701         adcq    $0,%r14
702
703         addq    %r9,%r9
704         adcq    %r10,%r10
705         adcq    %r11,%r11
706         adcq    %r12,%r12
707         adcq    %r13,%r13
708         adcq    %r14,%r14
709         adcq    $0,%r15
710
711         mulq    %rax
712         movq    %rax,%r8
713         movq    8(%rsi),%rax
714         movq    %rdx,%rcx
715
716         mulq    %rax
717         addq    %rcx,%r9
718         adcq    %rax,%r10
719         movq    16(%rsi),%rax
720         adcq    $0,%rdx
721         movq    %rdx,%rcx
722
723         mulq    %rax
724         addq    %rcx,%r11
725         adcq    %rax,%r12
726         movq    24(%rsi),%rax
727         adcq    $0,%rdx
728         movq    %rdx,%rcx
729
730         mulq    %rax
731         addq    %rcx,%r13
732         adcq    %rax,%r14
733         movq    %r8,%rax
734         adcq    %rdx,%r15
735
736         movq    .Lpoly+8(%rip),%rsi
737         movq    .Lpoly+24(%rip),%rbp
738
739
740
741
742         movq    %r8,%rcx
743         shlq    $32,%r8
744         mulq    %rbp
745         shrq    $32,%rcx
746         addq    %r8,%r9
747         adcq    %rcx,%r10
748         adcq    %rax,%r11
749         movq    %r9,%rax
750         adcq    $0,%rdx
751
752
753
754         movq    %r9,%rcx
755         shlq    $32,%r9
756         movq    %rdx,%r8
757         mulq    %rbp
758         shrq    $32,%rcx
759         addq    %r9,%r10
760         adcq    %rcx,%r11
761         adcq    %rax,%r8
762         movq    %r10,%rax
763         adcq    $0,%rdx
764
765
766
767         movq    %r10,%rcx
768         shlq    $32,%r10
769         movq    %rdx,%r9
770         mulq    %rbp
771         shrq    $32,%rcx
772         addq    %r10,%r11
773         adcq    %rcx,%r8
774         adcq    %rax,%r9
775         movq    %r11,%rax
776         adcq    $0,%rdx
777
778
779
780         movq    %r11,%rcx
781         shlq    $32,%r11
782         movq    %rdx,%r10
783         mulq    %rbp
784         shrq    $32,%rcx
785         addq    %r11,%r8
786         adcq    %rcx,%r9
787         adcq    %rax,%r10
788         adcq    $0,%rdx
789         xorq    %r11,%r11
790
791
792
793         addq    %r8,%r12
794         adcq    %r9,%r13
795         movq    %r12,%r8
796         adcq    %r10,%r14
797         adcq    %rdx,%r15
798         movq    %r13,%r9
799         adcq    $0,%r11
800
801         subq    $-1,%r12
802         movq    %r14,%r10
803         sbbq    %rsi,%r13
804         sbbq    $0,%r14
805         movq    %r15,%rcx
806         sbbq    %rbp,%r15
807         sbbq    $0,%r11
808
809         cmovcq  %r8,%r12
810         cmovcq  %r9,%r13
811         movq    %r12,0(%rdi)
812         cmovcq  %r10,%r14
813         movq    %r13,8(%rdi)
814         cmovcq  %rcx,%r15
815         movq    %r14,16(%rdi)
816         movq    %r15,24(%rdi)
817
818         .byte   0xf3,0xc3
819 .size   __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
820 .type   __ecp_nistz256_mul_montx,@function
821 .align  32
822 __ecp_nistz256_mul_montx:
823
824
825         mulxq   %r9,%r8,%r9
826         mulxq   %r10,%rcx,%r10
827         movq    $32,%r14
828         xorq    %r13,%r13
829         mulxq   %r11,%rbp,%r11
830         movq    .Lpoly+24(%rip),%r15
831         adcq    %rcx,%r9
832         mulxq   %r12,%rcx,%r12
833         movq    %r8,%rdx
834         adcq    %rbp,%r10
835         shlxq   %r14,%r8,%rbp
836         adcq    %rcx,%r11
837         shrxq   %r14,%r8,%rcx
838         adcq    $0,%r12
839
840
841
842         addq    %rbp,%r9
843         adcq    %rcx,%r10
844
845         mulxq   %r15,%rcx,%rbp
846         movq    8(%rbx),%rdx
847         adcq    %rcx,%r11
848         adcq    %rbp,%r12
849         adcq    $0,%r13
850         xorq    %r8,%r8
851
852
853
854         mulxq   0+128(%rsi),%rcx,%rbp
855         adcxq   %rcx,%r9
856         adoxq   %rbp,%r10
857
858         mulxq   8+128(%rsi),%rcx,%rbp
859         adcxq   %rcx,%r10
860         adoxq   %rbp,%r11
861
862         mulxq   16+128(%rsi),%rcx,%rbp
863         adcxq   %rcx,%r11
864         adoxq   %rbp,%r12
865
866         mulxq   24+128(%rsi),%rcx,%rbp
867         movq    %r9,%rdx
868         adcxq   %rcx,%r12
869         shlxq   %r14,%r9,%rcx
870         adoxq   %rbp,%r13
871         shrxq   %r14,%r9,%rbp
872
873         adcxq   %r8,%r13
874         adoxq   %r8,%r8
875         adcq    $0,%r8
876
877
878
879         addq    %rcx,%r10
880         adcq    %rbp,%r11
881
882         mulxq   %r15,%rcx,%rbp
883         movq    16(%rbx),%rdx
884         adcq    %rcx,%r12
885         adcq    %rbp,%r13
886         adcq    $0,%r8
887         xorq    %r9,%r9
888
889
890
891         mulxq   0+128(%rsi),%rcx,%rbp
892         adcxq   %rcx,%r10
893         adoxq   %rbp,%r11
894
895         mulxq   8+128(%rsi),%rcx,%rbp
896         adcxq   %rcx,%r11
897         adoxq   %rbp,%r12
898
899         mulxq   16+128(%rsi),%rcx,%rbp
900         adcxq   %rcx,%r12
901         adoxq   %rbp,%r13
902
903         mulxq   24+128(%rsi),%rcx,%rbp
904         movq    %r10,%rdx
905         adcxq   %rcx,%r13
906         shlxq   %r14,%r10,%rcx
907         adoxq   %rbp,%r8
908         shrxq   %r14,%r10,%rbp
909
910         adcxq   %r9,%r8
911         adoxq   %r9,%r9
912         adcq    $0,%r9
913
914
915
916         addq    %rcx,%r11
917         adcq    %rbp,%r12
918
919         mulxq   %r15,%rcx,%rbp
920         movq    24(%rbx),%rdx
921         adcq    %rcx,%r13
922         adcq    %rbp,%r8
923         adcq    $0,%r9
924         xorq    %r10,%r10
925
926
927
928         mulxq   0+128(%rsi),%rcx,%rbp
929         adcxq   %rcx,%r11
930         adoxq   %rbp,%r12
931
932         mulxq   8+128(%rsi),%rcx,%rbp
933         adcxq   %rcx,%r12
934         adoxq   %rbp,%r13
935
936         mulxq   16+128(%rsi),%rcx,%rbp
937         adcxq   %rcx,%r13
938         adoxq   %rbp,%r8
939
940         mulxq   24+128(%rsi),%rcx,%rbp
941         movq    %r11,%rdx
942         adcxq   %rcx,%r8
943         shlxq   %r14,%r11,%rcx
944         adoxq   %rbp,%r9
945         shrxq   %r14,%r11,%rbp
946
947         adcxq   %r10,%r9
948         adoxq   %r10,%r10
949         adcq    $0,%r10
950
951
952
953         addq    %rcx,%r12
954         adcq    %rbp,%r13
955
956         mulxq   %r15,%rcx,%rbp
957         movq    %r12,%rbx
958         movq    .Lpoly+8(%rip),%r14
959         adcq    %rcx,%r8
960         movq    %r13,%rdx
961         adcq    %rbp,%r9
962         adcq    $0,%r10
963
964
965
966         xorl    %eax,%eax
967         movq    %r8,%rcx
968         sbbq    $-1,%r12
969         sbbq    %r14,%r13
970         sbbq    $0,%r8
971         movq    %r9,%rbp
972         sbbq    %r15,%r9
973         sbbq    $0,%r10
974
975         cmovcq  %rbx,%r12
976         cmovcq  %rdx,%r13
977         movq    %r12,0(%rdi)
978         cmovcq  %rcx,%r8
979         movq    %r13,8(%rdi)
980         cmovcq  %rbp,%r9
981         movq    %r8,16(%rdi)
982         movq    %r9,24(%rdi)
983
984         .byte   0xf3,0xc3
985 .size   __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx
986
987 .type   __ecp_nistz256_sqr_montx,@function
988 .align  32
989 __ecp_nistz256_sqr_montx:
990         mulxq   %r14,%r9,%r10
991         mulxq   %r15,%rcx,%r11
992         xorl    %eax,%eax
993         adcq    %rcx,%r10
994         mulxq   %r8,%rbp,%r12
995         movq    %r14,%rdx
996         adcq    %rbp,%r11
997         adcq    $0,%r12
998         xorq    %r13,%r13
999
1000
1001         mulxq   %r15,%rcx,%rbp
1002         adcxq   %rcx,%r11
1003         adoxq   %rbp,%r12
1004
1005         mulxq   %r8,%rcx,%rbp
1006         movq    %r15,%rdx
1007         adcxq   %rcx,%r12
1008         adoxq   %rbp,%r13
1009         adcq    $0,%r13
1010
1011
1012         mulxq   %r8,%rcx,%r14
1013         movq    0+128(%rsi),%rdx
1014         xorq    %r15,%r15
1015         adcxq   %r9,%r9
1016         adoxq   %rcx,%r13
1017         adcxq   %r10,%r10
1018         adoxq   %r15,%r14
1019
1020         mulxq   %rdx,%r8,%rbp
1021         movq    8+128(%rsi),%rdx
1022         adcxq   %r11,%r11
1023         adoxq   %rbp,%r9
1024         adcxq   %r12,%r12
1025         mulxq   %rdx,%rcx,%rax
1026         movq    16+128(%rsi),%rdx
1027         adcxq   %r13,%r13
1028         adoxq   %rcx,%r10
1029         adcxq   %r14,%r14
1030 .byte   0x67
1031         mulxq   %rdx,%rcx,%rbp
1032         movq    24+128(%rsi),%rdx
1033         adoxq   %rax,%r11
1034         adcxq   %r15,%r15
1035         adoxq   %rcx,%r12
1036         movq    $32,%rsi
1037         adoxq   %rbp,%r13
1038 .byte   0x67,0x67
1039         mulxq   %rdx,%rcx,%rax
1040         movq    %r8,%rdx
1041         adoxq   %rcx,%r14
1042         shlxq   %rsi,%r8,%rcx
1043         adoxq   %rax,%r15
1044         shrxq   %rsi,%r8,%rax
1045         movq    .Lpoly+24(%rip),%rbp
1046
1047
1048         addq    %rcx,%r9
1049         adcq    %rax,%r10
1050
1051         mulxq   %rbp,%rcx,%r8
1052         movq    %r9,%rdx
1053         adcq    %rcx,%r11
1054         shlxq   %rsi,%r9,%rcx
1055         adcq    $0,%r8
1056         shrxq   %rsi,%r9,%rax
1057
1058
1059         addq    %rcx,%r10
1060         adcq    %rax,%r11
1061
1062         mulxq   %rbp,%rcx,%r9
1063         movq    %r10,%rdx
1064         adcq    %rcx,%r8
1065         shlxq   %rsi,%r10,%rcx
1066         adcq    $0,%r9
1067         shrxq   %rsi,%r10,%rax
1068
1069
1070         addq    %rcx,%r11
1071         adcq    %rax,%r8
1072
1073         mulxq   %rbp,%rcx,%r10
1074         movq    %r11,%rdx
1075         adcq    %rcx,%r9
1076         shlxq   %rsi,%r11,%rcx
1077         adcq    $0,%r10
1078         shrxq   %rsi,%r11,%rax
1079
1080
1081         addq    %rcx,%r8
1082         adcq    %rax,%r9
1083
1084         mulxq   %rbp,%rcx,%r11
1085         adcq    %rcx,%r10
1086         adcq    $0,%r11
1087
1088         xorq    %rdx,%rdx
1089         adcq    %r8,%r12
1090         movq    .Lpoly+8(%rip),%rsi
1091         adcq    %r9,%r13
1092         movq    %r12,%r8
1093         adcq    %r10,%r14
1094         adcq    %r11,%r15
1095         movq    %r13,%r9
1096         adcq    $0,%rdx
1097
1098         xorl    %eax,%eax
1099         sbbq    $-1,%r12
1100         movq    %r14,%r10
1101         sbbq    %rsi,%r13
1102         sbbq    $0,%r14
1103         movq    %r15,%r11
1104         sbbq    %rbp,%r15
1105         sbbq    $0,%rdx
1106
1107         cmovcq  %r8,%r12
1108         cmovcq  %r9,%r13
1109         movq    %r12,0(%rdi)
1110         cmovcq  %r10,%r14
1111         movq    %r13,8(%rdi)
1112         cmovcq  %r11,%r15
1113         movq    %r14,16(%rdi)
1114         movq    %r15,24(%rdi)
1115
1116         .byte   0xf3,0xc3
1117 .size   __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx
1118
1119
1120
1121
1122
1123
1124 .globl  ecp_nistz256_from_mont
1125 .type   ecp_nistz256_from_mont,@function
1126 .align  32
1127 ecp_nistz256_from_mont:
1128         pushq   %r12
1129         pushq   %r13
1130
1131         movq    0(%rsi),%rax
1132         movq    .Lpoly+24(%rip),%r13
1133         movq    8(%rsi),%r9
1134         movq    16(%rsi),%r10
1135         movq    24(%rsi),%r11
1136         movq    %rax,%r8
1137         movq    .Lpoly+8(%rip),%r12
1138
1139
1140
1141         movq    %rax,%rcx
1142         shlq    $32,%r8
1143         mulq    %r13
1144         shrq    $32,%rcx
1145         addq    %r8,%r9
1146         adcq    %rcx,%r10
1147         adcq    %rax,%r11
1148         movq    %r9,%rax
1149         adcq    $0,%rdx
1150
1151
1152
1153         movq    %r9,%rcx
1154         shlq    $32,%r9
1155         movq    %rdx,%r8
1156         mulq    %r13
1157         shrq    $32,%rcx
1158         addq    %r9,%r10
1159         adcq    %rcx,%r11
1160         adcq    %rax,%r8
1161         movq    %r10,%rax
1162         adcq    $0,%rdx
1163
1164
1165
1166         movq    %r10,%rcx
1167         shlq    $32,%r10
1168         movq    %rdx,%r9
1169         mulq    %r13
1170         shrq    $32,%rcx
1171         addq    %r10,%r11
1172         adcq    %rcx,%r8
1173         adcq    %rax,%r9
1174         movq    %r11,%rax
1175         adcq    $0,%rdx
1176
1177
1178
1179         movq    %r11,%rcx
1180         shlq    $32,%r11
1181         movq    %rdx,%r10
1182         mulq    %r13
1183         shrq    $32,%rcx
1184         addq    %r11,%r8
1185         adcq    %rcx,%r9
1186         movq    %r8,%rcx
1187         adcq    %rax,%r10
1188         movq    %r9,%rsi
1189         adcq    $0,%rdx
1190
1191
1192
1193         subq    $-1,%r8
1194         movq    %r10,%rax
1195         sbbq    %r12,%r9
1196         sbbq    $0,%r10
1197         movq    %rdx,%r11
1198         sbbq    %r13,%rdx
1199         sbbq    %r13,%r13
1200
1201         cmovnzq %rcx,%r8
1202         cmovnzq %rsi,%r9
1203         movq    %r8,0(%rdi)
1204         cmovnzq %rax,%r10
1205         movq    %r9,8(%rdi)
1206         cmovzq  %rdx,%r11
1207         movq    %r10,16(%rdi)
1208         movq    %r11,24(%rdi)
1209
1210         popq    %r13
1211         popq    %r12
1212         .byte   0xf3,0xc3
1213 .size   ecp_nistz256_from_mont,.-ecp_nistz256_from_mont
1214
1215
1216 .globl  ecp_nistz256_select_w5
1217 .type   ecp_nistz256_select_w5,@function
1218 .align  32
1219 ecp_nistz256_select_w5:
1220         movl    OPENSSL_ia32cap_P+8(%rip),%eax
1221         testl   $32,%eax
1222         jnz     .Lavx2_select_w5
1223         movdqa  .LOne(%rip),%xmm0
1224         movd    %edx,%xmm1
1225
1226         pxor    %xmm2,%xmm2
1227         pxor    %xmm3,%xmm3
1228         pxor    %xmm4,%xmm4
1229         pxor    %xmm5,%xmm5
1230         pxor    %xmm6,%xmm6
1231         pxor    %xmm7,%xmm7
1232
1233         movdqa  %xmm0,%xmm8
1234         pshufd  $0,%xmm1,%xmm1
1235
1236         movq    $16,%rax
1237 .Lselect_loop_sse_w5:
1238
1239         movdqa  %xmm8,%xmm15
1240         paddd   %xmm0,%xmm8
1241         pcmpeqd %xmm1,%xmm15
1242
1243         movdqa  0(%rsi),%xmm9
1244         movdqa  16(%rsi),%xmm10
1245         movdqa  32(%rsi),%xmm11
1246         movdqa  48(%rsi),%xmm12
1247         movdqa  64(%rsi),%xmm13
1248         movdqa  80(%rsi),%xmm14
1249         leaq    96(%rsi),%rsi
1250
1251         pand    %xmm15,%xmm9
1252         pand    %xmm15,%xmm10
1253         por     %xmm9,%xmm2
1254         pand    %xmm15,%xmm11
1255         por     %xmm10,%xmm3
1256         pand    %xmm15,%xmm12
1257         por     %xmm11,%xmm4
1258         pand    %xmm15,%xmm13
1259         por     %xmm12,%xmm5
1260         pand    %xmm15,%xmm14
1261         por     %xmm13,%xmm6
1262         por     %xmm14,%xmm7
1263
1264         decq    %rax
1265         jnz     .Lselect_loop_sse_w5
1266
1267         movdqu  %xmm2,0(%rdi)
1268         movdqu  %xmm3,16(%rdi)
1269         movdqu  %xmm4,32(%rdi)
1270         movdqu  %xmm5,48(%rdi)
1271         movdqu  %xmm6,64(%rdi)
1272         movdqu  %xmm7,80(%rdi)
1273         .byte   0xf3,0xc3
1274 .size   ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
1275
1276
1277
1278 .globl  ecp_nistz256_select_w7
1279 .type   ecp_nistz256_select_w7,@function
1280 .align  32
1281 ecp_nistz256_select_w7:
1282         movl    OPENSSL_ia32cap_P+8(%rip),%eax
1283         testl   $32,%eax
1284         jnz     .Lavx2_select_w7
1285         movdqa  .LOne(%rip),%xmm8
1286         movd    %edx,%xmm1
1287
1288         pxor    %xmm2,%xmm2
1289         pxor    %xmm3,%xmm3
1290         pxor    %xmm4,%xmm4
1291         pxor    %xmm5,%xmm5
1292
1293         movdqa  %xmm8,%xmm0
1294         pshufd  $0,%xmm1,%xmm1
1295         movq    $64,%rax
1296
1297 .Lselect_loop_sse_w7:
1298         movdqa  %xmm8,%xmm15
1299         paddd   %xmm0,%xmm8
1300         movdqa  0(%rsi),%xmm9
1301         movdqa  16(%rsi),%xmm10
1302         pcmpeqd %xmm1,%xmm15
1303         movdqa  32(%rsi),%xmm11
1304         movdqa  48(%rsi),%xmm12
1305         leaq    64(%rsi),%rsi
1306
1307         pand    %xmm15,%xmm9
1308         pand    %xmm15,%xmm10
1309         por     %xmm9,%xmm2
1310         pand    %xmm15,%xmm11
1311         por     %xmm10,%xmm3
1312         pand    %xmm15,%xmm12
1313         por     %xmm11,%xmm4
1314         prefetcht0      255(%rsi)
1315         por     %xmm12,%xmm5
1316
1317         decq    %rax
1318         jnz     .Lselect_loop_sse_w7
1319
1320         movdqu  %xmm2,0(%rdi)
1321         movdqu  %xmm3,16(%rdi)
1322         movdqu  %xmm4,32(%rdi)
1323         movdqu  %xmm5,48(%rdi)
1324         .byte   0xf3,0xc3
1325 .size   ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
1326
1327
1328 .type   ecp_nistz256_avx2_select_w5,@function
1329 .align  32
1330 ecp_nistz256_avx2_select_w5:
1331 .Lavx2_select_w5:
1332         vzeroupper
1333         vmovdqa .LTwo(%rip),%ymm0
1334
1335         vpxor   %ymm2,%ymm2,%ymm2
1336         vpxor   %ymm3,%ymm3,%ymm3
1337         vpxor   %ymm4,%ymm4,%ymm4
1338
1339         vmovdqa .LOne(%rip),%ymm5
1340         vmovdqa .LTwo(%rip),%ymm10
1341
1342         vmovd   %edx,%xmm1
1343         vpermd  %ymm1,%ymm2,%ymm1
1344
1345         movq    $8,%rax
1346 .Lselect_loop_avx2_w5:
1347
1348         vmovdqa 0(%rsi),%ymm6
1349         vmovdqa 32(%rsi),%ymm7
1350         vmovdqa 64(%rsi),%ymm8
1351
1352         vmovdqa 96(%rsi),%ymm11
1353         vmovdqa 128(%rsi),%ymm12
1354         vmovdqa 160(%rsi),%ymm13
1355
1356         vpcmpeqd        %ymm1,%ymm5,%ymm9
1357         vpcmpeqd        %ymm1,%ymm10,%ymm14
1358
1359         vpaddd  %ymm0,%ymm5,%ymm5
1360         vpaddd  %ymm0,%ymm10,%ymm10
1361         leaq    192(%rsi),%rsi
1362
1363         vpand   %ymm9,%ymm6,%ymm6
1364         vpand   %ymm9,%ymm7,%ymm7
1365         vpand   %ymm9,%ymm8,%ymm8
1366         vpand   %ymm14,%ymm11,%ymm11
1367         vpand   %ymm14,%ymm12,%ymm12
1368         vpand   %ymm14,%ymm13,%ymm13
1369
1370         vpxor   %ymm6,%ymm2,%ymm2
1371         vpxor   %ymm7,%ymm3,%ymm3
1372         vpxor   %ymm8,%ymm4,%ymm4
1373         vpxor   %ymm11,%ymm2,%ymm2
1374         vpxor   %ymm12,%ymm3,%ymm3
1375         vpxor   %ymm13,%ymm4,%ymm4
1376
1377         decq    %rax
1378         jnz     .Lselect_loop_avx2_w5
1379
1380         vmovdqu %ymm2,0(%rdi)
1381         vmovdqu %ymm3,32(%rdi)
1382         vmovdqu %ymm4,64(%rdi)
1383         vzeroupper
1384         .byte   0xf3,0xc3
1385 .size   ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5
1386
1387
1388
1389 .globl  ecp_nistz256_avx2_select_w7
1390 .type   ecp_nistz256_avx2_select_w7,@function
1391 .align  32
1392 ecp_nistz256_avx2_select_w7:
1393 .Lavx2_select_w7:
1394         vzeroupper
1395         vmovdqa .LThree(%rip),%ymm0
1396
1397         vpxor   %ymm2,%ymm2,%ymm2
1398         vpxor   %ymm3,%ymm3,%ymm3
1399
1400         vmovdqa .LOne(%rip),%ymm4
1401         vmovdqa .LTwo(%rip),%ymm8
1402         vmovdqa .LThree(%rip),%ymm12
1403
1404         vmovd   %edx,%xmm1
1405         vpermd  %ymm1,%ymm2,%ymm1
1406
1407
1408         movq    $21,%rax
1409 .Lselect_loop_avx2_w7:
1410
1411         vmovdqa 0(%rsi),%ymm5
1412         vmovdqa 32(%rsi),%ymm6
1413
1414         vmovdqa 64(%rsi),%ymm9
1415         vmovdqa 96(%rsi),%ymm10
1416
1417         vmovdqa 128(%rsi),%ymm13
1418         vmovdqa 160(%rsi),%ymm14
1419
1420         vpcmpeqd        %ymm1,%ymm4,%ymm7
1421         vpcmpeqd        %ymm1,%ymm8,%ymm11
1422         vpcmpeqd        %ymm1,%ymm12,%ymm15
1423
1424         vpaddd  %ymm0,%ymm4,%ymm4
1425         vpaddd  %ymm0,%ymm8,%ymm8
1426         vpaddd  %ymm0,%ymm12,%ymm12
1427         leaq    192(%rsi),%rsi
1428
1429         vpand   %ymm7,%ymm5,%ymm5
1430         vpand   %ymm7,%ymm6,%ymm6
1431         vpand   %ymm11,%ymm9,%ymm9
1432         vpand   %ymm11,%ymm10,%ymm10
1433         vpand   %ymm15,%ymm13,%ymm13
1434         vpand   %ymm15,%ymm14,%ymm14
1435
1436         vpxor   %ymm5,%ymm2,%ymm2
1437         vpxor   %ymm6,%ymm3,%ymm3
1438         vpxor   %ymm9,%ymm2,%ymm2
1439         vpxor   %ymm10,%ymm3,%ymm3
1440         vpxor   %ymm13,%ymm2,%ymm2
1441         vpxor   %ymm14,%ymm3,%ymm3
1442
1443         decq    %rax
1444         jnz     .Lselect_loop_avx2_w7
1445
1446
1447         vmovdqa 0(%rsi),%ymm5
1448         vmovdqa 32(%rsi),%ymm6
1449
1450         vpcmpeqd        %ymm1,%ymm4,%ymm7
1451
1452         vpand   %ymm7,%ymm5,%ymm5
1453         vpand   %ymm7,%ymm6,%ymm6
1454
1455         vpxor   %ymm5,%ymm2,%ymm2
1456         vpxor   %ymm6,%ymm3,%ymm3
1457
1458         vmovdqu %ymm2,0(%rdi)
1459         vmovdqu %ymm3,32(%rdi)
1460         vzeroupper
1461         .byte   0xf3,0xc3
1462 .size   ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
1463 .type   __ecp_nistz256_add_toq,@function
1464 .align  32
1465 __ecp_nistz256_add_toq:
1466         addq    0(%rbx),%r12
1467         adcq    8(%rbx),%r13
1468         movq    %r12,%rax
1469         adcq    16(%rbx),%r8
1470         adcq    24(%rbx),%r9
1471         movq    %r13,%rbp
1472         sbbq    %r11,%r11
1473
1474         subq    $-1,%r12
1475         movq    %r8,%rcx
1476         sbbq    %r14,%r13
1477         sbbq    $0,%r8
1478         movq    %r9,%r10
1479         sbbq    %r15,%r9
1480         testq   %r11,%r11
1481
1482         cmovzq  %rax,%r12
1483         cmovzq  %rbp,%r13
1484         movq    %r12,0(%rdi)
1485         cmovzq  %rcx,%r8
1486         movq    %r13,8(%rdi)
1487         cmovzq  %r10,%r9
1488         movq    %r8,16(%rdi)
1489         movq    %r9,24(%rdi)
1490
1491         .byte   0xf3,0xc3
1492 .size   __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq
1493
1494 .type   __ecp_nistz256_sub_fromq,@function
1495 .align  32
1496 __ecp_nistz256_sub_fromq:
1497         subq    0(%rbx),%r12
1498         sbbq    8(%rbx),%r13
1499         movq    %r12,%rax
1500         sbbq    16(%rbx),%r8
1501         sbbq    24(%rbx),%r9
1502         movq    %r13,%rbp
1503         sbbq    %r11,%r11
1504
1505         addq    $-1,%r12
1506         movq    %r8,%rcx
1507         adcq    %r14,%r13
1508         adcq    $0,%r8
1509         movq    %r9,%r10
1510         adcq    %r15,%r9
1511         testq   %r11,%r11
1512
1513         cmovzq  %rax,%r12
1514         cmovzq  %rbp,%r13
1515         movq    %r12,0(%rdi)
1516         cmovzq  %rcx,%r8
1517         movq    %r13,8(%rdi)
1518         cmovzq  %r10,%r9
1519         movq    %r8,16(%rdi)
1520         movq    %r9,24(%rdi)
1521
1522         .byte   0xf3,0xc3
1523 .size   __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq
1524
1525 .type   __ecp_nistz256_subq,@function
1526 .align  32
1527 __ecp_nistz256_subq:
1528         subq    %r12,%rax
1529         sbbq    %r13,%rbp
1530         movq    %rax,%r12
1531         sbbq    %r8,%rcx
1532         sbbq    %r9,%r10
1533         movq    %rbp,%r13
1534         sbbq    %r11,%r11
1535
1536         addq    $-1,%rax
1537         movq    %rcx,%r8
1538         adcq    %r14,%rbp
1539         adcq    $0,%rcx
1540         movq    %r10,%r9
1541         adcq    %r15,%r10
1542         testq   %r11,%r11
1543
1544         cmovnzq %rax,%r12
1545         cmovnzq %rbp,%r13
1546         cmovnzq %rcx,%r8
1547         cmovnzq %r10,%r9
1548
1549         .byte   0xf3,0xc3
1550 .size   __ecp_nistz256_subq,.-__ecp_nistz256_subq
1551
1552 .type   __ecp_nistz256_mul_by_2q,@function
1553 .align  32
1554 __ecp_nistz256_mul_by_2q:
1555         addq    %r12,%r12
1556         adcq    %r13,%r13
1557         movq    %r12,%rax
1558         adcq    %r8,%r8
1559         adcq    %r9,%r9
1560         movq    %r13,%rbp
1561         sbbq    %r11,%r11
1562
1563         subq    $-1,%r12
1564         movq    %r8,%rcx
1565         sbbq    %r14,%r13
1566         sbbq    $0,%r8
1567         movq    %r9,%r10
1568         sbbq    %r15,%r9
1569         testq   %r11,%r11
1570
1571         cmovzq  %rax,%r12
1572         cmovzq  %rbp,%r13
1573         movq    %r12,0(%rdi)
1574         cmovzq  %rcx,%r8
1575         movq    %r13,8(%rdi)
1576         cmovzq  %r10,%r9
1577         movq    %r8,16(%rdi)
1578         movq    %r9,24(%rdi)
1579
1580         .byte   0xf3,0xc3
1581 .size   __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q
1582 .globl  ecp_nistz256_point_double
1583 .type   ecp_nistz256_point_double,@function
1584 .align  32
1585 ecp_nistz256_point_double:
1586         movl    $0x80100,%ecx
1587         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
1588         cmpl    $0x80100,%ecx
1589         je      .Lpoint_doublex
1590         pushq   %rbp
1591         pushq   %rbx
1592         pushq   %r12
1593         pushq   %r13
1594         pushq   %r14
1595         pushq   %r15
1596         subq    $160+8,%rsp
1597
1598 .Lpoint_double_shortcutq:
1599         movdqu  0(%rsi),%xmm0
1600         movq    %rsi,%rbx
1601         movdqu  16(%rsi),%xmm1
1602         movq    32+0(%rsi),%r12
1603         movq    32+8(%rsi),%r13
1604         movq    32+16(%rsi),%r8
1605         movq    32+24(%rsi),%r9
1606         movq    .Lpoly+8(%rip),%r14
1607         movq    .Lpoly+24(%rip),%r15
1608         movdqa  %xmm0,96(%rsp)
1609         movdqa  %xmm1,96+16(%rsp)
1610         leaq    32(%rdi),%r10
1611         leaq    64(%rdi),%r11
1612 .byte   102,72,15,110,199
1613 .byte   102,73,15,110,202
1614 .byte   102,73,15,110,211
1615
1616         leaq    0(%rsp),%rdi
1617         call    __ecp_nistz256_mul_by_2q
1618
1619         movq    64+0(%rsi),%rax
1620         movq    64+8(%rsi),%r14
1621         movq    64+16(%rsi),%r15
1622         movq    64+24(%rsi),%r8
1623         leaq    64-0(%rsi),%rsi
1624         leaq    64(%rsp),%rdi
1625         call    __ecp_nistz256_sqr_montq
1626
1627         movq    0+0(%rsp),%rax
1628         movq    8+0(%rsp),%r14
1629         leaq    0+0(%rsp),%rsi
1630         movq    16+0(%rsp),%r15
1631         movq    24+0(%rsp),%r8
1632         leaq    0(%rsp),%rdi
1633         call    __ecp_nistz256_sqr_montq
1634
1635         movq    32(%rbx),%rax
1636         movq    64+0(%rbx),%r9
1637         movq    64+8(%rbx),%r10
1638         movq    64+16(%rbx),%r11
1639         movq    64+24(%rbx),%r12
1640         leaq    64-0(%rbx),%rsi
1641         leaq    32(%rbx),%rbx
1642 .byte   102,72,15,126,215
1643         call    __ecp_nistz256_mul_montq
1644         call    __ecp_nistz256_mul_by_2q
1645
1646         movq    96+0(%rsp),%r12
1647         movq    96+8(%rsp),%r13
1648         leaq    64(%rsp),%rbx
1649         movq    96+16(%rsp),%r8
1650         movq    96+24(%rsp),%r9
1651         leaq    32(%rsp),%rdi
1652         call    __ecp_nistz256_add_toq
1653
1654         movq    96+0(%rsp),%r12
1655         movq    96+8(%rsp),%r13
1656         leaq    64(%rsp),%rbx
1657         movq    96+16(%rsp),%r8
1658         movq    96+24(%rsp),%r9
1659         leaq    64(%rsp),%rdi
1660         call    __ecp_nistz256_sub_fromq
1661
1662         movq    0+0(%rsp),%rax
1663         movq    8+0(%rsp),%r14
1664         leaq    0+0(%rsp),%rsi
1665         movq    16+0(%rsp),%r15
1666         movq    24+0(%rsp),%r8
1667 .byte   102,72,15,126,207
1668         call    __ecp_nistz256_sqr_montq
1669         xorq    %r9,%r9
1670         movq    %r12,%rax
1671         addq    $-1,%r12
1672         movq    %r13,%r10
1673         adcq    %rsi,%r13
1674         movq    %r14,%rcx
1675         adcq    $0,%r14
1676         movq    %r15,%r8
1677         adcq    %rbp,%r15
1678         adcq    $0,%r9
1679         xorq    %rsi,%rsi
1680         testq   $1,%rax
1681
1682         cmovzq  %rax,%r12
1683         cmovzq  %r10,%r13
1684         cmovzq  %rcx,%r14
1685         cmovzq  %r8,%r15
1686         cmovzq  %rsi,%r9
1687
1688         movq    %r13,%rax
1689         shrq    $1,%r12
1690         shlq    $63,%rax
1691         movq    %r14,%r10
1692         shrq    $1,%r13
1693         orq     %rax,%r12
1694         shlq    $63,%r10
1695         movq    %r15,%rcx
1696         shrq    $1,%r14
1697         orq     %r10,%r13
1698         shlq    $63,%rcx
1699         movq    %r12,0(%rdi)
1700         shrq    $1,%r15
1701         movq    %r13,8(%rdi)
1702         shlq    $63,%r9
1703         orq     %rcx,%r14
1704         orq     %r9,%r15
1705         movq    %r14,16(%rdi)
1706         movq    %r15,24(%rdi)
1707         movq    64(%rsp),%rax
1708         leaq    64(%rsp),%rbx
1709         movq    0+32(%rsp),%r9
1710         movq    8+32(%rsp),%r10
1711         leaq    0+32(%rsp),%rsi
1712         movq    16+32(%rsp),%r11
1713         movq    24+32(%rsp),%r12
1714         leaq    32(%rsp),%rdi
1715         call    __ecp_nistz256_mul_montq
1716
1717         leaq    128(%rsp),%rdi
1718         call    __ecp_nistz256_mul_by_2q
1719
1720         leaq    32(%rsp),%rbx
1721         leaq    32(%rsp),%rdi
1722         call    __ecp_nistz256_add_toq
1723
1724         movq    96(%rsp),%rax
1725         leaq    96(%rsp),%rbx
1726         movq    0+0(%rsp),%r9
1727         movq    8+0(%rsp),%r10
1728         leaq    0+0(%rsp),%rsi
1729         movq    16+0(%rsp),%r11
1730         movq    24+0(%rsp),%r12
1731         leaq    0(%rsp),%rdi
1732         call    __ecp_nistz256_mul_montq
1733
1734         leaq    128(%rsp),%rdi
1735         call    __ecp_nistz256_mul_by_2q
1736
1737         movq    0+32(%rsp),%rax
1738         movq    8+32(%rsp),%r14
1739         leaq    0+32(%rsp),%rsi
1740         movq    16+32(%rsp),%r15
1741         movq    24+32(%rsp),%r8
1742 .byte   102,72,15,126,199
1743         call    __ecp_nistz256_sqr_montq
1744
1745         leaq    128(%rsp),%rbx
1746         movq    %r14,%r8
1747         movq    %r15,%r9
1748         movq    %rsi,%r14
1749         movq    %rbp,%r15
1750         call    __ecp_nistz256_sub_fromq
1751
1752         movq    0+0(%rsp),%rax
1753         movq    0+8(%rsp),%rbp
1754         movq    0+16(%rsp),%rcx
1755         movq    0+24(%rsp),%r10
1756         leaq    0(%rsp),%rdi
1757         call    __ecp_nistz256_subq
1758
1759         movq    32(%rsp),%rax
1760         leaq    32(%rsp),%rbx
1761         movq    %r12,%r14
1762         xorl    %ecx,%ecx
1763         movq    %r12,0+0(%rsp)
1764         movq    %r13,%r10
1765         movq    %r13,0+8(%rsp)
1766         cmovzq  %r8,%r11
1767         movq    %r8,0+16(%rsp)
1768         leaq    0-0(%rsp),%rsi
1769         cmovzq  %r9,%r12
1770         movq    %r9,0+24(%rsp)
1771         movq    %r14,%r9
1772         leaq    0(%rsp),%rdi
1773         call    __ecp_nistz256_mul_montq
1774
1775 .byte   102,72,15,126,203
1776 .byte   102,72,15,126,207
1777         call    __ecp_nistz256_sub_fromq
1778
1779         addq    $160+8,%rsp
1780         popq    %r15
1781         popq    %r14
1782         popq    %r13
1783         popq    %r12
1784         popq    %rbx
1785         popq    %rbp
1786         .byte   0xf3,0xc3
1787 .size   ecp_nistz256_point_double,.-ecp_nistz256_point_double
1788 .globl  ecp_nistz256_point_add
1789 .type   ecp_nistz256_point_add,@function
1790 .align  32
1791 ecp_nistz256_point_add:
1792         movl    $0x80100,%ecx
1793         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
1794         cmpl    $0x80100,%ecx
1795         je      .Lpoint_addx
1796         pushq   %rbp
1797         pushq   %rbx
1798         pushq   %r12
1799         pushq   %r13
1800         pushq   %r14
1801         pushq   %r15
1802         subq    $576+8,%rsp
1803
1804         movdqu  0(%rsi),%xmm0
1805         movdqu  16(%rsi),%xmm1
1806         movdqu  32(%rsi),%xmm2
1807         movdqu  48(%rsi),%xmm3
1808         movdqu  64(%rsi),%xmm4
1809         movdqu  80(%rsi),%xmm5
1810         movq    %rsi,%rbx
1811         movq    %rdx,%rsi
1812         movdqa  %xmm0,384(%rsp)
1813         movdqa  %xmm1,384+16(%rsp)
1814         por     %xmm0,%xmm1
1815         movdqa  %xmm2,416(%rsp)
1816         movdqa  %xmm3,416+16(%rsp)
1817         por     %xmm2,%xmm3
1818         movdqa  %xmm4,448(%rsp)
1819         movdqa  %xmm5,448+16(%rsp)
1820         por     %xmm1,%xmm3
1821
1822         movdqu  0(%rsi),%xmm0
1823         pshufd  $0xb1,%xmm3,%xmm5
1824         movdqu  16(%rsi),%xmm1
1825         movdqu  32(%rsi),%xmm2
1826         por     %xmm3,%xmm5
1827         movdqu  48(%rsi),%xmm3
1828         movq    64+0(%rsi),%rax
1829         movq    64+8(%rsi),%r14
1830         movq    64+16(%rsi),%r15
1831         movq    64+24(%rsi),%r8
1832         movdqa  %xmm0,480(%rsp)
1833         pshufd  $0x1e,%xmm5,%xmm4
1834         movdqa  %xmm1,480+16(%rsp)
1835         por     %xmm0,%xmm1
1836 .byte   102,72,15,110,199
1837         movdqa  %xmm2,512(%rsp)
1838         movdqa  %xmm3,512+16(%rsp)
1839         por     %xmm2,%xmm3
1840         por     %xmm4,%xmm5
1841         pxor    %xmm4,%xmm4
1842         por     %xmm1,%xmm3
1843
1844         leaq    64-0(%rsi),%rsi
1845         movq    %rax,544+0(%rsp)
1846         movq    %r14,544+8(%rsp)
1847         movq    %r15,544+16(%rsp)
1848         movq    %r8,544+24(%rsp)
1849         leaq    96(%rsp),%rdi
1850         call    __ecp_nistz256_sqr_montq
1851
1852         pcmpeqd %xmm4,%xmm5
1853         pshufd  $0xb1,%xmm3,%xmm4
1854         por     %xmm3,%xmm4
1855         pshufd  $0,%xmm5,%xmm5
1856         pshufd  $0x1e,%xmm4,%xmm3
1857         por     %xmm3,%xmm4
1858         pxor    %xmm3,%xmm3
1859         pcmpeqd %xmm3,%xmm4
1860         pshufd  $0,%xmm4,%xmm4
1861         movq    64+0(%rbx),%rax
1862         movq    64+8(%rbx),%r14
1863         movq    64+16(%rbx),%r15
1864         movq    64+24(%rbx),%r8
1865 .byte   102,72,15,110,203
1866
1867         leaq    64-0(%rbx),%rsi
1868         leaq    32(%rsp),%rdi
1869         call    __ecp_nistz256_sqr_montq
1870
1871         movq    544(%rsp),%rax
1872         leaq    544(%rsp),%rbx
1873         movq    0+96(%rsp),%r9
1874         movq    8+96(%rsp),%r10
1875         leaq    0+96(%rsp),%rsi
1876         movq    16+96(%rsp),%r11
1877         movq    24+96(%rsp),%r12
1878         leaq    224(%rsp),%rdi
1879         call    __ecp_nistz256_mul_montq
1880
1881         movq    448(%rsp),%rax
1882         leaq    448(%rsp),%rbx
1883         movq    0+32(%rsp),%r9
1884         movq    8+32(%rsp),%r10
1885         leaq    0+32(%rsp),%rsi
1886         movq    16+32(%rsp),%r11
1887         movq    24+32(%rsp),%r12
1888         leaq    256(%rsp),%rdi
1889         call    __ecp_nistz256_mul_montq
1890
1891         movq    416(%rsp),%rax
1892         leaq    416(%rsp),%rbx
1893         movq    0+224(%rsp),%r9
1894         movq    8+224(%rsp),%r10
1895         leaq    0+224(%rsp),%rsi
1896         movq    16+224(%rsp),%r11
1897         movq    24+224(%rsp),%r12
1898         leaq    224(%rsp),%rdi
1899         call    __ecp_nistz256_mul_montq
1900
1901         movq    512(%rsp),%rax
1902         leaq    512(%rsp),%rbx
1903         movq    0+256(%rsp),%r9
1904         movq    8+256(%rsp),%r10
1905         leaq    0+256(%rsp),%rsi
1906         movq    16+256(%rsp),%r11
1907         movq    24+256(%rsp),%r12
1908         leaq    256(%rsp),%rdi
1909         call    __ecp_nistz256_mul_montq
1910
1911         leaq    224(%rsp),%rbx
1912         leaq    64(%rsp),%rdi
1913         call    __ecp_nistz256_sub_fromq
1914
1915         orq     %r13,%r12
1916         movdqa  %xmm4,%xmm2
1917         orq     %r8,%r12
1918         orq     %r9,%r12
1919         por     %xmm5,%xmm2
1920 .byte   102,73,15,110,220
1921
1922         movq    384(%rsp),%rax
1923         leaq    384(%rsp),%rbx
1924         movq    0+96(%rsp),%r9
1925         movq    8+96(%rsp),%r10
1926         leaq    0+96(%rsp),%rsi
1927         movq    16+96(%rsp),%r11
1928         movq    24+96(%rsp),%r12
1929         leaq    160(%rsp),%rdi
1930         call    __ecp_nistz256_mul_montq
1931
1932         movq    480(%rsp),%rax
1933         leaq    480(%rsp),%rbx
1934         movq    0+32(%rsp),%r9
1935         movq    8+32(%rsp),%r10
1936         leaq    0+32(%rsp),%rsi
1937         movq    16+32(%rsp),%r11
1938         movq    24+32(%rsp),%r12
1939         leaq    192(%rsp),%rdi
1940         call    __ecp_nistz256_mul_montq
1941
1942         leaq    160(%rsp),%rbx
1943         leaq    0(%rsp),%rdi
1944         call    __ecp_nistz256_sub_fromq
1945
1946         orq     %r13,%r12
1947         orq     %r8,%r12
1948         orq     %r9,%r12
1949
1950 .byte   0x3e
1951         jnz     .Ladd_proceedq
1952 .byte   102,73,15,126,208
1953 .byte   102,73,15,126,217
1954         testq   %r8,%r8
1955         jnz     .Ladd_proceedq
1956         testq   %r9,%r9
1957         jz      .Ladd_doubleq
1958
1959 .byte   102,72,15,126,199
1960         pxor    %xmm0,%xmm0
1961         movdqu  %xmm0,0(%rdi)
1962         movdqu  %xmm0,16(%rdi)
1963         movdqu  %xmm0,32(%rdi)
1964         movdqu  %xmm0,48(%rdi)
1965         movdqu  %xmm0,64(%rdi)
1966         movdqu  %xmm0,80(%rdi)
1967         jmp     .Ladd_doneq
1968
1969 .align  32
1970 .Ladd_doubleq:
1971 .byte   102,72,15,126,206
1972 .byte   102,72,15,126,199
1973         addq    $416,%rsp
1974         jmp     .Lpoint_double_shortcutq
1975
1976 .align  32
1977 .Ladd_proceedq:
1978         movq    0+64(%rsp),%rax
1979         movq    8+64(%rsp),%r14
1980         leaq    0+64(%rsp),%rsi
1981         movq    16+64(%rsp),%r15
1982         movq    24+64(%rsp),%r8
1983         leaq    96(%rsp),%rdi
1984         call    __ecp_nistz256_sqr_montq
1985
1986         movq    448(%rsp),%rax
1987         leaq    448(%rsp),%rbx
1988         movq    0+0(%rsp),%r9
1989         movq    8+0(%rsp),%r10
1990         leaq    0+0(%rsp),%rsi
1991         movq    16+0(%rsp),%r11
1992         movq    24+0(%rsp),%r12
1993         leaq    352(%rsp),%rdi
1994         call    __ecp_nistz256_mul_montq
1995
1996         movq    0+0(%rsp),%rax
1997         movq    8+0(%rsp),%r14
1998         leaq    0+0(%rsp),%rsi
1999         movq    16+0(%rsp),%r15
2000         movq    24+0(%rsp),%r8
2001         leaq    32(%rsp),%rdi
2002         call    __ecp_nistz256_sqr_montq
2003
2004         movq    544(%rsp),%rax
2005         leaq    544(%rsp),%rbx
2006         movq    0+352(%rsp),%r9
2007         movq    8+352(%rsp),%r10
2008         leaq    0+352(%rsp),%rsi
2009         movq    16+352(%rsp),%r11
2010         movq    24+352(%rsp),%r12
2011         leaq    352(%rsp),%rdi
2012         call    __ecp_nistz256_mul_montq
2013
2014         movq    0(%rsp),%rax
2015         leaq    0(%rsp),%rbx
2016         movq    0+32(%rsp),%r9
2017         movq    8+32(%rsp),%r10
2018         leaq    0+32(%rsp),%rsi
2019         movq    16+32(%rsp),%r11
2020         movq    24+32(%rsp),%r12
2021         leaq    128(%rsp),%rdi
2022         call    __ecp_nistz256_mul_montq
2023
2024         movq    160(%rsp),%rax
2025         leaq    160(%rsp),%rbx
2026         movq    0+32(%rsp),%r9
2027         movq    8+32(%rsp),%r10
2028         leaq    0+32(%rsp),%rsi
2029         movq    16+32(%rsp),%r11
2030         movq    24+32(%rsp),%r12
2031         leaq    192(%rsp),%rdi
2032         call    __ecp_nistz256_mul_montq
2033
2034
2035
2036
2037         addq    %r12,%r12
2038         leaq    96(%rsp),%rsi
2039         adcq    %r13,%r13
2040         movq    %r12,%rax
2041         adcq    %r8,%r8
2042         adcq    %r9,%r9
2043         movq    %r13,%rbp
2044         sbbq    %r11,%r11
2045
2046         subq    $-1,%r12
2047         movq    %r8,%rcx
2048         sbbq    %r14,%r13
2049         sbbq    $0,%r8
2050         movq    %r9,%r10
2051         sbbq    %r15,%r9
2052         testq   %r11,%r11
2053
2054         cmovzq  %rax,%r12
2055         movq    0(%rsi),%rax
2056         cmovzq  %rbp,%r13
2057         movq    8(%rsi),%rbp
2058         cmovzq  %rcx,%r8
2059         movq    16(%rsi),%rcx
2060         cmovzq  %r10,%r9
2061         movq    24(%rsi),%r10
2062
2063         call    __ecp_nistz256_subq
2064
2065         leaq    128(%rsp),%rbx
2066         leaq    288(%rsp),%rdi
2067         call    __ecp_nistz256_sub_fromq
2068
2069         movq    192+0(%rsp),%rax
2070         movq    192+8(%rsp),%rbp
2071         movq    192+16(%rsp),%rcx
2072         movq    192+24(%rsp),%r10
2073         leaq    320(%rsp),%rdi
2074
2075         call    __ecp_nistz256_subq
2076
2077         movq    %r12,0(%rdi)
2078         movq    %r13,8(%rdi)
2079         movq    %r8,16(%rdi)
2080         movq    %r9,24(%rdi)
2081         movq    128(%rsp),%rax
2082         leaq    128(%rsp),%rbx
2083         movq    0+224(%rsp),%r9
2084         movq    8+224(%rsp),%r10
2085         leaq    0+224(%rsp),%rsi
2086         movq    16+224(%rsp),%r11
2087         movq    24+224(%rsp),%r12
2088         leaq    256(%rsp),%rdi
2089         call    __ecp_nistz256_mul_montq
2090
2091         movq    320(%rsp),%rax
2092         leaq    320(%rsp),%rbx
2093         movq    0+64(%rsp),%r9
2094         movq    8+64(%rsp),%r10
2095         leaq    0+64(%rsp),%rsi
2096         movq    16+64(%rsp),%r11
2097         movq    24+64(%rsp),%r12
2098         leaq    320(%rsp),%rdi
2099         call    __ecp_nistz256_mul_montq
2100
2101         leaq    256(%rsp),%rbx
2102         leaq    320(%rsp),%rdi
2103         call    __ecp_nistz256_sub_fromq
2104
2105 .byte   102,72,15,126,199
2106
2107         movdqa  %xmm5,%xmm0
2108         movdqa  %xmm5,%xmm1
2109         pandn   352(%rsp),%xmm0
2110         movdqa  %xmm5,%xmm2
2111         pandn   352+16(%rsp),%xmm1
2112         movdqa  %xmm5,%xmm3
2113         pand    544(%rsp),%xmm2
2114         pand    544+16(%rsp),%xmm3
2115         por     %xmm0,%xmm2
2116         por     %xmm1,%xmm3
2117
2118         movdqa  %xmm4,%xmm0
2119         movdqa  %xmm4,%xmm1
2120         pandn   %xmm2,%xmm0
2121         movdqa  %xmm4,%xmm2
2122         pandn   %xmm3,%xmm1
2123         movdqa  %xmm4,%xmm3
2124         pand    448(%rsp),%xmm2
2125         pand    448+16(%rsp),%xmm3
2126         por     %xmm0,%xmm2
2127         por     %xmm1,%xmm3
2128         movdqu  %xmm2,64(%rdi)
2129         movdqu  %xmm3,80(%rdi)
2130
2131         movdqa  %xmm5,%xmm0
2132         movdqa  %xmm5,%xmm1
2133         pandn   288(%rsp),%xmm0
2134         movdqa  %xmm5,%xmm2
2135         pandn   288+16(%rsp),%xmm1
2136         movdqa  %xmm5,%xmm3
2137         pand    480(%rsp),%xmm2
2138         pand    480+16(%rsp),%xmm3
2139         por     %xmm0,%xmm2
2140         por     %xmm1,%xmm3
2141
2142         movdqa  %xmm4,%xmm0
2143         movdqa  %xmm4,%xmm1
2144         pandn   %xmm2,%xmm0
2145         movdqa  %xmm4,%xmm2
2146         pandn   %xmm3,%xmm1
2147         movdqa  %xmm4,%xmm3
2148         pand    384(%rsp),%xmm2
2149         pand    384+16(%rsp),%xmm3
2150         por     %xmm0,%xmm2
2151         por     %xmm1,%xmm3
2152         movdqu  %xmm2,0(%rdi)
2153         movdqu  %xmm3,16(%rdi)
2154
2155         movdqa  %xmm5,%xmm0
2156         movdqa  %xmm5,%xmm1
2157         pandn   320(%rsp),%xmm0
2158         movdqa  %xmm5,%xmm2
2159         pandn   320+16(%rsp),%xmm1
2160         movdqa  %xmm5,%xmm3
2161         pand    512(%rsp),%xmm2
2162         pand    512+16(%rsp),%xmm3
2163         por     %xmm0,%xmm2
2164         por     %xmm1,%xmm3
2165
2166         movdqa  %xmm4,%xmm0
2167         movdqa  %xmm4,%xmm1
2168         pandn   %xmm2,%xmm0
2169         movdqa  %xmm4,%xmm2
2170         pandn   %xmm3,%xmm1
2171         movdqa  %xmm4,%xmm3
2172         pand    416(%rsp),%xmm2
2173         pand    416+16(%rsp),%xmm3
2174         por     %xmm0,%xmm2
2175         por     %xmm1,%xmm3
2176         movdqu  %xmm2,32(%rdi)
2177         movdqu  %xmm3,48(%rdi)
2178
2179 .Ladd_doneq:
2180         addq    $576+8,%rsp
2181         popq    %r15
2182         popq    %r14
2183         popq    %r13
2184         popq    %r12
2185         popq    %rbx
2186         popq    %rbp
2187         .byte   0xf3,0xc3
2188 .size   ecp_nistz256_point_add,.-ecp_nistz256_point_add
2189 .globl  ecp_nistz256_point_add_affine
2190 .type   ecp_nistz256_point_add_affine,@function
2191 .align  32
2192 ecp_nistz256_point_add_affine:
2193         movl    $0x80100,%ecx
2194         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
2195         cmpl    $0x80100,%ecx
2196         je      .Lpoint_add_affinex
2197         pushq   %rbp
2198         pushq   %rbx
2199         pushq   %r12
2200         pushq   %r13
2201         pushq   %r14
2202         pushq   %r15
2203         subq    $480+8,%rsp
2204
2205         movdqu  0(%rsi),%xmm0
2206         movq    %rdx,%rbx
2207         movdqu  16(%rsi),%xmm1
2208         movdqu  32(%rsi),%xmm2
2209         movdqu  48(%rsi),%xmm3
2210         movdqu  64(%rsi),%xmm4
2211         movdqu  80(%rsi),%xmm5
2212         movq    64+0(%rsi),%rax
2213         movq    64+8(%rsi),%r14
2214         movq    64+16(%rsi),%r15
2215         movq    64+24(%rsi),%r8
2216         movdqa  %xmm0,320(%rsp)
2217         movdqa  %xmm1,320+16(%rsp)
2218         por     %xmm0,%xmm1
2219         movdqa  %xmm2,352(%rsp)
2220         movdqa  %xmm3,352+16(%rsp)
2221         por     %xmm2,%xmm3
2222         movdqa  %xmm4,384(%rsp)
2223         movdqa  %xmm5,384+16(%rsp)
2224         por     %xmm1,%xmm3
2225
2226         movdqu  0(%rbx),%xmm0
2227         pshufd  $0xb1,%xmm3,%xmm5
2228         movdqu  16(%rbx),%xmm1
2229         movdqu  32(%rbx),%xmm2
2230         por     %xmm3,%xmm5
2231         movdqu  48(%rbx),%xmm3
2232         movdqa  %xmm0,416(%rsp)
2233         pshufd  $0x1e,%xmm5,%xmm4
2234         movdqa  %xmm1,416+16(%rsp)
2235         por     %xmm0,%xmm1
2236 .byte   102,72,15,110,199
2237         movdqa  %xmm2,448(%rsp)
2238         movdqa  %xmm3,448+16(%rsp)
2239         por     %xmm2,%xmm3
2240         por     %xmm4,%xmm5
2241         pxor    %xmm4,%xmm4
2242         por     %xmm1,%xmm3
2243
2244         leaq    64-0(%rsi),%rsi
2245         leaq    32(%rsp),%rdi
2246         call    __ecp_nistz256_sqr_montq
2247
2248         pcmpeqd %xmm4,%xmm5
2249         pshufd  $0xb1,%xmm3,%xmm4
2250         movq    0(%rbx),%rax
2251
2252         movq    %r12,%r9
2253         por     %xmm3,%xmm4
2254         pshufd  $0,%xmm5,%xmm5
2255         pshufd  $0x1e,%xmm4,%xmm3
2256         movq    %r13,%r10
2257         por     %xmm3,%xmm4
2258         pxor    %xmm3,%xmm3
2259         movq    %r14,%r11
2260         pcmpeqd %xmm3,%xmm4
2261         pshufd  $0,%xmm4,%xmm4
2262
2263         leaq    32-0(%rsp),%rsi
2264         movq    %r15,%r12
2265         leaq    0(%rsp),%rdi
2266         call    __ecp_nistz256_mul_montq
2267
2268         leaq    320(%rsp),%rbx
2269         leaq    64(%rsp),%rdi
2270         call    __ecp_nistz256_sub_fromq
2271
2272         movq    384(%rsp),%rax
2273         leaq    384(%rsp),%rbx
2274         movq    0+32(%rsp),%r9
2275         movq    8+32(%rsp),%r10
2276         leaq    0+32(%rsp),%rsi
2277         movq    16+32(%rsp),%r11
2278         movq    24+32(%rsp),%r12
2279         leaq    32(%rsp),%rdi
2280         call    __ecp_nistz256_mul_montq
2281
2282         movq    384(%rsp),%rax
2283         leaq    384(%rsp),%rbx
2284         movq    0+64(%rsp),%r9
2285         movq    8+64(%rsp),%r10
2286         leaq    0+64(%rsp),%rsi
2287         movq    16+64(%rsp),%r11
2288         movq    24+64(%rsp),%r12
2289         leaq    288(%rsp),%rdi
2290         call    __ecp_nistz256_mul_montq
2291
2292         movq    448(%rsp),%rax
2293         leaq    448(%rsp),%rbx
2294         movq    0+32(%rsp),%r9
2295         movq    8+32(%rsp),%r10
2296         leaq    0+32(%rsp),%rsi
2297         movq    16+32(%rsp),%r11
2298         movq    24+32(%rsp),%r12
2299         leaq    32(%rsp),%rdi
2300         call    __ecp_nistz256_mul_montq
2301
2302         leaq    352(%rsp),%rbx
2303         leaq    96(%rsp),%rdi
2304         call    __ecp_nistz256_sub_fromq
2305
2306         movq    0+64(%rsp),%rax
2307         movq    8+64(%rsp),%r14
2308         leaq    0+64(%rsp),%rsi
2309         movq    16+64(%rsp),%r15
2310         movq    24+64(%rsp),%r8
2311         leaq    128(%rsp),%rdi
2312         call    __ecp_nistz256_sqr_montq
2313
2314         movq    0+96(%rsp),%rax
2315         movq    8+96(%rsp),%r14
2316         leaq    0+96(%rsp),%rsi
2317         movq    16+96(%rsp),%r15
2318         movq    24+96(%rsp),%r8
2319         leaq    192(%rsp),%rdi
2320         call    __ecp_nistz256_sqr_montq
2321
2322         movq    128(%rsp),%rax
2323         leaq    128(%rsp),%rbx
2324         movq    0+64(%rsp),%r9
2325         movq    8+64(%rsp),%r10
2326         leaq    0+64(%rsp),%rsi
2327         movq    16+64(%rsp),%r11
2328         movq    24+64(%rsp),%r12
2329         leaq    160(%rsp),%rdi
2330         call    __ecp_nistz256_mul_montq
2331
2332         movq    320(%rsp),%rax
2333         leaq    320(%rsp),%rbx
2334         movq    0+128(%rsp),%r9
2335         movq    8+128(%rsp),%r10
2336         leaq    0+128(%rsp),%rsi
2337         movq    16+128(%rsp),%r11
2338         movq    24+128(%rsp),%r12
2339         leaq    0(%rsp),%rdi
2340         call    __ecp_nistz256_mul_montq
2341
2342
2343
2344
2345         addq    %r12,%r12
2346         leaq    192(%rsp),%rsi
2347         adcq    %r13,%r13
2348         movq    %r12,%rax
2349         adcq    %r8,%r8
2350         adcq    %r9,%r9
2351         movq    %r13,%rbp
2352         sbbq    %r11,%r11
2353
2354         subq    $-1,%r12
2355         movq    %r8,%rcx
2356         sbbq    %r14,%r13
2357         sbbq    $0,%r8
2358         movq    %r9,%r10
2359         sbbq    %r15,%r9
2360         testq   %r11,%r11
2361
2362         cmovzq  %rax,%r12
2363         movq    0(%rsi),%rax
2364         cmovzq  %rbp,%r13
2365         movq    8(%rsi),%rbp
2366         cmovzq  %rcx,%r8
2367         movq    16(%rsi),%rcx
2368         cmovzq  %r10,%r9
2369         movq    24(%rsi),%r10
2370
2371         call    __ecp_nistz256_subq
2372
2373         leaq    160(%rsp),%rbx
2374         leaq    224(%rsp),%rdi
2375         call    __ecp_nistz256_sub_fromq
2376
2377         movq    0+0(%rsp),%rax
2378         movq    0+8(%rsp),%rbp
2379         movq    0+16(%rsp),%rcx
2380         movq    0+24(%rsp),%r10
2381         leaq    64(%rsp),%rdi
2382
2383         call    __ecp_nistz256_subq
2384
2385         movq    %r12,0(%rdi)
2386         movq    %r13,8(%rdi)
2387         movq    %r8,16(%rdi)
2388         movq    %r9,24(%rdi)
2389         movq    352(%rsp),%rax
2390         leaq    352(%rsp),%rbx
2391         movq    0+160(%rsp),%r9
2392         movq    8+160(%rsp),%r10
2393         leaq    0+160(%rsp),%rsi
2394         movq    16+160(%rsp),%r11
2395         movq    24+160(%rsp),%r12
2396         leaq    32(%rsp),%rdi
2397         call    __ecp_nistz256_mul_montq
2398
2399         movq    96(%rsp),%rax
2400         leaq    96(%rsp),%rbx
2401         movq    0+64(%rsp),%r9
2402         movq    8+64(%rsp),%r10
2403         leaq    0+64(%rsp),%rsi
2404         movq    16+64(%rsp),%r11
2405         movq    24+64(%rsp),%r12
2406         leaq    64(%rsp),%rdi
2407         call    __ecp_nistz256_mul_montq
2408
2409         leaq    32(%rsp),%rbx
2410         leaq    256(%rsp),%rdi
2411         call    __ecp_nistz256_sub_fromq
2412
2413 .byte   102,72,15,126,199
2414
2415         movdqa  %xmm5,%xmm0
2416         movdqa  %xmm5,%xmm1
2417         pandn   288(%rsp),%xmm0
2418         movdqa  %xmm5,%xmm2
2419         pandn   288+16(%rsp),%xmm1
2420         movdqa  %xmm5,%xmm3
2421         pand    .LONE_mont(%rip),%xmm2
2422         pand    .LONE_mont+16(%rip),%xmm3
2423         por     %xmm0,%xmm2
2424         por     %xmm1,%xmm3
2425
2426         movdqa  %xmm4,%xmm0
2427         movdqa  %xmm4,%xmm1
2428         pandn   %xmm2,%xmm0
2429         movdqa  %xmm4,%xmm2
2430         pandn   %xmm3,%xmm1
2431         movdqa  %xmm4,%xmm3
2432         pand    384(%rsp),%xmm2
2433         pand    384+16(%rsp),%xmm3
2434         por     %xmm0,%xmm2
2435         por     %xmm1,%xmm3
2436         movdqu  %xmm2,64(%rdi)
2437         movdqu  %xmm3,80(%rdi)
2438
2439         movdqa  %xmm5,%xmm0
2440         movdqa  %xmm5,%xmm1
2441         pandn   224(%rsp),%xmm0
2442         movdqa  %xmm5,%xmm2
2443         pandn   224+16(%rsp),%xmm1
2444         movdqa  %xmm5,%xmm3
2445         pand    416(%rsp),%xmm2
2446         pand    416+16(%rsp),%xmm3
2447         por     %xmm0,%xmm2
2448         por     %xmm1,%xmm3
2449
2450         movdqa  %xmm4,%xmm0
2451         movdqa  %xmm4,%xmm1
2452         pandn   %xmm2,%xmm0
2453         movdqa  %xmm4,%xmm2
2454         pandn   %xmm3,%xmm1
2455         movdqa  %xmm4,%xmm3
2456         pand    320(%rsp),%xmm2
2457         pand    320+16(%rsp),%xmm3
2458         por     %xmm0,%xmm2
2459         por     %xmm1,%xmm3
2460         movdqu  %xmm2,0(%rdi)
2461         movdqu  %xmm3,16(%rdi)
2462
2463         movdqa  %xmm5,%xmm0
2464         movdqa  %xmm5,%xmm1
2465         pandn   256(%rsp),%xmm0
2466         movdqa  %xmm5,%xmm2
2467         pandn   256+16(%rsp),%xmm1
2468         movdqa  %xmm5,%xmm3
2469         pand    448(%rsp),%xmm2
2470         pand    448+16(%rsp),%xmm3
2471         por     %xmm0,%xmm2
2472         por     %xmm1,%xmm3
2473
2474         movdqa  %xmm4,%xmm0
2475         movdqa  %xmm4,%xmm1
2476         pandn   %xmm2,%xmm0
2477         movdqa  %xmm4,%xmm2
2478         pandn   %xmm3,%xmm1
2479         movdqa  %xmm4,%xmm3
2480         pand    352(%rsp),%xmm2
2481         pand    352+16(%rsp),%xmm3
2482         por     %xmm0,%xmm2
2483         por     %xmm1,%xmm3
2484         movdqu  %xmm2,32(%rdi)
2485         movdqu  %xmm3,48(%rdi)
2486
2487         addq    $480+8,%rsp
2488         popq    %r15
2489         popq    %r14
2490         popq    %r13
2491         popq    %r12
2492         popq    %rbx
2493         popq    %rbp
2494         .byte   0xf3,0xc3
2495 .size   ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine
2496 .type   __ecp_nistz256_add_tox,@function
2497 .align  32
2498 __ecp_nistz256_add_tox:
2499         xorq    %r11,%r11
2500         adcq    0(%rbx),%r12
2501         adcq    8(%rbx),%r13
2502         movq    %r12,%rax
2503         adcq    16(%rbx),%r8
2504         adcq    24(%rbx),%r9
2505         movq    %r13,%rbp
2506         adcq    $0,%r11
2507
2508         xorq    %r10,%r10
2509         sbbq    $-1,%r12
2510         movq    %r8,%rcx
2511         sbbq    %r14,%r13
2512         sbbq    $0,%r8
2513         movq    %r9,%r10
2514         sbbq    %r15,%r9
2515
2516         btq     $0,%r11
2517         cmovncq %rax,%r12
2518         cmovncq %rbp,%r13
2519         movq    %r12,0(%rdi)
2520         cmovncq %rcx,%r8
2521         movq    %r13,8(%rdi)
2522         cmovncq %r10,%r9
2523         movq    %r8,16(%rdi)
2524         movq    %r9,24(%rdi)
2525
2526         .byte   0xf3,0xc3
2527 .size   __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox
2528
2529 .type   __ecp_nistz256_sub_fromx,@function
2530 .align  32
2531 __ecp_nistz256_sub_fromx:
2532         xorq    %r11,%r11
2533         sbbq    0(%rbx),%r12
2534         sbbq    8(%rbx),%r13
2535         movq    %r12,%rax
2536         sbbq    16(%rbx),%r8
2537         sbbq    24(%rbx),%r9
2538         movq    %r13,%rbp
2539         sbbq    $0,%r11
2540
2541         xorq    %r10,%r10
2542         adcq    $-1,%r12
2543         movq    %r8,%rcx
2544         adcq    %r14,%r13
2545         adcq    $0,%r8
2546         movq    %r9,%r10
2547         adcq    %r15,%r9
2548
2549         btq     $0,%r11
2550         cmovncq %rax,%r12
2551         cmovncq %rbp,%r13
2552         movq    %r12,0(%rdi)
2553         cmovncq %rcx,%r8
2554         movq    %r13,8(%rdi)
2555         cmovncq %r10,%r9
2556         movq    %r8,16(%rdi)
2557         movq    %r9,24(%rdi)
2558
2559         .byte   0xf3,0xc3
2560 .size   __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx
2561
2562 .type   __ecp_nistz256_subx,@function
2563 .align  32
2564 __ecp_nistz256_subx:
2565         xorq    %r11,%r11
2566         sbbq    %r12,%rax
2567         sbbq    %r13,%rbp
2568         movq    %rax,%r12
2569         sbbq    %r8,%rcx
2570         sbbq    %r9,%r10
2571         movq    %rbp,%r13
2572         sbbq    $0,%r11
2573
2574         xorq    %r9,%r9
2575         adcq    $-1,%rax
2576         movq    %rcx,%r8
2577         adcq    %r14,%rbp
2578         adcq    $0,%rcx
2579         movq    %r10,%r9
2580         adcq    %r15,%r10
2581
2582         btq     $0,%r11
2583         cmovcq  %rax,%r12
2584         cmovcq  %rbp,%r13
2585         cmovcq  %rcx,%r8
2586         cmovcq  %r10,%r9
2587
2588         .byte   0xf3,0xc3
2589 .size   __ecp_nistz256_subx,.-__ecp_nistz256_subx
2590
2591 .type   __ecp_nistz256_mul_by_2x,@function
2592 .align  32
2593 __ecp_nistz256_mul_by_2x:
2594         xorq    %r11,%r11
2595         adcq    %r12,%r12
2596         adcq    %r13,%r13
2597         movq    %r12,%rax
2598         adcq    %r8,%r8
2599         adcq    %r9,%r9
2600         movq    %r13,%rbp
2601         adcq    $0,%r11
2602
2603         xorq    %r10,%r10
2604         sbbq    $-1,%r12
2605         movq    %r8,%rcx
2606         sbbq    %r14,%r13
2607         sbbq    $0,%r8
2608         movq    %r9,%r10
2609         sbbq    %r15,%r9
2610
2611         btq     $0,%r11
2612         cmovncq %rax,%r12
2613         cmovncq %rbp,%r13
2614         movq    %r12,0(%rdi)
2615         cmovncq %rcx,%r8
2616         movq    %r13,8(%rdi)
2617         cmovncq %r10,%r9
2618         movq    %r8,16(%rdi)
2619         movq    %r9,24(%rdi)
2620
2621         .byte   0xf3,0xc3
2622 .size   __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x
2623 .type   ecp_nistz256_point_doublex,@function
2624 .align  32
2625 ecp_nistz256_point_doublex:
2626 .Lpoint_doublex:
2627         pushq   %rbp
2628         pushq   %rbx
2629         pushq   %r12
2630         pushq   %r13
2631         pushq   %r14
2632         pushq   %r15
2633         subq    $160+8,%rsp
2634
2635 .Lpoint_double_shortcutx:
2636         movdqu  0(%rsi),%xmm0
2637         movq    %rsi,%rbx
2638         movdqu  16(%rsi),%xmm1
2639         movq    32+0(%rsi),%r12
2640         movq    32+8(%rsi),%r13
2641         movq    32+16(%rsi),%r8
2642         movq    32+24(%rsi),%r9
2643         movq    .Lpoly+8(%rip),%r14
2644         movq    .Lpoly+24(%rip),%r15
2645         movdqa  %xmm0,96(%rsp)
2646         movdqa  %xmm1,96+16(%rsp)
2647         leaq    32(%rdi),%r10
2648         leaq    64(%rdi),%r11
2649 .byte   102,72,15,110,199
2650 .byte   102,73,15,110,202
2651 .byte   102,73,15,110,211
2652
2653         leaq    0(%rsp),%rdi
2654         call    __ecp_nistz256_mul_by_2x
2655
2656         movq    64+0(%rsi),%rdx
2657         movq    64+8(%rsi),%r14
2658         movq    64+16(%rsi),%r15
2659         movq    64+24(%rsi),%r8
2660         leaq    64-128(%rsi),%rsi
2661         leaq    64(%rsp),%rdi
2662         call    __ecp_nistz256_sqr_montx
2663
2664         movq    0+0(%rsp),%rdx
2665         movq    8+0(%rsp),%r14
2666         leaq    -128+0(%rsp),%rsi
2667         movq    16+0(%rsp),%r15
2668         movq    24+0(%rsp),%r8
2669         leaq    0(%rsp),%rdi
2670         call    __ecp_nistz256_sqr_montx
2671
2672         movq    32(%rbx),%rdx
2673         movq    64+0(%rbx),%r9
2674         movq    64+8(%rbx),%r10
2675         movq    64+16(%rbx),%r11
2676         movq    64+24(%rbx),%r12
2677         leaq    64-128(%rbx),%rsi
2678         leaq    32(%rbx),%rbx
2679 .byte   102,72,15,126,215
2680         call    __ecp_nistz256_mul_montx
2681         call    __ecp_nistz256_mul_by_2x
2682
2683         movq    96+0(%rsp),%r12
2684         movq    96+8(%rsp),%r13
2685         leaq    64(%rsp),%rbx
2686         movq    96+16(%rsp),%r8
2687         movq    96+24(%rsp),%r9
2688         leaq    32(%rsp),%rdi
2689         call    __ecp_nistz256_add_tox
2690
2691         movq    96+0(%rsp),%r12
2692         movq    96+8(%rsp),%r13
2693         leaq    64(%rsp),%rbx
2694         movq    96+16(%rsp),%r8
2695         movq    96+24(%rsp),%r9
2696         leaq    64(%rsp),%rdi
2697         call    __ecp_nistz256_sub_fromx
2698
2699         movq    0+0(%rsp),%rdx
2700         movq    8+0(%rsp),%r14
2701         leaq    -128+0(%rsp),%rsi
2702         movq    16+0(%rsp),%r15
2703         movq    24+0(%rsp),%r8
2704 .byte   102,72,15,126,207
2705         call    __ecp_nistz256_sqr_montx
2706         xorq    %r9,%r9
2707         movq    %r12,%rax
2708         addq    $-1,%r12
2709         movq    %r13,%r10
2710         adcq    %rsi,%r13
2711         movq    %r14,%rcx
2712         adcq    $0,%r14
2713         movq    %r15,%r8
2714         adcq    %rbp,%r15
2715         adcq    $0,%r9
2716         xorq    %rsi,%rsi
2717         testq   $1,%rax
2718
2719         cmovzq  %rax,%r12
2720         cmovzq  %r10,%r13
2721         cmovzq  %rcx,%r14
2722         cmovzq  %r8,%r15
2723         cmovzq  %rsi,%r9
2724
2725         movq    %r13,%rax
2726         shrq    $1,%r12
2727         shlq    $63,%rax
2728         movq    %r14,%r10
2729         shrq    $1,%r13
2730         orq     %rax,%r12
2731         shlq    $63,%r10
2732         movq    %r15,%rcx
2733         shrq    $1,%r14
2734         orq     %r10,%r13
2735         shlq    $63,%rcx
2736         movq    %r12,0(%rdi)
2737         shrq    $1,%r15
2738         movq    %r13,8(%rdi)
2739         shlq    $63,%r9
2740         orq     %rcx,%r14
2741         orq     %r9,%r15
2742         movq    %r14,16(%rdi)
2743         movq    %r15,24(%rdi)
2744         movq    64(%rsp),%rdx
2745         leaq    64(%rsp),%rbx
2746         movq    0+32(%rsp),%r9
2747         movq    8+32(%rsp),%r10
2748         leaq    -128+32(%rsp),%rsi
2749         movq    16+32(%rsp),%r11
2750         movq    24+32(%rsp),%r12
2751         leaq    32(%rsp),%rdi
2752         call    __ecp_nistz256_mul_montx
2753
2754         leaq    128(%rsp),%rdi
2755         call    __ecp_nistz256_mul_by_2x
2756
2757         leaq    32(%rsp),%rbx
2758         leaq    32(%rsp),%rdi
2759         call    __ecp_nistz256_add_tox
2760
2761         movq    96(%rsp),%rdx
2762         leaq    96(%rsp),%rbx
2763         movq    0+0(%rsp),%r9
2764         movq    8+0(%rsp),%r10
2765         leaq    -128+0(%rsp),%rsi
2766         movq    16+0(%rsp),%r11
2767         movq    24+0(%rsp),%r12
2768         leaq    0(%rsp),%rdi
2769         call    __ecp_nistz256_mul_montx
2770
2771         leaq    128(%rsp),%rdi
2772         call    __ecp_nistz256_mul_by_2x
2773
2774         movq    0+32(%rsp),%rdx
2775         movq    8+32(%rsp),%r14
2776         leaq    -128+32(%rsp),%rsi
2777         movq    16+32(%rsp),%r15
2778         movq    24+32(%rsp),%r8
2779 .byte   102,72,15,126,199
2780         call    __ecp_nistz256_sqr_montx
2781
2782         leaq    128(%rsp),%rbx
2783         movq    %r14,%r8
2784         movq    %r15,%r9
2785         movq    %rsi,%r14
2786         movq    %rbp,%r15
2787         call    __ecp_nistz256_sub_fromx
2788
2789         movq    0+0(%rsp),%rax
2790         movq    0+8(%rsp),%rbp
2791         movq    0+16(%rsp),%rcx
2792         movq    0+24(%rsp),%r10
2793         leaq    0(%rsp),%rdi
2794         call    __ecp_nistz256_subx
2795
2796         movq    32(%rsp),%rdx
2797         leaq    32(%rsp),%rbx
2798         movq    %r12,%r14
2799         xorl    %ecx,%ecx
2800         movq    %r12,0+0(%rsp)
2801         movq    %r13,%r10
2802         movq    %r13,0+8(%rsp)
2803         cmovzq  %r8,%r11
2804         movq    %r8,0+16(%rsp)
2805         leaq    0-128(%rsp),%rsi
2806         cmovzq  %r9,%r12
2807         movq    %r9,0+24(%rsp)
2808         movq    %r14,%r9
2809         leaq    0(%rsp),%rdi
2810         call    __ecp_nistz256_mul_montx
2811
2812 .byte   102,72,15,126,203
2813 .byte   102,72,15,126,207
2814         call    __ecp_nistz256_sub_fromx
2815
2816         addq    $160+8,%rsp
2817         popq    %r15
2818         popq    %r14
2819         popq    %r13
2820         popq    %r12
2821         popq    %rbx
2822         popq    %rbp
2823         .byte   0xf3,0xc3
2824 .size   ecp_nistz256_point_doublex,.-ecp_nistz256_point_doublex
2825 .type   ecp_nistz256_point_addx,@function
2826 .align  32
2827 ecp_nistz256_point_addx:
2828 .Lpoint_addx:
2829         pushq   %rbp
2830         pushq   %rbx
2831         pushq   %r12
2832         pushq   %r13
2833         pushq   %r14
2834         pushq   %r15
2835         subq    $576+8,%rsp
2836
2837         movdqu  0(%rsi),%xmm0
2838         movdqu  16(%rsi),%xmm1
2839         movdqu  32(%rsi),%xmm2
2840         movdqu  48(%rsi),%xmm3
2841         movdqu  64(%rsi),%xmm4
2842         movdqu  80(%rsi),%xmm5
2843         movq    %rsi,%rbx
2844         movq    %rdx,%rsi
2845         movdqa  %xmm0,384(%rsp)
2846         movdqa  %xmm1,384+16(%rsp)
2847         por     %xmm0,%xmm1
2848         movdqa  %xmm2,416(%rsp)
2849         movdqa  %xmm3,416+16(%rsp)
2850         por     %xmm2,%xmm3
2851         movdqa  %xmm4,448(%rsp)
2852         movdqa  %xmm5,448+16(%rsp)
2853         por     %xmm1,%xmm3
2854
2855         movdqu  0(%rsi),%xmm0
2856         pshufd  $0xb1,%xmm3,%xmm5
2857         movdqu  16(%rsi),%xmm1
2858         movdqu  32(%rsi),%xmm2
2859         por     %xmm3,%xmm5
2860         movdqu  48(%rsi),%xmm3
2861         movq    64+0(%rsi),%rdx
2862         movq    64+8(%rsi),%r14
2863         movq    64+16(%rsi),%r15
2864         movq    64+24(%rsi),%r8
2865         movdqa  %xmm0,480(%rsp)
2866         pshufd  $0x1e,%xmm5,%xmm4
2867         movdqa  %xmm1,480+16(%rsp)
2868         por     %xmm0,%xmm1
2869 .byte   102,72,15,110,199
2870         movdqa  %xmm2,512(%rsp)
2871         movdqa  %xmm3,512+16(%rsp)
2872         por     %xmm2,%xmm3
2873         por     %xmm4,%xmm5
2874         pxor    %xmm4,%xmm4
2875         por     %xmm1,%xmm3
2876
2877         leaq    64-128(%rsi),%rsi
2878         movq    %rdx,544+0(%rsp)
2879         movq    %r14,544+8(%rsp)
2880         movq    %r15,544+16(%rsp)
2881         movq    %r8,544+24(%rsp)
2882         leaq    96(%rsp),%rdi
2883         call    __ecp_nistz256_sqr_montx
2884
2885         pcmpeqd %xmm4,%xmm5
2886         pshufd  $0xb1,%xmm3,%xmm4
2887         por     %xmm3,%xmm4
2888         pshufd  $0,%xmm5,%xmm5
2889         pshufd  $0x1e,%xmm4,%xmm3
2890         por     %xmm3,%xmm4
2891         pxor    %xmm3,%xmm3
2892         pcmpeqd %xmm3,%xmm4
2893         pshufd  $0,%xmm4,%xmm4
2894         movq    64+0(%rbx),%rdx
2895         movq    64+8(%rbx),%r14
2896         movq    64+16(%rbx),%r15
2897         movq    64+24(%rbx),%r8
2898 .byte   102,72,15,110,203
2899
2900         leaq    64-128(%rbx),%rsi
2901         leaq    32(%rsp),%rdi
2902         call    __ecp_nistz256_sqr_montx
2903
2904         movq    544(%rsp),%rdx
2905         leaq    544(%rsp),%rbx
2906         movq    0+96(%rsp),%r9
2907         movq    8+96(%rsp),%r10
2908         leaq    -128+96(%rsp),%rsi
2909         movq    16+96(%rsp),%r11
2910         movq    24+96(%rsp),%r12
2911         leaq    224(%rsp),%rdi
2912         call    __ecp_nistz256_mul_montx
2913
2914         movq    448(%rsp),%rdx
2915         leaq    448(%rsp),%rbx
2916         movq    0+32(%rsp),%r9
2917         movq    8+32(%rsp),%r10
2918         leaq    -128+32(%rsp),%rsi
2919         movq    16+32(%rsp),%r11
2920         movq    24+32(%rsp),%r12
2921         leaq    256(%rsp),%rdi
2922         call    __ecp_nistz256_mul_montx
2923
2924         movq    416(%rsp),%rdx
2925         leaq    416(%rsp),%rbx
2926         movq    0+224(%rsp),%r9
2927         movq    8+224(%rsp),%r10
2928         leaq    -128+224(%rsp),%rsi
2929         movq    16+224(%rsp),%r11
2930         movq    24+224(%rsp),%r12
2931         leaq    224(%rsp),%rdi
2932         call    __ecp_nistz256_mul_montx
2933
2934         movq    512(%rsp),%rdx
2935         leaq    512(%rsp),%rbx
2936         movq    0+256(%rsp),%r9
2937         movq    8+256(%rsp),%r10
2938         leaq    -128+256(%rsp),%rsi
2939         movq    16+256(%rsp),%r11
2940         movq    24+256(%rsp),%r12
2941         leaq    256(%rsp),%rdi
2942         call    __ecp_nistz256_mul_montx
2943
2944         leaq    224(%rsp),%rbx
2945         leaq    64(%rsp),%rdi
2946         call    __ecp_nistz256_sub_fromx
2947
2948         orq     %r13,%r12
2949         movdqa  %xmm4,%xmm2
2950         orq     %r8,%r12
2951         orq     %r9,%r12
2952         por     %xmm5,%xmm2
2953 .byte   102,73,15,110,220
2954
2955         movq    384(%rsp),%rdx
2956         leaq    384(%rsp),%rbx
2957         movq    0+96(%rsp),%r9
2958         movq    8+96(%rsp),%r10
2959         leaq    -128+96(%rsp),%rsi
2960         movq    16+96(%rsp),%r11
2961         movq    24+96(%rsp),%r12
2962         leaq    160(%rsp),%rdi
2963         call    __ecp_nistz256_mul_montx
2964
2965         movq    480(%rsp),%rdx
2966         leaq    480(%rsp),%rbx
2967         movq    0+32(%rsp),%r9
2968         movq    8+32(%rsp),%r10
2969         leaq    -128+32(%rsp),%rsi
2970         movq    16+32(%rsp),%r11
2971         movq    24+32(%rsp),%r12
2972         leaq    192(%rsp),%rdi
2973         call    __ecp_nistz256_mul_montx
2974
2975         leaq    160(%rsp),%rbx
2976         leaq    0(%rsp),%rdi
2977         call    __ecp_nistz256_sub_fromx
2978
2979         orq     %r13,%r12
2980         orq     %r8,%r12
2981         orq     %r9,%r12
2982
2983 .byte   0x3e
2984         jnz     .Ladd_proceedx
2985 .byte   102,73,15,126,208
2986 .byte   102,73,15,126,217
2987         testq   %r8,%r8
2988         jnz     .Ladd_proceedx
2989         testq   %r9,%r9
2990         jz      .Ladd_doublex
2991
2992 .byte   102,72,15,126,199
2993         pxor    %xmm0,%xmm0
2994         movdqu  %xmm0,0(%rdi)
2995         movdqu  %xmm0,16(%rdi)
2996         movdqu  %xmm0,32(%rdi)
2997         movdqu  %xmm0,48(%rdi)
2998         movdqu  %xmm0,64(%rdi)
2999         movdqu  %xmm0,80(%rdi)
3000         jmp     .Ladd_donex
3001
3002 .align  32
3003 .Ladd_doublex:
3004 .byte   102,72,15,126,206
3005 .byte   102,72,15,126,199
3006         addq    $416,%rsp
3007         jmp     .Lpoint_double_shortcutx
3008
3009 .align  32
3010 .Ladd_proceedx:
3011         movq    0+64(%rsp),%rdx
3012         movq    8+64(%rsp),%r14
3013         leaq    -128+64(%rsp),%rsi
3014         movq    16+64(%rsp),%r15
3015         movq    24+64(%rsp),%r8
3016         leaq    96(%rsp),%rdi
3017         call    __ecp_nistz256_sqr_montx
3018
3019         movq    448(%rsp),%rdx
3020         leaq    448(%rsp),%rbx
3021         movq    0+0(%rsp),%r9
3022         movq    8+0(%rsp),%r10
3023         leaq    -128+0(%rsp),%rsi
3024         movq    16+0(%rsp),%r11
3025         movq    24+0(%rsp),%r12
3026         leaq    352(%rsp),%rdi
3027         call    __ecp_nistz256_mul_montx
3028
3029         movq    0+0(%rsp),%rdx
3030         movq    8+0(%rsp),%r14
3031         leaq    -128+0(%rsp),%rsi
3032         movq    16+0(%rsp),%r15
3033         movq    24+0(%rsp),%r8
3034         leaq    32(%rsp),%rdi
3035         call    __ecp_nistz256_sqr_montx
3036
3037         movq    544(%rsp),%rdx
3038         leaq    544(%rsp),%rbx
3039         movq    0+352(%rsp),%r9
3040         movq    8+352(%rsp),%r10
3041         leaq    -128+352(%rsp),%rsi
3042         movq    16+352(%rsp),%r11
3043         movq    24+352(%rsp),%r12
3044         leaq    352(%rsp),%rdi
3045         call    __ecp_nistz256_mul_montx
3046
3047         movq    0(%rsp),%rdx
3048         leaq    0(%rsp),%rbx
3049         movq    0+32(%rsp),%r9
3050         movq    8+32(%rsp),%r10
3051         leaq    -128+32(%rsp),%rsi
3052         movq    16+32(%rsp),%r11
3053         movq    24+32(%rsp),%r12
3054         leaq    128(%rsp),%rdi
3055         call    __ecp_nistz256_mul_montx
3056
3057         movq    160(%rsp),%rdx
3058         leaq    160(%rsp),%rbx
3059         movq    0+32(%rsp),%r9
3060         movq    8+32(%rsp),%r10
3061         leaq    -128+32(%rsp),%rsi
3062         movq    16+32(%rsp),%r11
3063         movq    24+32(%rsp),%r12
3064         leaq    192(%rsp),%rdi
3065         call    __ecp_nistz256_mul_montx
3066
3067
3068
3069
3070         addq    %r12,%r12
3071         leaq    96(%rsp),%rsi
3072         adcq    %r13,%r13
3073         movq    %r12,%rax
3074         adcq    %r8,%r8
3075         adcq    %r9,%r9
3076         movq    %r13,%rbp
3077         sbbq    %r11,%r11
3078
3079         subq    $-1,%r12
3080         movq    %r8,%rcx
3081         sbbq    %r14,%r13
3082         sbbq    $0,%r8
3083         movq    %r9,%r10
3084         sbbq    %r15,%r9
3085         testq   %r11,%r11
3086
3087         cmovzq  %rax,%r12
3088         movq    0(%rsi),%rax
3089         cmovzq  %rbp,%r13
3090         movq    8(%rsi),%rbp
3091         cmovzq  %rcx,%r8
3092         movq    16(%rsi),%rcx
3093         cmovzq  %r10,%r9
3094         movq    24(%rsi),%r10
3095
3096         call    __ecp_nistz256_subx
3097
3098         leaq    128(%rsp),%rbx
3099         leaq    288(%rsp),%rdi
3100         call    __ecp_nistz256_sub_fromx
3101
3102         movq    192+0(%rsp),%rax
3103         movq    192+8(%rsp),%rbp
3104         movq    192+16(%rsp),%rcx
3105         movq    192+24(%rsp),%r10
3106         leaq    320(%rsp),%rdi
3107
3108         call    __ecp_nistz256_subx
3109
3110         movq    %r12,0(%rdi)
3111         movq    %r13,8(%rdi)
3112         movq    %r8,16(%rdi)
3113         movq    %r9,24(%rdi)
3114         movq    128(%rsp),%rdx
3115         leaq    128(%rsp),%rbx
3116         movq    0+224(%rsp),%r9
3117         movq    8+224(%rsp),%r10
3118         leaq    -128+224(%rsp),%rsi
3119         movq    16+224(%rsp),%r11
3120         movq    24+224(%rsp),%r12
3121         leaq    256(%rsp),%rdi
3122         call    __ecp_nistz256_mul_montx
3123
3124         movq    320(%rsp),%rdx
3125         leaq    320(%rsp),%rbx
3126         movq    0+64(%rsp),%r9
3127         movq    8+64(%rsp),%r10
3128         leaq    -128+64(%rsp),%rsi
3129         movq    16+64(%rsp),%r11
3130         movq    24+64(%rsp),%r12
3131         leaq    320(%rsp),%rdi
3132         call    __ecp_nistz256_mul_montx
3133
3134         leaq    256(%rsp),%rbx
3135         leaq    320(%rsp),%rdi
3136         call    __ecp_nistz256_sub_fromx
3137
3138 .byte   102,72,15,126,199
3139
3140         movdqa  %xmm5,%xmm0
3141         movdqa  %xmm5,%xmm1
3142         pandn   352(%rsp),%xmm0
3143         movdqa  %xmm5,%xmm2
3144         pandn   352+16(%rsp),%xmm1
3145         movdqa  %xmm5,%xmm3
3146         pand    544(%rsp),%xmm2
3147         pand    544+16(%rsp),%xmm3
3148         por     %xmm0,%xmm2
3149         por     %xmm1,%xmm3
3150
3151         movdqa  %xmm4,%xmm0
3152         movdqa  %xmm4,%xmm1
3153         pandn   %xmm2,%xmm0
3154         movdqa  %xmm4,%xmm2
3155         pandn   %xmm3,%xmm1
3156         movdqa  %xmm4,%xmm3
3157         pand    448(%rsp),%xmm2
3158         pand    448+16(%rsp),%xmm3
3159         por     %xmm0,%xmm2
3160         por     %xmm1,%xmm3
3161         movdqu  %xmm2,64(%rdi)
3162         movdqu  %xmm3,80(%rdi)
3163
3164         movdqa  %xmm5,%xmm0
3165         movdqa  %xmm5,%xmm1
3166         pandn   288(%rsp),%xmm0
3167         movdqa  %xmm5,%xmm2
3168         pandn   288+16(%rsp),%xmm1
3169         movdqa  %xmm5,%xmm3
3170         pand    480(%rsp),%xmm2
3171         pand    480+16(%rsp),%xmm3
3172         por     %xmm0,%xmm2
3173         por     %xmm1,%xmm3
3174
3175         movdqa  %xmm4,%xmm0
3176         movdqa  %xmm4,%xmm1
3177         pandn   %xmm2,%xmm0
3178         movdqa  %xmm4,%xmm2
3179         pandn   %xmm3,%xmm1
3180         movdqa  %xmm4,%xmm3
3181         pand    384(%rsp),%xmm2
3182         pand    384+16(%rsp),%xmm3
3183         por     %xmm0,%xmm2
3184         por     %xmm1,%xmm3
3185         movdqu  %xmm2,0(%rdi)
3186         movdqu  %xmm3,16(%rdi)
3187
3188         movdqa  %xmm5,%xmm0
3189         movdqa  %xmm5,%xmm1
3190         pandn   320(%rsp),%xmm0
3191         movdqa  %xmm5,%xmm2
3192         pandn   320+16(%rsp),%xmm1
3193         movdqa  %xmm5,%xmm3
3194         pand    512(%rsp),%xmm2
3195         pand    512+16(%rsp),%xmm3
3196         por     %xmm0,%xmm2
3197         por     %xmm1,%xmm3
3198
3199         movdqa  %xmm4,%xmm0
3200         movdqa  %xmm4,%xmm1
3201         pandn   %xmm2,%xmm0
3202         movdqa  %xmm4,%xmm2
3203         pandn   %xmm3,%xmm1
3204         movdqa  %xmm4,%xmm3
3205         pand    416(%rsp),%xmm2
3206         pand    416+16(%rsp),%xmm3
3207         por     %xmm0,%xmm2
3208         por     %xmm1,%xmm3
3209         movdqu  %xmm2,32(%rdi)
3210         movdqu  %xmm3,48(%rdi)
3211
3212 .Ladd_donex:
3213         addq    $576+8,%rsp
3214         popq    %r15
3215         popq    %r14
3216         popq    %r13
3217         popq    %r12
3218         popq    %rbx
3219         popq    %rbp
3220         .byte   0xf3,0xc3
3221 .size   ecp_nistz256_point_addx,.-ecp_nistz256_point_addx
3222 .type   ecp_nistz256_point_add_affinex,@function
3223 .align  32
3224 ecp_nistz256_point_add_affinex:
3225 .Lpoint_add_affinex:
3226         pushq   %rbp
3227         pushq   %rbx
3228         pushq   %r12
3229         pushq   %r13
3230         pushq   %r14
3231         pushq   %r15
3232         subq    $480+8,%rsp
3233
3234         movdqu  0(%rsi),%xmm0
3235         movq    %rdx,%rbx
3236         movdqu  16(%rsi),%xmm1
3237         movdqu  32(%rsi),%xmm2
3238         movdqu  48(%rsi),%xmm3
3239         movdqu  64(%rsi),%xmm4
3240         movdqu  80(%rsi),%xmm5
3241         movq    64+0(%rsi),%rdx
3242         movq    64+8(%rsi),%r14
3243         movq    64+16(%rsi),%r15
3244         movq    64+24(%rsi),%r8
3245         movdqa  %xmm0,320(%rsp)
3246         movdqa  %xmm1,320+16(%rsp)
3247         por     %xmm0,%xmm1
3248         movdqa  %xmm2,352(%rsp)
3249         movdqa  %xmm3,352+16(%rsp)
3250         por     %xmm2,%xmm3
3251         movdqa  %xmm4,384(%rsp)
3252         movdqa  %xmm5,384+16(%rsp)
3253         por     %xmm1,%xmm3
3254
3255         movdqu  0(%rbx),%xmm0
3256         pshufd  $0xb1,%xmm3,%xmm5
3257         movdqu  16(%rbx),%xmm1
3258         movdqu  32(%rbx),%xmm2
3259         por     %xmm3,%xmm5
3260         movdqu  48(%rbx),%xmm3
3261         movdqa  %xmm0,416(%rsp)
3262         pshufd  $0x1e,%xmm5,%xmm4
3263         movdqa  %xmm1,416+16(%rsp)
3264         por     %xmm0,%xmm1
3265 .byte   102,72,15,110,199
3266         movdqa  %xmm2,448(%rsp)
3267         movdqa  %xmm3,448+16(%rsp)
3268         por     %xmm2,%xmm3
3269         por     %xmm4,%xmm5
3270         pxor    %xmm4,%xmm4
3271         por     %xmm1,%xmm3
3272
3273         leaq    64-128(%rsi),%rsi
3274         leaq    32(%rsp),%rdi
3275         call    __ecp_nistz256_sqr_montx
3276
3277         pcmpeqd %xmm4,%xmm5
3278         pshufd  $0xb1,%xmm3,%xmm4
3279         movq    0(%rbx),%rdx
3280
3281         movq    %r12,%r9
3282         por     %xmm3,%xmm4
3283         pshufd  $0,%xmm5,%xmm5
3284         pshufd  $0x1e,%xmm4,%xmm3
3285         movq    %r13,%r10
3286         por     %xmm3,%xmm4
3287         pxor    %xmm3,%xmm3
3288         movq    %r14,%r11
3289         pcmpeqd %xmm3,%xmm4
3290         pshufd  $0,%xmm4,%xmm4
3291
3292         leaq    32-128(%rsp),%rsi
3293         movq    %r15,%r12
3294         leaq    0(%rsp),%rdi
3295         call    __ecp_nistz256_mul_montx
3296
3297         leaq    320(%rsp),%rbx
3298         leaq    64(%rsp),%rdi
3299         call    __ecp_nistz256_sub_fromx
3300
3301         movq    384(%rsp),%rdx
3302         leaq    384(%rsp),%rbx
3303         movq    0+32(%rsp),%r9
3304         movq    8+32(%rsp),%r10
3305         leaq    -128+32(%rsp),%rsi
3306         movq    16+32(%rsp),%r11
3307         movq    24+32(%rsp),%r12
3308         leaq    32(%rsp),%rdi
3309         call    __ecp_nistz256_mul_montx
3310
3311         movq    384(%rsp),%rdx
3312         leaq    384(%rsp),%rbx
3313         movq    0+64(%rsp),%r9
3314         movq    8+64(%rsp),%r10
3315         leaq    -128+64(%rsp),%rsi
3316         movq    16+64(%rsp),%r11
3317         movq    24+64(%rsp),%r12
3318         leaq    288(%rsp),%rdi
3319         call    __ecp_nistz256_mul_montx
3320
3321         movq    448(%rsp),%rdx
3322         leaq    448(%rsp),%rbx
3323         movq    0+32(%rsp),%r9
3324         movq    8+32(%rsp),%r10
3325         leaq    -128+32(%rsp),%rsi
3326         movq    16+32(%rsp),%r11
3327         movq    24+32(%rsp),%r12
3328         leaq    32(%rsp),%rdi
3329         call    __ecp_nistz256_mul_montx
3330
3331         leaq    352(%rsp),%rbx
3332         leaq    96(%rsp),%rdi
3333         call    __ecp_nistz256_sub_fromx
3334
3335         movq    0+64(%rsp),%rdx
3336         movq    8+64(%rsp),%r14
3337         leaq    -128+64(%rsp),%rsi
3338         movq    16+64(%rsp),%r15
3339         movq    24+64(%rsp),%r8
3340         leaq    128(%rsp),%rdi
3341         call    __ecp_nistz256_sqr_montx
3342
3343         movq    0+96(%rsp),%rdx
3344         movq    8+96(%rsp),%r14
3345         leaq    -128+96(%rsp),%rsi
3346         movq    16+96(%rsp),%r15
3347         movq    24+96(%rsp),%r8
3348         leaq    192(%rsp),%rdi
3349         call    __ecp_nistz256_sqr_montx
3350
3351         movq    128(%rsp),%rdx
3352         leaq    128(%rsp),%rbx
3353         movq    0+64(%rsp),%r9
3354         movq    8+64(%rsp),%r10
3355         leaq    -128+64(%rsp),%rsi
3356         movq    16+64(%rsp),%r11
3357         movq    24+64(%rsp),%r12
3358         leaq    160(%rsp),%rdi
3359         call    __ecp_nistz256_mul_montx
3360
3361         movq    320(%rsp),%rdx
3362         leaq    320(%rsp),%rbx
3363         movq    0+128(%rsp),%r9
3364         movq    8+128(%rsp),%r10
3365         leaq    -128+128(%rsp),%rsi
3366         movq    16+128(%rsp),%r11
3367         movq    24+128(%rsp),%r12
3368         leaq    0(%rsp),%rdi
3369         call    __ecp_nistz256_mul_montx
3370
3371
3372
3373
3374         addq    %r12,%r12
3375         leaq    192(%rsp),%rsi
3376         adcq    %r13,%r13
3377         movq    %r12,%rax
3378         adcq    %r8,%r8
3379         adcq    %r9,%r9
3380         movq    %r13,%rbp
3381         sbbq    %r11,%r11
3382
3383         subq    $-1,%r12
3384         movq    %r8,%rcx
3385         sbbq    %r14,%r13
3386         sbbq    $0,%r8
3387         movq    %r9,%r10
3388         sbbq    %r15,%r9
3389         testq   %r11,%r11
3390
3391         cmovzq  %rax,%r12
3392         movq    0(%rsi),%rax
3393         cmovzq  %rbp,%r13
3394         movq    8(%rsi),%rbp
3395         cmovzq  %rcx,%r8
3396         movq    16(%rsi),%rcx
3397         cmovzq  %r10,%r9
3398         movq    24(%rsi),%r10
3399
3400         call    __ecp_nistz256_subx
3401
3402         leaq    160(%rsp),%rbx
3403         leaq    224(%rsp),%rdi
3404         call    __ecp_nistz256_sub_fromx
3405
3406         movq    0+0(%rsp),%rax
3407         movq    0+8(%rsp),%rbp
3408         movq    0+16(%rsp),%rcx
3409         movq    0+24(%rsp),%r10
3410         leaq    64(%rsp),%rdi
3411
3412         call    __ecp_nistz256_subx
3413
3414         movq    %r12,0(%rdi)
3415         movq    %r13,8(%rdi)
3416         movq    %r8,16(%rdi)
3417         movq    %r9,24(%rdi)
3418         movq    352(%rsp),%rdx
3419         leaq    352(%rsp),%rbx
3420         movq    0+160(%rsp),%r9
3421         movq    8+160(%rsp),%r10
3422         leaq    -128+160(%rsp),%rsi
3423         movq    16+160(%rsp),%r11
3424         movq    24+160(%rsp),%r12
3425         leaq    32(%rsp),%rdi
3426         call    __ecp_nistz256_mul_montx
3427
3428         movq    96(%rsp),%rdx
3429         leaq    96(%rsp),%rbx
3430         movq    0+64(%rsp),%r9
3431         movq    8+64(%rsp),%r10
3432         leaq    -128+64(%rsp),%rsi
3433         movq    16+64(%rsp),%r11
3434         movq    24+64(%rsp),%r12
3435         leaq    64(%rsp),%rdi
3436         call    __ecp_nistz256_mul_montx
3437
3438         leaq    32(%rsp),%rbx
3439         leaq    256(%rsp),%rdi
3440         call    __ecp_nistz256_sub_fromx
3441
3442 .byte   102,72,15,126,199
3443
3444         movdqa  %xmm5,%xmm0
3445         movdqa  %xmm5,%xmm1
3446         pandn   288(%rsp),%xmm0
3447         movdqa  %xmm5,%xmm2
3448         pandn   288+16(%rsp),%xmm1
3449         movdqa  %xmm5,%xmm3
3450         pand    .LONE_mont(%rip),%xmm2
3451         pand    .LONE_mont+16(%rip),%xmm3
3452         por     %xmm0,%xmm2
3453         por     %xmm1,%xmm3
3454
3455         movdqa  %xmm4,%xmm0
3456         movdqa  %xmm4,%xmm1
3457         pandn   %xmm2,%xmm0
3458         movdqa  %xmm4,%xmm2
3459         pandn   %xmm3,%xmm1
3460         movdqa  %xmm4,%xmm3
3461         pand    384(%rsp),%xmm2
3462         pand    384+16(%rsp),%xmm3
3463         por     %xmm0,%xmm2
3464         por     %xmm1,%xmm3
3465         movdqu  %xmm2,64(%rdi)
3466         movdqu  %xmm3,80(%rdi)
3467
3468         movdqa  %xmm5,%xmm0
3469         movdqa  %xmm5,%xmm1
3470         pandn   224(%rsp),%xmm0
3471         movdqa  %xmm5,%xmm2
3472         pandn   224+16(%rsp),%xmm1
3473         movdqa  %xmm5,%xmm3
3474         pand    416(%rsp),%xmm2
3475         pand    416+16(%rsp),%xmm3
3476         por     %xmm0,%xmm2
3477         por     %xmm1,%xmm3
3478
3479         movdqa  %xmm4,%xmm0
3480         movdqa  %xmm4,%xmm1
3481         pandn   %xmm2,%xmm0
3482         movdqa  %xmm4,%xmm2
3483         pandn   %xmm3,%xmm1
3484         movdqa  %xmm4,%xmm3
3485         pand    320(%rsp),%xmm2
3486         pand    320+16(%rsp),%xmm3
3487         por     %xmm0,%xmm2
3488         por     %xmm1,%xmm3
3489         movdqu  %xmm2,0(%rdi)
3490         movdqu  %xmm3,16(%rdi)
3491
3492         movdqa  %xmm5,%xmm0
3493         movdqa  %xmm5,%xmm1
3494         pandn   256(%rsp),%xmm0
3495         movdqa  %xmm5,%xmm2
3496         pandn   256+16(%rsp),%xmm1
3497         movdqa  %xmm5,%xmm3
3498         pand    448(%rsp),%xmm2
3499         pand    448+16(%rsp),%xmm3
3500         por     %xmm0,%xmm2
3501         por     %xmm1,%xmm3
3502
3503         movdqa  %xmm4,%xmm0
3504         movdqa  %xmm4,%xmm1
3505         pandn   %xmm2,%xmm0
3506         movdqa  %xmm4,%xmm2
3507         pandn   %xmm3,%xmm1
3508         movdqa  %xmm4,%xmm3
3509         pand    352(%rsp),%xmm2
3510         pand    352+16(%rsp),%xmm3
3511         por     %xmm0,%xmm2
3512         por     %xmm1,%xmm3
3513         movdqu  %xmm2,32(%rdi)
3514         movdqu  %xmm3,48(%rdi)
3515
3516         addq    $480+8,%rsp
3517         popq    %r15
3518         popq    %r14
3519         popq    %r13
3520         popq    %r12
3521         popq    %rbx
3522         popq    %rbp
3523         .byte   0xf3,0xc3
3524 .size   ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex