1 /* crypto/bn/bn_asm.c */
2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
60 # undef NDEBUG /* avoid conflicting definitions */
69 #if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
71 BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
81 mul_add(rp[0], ap[0], w, c1);
82 mul_add(rp[1], ap[1], w, c1);
83 mul_add(rp[2], ap[2], w, c1);
84 mul_add(rp[3], ap[3], w, c1);
90 mul_add(rp[0], ap[0], w, c1);
93 mul_add(rp[1], ap[1], w, c1);
96 mul_add(rp[2], ap[2], w, c1);
103 BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
112 mul(rp[0], ap[0], w, c1);
113 mul(rp[1], ap[1], w, c1);
114 mul(rp[2], ap[2], w, c1);
115 mul(rp[3], ap[3], w, c1);
121 mul(rp[0], ap[0], w, c1);
124 mul(rp[1], ap[1], w, c1);
127 mul(rp[2], ap[2], w, c1);
132 void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
138 sqr(r[0], r[1], a[0]);
139 sqr(r[2], r[3], a[1]);
140 sqr(r[4], r[5], a[2]);
141 sqr(r[6], r[7], a[3]);
147 sqr(r[0], r[1], a[0]);
150 sqr(r[2], r[3], a[1]);
153 sqr(r[4], r[5], a[2]);
157 #else /* !(defined(BN_LLONG) ||
158 * defined(BN_UMULT_HIGH)) */
160 BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
168 return ((BN_ULONG)0);
174 mul_add(rp[0], ap[0], bl, bh, c);
177 mul_add(rp[1], ap[1], bl, bh, c);
180 mul_add(rp[2], ap[2], bl, bh, c);
183 mul_add(rp[3], ap[3], bl, bh, c);
192 BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
199 return ((BN_ULONG)0);
205 mul(rp[0], ap[0], bl, bh, carry);
208 mul(rp[1], ap[1], bl, bh, carry);
211 mul(rp[2], ap[2], bl, bh, carry);
214 mul(rp[3], ap[3], bl, bh, carry);
223 void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
229 sqr64(r[0], r[1], a[0]);
233 sqr64(r[2], r[3], a[1]);
237 sqr64(r[4], r[5], a[2]);
241 sqr64(r[6], r[7], a[3]);
250 #endif /* !(defined(BN_LLONG) ||
251 * defined(BN_UMULT_HIGH)) */
253 #if defined(BN_LLONG) && defined(BN_DIV2W)
255 BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
257 return ((BN_ULONG)(((((BN_ULLONG) h) << BN_BITS2) | l) / (BN_ULLONG) d));
262 /* Divide h,l by d and return the result. */
263 /* I need to test this some more :-( */
264 BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
266 BN_ULONG dh, dl, q, ret = 0, th, tl, t;
272 i = BN_num_bits_word(d);
273 assert((i == BN_BITS2) || (h <= (BN_ULONG)1 << i));
281 h = (h << i) | (l >> (BN_BITS2 - i));
284 dh = (d & BN_MASK2h) >> BN_BITS4;
285 dl = (d & BN_MASK2l);
287 if ((h >> BN_BITS4) == dh)
296 if ((t & BN_MASK2h) ||
297 ((tl) <= ((t << BN_BITS4) | ((l & BN_MASK2h) >> BN_BITS4))))
303 t = (tl >> BN_BITS4);
304 tl = (tl << BN_BITS4) & BN_MASK2h;
320 h = ((h << BN_BITS4) | (l >> BN_BITS4)) & BN_MASK2;
321 l = (l & BN_MASK2l) << BN_BITS4;
326 #endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */
329 BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
336 return ((BN_ULONG)0);
339 ll += (BN_ULLONG) a[0] + b[0];
340 r[0] = (BN_ULONG)ll & BN_MASK2;
345 ll += (BN_ULLONG) a[1] + b[1];
346 r[1] = (BN_ULONG)ll & BN_MASK2;
351 ll += (BN_ULLONG) a[2] + b[2];
352 r[2] = (BN_ULONG)ll & BN_MASK2;
357 ll += (BN_ULLONG) a[3] + b[3];
358 r[3] = (BN_ULONG)ll & BN_MASK2;
367 return ((BN_ULONG)ll);
369 #else /* !BN_LLONG */
370 BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
377 return ((BN_ULONG)0);
382 t = (t + c) & BN_MASK2;
384 l = (t + b[0]) & BN_MASK2;
391 t = (t + c) & BN_MASK2;
393 l = (t + b[1]) & BN_MASK2;
400 t = (t + c) & BN_MASK2;
402 l = (t + b[2]) & BN_MASK2;
409 t = (t + c) & BN_MASK2;
411 l = (t + b[3]) & BN_MASK2;
421 return ((BN_ULONG)c);
423 #endif /* !BN_LLONG */
425 BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
433 return ((BN_ULONG)0);
438 r[0] = (t1 - t2 - c) & BN_MASK2;
446 r[1] = (t1 - t2 - c) & BN_MASK2;
454 r[2] = (t1 - t2 - c) & BN_MASK2;
462 r[3] = (t1 - t2 - c) & BN_MASK2;
477 # undef bn_mul_comba8
478 # undef bn_mul_comba4
479 # undef bn_sqr_comba8
480 # undef bn_sqr_comba4
482 /* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
483 /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
484 /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
486 * sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number
491 * Keep in mind that carrying into high part of multiplication result
492 * can not overflow, because it cannot be all-ones.
495 # define mul_add_c(a,b,c0,c1,c2) \
497 t1=(BN_ULONG)Lw(t); \
498 t2=(BN_ULONG)Hw(t); \
499 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
500 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
502 # define mul_add_c2(a,b,c0,c1,c2) \
506 t1=(BN_ULONG)Lw(tt); \
507 t2=(BN_ULONG)Hw(tt); \
508 c0=(c0+t1)&BN_MASK2; \
509 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
510 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
512 # define sqr_add_c(a,i,c0,c1,c2) \
513 t=(BN_ULLONG)a[i]*a[i]; \
514 t1=(BN_ULONG)Lw(t); \
515 t2=(BN_ULONG)Hw(t); \
516 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
517 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
519 # define sqr_add_c2(a,i,j,c0,c1,c2) \
520 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
522 # elif defined(BN_UMULT_LOHI)
524 # define mul_add_c(a,b,c0,c1,c2) { \
525 BN_ULONG ta=(a),tb=(b); \
526 BN_UMULT_LOHI(t1,t2,ta,tb); \
527 c0 += t1; t2 += (c0<t1)?1:0; \
528 c1 += t2; c2 += (c1<t2)?1:0; \
531 # define mul_add_c2(a,b,c0,c1,c2) { \
532 BN_ULONG ta=(a),tb=(b),t0; \
533 BN_UMULT_LOHI(t0,t1,ta,tb); \
534 c0 += t0; t2 = t1+((c0<t0)?1:0);\
535 c1 += t2; c2 += (c1<t2)?1:0; \
536 c0 += t0; t1 += (c0<t0)?1:0; \
537 c1 += t1; c2 += (c1<t1)?1:0; \
540 # define sqr_add_c(a,i,c0,c1,c2) { \
541 BN_ULONG ta=(a)[i]; \
542 BN_UMULT_LOHI(t1,t2,ta,ta); \
543 c0 += t1; t2 += (c0<t1)?1:0; \
544 c1 += t2; c2 += (c1<t2)?1:0; \
547 # define sqr_add_c2(a,i,j,c0,c1,c2) \
548 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
550 # elif defined(BN_UMULT_HIGH)
552 # define mul_add_c(a,b,c0,c1,c2) { \
553 BN_ULONG ta=(a),tb=(b); \
555 t2 = BN_UMULT_HIGH(ta,tb); \
556 c0 += t1; t2 += (c0<t1)?1:0; \
557 c1 += t2; c2 += (c1<t2)?1:0; \
560 # define mul_add_c2(a,b,c0,c1,c2) { \
561 BN_ULONG ta=(a),tb=(b),t0; \
562 t1 = BN_UMULT_HIGH(ta,tb); \
564 c0 += t0; t2 = t1+((c0<t0)?1:0);\
565 c1 += t2; c2 += (c1<t2)?1:0; \
566 c0 += t0; t1 += (c0<t0)?1:0; \
567 c1 += t1; c2 += (c1<t1)?1:0; \
570 # define sqr_add_c(a,i,c0,c1,c2) { \
571 BN_ULONG ta=(a)[i]; \
573 t2 = BN_UMULT_HIGH(ta,ta); \
574 c0 += t1; t2 += (c0<t1)?1:0; \
575 c1 += t2; c2 += (c1<t2)?1:0; \
578 # define sqr_add_c2(a,i,j,c0,c1,c2) \
579 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
581 # else /* !BN_LLONG */
582 # define mul_add_c(a,b,c0,c1,c2) \
583 t1=LBITS(a); t2=HBITS(a); \
584 bl=LBITS(b); bh=HBITS(b); \
585 mul64(t1,t2,bl,bh); \
586 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
587 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
589 # define mul_add_c2(a,b,c0,c1,c2) \
590 t1=LBITS(a); t2=HBITS(a); \
591 bl=LBITS(b); bh=HBITS(b); \
592 mul64(t1,t2,bl,bh); \
593 if (t2 & BN_TBIT) c2++; \
594 t2=(t2+t2)&BN_MASK2; \
595 if (t1 & BN_TBIT) t2++; \
596 t1=(t1+t1)&BN_MASK2; \
597 c0=(c0+t1)&BN_MASK2; \
598 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
599 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
601 # define sqr_add_c(a,i,c0,c1,c2) \
602 sqr64(t1,t2,(a)[i]); \
603 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
604 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
606 # define sqr_add_c2(a,i,j,c0,c1,c2) \
607 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
608 # endif /* !BN_LLONG */
610 void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
623 mul_add_c(a[0], b[0], c1, c2, c3);
626 mul_add_c(a[0], b[1], c2, c3, c1);
627 mul_add_c(a[1], b[0], c2, c3, c1);
630 mul_add_c(a[2], b[0], c3, c1, c2);
631 mul_add_c(a[1], b[1], c3, c1, c2);
632 mul_add_c(a[0], b[2], c3, c1, c2);
635 mul_add_c(a[0], b[3], c1, c2, c3);
636 mul_add_c(a[1], b[2], c1, c2, c3);
637 mul_add_c(a[2], b[1], c1, c2, c3);
638 mul_add_c(a[3], b[0], c1, c2, c3);
641 mul_add_c(a[4], b[0], c2, c3, c1);
642 mul_add_c(a[3], b[1], c2, c3, c1);
643 mul_add_c(a[2], b[2], c2, c3, c1);
644 mul_add_c(a[1], b[3], c2, c3, c1);
645 mul_add_c(a[0], b[4], c2, c3, c1);
648 mul_add_c(a[0], b[5], c3, c1, c2);
649 mul_add_c(a[1], b[4], c3, c1, c2);
650 mul_add_c(a[2], b[3], c3, c1, c2);
651 mul_add_c(a[3], b[2], c3, c1, c2);
652 mul_add_c(a[4], b[1], c3, c1, c2);
653 mul_add_c(a[5], b[0], c3, c1, c2);
656 mul_add_c(a[6], b[0], c1, c2, c3);
657 mul_add_c(a[5], b[1], c1, c2, c3);
658 mul_add_c(a[4], b[2], c1, c2, c3);
659 mul_add_c(a[3], b[3], c1, c2, c3);
660 mul_add_c(a[2], b[4], c1, c2, c3);
661 mul_add_c(a[1], b[5], c1, c2, c3);
662 mul_add_c(a[0], b[6], c1, c2, c3);
665 mul_add_c(a[0], b[7], c2, c3, c1);
666 mul_add_c(a[1], b[6], c2, c3, c1);
667 mul_add_c(a[2], b[5], c2, c3, c1);
668 mul_add_c(a[3], b[4], c2, c3, c1);
669 mul_add_c(a[4], b[3], c2, c3, c1);
670 mul_add_c(a[5], b[2], c2, c3, c1);
671 mul_add_c(a[6], b[1], c2, c3, c1);
672 mul_add_c(a[7], b[0], c2, c3, c1);
675 mul_add_c(a[7], b[1], c3, c1, c2);
676 mul_add_c(a[6], b[2], c3, c1, c2);
677 mul_add_c(a[5], b[3], c3, c1, c2);
678 mul_add_c(a[4], b[4], c3, c1, c2);
679 mul_add_c(a[3], b[5], c3, c1, c2);
680 mul_add_c(a[2], b[6], c3, c1, c2);
681 mul_add_c(a[1], b[7], c3, c1, c2);
684 mul_add_c(a[2], b[7], c1, c2, c3);
685 mul_add_c(a[3], b[6], c1, c2, c3);
686 mul_add_c(a[4], b[5], c1, c2, c3);
687 mul_add_c(a[5], b[4], c1, c2, c3);
688 mul_add_c(a[6], b[3], c1, c2, c3);
689 mul_add_c(a[7], b[2], c1, c2, c3);
692 mul_add_c(a[7], b[3], c2, c3, c1);
693 mul_add_c(a[6], b[4], c2, c3, c1);
694 mul_add_c(a[5], b[5], c2, c3, c1);
695 mul_add_c(a[4], b[6], c2, c3, c1);
696 mul_add_c(a[3], b[7], c2, c3, c1);
699 mul_add_c(a[4], b[7], c3, c1, c2);
700 mul_add_c(a[5], b[6], c3, c1, c2);
701 mul_add_c(a[6], b[5], c3, c1, c2);
702 mul_add_c(a[7], b[4], c3, c1, c2);
705 mul_add_c(a[7], b[5], c1, c2, c3);
706 mul_add_c(a[6], b[6], c1, c2, c3);
707 mul_add_c(a[5], b[7], c1, c2, c3);
710 mul_add_c(a[6], b[7], c2, c3, c1);
711 mul_add_c(a[7], b[6], c2, c3, c1);
714 mul_add_c(a[7], b[7], c3, c1, c2);
719 void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
732 mul_add_c(a[0], b[0], c1, c2, c3);
735 mul_add_c(a[0], b[1], c2, c3, c1);
736 mul_add_c(a[1], b[0], c2, c3, c1);
739 mul_add_c(a[2], b[0], c3, c1, c2);
740 mul_add_c(a[1], b[1], c3, c1, c2);
741 mul_add_c(a[0], b[2], c3, c1, c2);
744 mul_add_c(a[0], b[3], c1, c2, c3);
745 mul_add_c(a[1], b[2], c1, c2, c3);
746 mul_add_c(a[2], b[1], c1, c2, c3);
747 mul_add_c(a[3], b[0], c1, c2, c3);
750 mul_add_c(a[3], b[1], c2, c3, c1);
751 mul_add_c(a[2], b[2], c2, c3, c1);
752 mul_add_c(a[1], b[3], c2, c3, c1);
755 mul_add_c(a[2], b[3], c3, c1, c2);
756 mul_add_c(a[3], b[2], c3, c1, c2);
759 mul_add_c(a[3], b[3], c1, c2, c3);
764 void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
777 sqr_add_c(a, 0, c1, c2, c3);
780 sqr_add_c2(a, 1, 0, c2, c3, c1);
783 sqr_add_c(a, 1, c3, c1, c2);
784 sqr_add_c2(a, 2, 0, c3, c1, c2);
787 sqr_add_c2(a, 3, 0, c1, c2, c3);
788 sqr_add_c2(a, 2, 1, c1, c2, c3);
791 sqr_add_c(a, 2, c2, c3, c1);
792 sqr_add_c2(a, 3, 1, c2, c3, c1);
793 sqr_add_c2(a, 4, 0, c2, c3, c1);
796 sqr_add_c2(a, 5, 0, c3, c1, c2);
797 sqr_add_c2(a, 4, 1, c3, c1, c2);
798 sqr_add_c2(a, 3, 2, c3, c1, c2);
801 sqr_add_c(a, 3, c1, c2, c3);
802 sqr_add_c2(a, 4, 2, c1, c2, c3);
803 sqr_add_c2(a, 5, 1, c1, c2, c3);
804 sqr_add_c2(a, 6, 0, c1, c2, c3);
807 sqr_add_c2(a, 7, 0, c2, c3, c1);
808 sqr_add_c2(a, 6, 1, c2, c3, c1);
809 sqr_add_c2(a, 5, 2, c2, c3, c1);
810 sqr_add_c2(a, 4, 3, c2, c3, c1);
813 sqr_add_c(a, 4, c3, c1, c2);
814 sqr_add_c2(a, 5, 3, c3, c1, c2);
815 sqr_add_c2(a, 6, 2, c3, c1, c2);
816 sqr_add_c2(a, 7, 1, c3, c1, c2);
819 sqr_add_c2(a, 7, 2, c1, c2, c3);
820 sqr_add_c2(a, 6, 3, c1, c2, c3);
821 sqr_add_c2(a, 5, 4, c1, c2, c3);
824 sqr_add_c(a, 5, c2, c3, c1);
825 sqr_add_c2(a, 6, 4, c2, c3, c1);
826 sqr_add_c2(a, 7, 3, c2, c3, c1);
829 sqr_add_c2(a, 7, 4, c3, c1, c2);
830 sqr_add_c2(a, 6, 5, c3, c1, c2);
833 sqr_add_c(a, 6, c1, c2, c3);
834 sqr_add_c2(a, 7, 5, c1, c2, c3);
837 sqr_add_c2(a, 7, 6, c2, c3, c1);
840 sqr_add_c(a, 7, c3, c1, c2);
845 void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
858 sqr_add_c(a, 0, c1, c2, c3);
861 sqr_add_c2(a, 1, 0, c2, c3, c1);
864 sqr_add_c(a, 1, c3, c1, c2);
865 sqr_add_c2(a, 2, 0, c3, c1, c2);
868 sqr_add_c2(a, 3, 0, c1, c2, c3);
869 sqr_add_c2(a, 2, 1, c1, c2, c3);
872 sqr_add_c(a, 2, c2, c3, c1);
873 sqr_add_c2(a, 3, 1, c2, c3, c1);
876 sqr_add_c2(a, 3, 2, c3, c1, c2);
879 sqr_add_c(a, 3, c1, c2, c3);
883 #else /* !BN_MUL_COMBA */
885 /* hmm... is it faster just to do a multiply? */
886 # undef bn_sqr_comba4
887 void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
890 bn_sqr_normal(r, a, 4, t);
893 # undef bn_sqr_comba8
894 void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
897 bn_sqr_normal(r, a, 8, t);
900 void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
902 r[4] = bn_mul_words(&(r[0]), a, 4, b[0]);
903 r[5] = bn_mul_add_words(&(r[1]), a, 4, b[1]);
904 r[6] = bn_mul_add_words(&(r[2]), a, 4, b[2]);
905 r[7] = bn_mul_add_words(&(r[3]), a, 4, b[3]);
908 void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
910 r[8] = bn_mul_words(&(r[0]), a, 8, b[0]);
911 r[9] = bn_mul_add_words(&(r[1]), a, 8, b[1]);
912 r[10] = bn_mul_add_words(&(r[2]), a, 8, b[2]);
913 r[11] = bn_mul_add_words(&(r[3]), a, 8, b[3]);
914 r[12] = bn_mul_add_words(&(r[4]), a, 8, b[4]);
915 r[13] = bn_mul_add_words(&(r[5]), a, 8, b[5]);
916 r[14] = bn_mul_add_words(&(r[6]), a, 8, b[6]);
917 r[15] = bn_mul_add_words(&(r[7]), a, 8, b[7]);
920 #endif /* !BN_MUL_COMBA */