crypto/openssl/crypto/bn/bn_exp.c

   1 /* crypto/bn/bn_exp.c */
   2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
   3  * All rights reserved.
   4  *
   5  * This package is an SSL implementation written
   6  * by Eric Young (eay@cryptsoft.com).
   7  * The implementation was written so as to conform with Netscapes SSL.
   8  *
   9  * This library is free for commercial and non-commercial use as long as
  10  * the following conditions are aheared to.  The following conditions
  11  * apply to all code found in this distribution, be it the RC4, RSA,
  12  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
  13  * included with this distribution is covered by the same copyright terms
  14  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
  15  *
  16  * Copyright remains Eric Young's, and as such any Copyright notices in
  17  * the code are not to be removed.
  18  * If this package is used in a product, Eric Young should be given attribution
  19  * as the author of the parts of the library used.
  20  * This can be in the form of a textual message at program startup or
  21  * in documentation (online or textual) provided with the package.
  22  *
  23  * Redistribution and use in source and binary forms, with or without
  24  * modification, are permitted provided that the following conditions
  25  * are met:
  26  * 1. Redistributions of source code must retain the copyright
  27  *    notice, this list of conditions and the following disclaimer.
  28  * 2. Redistributions in binary form must reproduce the above copyright
  29  *    notice, this list of conditions and the following disclaimer in the
  30  *    documentation and/or other materials provided with the distribution.
  31  * 3. All advertising materials mentioning features or use of this software
  32  *    must display the following acknowledgement:
  33  *    "This product includes cryptographic software written by
  34  *     Eric Young (eay@cryptsoft.com)"
  35  *    The word 'cryptographic' can be left out if the rouines from the library
  36  *    being used are not cryptographic related :-).
  37  * 4. If you include any Windows specific code (or a derivative thereof) from
  38  *    the apps directory (application code) you must include an acknowledgement:
  39  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
  40  *
  41  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
  42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  44  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  51  * SUCH DAMAGE.
  52  *
  53  * The licence and distribution terms for any publically available version or
  54  * derivative of this code cannot be changed.  i.e. this code cannot simply be
  55  * copied and put under another distribution licence
  56  * [including the GNU Public Licence.]
  57  */
  58 /* ====================================================================
  59  * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
  60  *
  61  * Redistribution and use in source and binary forms, with or without
  62  * modification, are permitted provided that the following conditions
  63  * are met:
  64  *
  65  * 1. Redistributions of source code must retain the above copyright
  66  *    notice, this list of conditions and the following disclaimer.
  67  *
  68  * 2. Redistributions in binary form must reproduce the above copyright
  69  *    notice, this list of conditions and the following disclaimer in
  70  *    the documentation and/or other materials provided with the
  71  *    distribution.
  72  *
  73  * 3. All advertising materials mentioning features or use of this
  74  *    software must display the following acknowledgment:
  75  *    "This product includes software developed by the OpenSSL Project
  76  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
  77  *
  78  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  79  *    endorse or promote products derived from this software without
  80  *    prior written permission. For written permission, please contact
  81  *    openssl-core@openssl.org.
  82  *
  83  * 5. Products derived from this software may not be called "OpenSSL"
  84  *    nor may "OpenSSL" appear in their names without prior written
  85  *    permission of the OpenSSL Project.
  86  *
  87  * 6. Redistributions of any form whatsoever must retain the following
  88  *    acknowledgment:
  89  *    "This product includes software developed by the OpenSSL Project
  90  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
  91  *
  92  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  93  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  94  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  95  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
  96  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  97  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  98  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  99  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 100  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 101  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 102  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 103  * OF THE POSSIBILITY OF SUCH DAMAGE.
 104  * ====================================================================
 105  *
 106  * This product includes cryptographic software written by Eric Young
 107  * (eay@cryptsoft.com).  This product includes software written by Tim
 108  * Hudson (tjh@cryptsoft.com).
 109  *
 110  */
 111
 112 #include "cryptlib.h"
 113 #include "constant_time_locl.h"
 114 #include "bn_lcl.h"
 115
 116 /* maximum precomputation table size for *variable* sliding windows */
 117 #define TABLE_SIZE      32
 118
 119 /* this one works - simple but works */
 120 int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
 121 {
 122     int i, bits, ret = 0;
 123     BIGNUM *v, *rr;
 124
 125     if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
 126         /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
 127         BNerr(BN_F_BN_EXP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
 128         return -1;
 129     }
 130
 131     BN_CTX_start(ctx);
 132     if ((r == a) || (r == p))
 133         rr = BN_CTX_get(ctx);
 134     else
 135         rr = r;
 136     v = BN_CTX_get(ctx);
 137     if (rr == NULL || v == NULL)
 138         goto err;
 139
 140     if (BN_copy(v, a) == NULL)
 141         goto err;
 142     bits = BN_num_bits(p);
 143
 144     if (BN_is_odd(p)) {
 145         if (BN_copy(rr, a) == NULL)
 146             goto err;
 147     } else {
 148         if (!BN_one(rr))
 149             goto err;
 150     }
 151
 152     for (i = 1; i < bits; i++) {
 153         if (!BN_sqr(v, v, ctx))
 154             goto err;
 155         if (BN_is_bit_set(p, i)) {
 156             if (!BN_mul(rr, rr, v, ctx))
 157                 goto err;
 158         }
 159     }
 160     ret = 1;
 161  err:
 162     if (r != rr)
 163         BN_copy(r, rr);
 164     BN_CTX_end(ctx);
 165     bn_check_top(r);
 166     return (ret);
 167 }
 168
 169 int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
 170                BN_CTX *ctx)
 171 {
 172     int ret;
 173
 174     bn_check_top(a);
 175     bn_check_top(p);
 176     bn_check_top(m);
 177
 178     /*-
 179      * For even modulus  m = 2^k*m_odd,  it might make sense to compute
 180      * a^p mod m_odd  and  a^p mod 2^k  separately (with Montgomery
 181      * exponentiation for the odd part), using appropriate exponent
 182      * reductions, and combine the results using the CRT.
 183      *
 184      * For now, we use Montgomery only if the modulus is odd; otherwise,
 185      * exponentiation using the reciprocal-based quick remaindering
 186      * algorithm is used.
 187      *
 188      * (Timing obtained with expspeed.c [computations  a^p mod m
 189      * where  a, p, m  are of the same length: 256, 512, 1024, 2048,
 190      * 4096, 8192 bits], compared to the running time of the
 191      * standard algorithm:
 192      *
 193      *   BN_mod_exp_mont   33 .. 40 %  [AMD K6-2, Linux, debug configuration]
 194      *                     55 .. 77 %  [UltraSparc processor, but
 195      *                                  debug-solaris-sparcv8-gcc conf.]
 196      *
 197      *   BN_mod_exp_recp   50 .. 70 %  [AMD K6-2, Linux, debug configuration]
 198      *                     62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
 199      *
 200      * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
 201      * at 2048 and more bits, but at 512 and 1024 bits, it was
 202      * slower even than the standard algorithm!
 203      *
 204      * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
 205      * should be obtained when the new Montgomery reduction code
 206      * has been integrated into OpenSSL.)
 207      */
 208
 209 #define MONT_MUL_MOD
 210 #define MONT_EXP_WORD
 211 #define RECP_MUL_MOD
 212
 213 #ifdef MONT_MUL_MOD
 214     /*
 215      * I have finally been able to take out this pre-condition of the top bit
 216      * being set.  It was caused by an error in BN_div with negatives.  There
 217      * was also another problem when for a^b%m a >= m.  eay 07-May-97
 218      */
 219     /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
 220
 221     if (BN_is_odd(m)) {
 222 # ifdef MONT_EXP_WORD
 223         if (a->top == 1 && !a->neg
 224             && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) {
 225             BN_ULONG A = a->d[0];
 226             ret = BN_mod_exp_mont_word(r, A, p, m, ctx, NULL);
 227         } else
 228 # endif
 229             ret = BN_mod_exp_mont(r, a, p, m, ctx, NULL);
 230     } else
 231 #endif
 232 #ifdef RECP_MUL_MOD
 233     {
 234         ret = BN_mod_exp_recp(r, a, p, m, ctx);
 235     }
 236 #else
 237     {
 238         ret = BN_mod_exp_simple(r, a, p, m, ctx);
 239     }
 240 #endif
 241
 242     bn_check_top(r);
 243     return (ret);
 244 }
 245
 246 int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
 247                     const BIGNUM *m, BN_CTX *ctx)
 248 {
 249     int i, j, bits, ret = 0, wstart, wend, window, wvalue;
 250     int start = 1;
 251     BIGNUM *aa;
 252     /* Table of variables obtained from 'ctx' */
 253     BIGNUM *val[TABLE_SIZE];
 254     BN_RECP_CTX recp;
 255
 256     if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
 257         /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
 258         BNerr(BN_F_BN_MOD_EXP_RECP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
 259         return -1;
 260     }
 261
 262     bits = BN_num_bits(p);
 263
 264     if (bits == 0) {
 265         ret = BN_one(r);
 266         return ret;
 267     }
 268
 269     BN_CTX_start(ctx);
 270     aa = BN_CTX_get(ctx);
 271     val[0] = BN_CTX_get(ctx);
 272     if (!aa || !val[0])
 273         goto err;
 274
 275     BN_RECP_CTX_init(&recp);
 276     if (m->neg) {
 277         /* ignore sign of 'm' */
 278         if (!BN_copy(aa, m))
 279             goto err;
 280         aa->neg = 0;
 281         if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0)
 282             goto err;
 283     } else {
 284         if (BN_RECP_CTX_set(&recp, m, ctx) <= 0)
 285             goto err;
 286     }
 287
 288     if (!BN_nnmod(val[0], a, m, ctx))
 289         goto err;               /* 1 */
 290     if (BN_is_zero(val[0])) {
 291         BN_zero(r);
 292         ret = 1;
 293         goto err;
 294     }
 295
 296     window = BN_window_bits_for_exponent_size(bits);
 297     if (window > 1) {
 298         if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx))
 299             goto err;           /* 2 */
 300         j = 1 << (window - 1);
 301         for (i = 1; i < j; i++) {
 302             if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
 303                 !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx))
 304                 goto err;
 305         }
 306     }
 307
 308     start = 1;                  /* This is used to avoid multiplication etc
 309                                  * when there is only the value '1' in the
 310                                  * buffer. */
 311     wvalue = 0;                 /* The 'value' of the window */
 312     wstart = bits - 1;          /* The top bit of the window */
 313     wend = 0;                   /* The bottom bit of the window */
 314
 315     if (!BN_one(r))
 316         goto err;
 317
 318     for (;;) {
 319         if (BN_is_bit_set(p, wstart) == 0) {
 320             if (!start)
 321                 if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
 322                     goto err;
 323             if (wstart == 0)
 324                 break;
 325             wstart--;
 326             continue;
 327         }
 328         /*
 329          * We now have wstart on a 'set' bit, we now need to work out how bit
 330          * a window to do.  To do this we need to scan forward until the last
 331          * set bit before the end of the window
 332          */
 333         j = wstart;
 334         wvalue = 1;
 335         wend = 0;
 336         for (i = 1; i < window; i++) {
 337             if (wstart - i < 0)
 338                 break;
 339             if (BN_is_bit_set(p, wstart - i)) {
 340                 wvalue <<= (i - wend);
 341                 wvalue |= 1;
 342                 wend = i;
 343             }
 344         }
 345
 346         /* wend is the size of the current window */
 347         j = wend + 1;
 348         /* add the 'bytes above' */
 349         if (!start)
 350             for (i = 0; i < j; i++) {
 351                 if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
 352                     goto err;
 353             }
 354
 355         /* wvalue will be an odd number < 2^window */
 356         if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx))
 357             goto err;
 358
 359         /* move the 'window' down further */
 360         wstart -= wend + 1;
 361         wvalue = 0;
 362         start = 0;
 363         if (wstart < 0)
 364             break;
 365     }
 366     ret = 1;
 367  err:
 368     BN_CTX_end(ctx);
 369     BN_RECP_CTX_free(&recp);
 370     bn_check_top(r);
 371     return (ret);
 372 }
 373
 374 int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
 375                     const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
 376 {
 377     int i, j, bits, ret = 0, wstart, wend, window, wvalue;
 378     int start = 1;
 379     BIGNUM *d, *r;
 380     const BIGNUM *aa;
 381     /* Table of variables obtained from 'ctx' */
 382     BIGNUM *val[TABLE_SIZE];
 383     BN_MONT_CTX *mont = NULL;
 384
 385     if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
 386         return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
 387     }
 388
 389     bn_check_top(a);
 390     bn_check_top(p);
 391     bn_check_top(m);
 392
 393     if (!BN_is_odd(m)) {
 394         BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS);
 395         return (0);
 396     }
 397     bits = BN_num_bits(p);
 398     if (bits == 0) {
 399         ret = BN_one(rr);
 400         return ret;
 401     }
 402
 403     BN_CTX_start(ctx);
 404     d = BN_CTX_get(ctx);
 405     r = BN_CTX_get(ctx);
 406     val[0] = BN_CTX_get(ctx);
 407     if (!d || !r || !val[0])
 408         goto err;
 409
 410     /*
 411      * If this is not done, things will break in the montgomery part
 412      */
 413
 414     if (in_mont != NULL)
 415         mont = in_mont;
 416     else {
 417         if ((mont = BN_MONT_CTX_new()) == NULL)
 418             goto err;
 419         if (!BN_MONT_CTX_set(mont, m, ctx))
 420             goto err;
 421     }
 422
 423     if (a->neg || BN_ucmp(a, m) >= 0) {
 424         if (!BN_nnmod(val[0], a, m, ctx))
 425             goto err;
 426         aa = val[0];
 427     } else
 428         aa = a;
 429     if (BN_is_zero(aa)) {
 430         BN_zero(rr);
 431         ret = 1;
 432         goto err;
 433     }
 434     if (!BN_to_montgomery(val[0], aa, mont, ctx))
 435         goto err;               /* 1 */
 436
 437     window = BN_window_bits_for_exponent_size(bits);
 438     if (window > 1) {
 439         if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx))
 440             goto err;           /* 2 */
 441         j = 1 << (window - 1);
 442         for (i = 1; i < j; i++) {
 443             if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
 444                 !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx))
 445                 goto err;
 446         }
 447     }
 448
 449     start = 1;                  /* This is used to avoid multiplication etc
 450                                  * when there is only the value '1' in the
 451                                  * buffer. */
 452     wvalue = 0;                 /* The 'value' of the window */
 453     wstart = bits - 1;          /* The top bit of the window */
 454     wend = 0;                   /* The bottom bit of the window */
 455
 456     if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
 457         goto err;
 458     for (;;) {
 459         if (BN_is_bit_set(p, wstart) == 0) {
 460             if (!start) {
 461                 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
 462                     goto err;
 463             }
 464             if (wstart == 0)
 465                 break;
 466             wstart--;
 467             continue;
 468         }
 469         /*
 470          * We now have wstart on a 'set' bit, we now need to work out how bit
 471          * a window to do.  To do this we need to scan forward until the last
 472          * set bit before the end of the window
 473          */
 474         j = wstart;
 475         wvalue = 1;
 476         wend = 0;
 477         for (i = 1; i < window; i++) {
 478             if (wstart - i < 0)
 479                 break;
 480             if (BN_is_bit_set(p, wstart - i)) {
 481                 wvalue <<= (i - wend);
 482                 wvalue |= 1;
 483                 wend = i;
 484             }
 485         }
 486
 487         /* wend is the size of the current window */
 488         j = wend + 1;
 489         /* add the 'bytes above' */
 490         if (!start)
 491             for (i = 0; i < j; i++) {
 492                 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
 493                     goto err;
 494             }
 495
 496         /* wvalue will be an odd number < 2^window */
 497         if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx))
 498             goto err;
 499
 500         /* move the 'window' down further */
 501         wstart -= wend + 1;
 502         wvalue = 0;
 503         start = 0;
 504         if (wstart < 0)
 505             break;
 506     }
 507     if (!BN_from_montgomery(rr, r, mont, ctx))
 508         goto err;
 509     ret = 1;
 510  err:
 511     if ((in_mont == NULL) && (mont != NULL))
 512         BN_MONT_CTX_free(mont);
 513     BN_CTX_end(ctx);
 514     bn_check_top(rr);
 515     return (ret);
 516 }
 517
 518 /*
 519  * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
 520  * layout so that accessing any of these table values shows the same access
 521  * pattern as far as cache lines are concerned.  The following functions are
 522  * used to transfer a BIGNUM from/to that table.
 523  */
 524
 525 static int MOD_EXP_CTIME_COPY_TO_PREBUF(BIGNUM *b, int top,
 526                                         unsigned char *buf, int idx,
 527                                         int window)
 528 {
 529     int i, j;
 530     int width = 1 << window;
 531     BN_ULONG *table = (BN_ULONG *)buf;
 532
 533     if (bn_wexpand(b, top) == NULL)
 534         return 0;
 535     while (b->top < top) {
 536         b->d[b->top++] = 0;
 537     }
 538
 539     for (i = 0, j = idx; i < top; i++, j += width) {
 540         table[j] = b->d[i];
 541     }
 542
 543     bn_correct_top(b);
 544     return 1;
 545 }
 546
 547 static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top,
 548                                           unsigned char *buf, int idx,
 549                                           int window)
 550 {
 551     int i, j;
 552     int width = 1 << window;
 553     volatile BN_ULONG *table = (volatile BN_ULONG *)buf;
 554
 555     if (bn_wexpand(b, top) == NULL)
 556         return 0;
 557
 558     if (window <= 3) {
 559         for (i = 0; i < top; i++, table += width) {
 560             BN_ULONG acc = 0;
 561
 562             for (j = 0; j < width; j++) {
 563                 acc |= table[j] &
 564                        ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
 565             }
 566
 567             b->d[i] = acc;
 568         }
 569     } else {
 570         int xstride = 1 << (window - 2);
 571         BN_ULONG y0, y1, y2, y3;
 572
 573         i = idx >> (window - 2);        /* equivalent of idx / xstride */
 574         idx &= xstride - 1;             /* equivalent of idx % xstride */
 575
 576         y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1);
 577         y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1);
 578         y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1);
 579         y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1);
 580
 581         for (i = 0; i < top; i++, table += width) {
 582             BN_ULONG acc = 0;
 583
 584             for (j = 0; j < xstride; j++) {
 585                 acc |= ( (table[j + 0 * xstride] & y0) |
 586                          (table[j + 1 * xstride] & y1) |
 587                          (table[j + 2 * xstride] & y2) |
 588                          (table[j + 3 * xstride] & y3) )
 589                        & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
 590             }
 591
 592             b->d[i] = acc;
 593         }
 594     }
 595
 596     b->top = top;
 597     bn_correct_top(b);
 598     return 1;
 599 }
 600
 601 /*
 602  * Given a pointer value, compute the next address that is a cache line
 603  * multiple.
 604  */
 605 #define MOD_EXP_CTIME_ALIGN(x_) \
 606         ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((BN_ULONG)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
 607
 608 /*
 609  * This variant of BN_mod_exp_mont() uses fixed windows and the special
 610  * precomputation memory layout to limit data-dependency to a minimum to
 611  * protect secret exponents (cf. the hyper-threading timing attacks pointed
 612  * out by Colin Percival,
 613  * http://www.daemong-consideredperthreading-considered-harmful/)
 614  */
 615 int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
 616                               const BIGNUM *m, BN_CTX *ctx,
 617                               BN_MONT_CTX *in_mont)
 618 {
 619     int i, bits, ret = 0, idx, window, wvalue;
 620     int top;
 621     BIGNUM *r;
 622     const BIGNUM *aa;
 623     BN_MONT_CTX *mont = NULL;
 624
 625     int numPowers;
 626     unsigned char *powerbufFree = NULL;
 627     int powerbufLen = 0;
 628     unsigned char *powerbuf = NULL;
 629     BIGNUM *computeTemp = NULL, *am = NULL;
 630
 631     bn_check_top(a);
 632     bn_check_top(p);
 633     bn_check_top(m);
 634
 635     top = m->top;
 636
 637     if (!(m->d[0] & 1)) {
 638         BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS);
 639         return (0);
 640     }
 641     bits = BN_num_bits(p);
 642     if (bits == 0) {
 643         ret = BN_one(rr);
 644         return ret;
 645     }
 646
 647     /* Initialize BIGNUM context and allocate intermediate result */
 648     BN_CTX_start(ctx);
 649     r = BN_CTX_get(ctx);
 650     if (r == NULL)
 651         goto err;
 652
 653     /*
 654      * Allocate a montgomery context if it was not supplied by the caller. If
 655      * this is not done, things will break in the montgomery part.
 656      */
 657     if (in_mont != NULL)
 658         mont = in_mont;
 659     else {
 660         if ((mont = BN_MONT_CTX_new()) == NULL)
 661             goto err;
 662         if (!BN_MONT_CTX_set(mont, m, ctx))
 663             goto err;
 664     }
 665
 666     /* Get the window size to use with size of p. */
 667     window = BN_window_bits_for_ctime_exponent_size(bits);
 668
 669     /*
 670      * Allocate a buffer large enough to hold all of the pre-computed powers
 671      * of a.
 672      */
 673     numPowers = 1 << window;
 674     powerbufLen = sizeof(m->d[0]) * top * numPowers;
 675     if ((powerbufFree =
 676          (unsigned char *)OPENSSL_malloc(powerbufLen +
 677                                          MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH))
 678         == NULL)
 679         goto err;
 680
 681     powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
 682     memset(powerbuf, 0, powerbufLen);
 683
 684     /*
 685      * Initialize the intermediate result. Do this early to save double
 686      * conversion, once each for a^0 and intermediate result.
 687      */
 688     if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
 689         goto err;
 690     if (!MOD_EXP_CTIME_COPY_TO_PREBUF(r, top, powerbuf, 0, window))
 691         goto err;
 692
 693     /* Initialize computeTemp as a^1 with montgomery precalcs */
 694     computeTemp = BN_CTX_get(ctx);
 695     am = BN_CTX_get(ctx);
 696     if (computeTemp == NULL || am == NULL)
 697         goto err;
 698
 699     if (a->neg || BN_ucmp(a, m) >= 0) {
 700         if (!BN_mod(am, a, m, ctx))
 701             goto err;
 702         aa = am;
 703     } else
 704         aa = a;
 705     if (!BN_to_montgomery(am, aa, mont, ctx))
 706         goto err;
 707     if (!BN_copy(computeTemp, am))
 708         goto err;
 709     if (!MOD_EXP_CTIME_COPY_TO_PREBUF(am, top, powerbuf, 1, window))
 710         goto err;
 711
 712     /*
 713      * If the window size is greater than 1, then calculate
 714      * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) (even powers
 715      * could instead be computed as (a^(i/2))^2 to use the slight performance
 716      * advantage of sqr over mul).
 717      */
 718     if (window > 1) {
 719         for (i = 2; i < numPowers; i++) {
 720             /* Calculate a^i = a^(i-1) * a */
 721             if (!BN_mod_mul_montgomery
 722                 (computeTemp, am, computeTemp, mont, ctx))
 723                 goto err;
 724             if (!MOD_EXP_CTIME_COPY_TO_PREBUF(computeTemp, top, powerbuf, i,
 725                                               window))
 726                 goto err;
 727         }
 728     }
 729
 730     /*
 731      * Adjust the number of bits up to a multiple of the window size. If the
 732      * exponent length is not a multiple of the window size, then this pads
 733      * the most significant bits with zeros to normalize the scanning loop to
 734      * there's no special cases. * NOTE: Making the window size a power of
 735      * two less than the native * word size ensures that the padded bits
 736      * won't go past the last * word in the internal BIGNUM structure. Going
 737      * past the end will * still produce the correct result, but causes a
 738      * different branch * to be taken in the BN_is_bit_set function.
 739      */
 740     bits = ((bits + window - 1) / window) * window;
 741     idx = bits - 1;             /* The top bit of the window */
 742
 743     /*
 744      * Scan the exponent one window at a time starting from the most
 745      * significant bits.
 746      */
 747     while (idx >= 0) {
 748         wvalue = 0;             /* The 'value' of the window */
 749
 750         /* Scan the window, squaring the result as we go */
 751         for (i = 0; i < window; i++, idx--) {
 752             if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
 753                 goto err;
 754             wvalue = (wvalue << 1) + BN_is_bit_set(p, idx);
 755         }
 756
 757         /*
 758          * Fetch the appropriate pre-computed value from the pre-buf
 759          */
 760         if (!MOD_EXP_CTIME_COPY_FROM_PREBUF
 761             (computeTemp, top, powerbuf, wvalue, window))
 762             goto err;
 763
 764         /* Multiply the result into the intermediate result */
 765         if (!BN_mod_mul_montgomery(r, r, computeTemp, mont, ctx))
 766             goto err;
 767     }
 768
 769     /* Convert the final result from montgomery to standard format */
 770     if (!BN_from_montgomery(rr, r, mont, ctx))
 771         goto err;
 772     ret = 1;
 773  err:
 774     if ((in_mont == NULL) && (mont != NULL))
 775         BN_MONT_CTX_free(mont);
 776     if (powerbuf != NULL) {
 777         OPENSSL_cleanse(powerbuf, powerbufLen);
 778         OPENSSL_free(powerbufFree);
 779     }
 780     if (am != NULL)
 781         BN_clear(am);
 782     if (computeTemp != NULL)
 783         BN_clear(computeTemp);
 784     BN_CTX_end(ctx);
 785     return (ret);
 786 }
 787
 788 int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
 789                          const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
 790 {
 791     BN_MONT_CTX *mont = NULL;
 792     int b, bits, ret = 0;
 793     int r_is_one;
 794     BN_ULONG w, next_w;
 795     BIGNUM *d, *r, *t;
 796     BIGNUM *swap_tmp;
 797 #define BN_MOD_MUL_WORD(r, w, m) \
 798                 (BN_mul_word(r, (w)) && \
 799                 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/  \
 800                         (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
 801     /*
 802      * BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is
 803      * probably more overhead than always using BN_mod (which uses BN_copy if
 804      * a similar test returns true).
 805      */
 806     /*
 807      * We can use BN_mod and do not need BN_nnmod because our accumulator is
 808      * never negative (the result of BN_mod does not depend on the sign of
 809      * the modulus).
 810      */
 811 #define BN_TO_MONTGOMERY_WORD(r, w, mont) \
 812                 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
 813
 814     if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
 815         /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
 816         BNerr(BN_F_BN_MOD_EXP_MONT_WORD, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
 817         return -1;
 818     }
 819
 820     bn_check_top(p);
 821     bn_check_top(m);
 822
 823     if (!BN_is_odd(m)) {
 824         BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS);
 825         return (0);
 826     }
 827     if (m->top == 1)
 828         a %= m->d[0];           /* make sure that 'a' is reduced */
 829
 830     bits = BN_num_bits(p);
 831     if (bits == 0) {
 832         /* x**0 mod 1 is still zero. */
 833         if (BN_is_one(m)) {
 834             ret = 1;
 835             BN_zero(rr);
 836         } else
 837             ret = BN_one(rr);
 838         return ret;
 839     }
 840     if (a == 0) {
 841         BN_zero(rr);
 842         ret = 1;
 843         return ret;
 844     }
 845
 846     BN_CTX_start(ctx);
 847     d = BN_CTX_get(ctx);
 848     r = BN_CTX_get(ctx);
 849     t = BN_CTX_get(ctx);
 850     if (d == NULL || r == NULL || t == NULL)
 851         goto err;
 852
 853     if (in_mont != NULL)
 854         mont = in_mont;
 855     else {
 856         if ((mont = BN_MONT_CTX_new()) == NULL)
 857             goto err;
 858         if (!BN_MONT_CTX_set(mont, m, ctx))
 859             goto err;
 860     }
 861
 862     r_is_one = 1;               /* except for Montgomery factor */
 863
 864     /* bits-1 >= 0 */
 865
 866     /* The result is accumulated in the product r*w. */
 867     w = a;                      /* bit 'bits-1' of 'p' is always set */
 868     for (b = bits - 2; b >= 0; b--) {
 869         /* First, square r*w. */
 870         next_w = w * w;
 871         if ((next_w / w) != w) { /* overflow */
 872             if (r_is_one) {
 873                 if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
 874                     goto err;
 875                 r_is_one = 0;
 876             } else {
 877                 if (!BN_MOD_MUL_WORD(r, w, m))
 878                     goto err;
 879             }
 880             next_w = 1;
 881         }
 882         w = next_w;
 883         if (!r_is_one) {
 884             if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
 885                 goto err;
 886         }
 887
 888         /* Second, multiply r*w by 'a' if exponent bit is set. */
 889         if (BN_is_bit_set(p, b)) {
 890             next_w = w * a;
 891             if ((next_w / a) != w) { /* overflow */
 892                 if (r_is_one) {
 893                     if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
 894                         goto err;
 895                     r_is_one = 0;
 896                 } else {
 897                     if (!BN_MOD_MUL_WORD(r, w, m))
 898                         goto err;
 899                 }
 900                 next_w = a;
 901             }
 902             w = next_w;
 903         }
 904     }
 905
 906     /* Finally, set r:=r*w. */
 907     if (w != 1) {
 908         if (r_is_one) {
 909             if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
 910                 goto err;
 911             r_is_one = 0;
 912         } else {
 913             if (!BN_MOD_MUL_WORD(r, w, m))
 914                 goto err;
 915         }
 916     }
 917
 918     if (r_is_one) {             /* can happen only if a == 1 */
 919         if (!BN_one(rr))
 920             goto err;
 921     } else {
 922         if (!BN_from_montgomery(rr, r, mont, ctx))
 923             goto err;
 924     }
 925     ret = 1;
 926  err:
 927     if ((in_mont == NULL) && (mont != NULL))
 928         BN_MONT_CTX_free(mont);
 929     BN_CTX_end(ctx);
 930     bn_check_top(rr);
 931     return (ret);
 932 }
 933
 934 /* The old fallback, simple version :-) */
 935 int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
 936                       const BIGNUM *m, BN_CTX *ctx)
 937 {
 938     int i, j, bits, ret = 0, wstart, wend, window, wvalue;
 939     int start = 1;
 940     BIGNUM *d;
 941     /* Table of variables obtained from 'ctx' */
 942     BIGNUM *val[TABLE_SIZE];
 943
 944     if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
 945         /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
 946         BNerr(BN_F_BN_MOD_EXP_SIMPLE, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
 947         return -1;
 948     }
 949
 950     bits = BN_num_bits(p);
 951
 952     if (bits == 0) {
 953         ret = BN_one(r);
 954         return ret;
 955     }
 956
 957     BN_CTX_start(ctx);
 958     d = BN_CTX_get(ctx);
 959     val[0] = BN_CTX_get(ctx);
 960     if (!d || !val[0])
 961         goto err;
 962
 963     if (!BN_nnmod(val[0], a, m, ctx))
 964         goto err;               /* 1 */
 965     if (BN_is_zero(val[0])) {
 966         BN_zero(r);
 967         ret = 1;
 968         goto err;
 969     }
 970
 971     window = BN_window_bits_for_exponent_size(bits);
 972     if (window > 1) {
 973         if (!BN_mod_mul(d, val[0], val[0], m, ctx))
 974             goto err;           /* 2 */
 975         j = 1 << (window - 1);
 976         for (i = 1; i < j; i++) {
 977             if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
 978                 !BN_mod_mul(val[i], val[i - 1], d, m, ctx))
 979                 goto err;
 980         }
 981     }
 982
 983     start = 1;                  /* This is used to avoid multiplication etc
 984                                  * when there is only the value '1' in the
 985                                  * buffer. */
 986     wvalue = 0;                 /* The 'value' of the window */
 987     wstart = bits - 1;          /* The top bit of the window */
 988     wend = 0;                   /* The bottom bit of the window */
 989
 990     if (!BN_one(r))
 991         goto err;
 992
 993     for (;;) {
 994         if (BN_is_bit_set(p, wstart) == 0) {
 995             if (!start)
 996                 if (!BN_mod_mul(r, r, r, m, ctx))
 997                     goto err;
 998             if (wstart == 0)
 999                 break;
1000             wstart--;
1001             continue;
1002         }
1003         /*
1004          * We now have wstart on a 'set' bit, we now need to work out how bit
1005          * a window to do.  To do this we need to scan forward until the last
1006          * set bit before the end of the window
1007          */
1008         j = wstart;
1009         wvalue = 1;
1010         wend = 0;
1011         for (i = 1; i < window; i++) {
1012             if (wstart - i < 0)
1013                 break;
1014             if (BN_is_bit_set(p, wstart - i)) {
1015                 wvalue <<= (i - wend);
1016                 wvalue |= 1;
1017                 wend = i;
1018             }
1019         }
1020
1021         /* wend is the size of the current window */
1022         j = wend + 1;
1023         /* add the 'bytes above' */
1024         if (!start)
1025             for (i = 0; i < j; i++) {
1026                 if (!BN_mod_mul(r, r, r, m, ctx))
1027                     goto err;
1028             }
1029
1030         /* wvalue will be an odd number < 2^window */
1031         if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx))
1032             goto err;
1033
1034         /* move the 'window' down further */
1035         wstart -= wend + 1;
1036         wvalue = 0;
1037         start = 0;
1038         if (wstart < 0)
1039             break;
1040     }
1041     ret = 1;
1042  err:
1043     BN_CTX_end(ctx);
1044     bn_check_top(r);
1045     return (ret);
1046 }