contrib/bearssl/src/rsa/rsa_i31_privexp.c

   1 /*
   2  * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining
   5  * a copy of this software and associated documentation files (the
   6  * "Software"), to deal in the Software without restriction, including
   7  * without limitation the rights to use, copy, modify, merge, publish,
   8  * distribute, sublicense, and/or sell copies of the Software, and to
   9  * permit persons to whom the Software is furnished to do so, subject to
  10  * the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be
  13  * included in all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  */
  24
  25 #include "inner.h"
  26
  27 /* see bearssl_rsa.h */
  28 size_t
  29 br_rsa_i31_compute_privexp(void *d,
  30         const br_rsa_private_key *sk, uint32_t e)
  31 {
  32         /*
  33          * We want to invert e modulo phi = (p-1)(q-1). This first
  34          * requires computing phi, which is easy since we have the factors
  35          * p and q in the private key structure.
  36          *
  37          * Since p = 3 mod 4 and q = 3 mod 4, phi/4 is an odd integer.
  38          * We could invert e modulo phi/4 then patch the result to
  39          * modulo phi, but this would involve assembling three modulus-wide
  40          * values (phi/4, 1 and e) and calling moddiv, that requires
  41          * three more temporaries, for a total of six big integers, or
  42          * slightly more than 3 kB of stack space for RSA-4096. This
  43          * exceeds our stack requirements.
  44          *
  45          * Instead, we first use one step of the extended GCD:
  46          *
  47          *   - We compute phi = k*e + r  (Euclidean division of phi by e).
  48          *     If public exponent e is correct, then r != 0 (e must be
  49          *     invertible modulo phi). We also have k != 0 since we
  50          *     enforce non-ridiculously-small factors.
  51          *
  52          *   - We find small u, v such that u*e - v*r = 1  (using a
  53          *     binary GCD; we can arrange for u < r and v < e, i.e. all
  54          *     values fit on 32 bits).
  55          *
  56          *   - Solution is: d = u + v*k
  57          *     This last computation is exact: since u < r and v < e,
  58          *     the above implies d < r + e*((phi-r)/e) = phi
  59          */
  60
  61         uint32_t tmp[4 * ((BR_MAX_RSA_FACTOR + 30) / 31) + 12];
  62         uint32_t *p, *q, *k, *m, *z, *phi;
  63         const unsigned char *pbuf, *qbuf;
  64         size_t plen, qlen, u, len, dlen;
  65         uint32_t r, a, b, u0, v0, u1, v1, he, hr;
  66         int i;
  67
  68         /*
  69          * Check that e is correct.
  70          */
  71         if (e < 3 || (e & 1) == 0) {
  72                 return 0;
  73         }
  74
  75         /*
  76          * Check lengths of p and q, and that they are both odd.
  77          */
  78         pbuf = sk->p;
  79         plen = sk->plen;
  80         while (plen > 0 && *pbuf == 0) {
  81                 pbuf ++;
  82                 plen --;
  83         }
  84         if (plen < 5 || plen > (BR_MAX_RSA_FACTOR / 8)
  85                 || (pbuf[plen - 1] & 1) != 1)
  86         {
  87                 return 0;
  88         }
  89         qbuf = sk->q;
  90         qlen = sk->qlen;
  91         while (qlen > 0 && *qbuf == 0) {
  92                 qbuf ++;
  93                 qlen --;
  94         }
  95         if (qlen < 5 || qlen > (BR_MAX_RSA_FACTOR / 8)
  96                 || (qbuf[qlen - 1] & 1) != 1)
  97         {
  98                 return 0;
  99         }
 100
 101         /*
 102          * Output length is that of the modulus.
 103          */
 104         dlen = (sk->n_bitlen + 7) >> 3;
 105         if (d == NULL) {
 106                 return dlen;
 107         }
 108
 109         p = tmp;
 110         br_i31_decode(p, pbuf, plen);
 111         plen = (p[0] + 31) >> 5;
 112         q = p + 1 + plen;
 113         br_i31_decode(q, qbuf, qlen);
 114         qlen = (q[0] + 31) >> 5;
 115
 116         /*
 117          * Compute phi = (p-1)*(q-1), then move it over p-1 and q-1 (that
 118          * we do not need anymore). The mulacc function sets the announced
 119          * bit length of t to be the sum of the announced bit lengths of
 120          * p-1 and q-1, which is usually exact but may overshoot by one 1
 121          * bit in some cases; we readjust it to its true length.
 122          */
 123         p[1] --;
 124         q[1] --;
 125         phi = q + 1 + qlen;
 126         br_i31_zero(phi, p[0]);
 127         br_i31_mulacc(phi, p, q);
 128         len = (phi[0] + 31) >> 5;
 129         memmove(tmp, phi, (1 + len) * sizeof *phi);
 130         phi = tmp;
 131         phi[0] = br_i31_bit_length(phi + 1, len);
 132         len = (phi[0] + 31) >> 5;
 133
 134         /*
 135          * Divide phi by public exponent e. The final remainder r must be
 136          * non-zero (otherwise, the key is invalid). The quotient is k,
 137          * which we write over phi, since we don't need phi after that.
 138          */
 139         r = 0;
 140         for (u = len; u >= 1; u --) {
 141                 /*
 142                  * Upon entry, r < e, and phi[u] < 2^31; hence,
 143                  * hi:lo < e*2^31. Thus, the produced word k[u]
 144                  * must be lower than 2^31, and the new remainder r
 145                  * is lower than e.
 146                  */
 147                 uint32_t hi, lo;
 148
 149                 hi = r >> 1;
 150                 lo = (r << 31) + phi[u];
 151                 phi[u] = br_divrem(hi, lo, e, &r);
 152         }
 153         if (r == 0) {
 154                 return 0;
 155         }
 156         k = phi;
 157
 158         /*
 159          * Compute u and v such that u*e - v*r = GCD(e,r). We use
 160          * a binary GCD algorithm, with 6 extra integers a, b,
 161          * u0, u1, v0 and v1. Initial values are:
 162          *   a = e    u0 = 1   v0 = 0
 163          *   b = r    u1 = r   v1 = e-1
 164          * The following invariants are maintained:
 165          *   a = u0*e - v0*r
 166          *   b = u1*e - v1*r
 167          *   0 < a <= e
 168          *   0 < b <= r
 169          *   0 <= u0 <= r
 170          *   0 <= v0 <= e
 171          *   0 <= u1 <= r
 172          *   0 <= v1 <= e
 173          *
 174          * At each iteration, we reduce either a or b by one bit, and
 175          * adjust u0, u1, v0 and v1 to maintain the invariants:
 176          *  - if a is even, then a <- a/2
 177          *  - otherwise, if b is even, then b <- b/2
 178          *  - otherwise, if a > b, then a <- (a-b)/2
 179          *  - otherwise, if b > a, then b <- (b-a)/2
 180          * Algorithm stops when a = b. At that point, the common value
 181          * is the GCD of e and r; it must be 1 (otherwise, the private
 182          * key or public exponent is not valid). The (u0,v0) or (u1,v1)
 183          * pairs are the solution we are looking for.
 184          *
 185          * Since either a or b is reduced by at least 1 bit at each
 186          * iteration, 62 iterations are enough to reach the end
 187          * condition.
 188          *
 189          * To maintain the invariants, we must compute the same operations
 190          * on the u* and v* values that we do on a and b:
 191          *  - When a is divided by 2, u0 and v0 must be divided by 2.
 192          *  - When b is divided by 2, u1 and v1 must be divided by 2.
 193          *  - When b is subtracted from a, u1 and v1 are subtracted from
 194          *    u0 and v0, respectively.
 195          *  - When a is subtracted from b, u0 and v0 are subtracted from
 196          *    u1 and v1, respectively.
 197          *
 198          * However, we want to keep the u* and v* values in their proper
 199          * ranges. The following remarks apply:
 200          *
 201          *  - When a is divided by 2, then a is even. Therefore:
 202          *
 203          *     * If r is odd, then u0 and v0 must have the same parity;
 204          *       if they are both odd, then adding r to u0 and e to v0
 205          *       makes them both even, and the division by 2 brings them
 206          *       back to the proper range.
 207          *
 208          *     * If r is even, then u0 must be even; if v0 is odd, then
 209          *       adding r to u0 and e to v0 makes them both even, and the
 210          *       division by 2 brings them back to the proper range.
 211          *
 212          *    Thus, all we need to do is to look at the parity of v0,
 213          *    and add (r,e) to (u0,v0) when v0 is odd. In order to avoid
 214          *    a 32-bit overflow, we can add ((r+1)/2,(e/2)+1) after the
 215          *    division (r+1 does not overflow since r < e; and (e/2)+1
 216          *    is equal to (e+1)/2 since e is odd).
 217          *
 218          *  - When we subtract b from a, three cases may occur:
 219          *
 220          *     * u1 <= u0 and v1 <= v0: just do the subtractions
 221          *
 222          *     * u1 > u0 and v1 > v0: compute:
 223          *         (u0, v0) <- (u0 + r - u1, v0 + e - v1)
 224          *
 225          *     * u1 <= u0 and v1 > v0: compute:
 226          *         (u0, v0) <- (u0 + r - u1, v0 + e - v1)
 227          *
 228          *    The fourth case (u1 > u0 and v1 <= v0) is not possible
 229          *    because it would contradict "b < a" (which is the reason
 230          *    why we subtract b from a).
 231          *
 232          *    The tricky case is the third one: from the equations, it
 233          *    seems that u0 may go out of range. However, the invariants
 234          *    and ranges of other values imply that, in that case, the
 235          *    new u0 does not actually exceed the range.
 236          *
 237          *    We can thus handle the subtraction by adding (r,e) based
 238          *    solely on the comparison between v0 and v1.
 239          */
 240         a = e;
 241         b = r;
 242         u0 = 1;
 243         v0 = 0;
 244         u1 = r;
 245         v1 = e - 1;
 246         hr = (r + 1) >> 1;
 247         he = (e >> 1) + 1;
 248         for (i = 0; i < 62; i ++) {
 249                 uint32_t oa, ob, agtb, bgta;
 250                 uint32_t sab, sba, da, db;
 251                 uint32_t ctl;
 252
 253                 oa = a & 1;                  /* 1 if a is odd */
 254                 ob = b & 1;                  /* 1 if b is odd */
 255                 agtb = GT(a, b);             /* 1 if a > b */
 256                 bgta = GT(b, a);             /* 1 if b > a */
 257
 258                 sab = oa & ob & agtb;        /* 1 if a <- a-b */
 259                 sba = oa & ob & bgta;        /* 1 if b <- b-a */
 260
 261                 /* a <- a-b, u0 <- u0-u1, v0 <- v0-v1 */
 262                 ctl = GT(v1, v0);
 263                 a -= b & -sab;
 264                 u0 -= (u1 - (r & -ctl)) & -sab;
 265                 v0 -= (v1 - (e & -ctl)) & -sab;
 266
 267                 /* b <- b-a, u1 <- u1-u0 mod r, v1 <- v1-v0 mod e */
 268                 ctl = GT(v0, v1);
 269                 b -= a & -sba;
 270                 u1 -= (u0 - (r & -ctl)) & -sba;
 271                 v1 -= (v0 - (e & -ctl)) & -sba;
 272
 273                 da = NOT(oa) | sab;          /* 1 if a <- a/2 */
 274                 db = (oa & NOT(ob)) | sba;   /* 1 if b <- b/2 */
 275
 276                 /* a <- a/2, u0 <- u0/2, v0 <- v0/2 */
 277                 ctl = v0 & 1;
 278                 a ^= (a ^ (a >> 1)) & -da;
 279                 u0 ^= (u0 ^ ((u0 >> 1) + (hr & -ctl))) & -da;
 280                 v0 ^= (v0 ^ ((v0 >> 1) + (he & -ctl))) & -da;
 281
 282                 /* b <- b/2, u1 <- u1/2 mod r, v1 <- v1/2 mod e */
 283                 ctl = v1 & 1;
 284                 b ^= (b ^ (b >> 1)) & -db;
 285                 u1 ^= (u1 ^ ((u1 >> 1) + (hr & -ctl))) & -db;
 286                 v1 ^= (v1 ^ ((v1 >> 1) + (he & -ctl))) & -db;
 287         }
 288
 289         /*
 290          * Check that the GCD is indeed 1. If not, then the key is invalid
 291          * (and there's no harm in leaking that piece of information).
 292          */
 293         if (a != 1) {
 294                 return 0;
 295         }
 296
 297         /*
 298          * Now we have u0*e - v0*r = 1. Let's compute the result as:
 299          *   d = u0 + v0*k
 300          * We still have k in the tmp[] array, and its announced bit
 301          * length is that of phi.
 302          */
 303         m = k + 1 + len;
 304         m[0] = (1 << 5) + 1;  /* bit length is 32 bits, encoded */
 305         m[1] = v0 & 0x7FFFFFFF;
 306         m[2] = v0 >> 31;
 307         z = m + 3;
 308         br_i31_zero(z, k[0]);
 309         z[1] = u0 & 0x7FFFFFFF;
 310         z[2] = u0 >> 31;
 311         br_i31_mulacc(z, k, m);
 312
 313         /*
 314          * Encode the result.
 315          */
 316         br_i31_encode(d, dlen, z);
 317         return dlen;
 318 }