1 /* crypto/ec/ecp_nistp224.c */
3 * Written by Emilia Kasper (Google) for the OpenSSL project.
5 /* Copyright 2011 Google Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
24 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
25 * and Adam Langley's public domain 64-bit C implementation of curve25519
28 #include <openssl/opensslconf.h>
29 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
31 # ifndef OPENSSL_SYS_VMS
34 # include <inttypes.h>
38 # include <openssl/err.h>
41 # if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
42 /* even with gcc, the typedef won't work for 32-bit platforms */
43 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
46 # error "Need GCC 3.1 or later to define type uint128_t"
52 /******************************************************************************/
54 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
56 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
57 * using 64-bit coefficients called 'limbs',
58 * and sometimes (for multiplication results) as
59 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
60 * using 128-bit coefficients called 'widelimbs'.
61 * A 4-limb representation is an 'felem';
62 * a 7-widelimb representation is a 'widefelem'.
63 * Even within felems, bits of adjacent limbs overlap, and we don't always
64 * reduce the representations: we ensure that inputs to each felem
65 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
66 * and fit into a 128-bit word without overflow. The coefficients are then
67 * again partially reduced to obtain an felem satisfying a_i < 2^57.
68 * We only reduce to the unique minimal representation at the end of the
72 typedef uint64_t limb;
73 typedef uint128_t widelimb;
75 typedef limb felem[4];
76 typedef widelimb widefelem[7];
79 * Field element represented as a byte arrary. 28*8 = 224 bits is also the
80 * group order size for the elliptic curve, and we also use this type for
81 * scalars for point multiplication.
83 typedef u8 felem_bytearray[28];
85 static const felem_bytearray nistp224_curve_params[5] = {
86 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
87 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
88 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
89 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
90 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
91 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
92 {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
93 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
94 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
95 {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
96 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
97 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
98 {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
99 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
100 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
104 * Precomputed multiples of the standard generator
105 * Points are given in coordinates (X, Y, Z) where Z normally is 1
106 * (0 for the point at infinity).
107 * For each field element, slice a_0 is word 0, etc.
109 * The table has 2 * 16 elements, starting with the following:
110 * index | bits | point
111 * ------+---------+------------------------------
114 * 2 | 0 0 1 0 | 2^56G
115 * 3 | 0 0 1 1 | (2^56 + 1)G
116 * 4 | 0 1 0 0 | 2^112G
117 * 5 | 0 1 0 1 | (2^112 + 1)G
118 * 6 | 0 1 1 0 | (2^112 + 2^56)G
119 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
120 * 8 | 1 0 0 0 | 2^168G
121 * 9 | 1 0 0 1 | (2^168 + 1)G
122 * 10 | 1 0 1 0 | (2^168 + 2^56)G
123 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
124 * 12 | 1 1 0 0 | (2^168 + 2^112)G
125 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
126 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
127 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
128 * followed by a copy of this with each element multiplied by 2^28.
130 * The reason for this is so that we can clock bits into four different
131 * locations when doing simple scalar multiplies against the base point,
132 * and then another four locations using the second 16 elements.
134 static const felem gmul[2][16][3] = { {{{0, 0, 0, 0},
137 {{0x3280d6115c1d21, 0xc1d356c2112234,
138 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
139 {0xd5819985007e34, 0x75a05a07476444,
140 0xfb4c22dfe6cd43, 0xbd376388b5f723},
142 {{0xfd9675666ebbe9, 0xbca7664d40ce5e,
143 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
144 {0x29e0b892dc9c43, 0xece8608436e662,
145 0xdc858f185310d0, 0x9812dd4eb8d321},
147 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1,
148 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
149 {0xf19f90ed50266d, 0xabf2b4bf65f9df,
150 0x313865468fafec, 0x5cb379ba910a17},
152 {{0x0641966cab26e3, 0x91fb2991fab0a0,
153 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
154 {0x7510407766af5d, 0x84d929610d5450,
155 0x81d77aae82f706, 0x6916f6d4338c5b},
157 {{0xea95ac3b1f15c6, 0x086000905e82d4,
158 0xdd323ae4d1c8b1, 0x932b56be7685a3},
159 {0x9ef93dea25dbbf, 0x41665960f390f0,
160 0xfdec76dbe2a8a7, 0x523e80f019062a},
162 {{0x822fdd26732c73, 0xa01c83531b5d0f,
163 0x363f37347c1ba4, 0xc391b45c84725c},
164 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec,
165 0xc393da7e222a7f, 0x1efb7890ede244},
167 {{0x4c9e90ca217da1, 0xd11beca79159bb,
168 0xff8d33c2c98b7c, 0x2610b39409f849},
169 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb,
170 0x966c079b753c89, 0xfe67e4e820b112},
172 {{0xe28cae2df5312d, 0xc71b61d16f5c6e,
173 0x79b7619a3e7c4c, 0x05c73240899b47},
174 {0x9f7f6382c73e3a, 0x18615165c56bda,
175 0x641fab2116fd56, 0x72855882b08394},
177 {{0x0469182f161c09, 0x74a98ca8d00fb5,
178 0xb89da93489a3e0, 0x41c98768fb0c1d},
179 {0xe5ea05fb32da81, 0x3dce9ffbca6855,
180 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
182 {{0xdab22b2333e87f, 0x4430137a5dd2f6,
183 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
184 {0x764a7df0c8fda5, 0x185ba5c3fa2044,
185 0x9281d688bcbe50, 0xc40331df893881},
187 {{0xb89530796f0f60, 0xade92bd26909a3,
188 0x1a0c83fb4884da, 0x1765bf22a5a984},
189 {0x772a9ee75db09e, 0x23bc6c67cec16f,
190 0x4c1edba8b14e2f, 0xe2a215d9611369},
192 {{0x571e509fb5efb3, 0xade88696410552,
193 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
194 {0xff9f51160f4652, 0xb47ce2495a6539,
195 0xa2946c53b582f4, 0x286d2db3ee9a60},
197 {{0x40bbd5081a44af, 0x0995183b13926c,
198 0xbcefba6f47f6d0, 0x215619e9cc0057},
199 {0x8bc94d3b0df45e, 0xf11c54a3694f6f,
200 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
202 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8,
203 0x1c29819435d2c6, 0xc813132f4c07e9},
204 {0x2891425503b11f, 0x08781030579fea,
205 0xf5426ba5cc9674, 0x1e28ebf18562bc},
207 {{0x9f31997cc864eb, 0x06cd91d28b5e4c,
208 0xff17036691a973, 0xf1aef351497c58},
209 {0xdd1f2d600564ff, 0xdead073b1402db,
210 0x74a684435bd693, 0xeea7471f962558},
215 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
216 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
218 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
219 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
221 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
222 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
224 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
225 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
227 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
228 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
230 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
231 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
233 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
234 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
236 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
237 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
239 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
240 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
242 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
243 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
245 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
246 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
248 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
249 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
251 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
252 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
254 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
255 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
257 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
258 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
262 /* Precomputation for the group generator. */
264 felem g_pre_comp[2][16][3];
268 const EC_METHOD *EC_GFp_nistp224_method(void)
270 static const EC_METHOD ret = {
271 EC_FLAGS_DEFAULT_OCT,
272 NID_X9_62_prime_field,
273 ec_GFp_nistp224_group_init,
274 ec_GFp_simple_group_finish,
275 ec_GFp_simple_group_clear_finish,
276 ec_GFp_nist_group_copy,
277 ec_GFp_nistp224_group_set_curve,
278 ec_GFp_simple_group_get_curve,
279 ec_GFp_simple_group_get_degree,
280 ec_GFp_simple_group_check_discriminant,
281 ec_GFp_simple_point_init,
282 ec_GFp_simple_point_finish,
283 ec_GFp_simple_point_clear_finish,
284 ec_GFp_simple_point_copy,
285 ec_GFp_simple_point_set_to_infinity,
286 ec_GFp_simple_set_Jprojective_coordinates_GFp,
287 ec_GFp_simple_get_Jprojective_coordinates_GFp,
288 ec_GFp_simple_point_set_affine_coordinates,
289 ec_GFp_nistp224_point_get_affine_coordinates,
290 0 /* point_set_compressed_coordinates */ ,
295 ec_GFp_simple_invert,
296 ec_GFp_simple_is_at_infinity,
297 ec_GFp_simple_is_on_curve,
299 ec_GFp_simple_make_affine,
300 ec_GFp_simple_points_make_affine,
301 ec_GFp_nistp224_points_mul,
302 ec_GFp_nistp224_precompute_mult,
303 ec_GFp_nistp224_have_precompute_mult,
304 ec_GFp_nist_field_mul,
305 ec_GFp_nist_field_sqr,
307 0 /* field_encode */ ,
308 0 /* field_decode */ ,
309 0 /* field_set_to_one */
316 * Helper functions to convert field elements to/from internal representation
318 static void bin28_to_felem(felem out, const u8 in[28])
320 out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
321 out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
322 out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
323 out[3] = (*((const uint64_t *)(in+20))) >> 8;
326 static void felem_to_bin28(u8 out[28], const felem in)
329 for (i = 0; i < 7; ++i) {
330 out[i] = in[0] >> (8 * i);
331 out[i + 7] = in[1] >> (8 * i);
332 out[i + 14] = in[2] >> (8 * i);
333 out[i + 21] = in[3] >> (8 * i);
337 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
338 static void flip_endian(u8 *out, const u8 *in, unsigned len)
341 for (i = 0; i < len; ++i)
342 out[i] = in[len - 1 - i];
345 /* From OpenSSL BIGNUM to internal representation */
346 static int BN_to_felem(felem out, const BIGNUM *bn)
348 felem_bytearray b_in;
349 felem_bytearray b_out;
352 /* BN_bn2bin eats leading zeroes */
353 memset(b_out, 0, sizeof(b_out));
354 num_bytes = BN_num_bytes(bn);
355 if (num_bytes > sizeof(b_out)) {
356 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
359 if (BN_is_negative(bn)) {
360 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
363 num_bytes = BN_bn2bin(bn, b_in);
364 flip_endian(b_out, b_in, num_bytes);
365 bin28_to_felem(out, b_out);
369 /* From internal representation to OpenSSL BIGNUM */
370 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
372 felem_bytearray b_in, b_out;
373 felem_to_bin28(b_in, in);
374 flip_endian(b_out, b_in, sizeof(b_out));
375 return BN_bin2bn(b_out, sizeof(b_out), out);
378 /******************************************************************************/
382 * Field operations, using the internal representation of field elements.
383 * NB! These operations are specific to our point multiplication and cannot be
384 * expected to be correct in general - e.g., multiplication with a large scalar
385 * will cause an overflow.
389 static void felem_one(felem out)
397 static void felem_assign(felem out, const felem in)
405 /* Sum two field elements: out += in */
406 static void felem_sum(felem out, const felem in)
414 /* Get negative value: out = -in */
415 /* Assumes in[i] < 2^57 */
416 static void felem_neg(felem out, const felem in)
418 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
419 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
420 static const limb two58m42m2 = (((limb) 1) << 58) -
421 (((limb) 1) << 42) - (((limb) 1) << 2);
423 /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
424 out[0] = two58p2 - in[0];
425 out[1] = two58m42m2 - in[1];
426 out[2] = two58m2 - in[2];
427 out[3] = two58m2 - in[3];
430 /* Subtract field elements: out -= in */
431 /* Assumes in[i] < 2^57 */
432 static void felem_diff(felem out, const felem in)
434 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
435 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
436 static const limb two58m42m2 = (((limb) 1) << 58) -
437 (((limb) 1) << 42) - (((limb) 1) << 2);
439 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
441 out[1] += two58m42m2;
451 /* Subtract in unreduced 128-bit mode: out -= in */
452 /* Assumes in[i] < 2^119 */
453 static void widefelem_diff(widefelem out, const widefelem in)
455 static const widelimb two120 = ((widelimb) 1) << 120;
456 static const widelimb two120m64 = (((widelimb) 1) << 120) -
457 (((widelimb) 1) << 64);
458 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
459 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
461 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
466 out[4] += two120m104m64;
479 /* Subtract in mixed mode: out128 -= in64 */
481 static void felem_diff_128_64(widefelem out, const felem in)
483 static const widelimb two64p8 = (((widelimb) 1) << 64) +
484 (((widelimb) 1) << 8);
485 static const widelimb two64m8 = (((widelimb) 1) << 64) -
486 (((widelimb) 1) << 8);
487 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
488 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
490 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
492 out[1] += two64m48m8;
503 * Multiply a field element by a scalar: out = out * scalar The scalars we
504 * actually use are small, so results fit without overflow
506 static void felem_scalar(felem out, const limb scalar)
515 * Multiply an unreduced field element by a scalar: out = out * scalar The
516 * scalars we actually use are small, so results fit without overflow
518 static void widefelem_scalar(widefelem out, const widelimb scalar)
529 /* Square a field element: out = in^2 */
530 static void felem_square(widefelem out, const felem in)
532 limb tmp0, tmp1, tmp2;
536 out[0] = ((widelimb) in[0]) * in[0];
537 out[1] = ((widelimb) in[0]) * tmp1;
538 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
539 out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
540 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
541 out[5] = ((widelimb) in[3]) * tmp2;
542 out[6] = ((widelimb) in[3]) * in[3];
545 /* Multiply two field elements: out = in1 * in2 */
546 static void felem_mul(widefelem out, const felem in1, const felem in2)
548 out[0] = ((widelimb) in1[0]) * in2[0];
549 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
550 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
551 ((widelimb) in1[2]) * in2[0];
552 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
553 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
554 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
555 ((widelimb) in1[3]) * in2[1];
556 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
557 out[6] = ((widelimb) in1[3]) * in2[3];
561 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
562 * Requires in[i] < 2^126,
563 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
564 static void felem_reduce(felem out, const widefelem in)
566 static const widelimb two127p15 = (((widelimb) 1) << 127) +
567 (((widelimb) 1) << 15);
568 static const widelimb two127m71 = (((widelimb) 1) << 127) -
569 (((widelimb) 1) << 71);
570 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
571 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
574 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
575 output[0] = in[0] + two127p15;
576 output[1] = in[1] + two127m71m55;
577 output[2] = in[2] + two127m71;
581 /* Eliminate in[4], in[5], in[6] */
582 output[4] += in[6] >> 16;
583 output[3] += (in[6] & 0xffff) << 40;
586 output[3] += in[5] >> 16;
587 output[2] += (in[5] & 0xffff) << 40;
590 output[2] += output[4] >> 16;
591 output[1] += (output[4] & 0xffff) << 40;
592 output[0] -= output[4];
594 /* Carry 2 -> 3 -> 4 */
595 output[3] += output[2] >> 56;
596 output[2] &= 0x00ffffffffffffff;
598 output[4] = output[3] >> 56;
599 output[3] &= 0x00ffffffffffffff;
601 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
603 /* Eliminate output[4] */
604 output[2] += output[4] >> 16;
605 /* output[2] < 2^56 + 2^56 = 2^57 */
606 output[1] += (output[4] & 0xffff) << 40;
607 output[0] -= output[4];
609 /* Carry 0 -> 1 -> 2 -> 3 */
610 output[1] += output[0] >> 56;
611 out[0] = output[0] & 0x00ffffffffffffff;
613 output[2] += output[1] >> 56;
614 /* output[2] < 2^57 + 2^72 */
615 out[1] = output[1] & 0x00ffffffffffffff;
616 output[3] += output[2] >> 56;
617 /* output[3] <= 2^56 + 2^16 */
618 out[2] = output[2] & 0x00ffffffffffffff;
621 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
622 * out[3] <= 2^56 + 2^16 (due to final carry),
628 static void felem_square_reduce(felem out, const felem in)
631 felem_square(tmp, in);
632 felem_reduce(out, tmp);
635 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
638 felem_mul(tmp, in1, in2);
639 felem_reduce(out, tmp);
643 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
644 * call felem_reduce first)
646 static void felem_contract(felem out, const felem in)
648 static const int64_t two56 = ((limb) 1) << 56;
649 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
650 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
656 /* Case 1: a = 1 iff in >= 2^224 */
660 tmp[3] &= 0x00ffffffffffffff;
662 * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
663 * and the lower part is non-zero
665 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
666 (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
667 a &= 0x00ffffffffffffff;
668 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
670 /* subtract 2^224 - 2^96 + 1 if a is all-one */
671 tmp[3] &= a ^ 0xffffffffffffffff;
672 tmp[2] &= a ^ 0xffffffffffffffff;
673 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
677 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
678 * non-zero, so we only need one step
684 /* carry 1 -> 2 -> 3 */
685 tmp[2] += tmp[1] >> 56;
686 tmp[1] &= 0x00ffffffffffffff;
688 tmp[3] += tmp[2] >> 56;
689 tmp[2] &= 0x00ffffffffffffff;
691 /* Now 0 <= out < p */
699 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
700 * elements are reduced to in < 2^225, so we only need to check three cases:
701 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
703 static limb felem_is_zero(const felem in)
705 limb zero, two224m96p1, two225m97p2;
707 zero = in[0] | in[1] | in[2] | in[3];
708 zero = (((int64_t) (zero) - 1) >> 63) & 1;
709 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
710 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
711 two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
712 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
713 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
714 two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
715 return (zero | two224m96p1 | two225m97p2);
718 static int felem_is_zero_int(const void *in)
720 return (int)(felem_is_zero(in) & ((limb) 1));
723 /* Invert a field element */
724 /* Computation chain copied from djb's code */
725 static void felem_inv(felem out, const felem in)
727 felem ftmp, ftmp2, ftmp3, ftmp4;
731 felem_square(tmp, in);
732 felem_reduce(ftmp, tmp); /* 2 */
733 felem_mul(tmp, in, ftmp);
734 felem_reduce(ftmp, tmp); /* 2^2 - 1 */
735 felem_square(tmp, ftmp);
736 felem_reduce(ftmp, tmp); /* 2^3 - 2 */
737 felem_mul(tmp, in, ftmp);
738 felem_reduce(ftmp, tmp); /* 2^3 - 1 */
739 felem_square(tmp, ftmp);
740 felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
741 felem_square(tmp, ftmp2);
742 felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
743 felem_square(tmp, ftmp2);
744 felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
745 felem_mul(tmp, ftmp2, ftmp);
746 felem_reduce(ftmp, tmp); /* 2^6 - 1 */
747 felem_square(tmp, ftmp);
748 felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
749 for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */
750 felem_square(tmp, ftmp2);
751 felem_reduce(ftmp2, tmp);
753 felem_mul(tmp, ftmp2, ftmp);
754 felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
755 felem_square(tmp, ftmp2);
756 felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
757 for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */
758 felem_square(tmp, ftmp3);
759 felem_reduce(ftmp3, tmp);
761 felem_mul(tmp, ftmp3, ftmp2);
762 felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
763 felem_square(tmp, ftmp2);
764 felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
765 for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */
766 felem_square(tmp, ftmp3);
767 felem_reduce(ftmp3, tmp);
769 felem_mul(tmp, ftmp3, ftmp2);
770 felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
771 felem_square(tmp, ftmp3);
772 felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
773 for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */
774 felem_square(tmp, ftmp4);
775 felem_reduce(ftmp4, tmp);
777 felem_mul(tmp, ftmp3, ftmp4);
778 felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
779 felem_square(tmp, ftmp3);
780 felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
781 for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */
782 felem_square(tmp, ftmp4);
783 felem_reduce(ftmp4, tmp);
785 felem_mul(tmp, ftmp2, ftmp4);
786 felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
787 for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */
788 felem_square(tmp, ftmp2);
789 felem_reduce(ftmp2, tmp);
791 felem_mul(tmp, ftmp2, ftmp);
792 felem_reduce(ftmp, tmp); /* 2^126 - 1 */
793 felem_square(tmp, ftmp);
794 felem_reduce(ftmp, tmp); /* 2^127 - 2 */
795 felem_mul(tmp, ftmp, in);
796 felem_reduce(ftmp, tmp); /* 2^127 - 1 */
797 for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */
798 felem_square(tmp, ftmp);
799 felem_reduce(ftmp, tmp);
801 felem_mul(tmp, ftmp, ftmp3);
802 felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
806 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
809 static void copy_conditional(felem out, const felem in, limb icopy)
813 * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
815 const limb copy = -icopy;
816 for (i = 0; i < 4; ++i) {
817 const limb tmp = copy & (in[i] ^ out[i]);
822 /******************************************************************************/
824 * ELLIPTIC CURVE POINT OPERATIONS
826 * Points are represented in Jacobian projective coordinates:
827 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
828 * or to the point at infinity if Z == 0.
833 * Double an elliptic curve point:
834 * (X', Y', Z') = 2 * (X, Y, Z), where
835 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
836 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
837 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
838 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
839 * while x_out == y_in is not (maybe this works, but it's not tested).
842 point_double(felem x_out, felem y_out, felem z_out,
843 const felem x_in, const felem y_in, const felem z_in)
846 felem delta, gamma, beta, alpha, ftmp, ftmp2;
848 felem_assign(ftmp, x_in);
849 felem_assign(ftmp2, x_in);
852 felem_square(tmp, z_in);
853 felem_reduce(delta, tmp);
856 felem_square(tmp, y_in);
857 felem_reduce(gamma, tmp);
860 felem_mul(tmp, x_in, gamma);
861 felem_reduce(beta, tmp);
863 /* alpha = 3*(x-delta)*(x+delta) */
864 felem_diff(ftmp, delta);
865 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
866 felem_sum(ftmp2, delta);
867 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
868 felem_scalar(ftmp2, 3);
869 /* ftmp2[i] < 3 * 2^58 < 2^60 */
870 felem_mul(tmp, ftmp, ftmp2);
871 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
872 felem_reduce(alpha, tmp);
874 /* x' = alpha^2 - 8*beta */
875 felem_square(tmp, alpha);
876 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
877 felem_assign(ftmp, beta);
878 felem_scalar(ftmp, 8);
879 /* ftmp[i] < 8 * 2^57 = 2^60 */
880 felem_diff_128_64(tmp, ftmp);
881 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
882 felem_reduce(x_out, tmp);
884 /* z' = (y + z)^2 - gamma - delta */
885 felem_sum(delta, gamma);
886 /* delta[i] < 2^57 + 2^57 = 2^58 */
887 felem_assign(ftmp, y_in);
888 felem_sum(ftmp, z_in);
889 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
890 felem_square(tmp, ftmp);
891 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
892 felem_diff_128_64(tmp, delta);
893 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
894 felem_reduce(z_out, tmp);
896 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
897 felem_scalar(beta, 4);
898 /* beta[i] < 4 * 2^57 = 2^59 */
899 felem_diff(beta, x_out);
900 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
901 felem_mul(tmp, alpha, beta);
902 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
903 felem_square(tmp2, gamma);
904 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
905 widefelem_scalar(tmp2, 8);
906 /* tmp2[i] < 8 * 2^116 = 2^119 */
907 widefelem_diff(tmp, tmp2);
908 /* tmp[i] < 2^119 + 2^120 < 2^121 */
909 felem_reduce(y_out, tmp);
913 * Add two elliptic curve points:
914 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
915 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
916 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
917 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
918 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
919 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
921 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
925 * This function is not entirely constant-time: it includes a branch for
926 * checking whether the two input points are equal, (while not equal to the
927 * point at infinity). This case never happens during single point
928 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
930 static void point_add(felem x3, felem y3, felem z3,
931 const felem x1, const felem y1, const felem z1,
932 const int mixed, const felem x2, const felem y2,
935 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
937 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
941 felem_square(tmp, z2);
942 felem_reduce(ftmp2, tmp);
945 felem_mul(tmp, ftmp2, z2);
946 felem_reduce(ftmp4, tmp);
948 /* ftmp4 = z2^3*y1 */
949 felem_mul(tmp2, ftmp4, y1);
950 felem_reduce(ftmp4, tmp2);
952 /* ftmp2 = z2^2*x1 */
953 felem_mul(tmp2, ftmp2, x1);
954 felem_reduce(ftmp2, tmp2);
957 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
960 /* ftmp4 = z2^3*y1 */
961 felem_assign(ftmp4, y1);
963 /* ftmp2 = z2^2*x1 */
964 felem_assign(ftmp2, x1);
968 felem_square(tmp, z1);
969 felem_reduce(ftmp, tmp);
972 felem_mul(tmp, ftmp, z1);
973 felem_reduce(ftmp3, tmp);
976 felem_mul(tmp, ftmp3, y2);
977 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
979 /* ftmp3 = z1^3*y2 - z2^3*y1 */
980 felem_diff_128_64(tmp, ftmp4);
981 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
982 felem_reduce(ftmp3, tmp);
985 felem_mul(tmp, ftmp, x2);
986 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
988 /* ftmp = z1^2*x2 - z2^2*x1 */
989 felem_diff_128_64(tmp, ftmp2);
990 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
991 felem_reduce(ftmp, tmp);
994 * the formulae are incorrect if the points are equal so we check for
995 * this and do doubling if this happens
997 x_equal = felem_is_zero(ftmp);
998 y_equal = felem_is_zero(ftmp3);
999 z1_is_zero = felem_is_zero(z1);
1000 z2_is_zero = felem_is_zero(z2);
1001 /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
1002 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
1003 point_double(x3, y3, z3, x1, y1, z1);
1009 felem_mul(tmp, z1, z2);
1010 felem_reduce(ftmp5, tmp);
1012 /* special case z2 = 0 is handled later */
1013 felem_assign(ftmp5, z1);
1016 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1017 felem_mul(tmp, ftmp, ftmp5);
1018 felem_reduce(z_out, tmp);
1020 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1021 felem_assign(ftmp5, ftmp);
1022 felem_square(tmp, ftmp);
1023 felem_reduce(ftmp, tmp);
1025 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1026 felem_mul(tmp, ftmp, ftmp5);
1027 felem_reduce(ftmp5, tmp);
1029 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1030 felem_mul(tmp, ftmp2, ftmp);
1031 felem_reduce(ftmp2, tmp);
1033 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1034 felem_mul(tmp, ftmp4, ftmp5);
1035 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1037 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1038 felem_square(tmp2, ftmp3);
1039 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1041 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1042 felem_diff_128_64(tmp2, ftmp5);
1043 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1045 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1046 felem_assign(ftmp5, ftmp2);
1047 felem_scalar(ftmp5, 2);
1048 /* ftmp5[i] < 2 * 2^57 = 2^58 */
1051 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1052 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1054 felem_diff_128_64(tmp2, ftmp5);
1055 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1056 felem_reduce(x_out, tmp2);
1058 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1059 felem_diff(ftmp2, x_out);
1060 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1063 * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1065 felem_mul(tmp2, ftmp3, ftmp2);
1066 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1069 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1070 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1072 widefelem_diff(tmp2, tmp);
1073 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1074 felem_reduce(y_out, tmp2);
1077 * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1078 * the point at infinity, so we need to check for this separately
1082 * if point 1 is at infinity, copy point 2 to output, and vice versa
1084 copy_conditional(x_out, x2, z1_is_zero);
1085 copy_conditional(x_out, x1, z2_is_zero);
1086 copy_conditional(y_out, y2, z1_is_zero);
1087 copy_conditional(y_out, y1, z2_is_zero);
1088 copy_conditional(z_out, z2, z1_is_zero);
1089 copy_conditional(z_out, z1, z2_is_zero);
1090 felem_assign(x3, x_out);
1091 felem_assign(y3, y_out);
1092 felem_assign(z3, z_out);
1096 * select_point selects the |idx|th point from a precomputation table and
1098 * The pre_comp array argument should be size of |size| argument
1100 static void select_point(const u64 idx, unsigned int size,
1101 const felem pre_comp[][3], felem out[3])
1104 limb *outlimbs = &out[0][0];
1105 memset(outlimbs, 0, 3 * sizeof(felem));
1107 for (i = 0; i < size; i++) {
1108 const limb *inlimbs = &pre_comp[i][0][0];
1115 for (j = 0; j < 4 * 3; j++)
1116 outlimbs[j] |= inlimbs[j] & mask;
1120 /* get_bit returns the |i|th bit in |in| */
1121 static char get_bit(const felem_bytearray in, unsigned i)
1125 return (in[i >> 3] >> (i & 7)) & 1;
1129 * Interleaved point multiplication using precomputed point multiples: The
1130 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1131 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1132 * generator, using certain (large) precomputed multiples in g_pre_comp.
1133 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1135 static void batch_mul(felem x_out, felem y_out, felem z_out,
1136 const felem_bytearray scalars[],
1137 const unsigned num_points, const u8 *g_scalar,
1138 const int mixed, const felem pre_comp[][17][3],
1139 const felem g_pre_comp[2][16][3])
1143 unsigned gen_mul = (g_scalar != NULL);
1144 felem nq[3], tmp[4];
1148 /* set nq to the point at infinity */
1149 memset(nq, 0, 3 * sizeof(felem));
1152 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1153 * of the generator (two in each of the last 28 rounds) and additions of
1154 * other points multiples (every 5th round).
1156 skip = 1; /* save two point operations in the first
1158 for (i = (num_points ? 220 : 27); i >= 0; --i) {
1161 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1163 /* add multiples of the generator */
1164 if (gen_mul && (i <= 27)) {
1165 /* first, look 28 bits upwards */
1166 bits = get_bit(g_scalar, i + 196) << 3;
1167 bits |= get_bit(g_scalar, i + 140) << 2;
1168 bits |= get_bit(g_scalar, i + 84) << 1;
1169 bits |= get_bit(g_scalar, i + 28);
1170 /* select the point to add, in constant time */
1171 select_point(bits, 16, g_pre_comp[1], tmp);
1174 /* value 1 below is argument for "mixed" */
1175 point_add(nq[0], nq[1], nq[2],
1176 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1178 memcpy(nq, tmp, 3 * sizeof(felem));
1182 /* second, look at the current position */
1183 bits = get_bit(g_scalar, i + 168) << 3;
1184 bits |= get_bit(g_scalar, i + 112) << 2;
1185 bits |= get_bit(g_scalar, i + 56) << 1;
1186 bits |= get_bit(g_scalar, i);
1187 /* select the point to add, in constant time */
1188 select_point(bits, 16, g_pre_comp[0], tmp);
1189 point_add(nq[0], nq[1], nq[2],
1190 nq[0], nq[1], nq[2],
1191 1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1194 /* do other additions every 5 doublings */
1195 if (num_points && (i % 5 == 0)) {
1196 /* loop over all scalars */
1197 for (num = 0; num < num_points; ++num) {
1198 bits = get_bit(scalars[num], i + 4) << 5;
1199 bits |= get_bit(scalars[num], i + 3) << 4;
1200 bits |= get_bit(scalars[num], i + 2) << 3;
1201 bits |= get_bit(scalars[num], i + 1) << 2;
1202 bits |= get_bit(scalars[num], i) << 1;
1203 bits |= get_bit(scalars[num], i - 1);
1204 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1206 /* select the point to add or subtract */
1207 select_point(digit, 17, pre_comp[num], tmp);
1208 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1210 copy_conditional(tmp[1], tmp[3], sign);
1213 point_add(nq[0], nq[1], nq[2],
1214 nq[0], nq[1], nq[2],
1215 mixed, tmp[0], tmp[1], tmp[2]);
1217 memcpy(nq, tmp, 3 * sizeof(felem));
1223 felem_assign(x_out, nq[0]);
1224 felem_assign(y_out, nq[1]);
1225 felem_assign(z_out, nq[2]);
1228 /******************************************************************************/
1230 * FUNCTIONS TO MANAGE PRECOMPUTATION
1233 static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1235 NISTP224_PRE_COMP *ret = NULL;
1236 ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof(*ret));
1238 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1241 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1242 ret->references = 1;
1246 static void *nistp224_pre_comp_dup(void *src_)
1248 NISTP224_PRE_COMP *src = src_;
1250 /* no need to actually copy, these objects never change! */
1251 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1256 static void nistp224_pre_comp_free(void *pre_)
1259 NISTP224_PRE_COMP *pre = pre_;
1264 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1271 static void nistp224_pre_comp_clear_free(void *pre_)
1274 NISTP224_PRE_COMP *pre = pre_;
1279 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1283 OPENSSL_cleanse(pre, sizeof(*pre));
1287 /******************************************************************************/
1289 * OPENSSL EC_METHOD FUNCTIONS
1292 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1295 ret = ec_GFp_simple_group_init(group);
1296 group->a_is_minus3 = 1;
1300 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1301 const BIGNUM *a, const BIGNUM *b,
1305 BN_CTX *new_ctx = NULL;
1306 BIGNUM *curve_p, *curve_a, *curve_b;
1309 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1312 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1313 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1314 ((curve_b = BN_CTX_get(ctx)) == NULL))
1316 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1317 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1318 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1319 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1320 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1321 EC_R_WRONG_CURVE_PARAMETERS);
1324 group->field_mod_func = BN_nist_mod_224;
1325 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1328 if (new_ctx != NULL)
1329 BN_CTX_free(new_ctx);
1334 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1337 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1338 const EC_POINT *point,
1339 BIGNUM *x, BIGNUM *y,
1342 felem z1, z2, x_in, y_in, x_out, y_out;
1345 if (EC_POINT_is_at_infinity(group, point)) {
1346 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1347 EC_R_POINT_AT_INFINITY);
1350 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1351 (!BN_to_felem(z1, &point->Z)))
1354 felem_square(tmp, z2);
1355 felem_reduce(z1, tmp);
1356 felem_mul(tmp, x_in, z1);
1357 felem_reduce(x_in, tmp);
1358 felem_contract(x_out, x_in);
1360 if (!felem_to_BN(x, x_out)) {
1361 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1366 felem_mul(tmp, z1, z2);
1367 felem_reduce(z1, tmp);
1368 felem_mul(tmp, y_in, z1);
1369 felem_reduce(y_in, tmp);
1370 felem_contract(y_out, y_in);
1372 if (!felem_to_BN(y, y_out)) {
1373 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1381 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1382 felem tmp_felems[ /* num+1 */ ])
1385 * Runs in constant time, unless an input is the point at infinity (which
1386 * normally shouldn't happen).
1388 ec_GFp_nistp_points_make_affine_internal(num,
1392 (void (*)(void *))felem_one,
1394 (void (*)(void *, const void *))
1396 (void (*)(void *, const void *))
1397 felem_square_reduce, (void (*)
1404 (void (*)(void *, const void *))
1406 (void (*)(void *, const void *))
1411 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1412 * values Result is stored in r (r can equal one of the inputs).
1414 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1415 const BIGNUM *scalar, size_t num,
1416 const EC_POINT *points[],
1417 const BIGNUM *scalars[], BN_CTX *ctx)
1423 BN_CTX *new_ctx = NULL;
1424 BIGNUM *x, *y, *z, *tmp_scalar;
1425 felem_bytearray g_secret;
1426 felem_bytearray *secrets = NULL;
1427 felem(*pre_comp)[17][3] = NULL;
1428 felem *tmp_felems = NULL;
1429 felem_bytearray tmp;
1431 int have_pre_comp = 0;
1432 size_t num_points = num;
1433 felem x_in, y_in, z_in, x_out, y_out, z_out;
1434 NISTP224_PRE_COMP *pre = NULL;
1435 const felem(*g_pre_comp)[16][3] = NULL;
1436 EC_POINT *generator = NULL;
1437 const EC_POINT *p = NULL;
1438 const BIGNUM *p_scalar = NULL;
1441 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1444 if (((x = BN_CTX_get(ctx)) == NULL) ||
1445 ((y = BN_CTX_get(ctx)) == NULL) ||
1446 ((z = BN_CTX_get(ctx)) == NULL) ||
1447 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1450 if (scalar != NULL) {
1451 pre = EC_EX_DATA_get_data(group->extra_data,
1452 nistp224_pre_comp_dup,
1453 nistp224_pre_comp_free,
1454 nistp224_pre_comp_clear_free);
1456 /* we have precomputation, try to use it */
1457 g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1459 /* try to use the standard precomputation */
1460 g_pre_comp = &gmul[0];
1461 generator = EC_POINT_new(group);
1462 if (generator == NULL)
1464 /* get the generator from precomputation */
1465 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1466 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1467 !felem_to_BN(z, g_pre_comp[0][1][2])) {
1468 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1471 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1475 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1476 /* precomputation matches generator */
1480 * we don't have valid precomputation: treat the generator as a
1483 num_points = num_points + 1;
1486 if (num_points > 0) {
1487 if (num_points >= 3) {
1489 * unless we precompute multiples for just one or two points,
1490 * converting those into affine form is time well spent
1494 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1495 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1498 OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1499 if ((secrets == NULL) || (pre_comp == NULL)
1500 || (mixed && (tmp_felems == NULL))) {
1501 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1506 * we treat NULL scalars as 0, and NULL points as points at infinity,
1507 * i.e., they contribute nothing to the linear combination
1509 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1510 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1511 for (i = 0; i < num_points; ++i) {
1515 p = EC_GROUP_get0_generator(group);
1518 /* the i^th point */
1521 p_scalar = scalars[i];
1523 if ((p_scalar != NULL) && (p != NULL)) {
1524 /* reduce scalar to 0 <= scalar < 2^224 */
1525 if ((BN_num_bits(p_scalar) > 224)
1526 || (BN_is_negative(p_scalar))) {
1528 * this is an unusual input, and we don't guarantee
1531 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
1532 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1535 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1537 num_bytes = BN_bn2bin(p_scalar, tmp);
1538 flip_endian(secrets[i], tmp, num_bytes);
1539 /* precompute multiples */
1540 if ((!BN_to_felem(x_out, &p->X)) ||
1541 (!BN_to_felem(y_out, &p->Y)) ||
1542 (!BN_to_felem(z_out, &p->Z)))
1544 felem_assign(pre_comp[i][1][0], x_out);
1545 felem_assign(pre_comp[i][1][1], y_out);
1546 felem_assign(pre_comp[i][1][2], z_out);
1547 for (j = 2; j <= 16; ++j) {
1549 point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1550 pre_comp[i][j][2], pre_comp[i][1][0],
1551 pre_comp[i][1][1], pre_comp[i][1][2], 0,
1552 pre_comp[i][j - 1][0],
1553 pre_comp[i][j - 1][1],
1554 pre_comp[i][j - 1][2]);
1556 point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1557 pre_comp[i][j][2], pre_comp[i][j / 2][0],
1558 pre_comp[i][j / 2][1],
1559 pre_comp[i][j / 2][2]);
1565 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1568 /* the scalar for the generator */
1569 if ((scalar != NULL) && (have_pre_comp)) {
1570 memset(g_secret, 0, sizeof(g_secret));
1571 /* reduce scalar to 0 <= scalar < 2^224 */
1572 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1574 * this is an unusual input, and we don't guarantee
1577 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
1578 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1581 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1583 num_bytes = BN_bn2bin(scalar, tmp);
1584 flip_endian(g_secret, tmp, num_bytes);
1585 /* do the multiplication with generator precomputation */
1586 batch_mul(x_out, y_out, z_out,
1587 (const felem_bytearray(*))secrets, num_points,
1589 mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1591 /* do the multiplication without generator precomputation */
1592 batch_mul(x_out, y_out, z_out,
1593 (const felem_bytearray(*))secrets, num_points,
1594 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1595 /* reduce the output to its unique minimal representation */
1596 felem_contract(x_in, x_out);
1597 felem_contract(y_in, y_out);
1598 felem_contract(z_in, z_out);
1599 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1600 (!felem_to_BN(z, z_in))) {
1601 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1604 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1608 if (generator != NULL)
1609 EC_POINT_free(generator);
1610 if (new_ctx != NULL)
1611 BN_CTX_free(new_ctx);
1612 if (secrets != NULL)
1613 OPENSSL_free(secrets);
1614 if (pre_comp != NULL)
1615 OPENSSL_free(pre_comp);
1616 if (tmp_felems != NULL)
1617 OPENSSL_free(tmp_felems);
1621 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1624 NISTP224_PRE_COMP *pre = NULL;
1626 BN_CTX *new_ctx = NULL;
1628 EC_POINT *generator = NULL;
1629 felem tmp_felems[32];
1631 /* throw away old precomputation */
1632 EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
1633 nistp224_pre_comp_free,
1634 nistp224_pre_comp_clear_free);
1636 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1639 if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
1641 /* get the generator */
1642 if (group->generator == NULL)
1644 generator = EC_POINT_new(group);
1645 if (generator == NULL)
1647 BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1648 BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1649 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1651 if ((pre = nistp224_pre_comp_new()) == NULL)
1654 * if the generator is the standard one, use built-in precomputation
1656 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1657 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1660 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
1661 (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
1662 (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z)))
1665 * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1666 * 2^140*G, 2^196*G for the second one
1668 for (i = 1; i <= 8; i <<= 1) {
1669 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1670 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1671 pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1672 for (j = 0; j < 27; ++j) {
1673 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1674 pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1675 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1679 point_double(pre->g_pre_comp[0][2 * i][0],
1680 pre->g_pre_comp[0][2 * i][1],
1681 pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1682 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1683 for (j = 0; j < 27; ++j) {
1684 point_double(pre->g_pre_comp[0][2 * i][0],
1685 pre->g_pre_comp[0][2 * i][1],
1686 pre->g_pre_comp[0][2 * i][2],
1687 pre->g_pre_comp[0][2 * i][0],
1688 pre->g_pre_comp[0][2 * i][1],
1689 pre->g_pre_comp[0][2 * i][2]);
1692 for (i = 0; i < 2; i++) {
1693 /* g_pre_comp[i][0] is the point at infinity */
1694 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1695 /* the remaining multiples */
1696 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1697 point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1698 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1699 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1700 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1701 pre->g_pre_comp[i][2][2]);
1702 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1703 point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1704 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1705 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1706 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1707 pre->g_pre_comp[i][2][2]);
1708 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1709 point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1710 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1711 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1712 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1713 pre->g_pre_comp[i][4][2]);
1715 * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1717 point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1718 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1719 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1720 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1721 pre->g_pre_comp[i][2][2]);
1722 for (j = 1; j < 8; ++j) {
1723 /* odd multiples: add G resp. 2^28*G */
1724 point_add(pre->g_pre_comp[i][2 * j + 1][0],
1725 pre->g_pre_comp[i][2 * j + 1][1],
1726 pre->g_pre_comp[i][2 * j + 1][2],
1727 pre->g_pre_comp[i][2 * j][0],
1728 pre->g_pre_comp[i][2 * j][1],
1729 pre->g_pre_comp[i][2 * j][2], 0,
1730 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1731 pre->g_pre_comp[i][1][2]);
1734 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1737 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
1738 nistp224_pre_comp_free,
1739 nistp224_pre_comp_clear_free))
1745 if (generator != NULL)
1746 EC_POINT_free(generator);
1747 if (new_ctx != NULL)
1748 BN_CTX_free(new_ctx);
1750 nistp224_pre_comp_free(pre);
1754 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1756 if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
1757 nistp224_pre_comp_free,
1758 nistp224_pre_comp_clear_free)
1766 static void *dummy = &dummy;