2 * Written by Emilia Kasper (Google) for the OpenSSL project.
4 /* Copyright 2011 Google Inc.
6 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
21 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
23 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
24 * and Adam Langley's public domain 64-bit C implementation of curve25519
27 #include <openssl/opensslconf.h>
28 #ifdef OPENSSL_NO_EC_NISTP_64_GCC_128
29 NON_EMPTY_TRANSLATION_UNIT
34 # include <openssl/err.h>
37 # if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
38 /* even with gcc, the typedef won't work for 32-bit platforms */
39 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
42 # error "Need GCC 3.1 or later to define type uint128_t"
49 /******************************************************************************/
51 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
53 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
54 * using 64-bit coefficients called 'limbs',
55 * and sometimes (for multiplication results) as
56 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
57 * using 128-bit coefficients called 'widelimbs'.
58 * A 4-limb representation is an 'felem';
59 * a 7-widelimb representation is a 'widefelem'.
60 * Even within felems, bits of adjacent limbs overlap, and we don't always
61 * reduce the representations: we ensure that inputs to each felem
62 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
63 * and fit into a 128-bit word without overflow. The coefficients are then
64 * again partially reduced to obtain an felem satisfying a_i < 2^57.
65 * We only reduce to the unique minimal representation at the end of the
69 typedef uint64_t limb;
70 typedef uint128_t widelimb;
72 typedef limb felem[4];
73 typedef widelimb widefelem[7];
76 * Field element represented as a byte arrary. 28*8 = 224 bits is also the
77 * group order size for the elliptic curve, and we also use this type for
78 * scalars for point multiplication.
80 typedef u8 felem_bytearray[28];
82 static const felem_bytearray nistp224_curve_params[5] = {
83 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
84 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
85 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
86 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
87 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
88 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
89 {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
90 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
91 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
92 {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
93 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
94 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
95 {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
96 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
97 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
101 * Precomputed multiples of the standard generator
102 * Points are given in coordinates (X, Y, Z) where Z normally is 1
103 * (0 for the point at infinity).
104 * For each field element, slice a_0 is word 0, etc.
106 * The table has 2 * 16 elements, starting with the following:
107 * index | bits | point
108 * ------+---------+------------------------------
111 * 2 | 0 0 1 0 | 2^56G
112 * 3 | 0 0 1 1 | (2^56 + 1)G
113 * 4 | 0 1 0 0 | 2^112G
114 * 5 | 0 1 0 1 | (2^112 + 1)G
115 * 6 | 0 1 1 0 | (2^112 + 2^56)G
116 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
117 * 8 | 1 0 0 0 | 2^168G
118 * 9 | 1 0 0 1 | (2^168 + 1)G
119 * 10 | 1 0 1 0 | (2^168 + 2^56)G
120 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
121 * 12 | 1 1 0 0 | (2^168 + 2^112)G
122 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
123 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
124 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
125 * followed by a copy of this with each element multiplied by 2^28.
127 * The reason for this is so that we can clock bits into four different
128 * locations when doing simple scalar multiplies against the base point,
129 * and then another four locations using the second 16 elements.
131 static const felem gmul[2][16][3] = {
135 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
136 {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
138 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
139 {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
141 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
142 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
144 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
145 {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
147 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
148 {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
150 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
151 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
153 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
154 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
156 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
157 {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
159 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
160 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
162 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
163 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
165 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
166 {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
168 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
169 {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
171 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
172 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
174 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
175 {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
177 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
178 {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
183 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
184 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
186 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
187 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
189 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
190 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
192 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
193 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
195 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
196 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
198 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
199 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
201 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
202 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
204 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
205 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
207 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
208 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
210 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
211 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
213 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
214 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
216 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
217 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
219 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
220 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
222 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
223 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
225 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
226 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
230 /* Precomputation for the group generator. */
231 struct nistp224_pre_comp_st {
232 felem g_pre_comp[2][16][3];
236 const EC_METHOD *EC_GFp_nistp224_method(void)
238 static const EC_METHOD ret = {
239 EC_FLAGS_DEFAULT_OCT,
240 NID_X9_62_prime_field,
241 ec_GFp_nistp224_group_init,
242 ec_GFp_simple_group_finish,
243 ec_GFp_simple_group_clear_finish,
244 ec_GFp_nist_group_copy,
245 ec_GFp_nistp224_group_set_curve,
246 ec_GFp_simple_group_get_curve,
247 ec_GFp_simple_group_get_degree,
248 ec_group_simple_order_bits,
249 ec_GFp_simple_group_check_discriminant,
250 ec_GFp_simple_point_init,
251 ec_GFp_simple_point_finish,
252 ec_GFp_simple_point_clear_finish,
253 ec_GFp_simple_point_copy,
254 ec_GFp_simple_point_set_to_infinity,
255 ec_GFp_simple_set_Jprojective_coordinates_GFp,
256 ec_GFp_simple_get_Jprojective_coordinates_GFp,
257 ec_GFp_simple_point_set_affine_coordinates,
258 ec_GFp_nistp224_point_get_affine_coordinates,
259 0 /* point_set_compressed_coordinates */ ,
264 ec_GFp_simple_invert,
265 ec_GFp_simple_is_at_infinity,
266 ec_GFp_simple_is_on_curve,
268 ec_GFp_simple_make_affine,
269 ec_GFp_simple_points_make_affine,
270 ec_GFp_nistp224_points_mul,
271 ec_GFp_nistp224_precompute_mult,
272 ec_GFp_nistp224_have_precompute_mult,
273 ec_GFp_nist_field_mul,
274 ec_GFp_nist_field_sqr,
276 0 /* field_encode */ ,
277 0 /* field_decode */ ,
278 0, /* field_set_to_one */
279 ec_key_simple_priv2oct,
280 ec_key_simple_oct2priv,
282 ec_key_simple_generate_key,
283 ec_key_simple_check_key,
284 ec_key_simple_generate_public_key,
287 ecdh_simple_compute_key
294 * Helper functions to convert field elements to/from internal representation
296 static void bin28_to_felem(felem out, const u8 in[28])
298 out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
299 out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
300 out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
301 out[3] = (*((const uint64_t *)(in+20))) >> 8;
304 static void felem_to_bin28(u8 out[28], const felem in)
307 for (i = 0; i < 7; ++i) {
308 out[i] = in[0] >> (8 * i);
309 out[i + 7] = in[1] >> (8 * i);
310 out[i + 14] = in[2] >> (8 * i);
311 out[i + 21] = in[3] >> (8 * i);
315 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
316 static void flip_endian(u8 *out, const u8 *in, unsigned len)
319 for (i = 0; i < len; ++i)
320 out[i] = in[len - 1 - i];
323 /* From OpenSSL BIGNUM to internal representation */
324 static int BN_to_felem(felem out, const BIGNUM *bn)
326 felem_bytearray b_in;
327 felem_bytearray b_out;
330 /* BN_bn2bin eats leading zeroes */
331 memset(b_out, 0, sizeof(b_out));
332 num_bytes = BN_num_bytes(bn);
333 if (num_bytes > sizeof b_out) {
334 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
337 if (BN_is_negative(bn)) {
338 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
341 num_bytes = BN_bn2bin(bn, b_in);
342 flip_endian(b_out, b_in, num_bytes);
343 bin28_to_felem(out, b_out);
347 /* From internal representation to OpenSSL BIGNUM */
348 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
350 felem_bytearray b_in, b_out;
351 felem_to_bin28(b_in, in);
352 flip_endian(b_out, b_in, sizeof b_out);
353 return BN_bin2bn(b_out, sizeof b_out, out);
356 /******************************************************************************/
360 * Field operations, using the internal representation of field elements.
361 * NB! These operations are specific to our point multiplication and cannot be
362 * expected to be correct in general - e.g., multiplication with a large scalar
363 * will cause an overflow.
367 static void felem_one(felem out)
375 static void felem_assign(felem out, const felem in)
383 /* Sum two field elements: out += in */
384 static void felem_sum(felem out, const felem in)
392 /* Get negative value: out = -in */
393 /* Assumes in[i] < 2^57 */
394 static void felem_neg(felem out, const felem in)
396 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
397 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
398 static const limb two58m42m2 = (((limb) 1) << 58) -
399 (((limb) 1) << 42) - (((limb) 1) << 2);
401 /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
402 out[0] = two58p2 - in[0];
403 out[1] = two58m42m2 - in[1];
404 out[2] = two58m2 - in[2];
405 out[3] = two58m2 - in[3];
408 /* Subtract field elements: out -= in */
409 /* Assumes in[i] < 2^57 */
410 static void felem_diff(felem out, const felem in)
412 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
413 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
414 static const limb two58m42m2 = (((limb) 1) << 58) -
415 (((limb) 1) << 42) - (((limb) 1) << 2);
417 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
419 out[1] += two58m42m2;
429 /* Subtract in unreduced 128-bit mode: out -= in */
430 /* Assumes in[i] < 2^119 */
431 static void widefelem_diff(widefelem out, const widefelem in)
433 static const widelimb two120 = ((widelimb) 1) << 120;
434 static const widelimb two120m64 = (((widelimb) 1) << 120) -
435 (((widelimb) 1) << 64);
436 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
437 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
439 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
444 out[4] += two120m104m64;
457 /* Subtract in mixed mode: out128 -= in64 */
459 static void felem_diff_128_64(widefelem out, const felem in)
461 static const widelimb two64p8 = (((widelimb) 1) << 64) +
462 (((widelimb) 1) << 8);
463 static const widelimb two64m8 = (((widelimb) 1) << 64) -
464 (((widelimb) 1) << 8);
465 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
466 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
468 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
470 out[1] += two64m48m8;
481 * Multiply a field element by a scalar: out = out * scalar The scalars we
482 * actually use are small, so results fit without overflow
484 static void felem_scalar(felem out, const limb scalar)
493 * Multiply an unreduced field element by a scalar: out = out * scalar The
494 * scalars we actually use are small, so results fit without overflow
496 static void widefelem_scalar(widefelem out, const widelimb scalar)
507 /* Square a field element: out = in^2 */
508 static void felem_square(widefelem out, const felem in)
510 limb tmp0, tmp1, tmp2;
514 out[0] = ((widelimb) in[0]) * in[0];
515 out[1] = ((widelimb) in[0]) * tmp1;
516 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
517 out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
518 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
519 out[5] = ((widelimb) in[3]) * tmp2;
520 out[6] = ((widelimb) in[3]) * in[3];
523 /* Multiply two field elements: out = in1 * in2 */
524 static void felem_mul(widefelem out, const felem in1, const felem in2)
526 out[0] = ((widelimb) in1[0]) * in2[0];
527 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
528 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
529 ((widelimb) in1[2]) * in2[0];
530 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
531 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
532 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
533 ((widelimb) in1[3]) * in2[1];
534 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
535 out[6] = ((widelimb) in1[3]) * in2[3];
539 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
540 * Requires in[i] < 2^126,
541 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
542 static void felem_reduce(felem out, const widefelem in)
544 static const widelimb two127p15 = (((widelimb) 1) << 127) +
545 (((widelimb) 1) << 15);
546 static const widelimb two127m71 = (((widelimb) 1) << 127) -
547 (((widelimb) 1) << 71);
548 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
549 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
552 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
553 output[0] = in[0] + two127p15;
554 output[1] = in[1] + two127m71m55;
555 output[2] = in[2] + two127m71;
559 /* Eliminate in[4], in[5], in[6] */
560 output[4] += in[6] >> 16;
561 output[3] += (in[6] & 0xffff) << 40;
564 output[3] += in[5] >> 16;
565 output[2] += (in[5] & 0xffff) << 40;
568 output[2] += output[4] >> 16;
569 output[1] += (output[4] & 0xffff) << 40;
570 output[0] -= output[4];
572 /* Carry 2 -> 3 -> 4 */
573 output[3] += output[2] >> 56;
574 output[2] &= 0x00ffffffffffffff;
576 output[4] = output[3] >> 56;
577 output[3] &= 0x00ffffffffffffff;
579 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
581 /* Eliminate output[4] */
582 output[2] += output[4] >> 16;
583 /* output[2] < 2^56 + 2^56 = 2^57 */
584 output[1] += (output[4] & 0xffff) << 40;
585 output[0] -= output[4];
587 /* Carry 0 -> 1 -> 2 -> 3 */
588 output[1] += output[0] >> 56;
589 out[0] = output[0] & 0x00ffffffffffffff;
591 output[2] += output[1] >> 56;
592 /* output[2] < 2^57 + 2^72 */
593 out[1] = output[1] & 0x00ffffffffffffff;
594 output[3] += output[2] >> 56;
595 /* output[3] <= 2^56 + 2^16 */
596 out[2] = output[2] & 0x00ffffffffffffff;
599 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
600 * out[3] <= 2^56 + 2^16 (due to final carry),
606 static void felem_square_reduce(felem out, const felem in)
609 felem_square(tmp, in);
610 felem_reduce(out, tmp);
613 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
616 felem_mul(tmp, in1, in2);
617 felem_reduce(out, tmp);
621 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
622 * call felem_reduce first)
624 static void felem_contract(felem out, const felem in)
626 static const int64_t two56 = ((limb) 1) << 56;
627 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
628 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
634 /* Case 1: a = 1 iff in >= 2^224 */
638 tmp[3] &= 0x00ffffffffffffff;
640 * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
641 * and the lower part is non-zero
643 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
644 (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
645 a &= 0x00ffffffffffffff;
646 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
648 /* subtract 2^224 - 2^96 + 1 if a is all-one */
649 tmp[3] &= a ^ 0xffffffffffffffff;
650 tmp[2] &= a ^ 0xffffffffffffffff;
651 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
655 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
656 * non-zero, so we only need one step
662 /* carry 1 -> 2 -> 3 */
663 tmp[2] += tmp[1] >> 56;
664 tmp[1] &= 0x00ffffffffffffff;
666 tmp[3] += tmp[2] >> 56;
667 tmp[2] &= 0x00ffffffffffffff;
669 /* Now 0 <= out < p */
677 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
678 * elements are reduced to in < 2^225, so we only need to check three cases:
679 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
681 static limb felem_is_zero(const felem in)
683 limb zero, two224m96p1, two225m97p2;
685 zero = in[0] | in[1] | in[2] | in[3];
686 zero = (((int64_t) (zero) - 1) >> 63) & 1;
687 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
688 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
689 two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
690 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
691 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
692 two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
693 return (zero | two224m96p1 | two225m97p2);
696 static limb felem_is_zero_int(const felem in)
698 return (int)(felem_is_zero(in) & ((limb) 1));
701 /* Invert a field element */
702 /* Computation chain copied from djb's code */
703 static void felem_inv(felem out, const felem in)
705 felem ftmp, ftmp2, ftmp3, ftmp4;
709 felem_square(tmp, in);
710 felem_reduce(ftmp, tmp); /* 2 */
711 felem_mul(tmp, in, ftmp);
712 felem_reduce(ftmp, tmp); /* 2^2 - 1 */
713 felem_square(tmp, ftmp);
714 felem_reduce(ftmp, tmp); /* 2^3 - 2 */
715 felem_mul(tmp, in, ftmp);
716 felem_reduce(ftmp, tmp); /* 2^3 - 1 */
717 felem_square(tmp, ftmp);
718 felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
719 felem_square(tmp, ftmp2);
720 felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
721 felem_square(tmp, ftmp2);
722 felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
723 felem_mul(tmp, ftmp2, ftmp);
724 felem_reduce(ftmp, tmp); /* 2^6 - 1 */
725 felem_square(tmp, ftmp);
726 felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
727 for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */
728 felem_square(tmp, ftmp2);
729 felem_reduce(ftmp2, tmp);
731 felem_mul(tmp, ftmp2, ftmp);
732 felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
733 felem_square(tmp, ftmp2);
734 felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
735 for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */
736 felem_square(tmp, ftmp3);
737 felem_reduce(ftmp3, tmp);
739 felem_mul(tmp, ftmp3, ftmp2);
740 felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
741 felem_square(tmp, ftmp2);
742 felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
743 for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */
744 felem_square(tmp, ftmp3);
745 felem_reduce(ftmp3, tmp);
747 felem_mul(tmp, ftmp3, ftmp2);
748 felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
749 felem_square(tmp, ftmp3);
750 felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
751 for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */
752 felem_square(tmp, ftmp4);
753 felem_reduce(ftmp4, tmp);
755 felem_mul(tmp, ftmp3, ftmp4);
756 felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
757 felem_square(tmp, ftmp3);
758 felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
759 for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */
760 felem_square(tmp, ftmp4);
761 felem_reduce(ftmp4, tmp);
763 felem_mul(tmp, ftmp2, ftmp4);
764 felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
765 for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */
766 felem_square(tmp, ftmp2);
767 felem_reduce(ftmp2, tmp);
769 felem_mul(tmp, ftmp2, ftmp);
770 felem_reduce(ftmp, tmp); /* 2^126 - 1 */
771 felem_square(tmp, ftmp);
772 felem_reduce(ftmp, tmp); /* 2^127 - 2 */
773 felem_mul(tmp, ftmp, in);
774 felem_reduce(ftmp, tmp); /* 2^127 - 1 */
775 for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */
776 felem_square(tmp, ftmp);
777 felem_reduce(ftmp, tmp);
779 felem_mul(tmp, ftmp, ftmp3);
780 felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
784 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
787 static void copy_conditional(felem out, const felem in, limb icopy)
791 * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
793 const limb copy = -icopy;
794 for (i = 0; i < 4; ++i) {
795 const limb tmp = copy & (in[i] ^ out[i]);
800 /******************************************************************************/
802 * ELLIPTIC CURVE POINT OPERATIONS
804 * Points are represented in Jacobian projective coordinates:
805 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
806 * or to the point at infinity if Z == 0.
811 * Double an elliptic curve point:
812 * (X', Y', Z') = 2 * (X, Y, Z), where
813 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
814 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
815 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
816 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
817 * while x_out == y_in is not (maybe this works, but it's not tested).
820 point_double(felem x_out, felem y_out, felem z_out,
821 const felem x_in, const felem y_in, const felem z_in)
824 felem delta, gamma, beta, alpha, ftmp, ftmp2;
826 felem_assign(ftmp, x_in);
827 felem_assign(ftmp2, x_in);
830 felem_square(tmp, z_in);
831 felem_reduce(delta, tmp);
834 felem_square(tmp, y_in);
835 felem_reduce(gamma, tmp);
838 felem_mul(tmp, x_in, gamma);
839 felem_reduce(beta, tmp);
841 /* alpha = 3*(x-delta)*(x+delta) */
842 felem_diff(ftmp, delta);
843 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
844 felem_sum(ftmp2, delta);
845 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
846 felem_scalar(ftmp2, 3);
847 /* ftmp2[i] < 3 * 2^58 < 2^60 */
848 felem_mul(tmp, ftmp, ftmp2);
849 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
850 felem_reduce(alpha, tmp);
852 /* x' = alpha^2 - 8*beta */
853 felem_square(tmp, alpha);
854 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
855 felem_assign(ftmp, beta);
856 felem_scalar(ftmp, 8);
857 /* ftmp[i] < 8 * 2^57 = 2^60 */
858 felem_diff_128_64(tmp, ftmp);
859 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
860 felem_reduce(x_out, tmp);
862 /* z' = (y + z)^2 - gamma - delta */
863 felem_sum(delta, gamma);
864 /* delta[i] < 2^57 + 2^57 = 2^58 */
865 felem_assign(ftmp, y_in);
866 felem_sum(ftmp, z_in);
867 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
868 felem_square(tmp, ftmp);
869 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
870 felem_diff_128_64(tmp, delta);
871 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
872 felem_reduce(z_out, tmp);
874 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
875 felem_scalar(beta, 4);
876 /* beta[i] < 4 * 2^57 = 2^59 */
877 felem_diff(beta, x_out);
878 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
879 felem_mul(tmp, alpha, beta);
880 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
881 felem_square(tmp2, gamma);
882 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
883 widefelem_scalar(tmp2, 8);
884 /* tmp2[i] < 8 * 2^116 = 2^119 */
885 widefelem_diff(tmp, tmp2);
886 /* tmp[i] < 2^119 + 2^120 < 2^121 */
887 felem_reduce(y_out, tmp);
891 * Add two elliptic curve points:
892 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
893 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
894 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
895 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
896 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
897 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
899 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
903 * This function is not entirely constant-time: it includes a branch for
904 * checking whether the two input points are equal, (while not equal to the
905 * point at infinity). This case never happens during single point
906 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
908 static void point_add(felem x3, felem y3, felem z3,
909 const felem x1, const felem y1, const felem z1,
910 const int mixed, const felem x2, const felem y2,
913 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
915 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
919 felem_square(tmp, z2);
920 felem_reduce(ftmp2, tmp);
923 felem_mul(tmp, ftmp2, z2);
924 felem_reduce(ftmp4, tmp);
926 /* ftmp4 = z2^3*y1 */
927 felem_mul(tmp2, ftmp4, y1);
928 felem_reduce(ftmp4, tmp2);
930 /* ftmp2 = z2^2*x1 */
931 felem_mul(tmp2, ftmp2, x1);
932 felem_reduce(ftmp2, tmp2);
935 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
938 /* ftmp4 = z2^3*y1 */
939 felem_assign(ftmp4, y1);
941 /* ftmp2 = z2^2*x1 */
942 felem_assign(ftmp2, x1);
946 felem_square(tmp, z1);
947 felem_reduce(ftmp, tmp);
950 felem_mul(tmp, ftmp, z1);
951 felem_reduce(ftmp3, tmp);
954 felem_mul(tmp, ftmp3, y2);
955 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
957 /* ftmp3 = z1^3*y2 - z2^3*y1 */
958 felem_diff_128_64(tmp, ftmp4);
959 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
960 felem_reduce(ftmp3, tmp);
963 felem_mul(tmp, ftmp, x2);
964 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
966 /* ftmp = z1^2*x2 - z2^2*x1 */
967 felem_diff_128_64(tmp, ftmp2);
968 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
969 felem_reduce(ftmp, tmp);
972 * the formulae are incorrect if the points are equal so we check for
973 * this and do doubling if this happens
975 x_equal = felem_is_zero(ftmp);
976 y_equal = felem_is_zero(ftmp3);
977 z1_is_zero = felem_is_zero(z1);
978 z2_is_zero = felem_is_zero(z2);
979 /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
980 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
981 point_double(x3, y3, z3, x1, y1, z1);
987 felem_mul(tmp, z1, z2);
988 felem_reduce(ftmp5, tmp);
990 /* special case z2 = 0 is handled later */
991 felem_assign(ftmp5, z1);
994 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
995 felem_mul(tmp, ftmp, ftmp5);
996 felem_reduce(z_out, tmp);
998 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
999 felem_assign(ftmp5, ftmp);
1000 felem_square(tmp, ftmp);
1001 felem_reduce(ftmp, tmp);
1003 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1004 felem_mul(tmp, ftmp, ftmp5);
1005 felem_reduce(ftmp5, tmp);
1007 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1008 felem_mul(tmp, ftmp2, ftmp);
1009 felem_reduce(ftmp2, tmp);
1011 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1012 felem_mul(tmp, ftmp4, ftmp5);
1013 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1015 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1016 felem_square(tmp2, ftmp3);
1017 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1019 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1020 felem_diff_128_64(tmp2, ftmp5);
1021 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1023 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1024 felem_assign(ftmp5, ftmp2);
1025 felem_scalar(ftmp5, 2);
1026 /* ftmp5[i] < 2 * 2^57 = 2^58 */
1029 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1030 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1032 felem_diff_128_64(tmp2, ftmp5);
1033 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1034 felem_reduce(x_out, tmp2);
1036 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1037 felem_diff(ftmp2, x_out);
1038 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1041 * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1043 felem_mul(tmp2, ftmp3, ftmp2);
1044 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1047 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1048 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1050 widefelem_diff(tmp2, tmp);
1051 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1052 felem_reduce(y_out, tmp2);
1055 * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1056 * the point at infinity, so we need to check for this separately
1060 * if point 1 is at infinity, copy point 2 to output, and vice versa
1062 copy_conditional(x_out, x2, z1_is_zero);
1063 copy_conditional(x_out, x1, z2_is_zero);
1064 copy_conditional(y_out, y2, z1_is_zero);
1065 copy_conditional(y_out, y1, z2_is_zero);
1066 copy_conditional(z_out, z2, z1_is_zero);
1067 copy_conditional(z_out, z1, z2_is_zero);
1068 felem_assign(x3, x_out);
1069 felem_assign(y3, y_out);
1070 felem_assign(z3, z_out);
1074 * select_point selects the |idx|th point from a precomputation table and
1076 * The pre_comp array argument should be size of |size| argument
1078 static void select_point(const u64 idx, unsigned int size,
1079 const felem pre_comp[][3], felem out[3])
1082 limb *outlimbs = &out[0][0];
1084 memset(out, 0, sizeof(*out) * 3);
1085 for (i = 0; i < size; i++) {
1086 const limb *inlimbs = &pre_comp[i][0][0];
1093 for (j = 0; j < 4 * 3; j++)
1094 outlimbs[j] |= inlimbs[j] & mask;
1098 /* get_bit returns the |i|th bit in |in| */
1099 static char get_bit(const felem_bytearray in, unsigned i)
1103 return (in[i >> 3] >> (i & 7)) & 1;
1107 * Interleaved point multiplication using precomputed point multiples: The
1108 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1109 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1110 * generator, using certain (large) precomputed multiples in g_pre_comp.
1111 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1113 static void batch_mul(felem x_out, felem y_out, felem z_out,
1114 const felem_bytearray scalars[],
1115 const unsigned num_points, const u8 *g_scalar,
1116 const int mixed, const felem pre_comp[][17][3],
1117 const felem g_pre_comp[2][16][3])
1121 unsigned gen_mul = (g_scalar != NULL);
1122 felem nq[3], tmp[4];
1126 /* set nq to the point at infinity */
1127 memset(nq, 0, sizeof(nq));
1130 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1131 * of the generator (two in each of the last 28 rounds) and additions of
1132 * other points multiples (every 5th round).
1134 skip = 1; /* save two point operations in the first
1136 for (i = (num_points ? 220 : 27); i >= 0; --i) {
1139 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1141 /* add multiples of the generator */
1142 if (gen_mul && (i <= 27)) {
1143 /* first, look 28 bits upwards */
1144 bits = get_bit(g_scalar, i + 196) << 3;
1145 bits |= get_bit(g_scalar, i + 140) << 2;
1146 bits |= get_bit(g_scalar, i + 84) << 1;
1147 bits |= get_bit(g_scalar, i + 28);
1148 /* select the point to add, in constant time */
1149 select_point(bits, 16, g_pre_comp[1], tmp);
1152 /* value 1 below is argument for "mixed" */
1153 point_add(nq[0], nq[1], nq[2],
1154 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1156 memcpy(nq, tmp, 3 * sizeof(felem));
1160 /* second, look at the current position */
1161 bits = get_bit(g_scalar, i + 168) << 3;
1162 bits |= get_bit(g_scalar, i + 112) << 2;
1163 bits |= get_bit(g_scalar, i + 56) << 1;
1164 bits |= get_bit(g_scalar, i);
1165 /* select the point to add, in constant time */
1166 select_point(bits, 16, g_pre_comp[0], tmp);
1167 point_add(nq[0], nq[1], nq[2],
1168 nq[0], nq[1], nq[2],
1169 1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1172 /* do other additions every 5 doublings */
1173 if (num_points && (i % 5 == 0)) {
1174 /* loop over all scalars */
1175 for (num = 0; num < num_points; ++num) {
1176 bits = get_bit(scalars[num], i + 4) << 5;
1177 bits |= get_bit(scalars[num], i + 3) << 4;
1178 bits |= get_bit(scalars[num], i + 2) << 3;
1179 bits |= get_bit(scalars[num], i + 1) << 2;
1180 bits |= get_bit(scalars[num], i) << 1;
1181 bits |= get_bit(scalars[num], i - 1);
1182 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1184 /* select the point to add or subtract */
1185 select_point(digit, 17, pre_comp[num], tmp);
1186 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1188 copy_conditional(tmp[1], tmp[3], sign);
1191 point_add(nq[0], nq[1], nq[2],
1192 nq[0], nq[1], nq[2],
1193 mixed, tmp[0], tmp[1], tmp[2]);
1195 memcpy(nq, tmp, 3 * sizeof(felem));
1201 felem_assign(x_out, nq[0]);
1202 felem_assign(y_out, nq[1]);
1203 felem_assign(z_out, nq[2]);
1206 /******************************************************************************/
1208 * FUNCTIONS TO MANAGE PRECOMPUTATION
1211 static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1213 NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1216 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1219 ret->references = 1;
1223 NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1226 CRYPTO_add(&p->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1230 void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1233 || CRYPTO_add(&p->references, -1, CRYPTO_LOCK_EC_PRE_COMP) > 0)
1238 /******************************************************************************/
1240 * OPENSSL EC_METHOD FUNCTIONS
1243 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1246 ret = ec_GFp_simple_group_init(group);
1247 group->a_is_minus3 = 1;
1251 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1252 const BIGNUM *a, const BIGNUM *b,
1256 BN_CTX *new_ctx = NULL;
1257 BIGNUM *curve_p, *curve_a, *curve_b;
1260 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1263 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1264 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1265 ((curve_b = BN_CTX_get(ctx)) == NULL))
1267 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1268 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1269 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1270 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1271 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1272 EC_R_WRONG_CURVE_PARAMETERS);
1275 group->field_mod_func = BN_nist_mod_224;
1276 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1279 BN_CTX_free(new_ctx);
1284 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1287 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1288 const EC_POINT *point,
1289 BIGNUM *x, BIGNUM *y,
1292 felem z1, z2, x_in, y_in, x_out, y_out;
1295 if (EC_POINT_is_at_infinity(group, point)) {
1296 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1297 EC_R_POINT_AT_INFINITY);
1300 if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1301 (!BN_to_felem(z1, point->Z)))
1304 felem_square(tmp, z2);
1305 felem_reduce(z1, tmp);
1306 felem_mul(tmp, x_in, z1);
1307 felem_reduce(x_in, tmp);
1308 felem_contract(x_out, x_in);
1310 if (!felem_to_BN(x, x_out)) {
1311 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1316 felem_mul(tmp, z1, z2);
1317 felem_reduce(z1, tmp);
1318 felem_mul(tmp, y_in, z1);
1319 felem_reduce(y_in, tmp);
1320 felem_contract(y_out, y_in);
1322 if (!felem_to_BN(y, y_out)) {
1323 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1331 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1332 felem tmp_felems[ /* num+1 */ ])
1335 * Runs in constant time, unless an input is the point at infinity (which
1336 * normally shouldn't happen).
1338 ec_GFp_nistp_points_make_affine_internal(num,
1342 (void (*)(void *))felem_one,
1343 (int (*)(const void *))
1345 (void (*)(void *, const void *))
1347 (void (*)(void *, const void *))
1348 felem_square_reduce, (void (*)
1355 (void (*)(void *, const void *))
1357 (void (*)(void *, const void *))
1362 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1363 * values Result is stored in r (r can equal one of the inputs).
1365 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1366 const BIGNUM *scalar, size_t num,
1367 const EC_POINT *points[],
1368 const BIGNUM *scalars[], BN_CTX *ctx)
1374 BN_CTX *new_ctx = NULL;
1375 BIGNUM *x, *y, *z, *tmp_scalar;
1376 felem_bytearray g_secret;
1377 felem_bytearray *secrets = NULL;
1378 felem (*pre_comp)[17][3] = NULL;
1379 felem *tmp_felems = NULL;
1380 felem_bytearray tmp;
1382 int have_pre_comp = 0;
1383 size_t num_points = num;
1384 felem x_in, y_in, z_in, x_out, y_out, z_out;
1385 NISTP224_PRE_COMP *pre = NULL;
1386 const felem(*g_pre_comp)[16][3] = NULL;
1387 EC_POINT *generator = NULL;
1388 const EC_POINT *p = NULL;
1389 const BIGNUM *p_scalar = NULL;
1392 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1395 if (((x = BN_CTX_get(ctx)) == NULL) ||
1396 ((y = BN_CTX_get(ctx)) == NULL) ||
1397 ((z = BN_CTX_get(ctx)) == NULL) ||
1398 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1401 if (scalar != NULL) {
1402 pre = group->pre_comp.nistp224;
1404 /* we have precomputation, try to use it */
1405 g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1407 /* try to use the standard precomputation */
1408 g_pre_comp = &gmul[0];
1409 generator = EC_POINT_new(group);
1410 if (generator == NULL)
1412 /* get the generator from precomputation */
1413 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1414 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1415 !felem_to_BN(z, g_pre_comp[0][1][2])) {
1416 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1419 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1423 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1424 /* precomputation matches generator */
1428 * we don't have valid precomputation: treat the generator as a
1431 num_points = num_points + 1;
1434 if (num_points > 0) {
1435 if (num_points >= 3) {
1437 * unless we precompute multiples for just one or two points,
1438 * converting those into affine form is time well spent
1442 secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1443 pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1446 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1447 if ((secrets == NULL) || (pre_comp == NULL)
1448 || (mixed && (tmp_felems == NULL))) {
1449 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1454 * we treat NULL scalars as 0, and NULL points as points at infinity,
1455 * i.e., they contribute nothing to the linear combination
1457 for (i = 0; i < num_points; ++i) {
1461 p = EC_GROUP_get0_generator(group);
1464 /* the i^th point */
1467 p_scalar = scalars[i];
1469 if ((p_scalar != NULL) && (p != NULL)) {
1470 /* reduce scalar to 0 <= scalar < 2^224 */
1471 if ((BN_num_bits(p_scalar) > 224)
1472 || (BN_is_negative(p_scalar))) {
1474 * this is an unusual input, and we don't guarantee
1477 if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1478 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1481 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1483 num_bytes = BN_bn2bin(p_scalar, tmp);
1484 flip_endian(secrets[i], tmp, num_bytes);
1485 /* precompute multiples */
1486 if ((!BN_to_felem(x_out, p->X)) ||
1487 (!BN_to_felem(y_out, p->Y)) ||
1488 (!BN_to_felem(z_out, p->Z)))
1490 felem_assign(pre_comp[i][1][0], x_out);
1491 felem_assign(pre_comp[i][1][1], y_out);
1492 felem_assign(pre_comp[i][1][2], z_out);
1493 for (j = 2; j <= 16; ++j) {
1495 point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1496 pre_comp[i][j][2], pre_comp[i][1][0],
1497 pre_comp[i][1][1], pre_comp[i][1][2], 0,
1498 pre_comp[i][j - 1][0],
1499 pre_comp[i][j - 1][1],
1500 pre_comp[i][j - 1][2]);
1502 point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1503 pre_comp[i][j][2], pre_comp[i][j / 2][0],
1504 pre_comp[i][j / 2][1],
1505 pre_comp[i][j / 2][2]);
1511 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1514 /* the scalar for the generator */
1515 if ((scalar != NULL) && (have_pre_comp)) {
1516 memset(g_secret, 0, sizeof(g_secret));
1517 /* reduce scalar to 0 <= scalar < 2^224 */
1518 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1520 * this is an unusual input, and we don't guarantee
1523 if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1524 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1527 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1529 num_bytes = BN_bn2bin(scalar, tmp);
1530 flip_endian(g_secret, tmp, num_bytes);
1531 /* do the multiplication with generator precomputation */
1532 batch_mul(x_out, y_out, z_out,
1533 (const felem_bytearray(*))secrets, num_points,
1535 mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1537 /* do the multiplication without generator precomputation */
1538 batch_mul(x_out, y_out, z_out,
1539 (const felem_bytearray(*))secrets, num_points,
1540 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1541 /* reduce the output to its unique minimal representation */
1542 felem_contract(x_in, x_out);
1543 felem_contract(y_in, y_out);
1544 felem_contract(z_in, z_out);
1545 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1546 (!felem_to_BN(z, z_in))) {
1547 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1550 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1554 EC_POINT_free(generator);
1555 BN_CTX_free(new_ctx);
1556 OPENSSL_free(secrets);
1557 OPENSSL_free(pre_comp);
1558 OPENSSL_free(tmp_felems);
1562 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1565 NISTP224_PRE_COMP *pre = NULL;
1567 BN_CTX *new_ctx = NULL;
1569 EC_POINT *generator = NULL;
1570 felem tmp_felems[32];
1572 /* throw away old precomputation */
1573 EC_pre_comp_free(group);
1575 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1578 if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
1580 /* get the generator */
1581 if (group->generator == NULL)
1583 generator = EC_POINT_new(group);
1584 if (generator == NULL)
1586 BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1587 BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1588 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1590 if ((pre = nistp224_pre_comp_new()) == NULL)
1593 * if the generator is the standard one, use built-in precomputation
1595 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1596 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1599 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1600 (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1601 (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1604 * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1605 * 2^140*G, 2^196*G for the second one
1607 for (i = 1; i <= 8; i <<= 1) {
1608 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1609 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1610 pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1611 for (j = 0; j < 27; ++j) {
1612 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1613 pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1614 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1618 point_double(pre->g_pre_comp[0][2 * i][0],
1619 pre->g_pre_comp[0][2 * i][1],
1620 pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1621 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1622 for (j = 0; j < 27; ++j) {
1623 point_double(pre->g_pre_comp[0][2 * i][0],
1624 pre->g_pre_comp[0][2 * i][1],
1625 pre->g_pre_comp[0][2 * i][2],
1626 pre->g_pre_comp[0][2 * i][0],
1627 pre->g_pre_comp[0][2 * i][1],
1628 pre->g_pre_comp[0][2 * i][2]);
1631 for (i = 0; i < 2; i++) {
1632 /* g_pre_comp[i][0] is the point at infinity */
1633 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1634 /* the remaining multiples */
1635 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1636 point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1637 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1638 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1639 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1640 pre->g_pre_comp[i][2][2]);
1641 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1642 point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1643 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1644 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1645 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1646 pre->g_pre_comp[i][2][2]);
1647 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1648 point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1649 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1650 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1651 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1652 pre->g_pre_comp[i][4][2]);
1654 * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1656 point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1657 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1658 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1659 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1660 pre->g_pre_comp[i][2][2]);
1661 for (j = 1; j < 8; ++j) {
1662 /* odd multiples: add G resp. 2^28*G */
1663 point_add(pre->g_pre_comp[i][2 * j + 1][0],
1664 pre->g_pre_comp[i][2 * j + 1][1],
1665 pre->g_pre_comp[i][2 * j + 1][2],
1666 pre->g_pre_comp[i][2 * j][0],
1667 pre->g_pre_comp[i][2 * j][1],
1668 pre->g_pre_comp[i][2 * j][2], 0,
1669 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1670 pre->g_pre_comp[i][1][2]);
1673 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1676 SETPRECOMP(group, nistp224, pre);
1681 EC_POINT_free(generator);
1682 BN_CTX_free(new_ctx);
1683 EC_nistp224_pre_comp_free(pre);
1687 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1689 return HAVEPRECOMP(group, nistp224);