2 * Written by Emilia Kasper (Google) for the OpenSSL project.
4 /* Copyright 2011 Google Inc.
6 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
21 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
23 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
24 * and Adam Langley's public domain 64-bit C implementation of curve25519
27 #include <openssl/opensslconf.h>
28 #ifdef OPENSSL_NO_EC_NISTP_64_GCC_128
29 NON_EMPTY_TRANSLATION_UNIT
34 # include <openssl/err.h>
37 # if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
38 /* even with gcc, the typedef won't work for 32-bit platforms */
39 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
42 # error "Need GCC 3.1 or later to define type uint128_t"
49 /******************************************************************************/
51 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
53 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
54 * using 64-bit coefficients called 'limbs',
55 * and sometimes (for multiplication results) as
56 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
57 * using 128-bit coefficients called 'widelimbs'.
58 * A 4-limb representation is an 'felem';
59 * a 7-widelimb representation is a 'widefelem'.
60 * Even within felems, bits of adjacent limbs overlap, and we don't always
61 * reduce the representations: we ensure that inputs to each felem
62 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
63 * and fit into a 128-bit word without overflow. The coefficients are then
64 * again partially reduced to obtain an felem satisfying a_i < 2^57.
65 * We only reduce to the unique minimal representation at the end of the
69 typedef uint64_t limb;
70 typedef uint128_t widelimb;
72 typedef limb felem[4];
73 typedef widelimb widefelem[7];
76 * Field element represented as a byte arrary. 28*8 = 224 bits is also the
77 * group order size for the elliptic curve, and we also use this type for
78 * scalars for point multiplication.
80 typedef u8 felem_bytearray[28];
82 static const felem_bytearray nistp224_curve_params[5] = {
83 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
84 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
85 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
86 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
87 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
88 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
89 {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
90 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
91 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
92 {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
93 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
94 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
95 {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
96 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
97 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
101 * Precomputed multiples of the standard generator
102 * Points are given in coordinates (X, Y, Z) where Z normally is 1
103 * (0 for the point at infinity).
104 * For each field element, slice a_0 is word 0, etc.
106 * The table has 2 * 16 elements, starting with the following:
107 * index | bits | point
108 * ------+---------+------------------------------
111 * 2 | 0 0 1 0 | 2^56G
112 * 3 | 0 0 1 1 | (2^56 + 1)G
113 * 4 | 0 1 0 0 | 2^112G
114 * 5 | 0 1 0 1 | (2^112 + 1)G
115 * 6 | 0 1 1 0 | (2^112 + 2^56)G
116 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
117 * 8 | 1 0 0 0 | 2^168G
118 * 9 | 1 0 0 1 | (2^168 + 1)G
119 * 10 | 1 0 1 0 | (2^168 + 2^56)G
120 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
121 * 12 | 1 1 0 0 | (2^168 + 2^112)G
122 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
123 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
124 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
125 * followed by a copy of this with each element multiplied by 2^28.
127 * The reason for this is so that we can clock bits into four different
128 * locations when doing simple scalar multiplies against the base point,
129 * and then another four locations using the second 16 elements.
131 static const felem gmul[2][16][3] = {
135 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
136 {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
138 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
139 {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
141 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
142 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
144 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
145 {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
147 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
148 {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
150 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
151 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
153 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
154 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
156 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
157 {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
159 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
160 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
162 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
163 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
165 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
166 {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
168 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
169 {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
171 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
172 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
174 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
175 {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
177 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
178 {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
183 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
184 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
186 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
187 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
189 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
190 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
192 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
193 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
195 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
196 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
198 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
199 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
201 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
202 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
204 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
205 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
207 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
208 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
210 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
211 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
213 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
214 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
216 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
217 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
219 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
220 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
222 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
223 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
225 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
226 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
230 /* Precomputation for the group generator. */
231 struct nistp224_pre_comp_st {
232 felem g_pre_comp[2][16][3];
236 const EC_METHOD *EC_GFp_nistp224_method(void)
238 static const EC_METHOD ret = {
239 EC_FLAGS_DEFAULT_OCT,
240 NID_X9_62_prime_field,
241 ec_GFp_nistp224_group_init,
242 ec_GFp_simple_group_finish,
243 ec_GFp_simple_group_clear_finish,
244 ec_GFp_nist_group_copy,
245 ec_GFp_nistp224_group_set_curve,
246 ec_GFp_simple_group_get_curve,
247 ec_GFp_simple_group_get_degree,
248 0, /* group_order_bits */
249 ec_GFp_simple_group_check_discriminant,
250 ec_GFp_simple_point_init,
251 ec_GFp_simple_point_finish,
252 ec_GFp_simple_point_clear_finish,
253 ec_GFp_simple_point_copy,
254 ec_GFp_simple_point_set_to_infinity,
255 ec_GFp_simple_set_Jprojective_coordinates_GFp,
256 ec_GFp_simple_get_Jprojective_coordinates_GFp,
257 ec_GFp_simple_point_set_affine_coordinates,
258 ec_GFp_nistp224_point_get_affine_coordinates,
259 0 /* point_set_compressed_coordinates */ ,
264 ec_GFp_simple_invert,
265 ec_GFp_simple_is_at_infinity,
266 ec_GFp_simple_is_on_curve,
268 ec_GFp_simple_make_affine,
269 ec_GFp_simple_points_make_affine,
270 ec_GFp_nistp224_points_mul,
271 ec_GFp_nistp224_precompute_mult,
272 ec_GFp_nistp224_have_precompute_mult,
273 ec_GFp_nist_field_mul,
274 ec_GFp_nist_field_sqr,
276 0 /* field_encode */ ,
277 0 /* field_decode */ ,
278 0 /* field_set_to_one */
285 * Helper functions to convert field elements to/from internal representation
287 static void bin28_to_felem(felem out, const u8 in[28])
289 out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
290 out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
291 out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
292 out[3] = (*((const uint64_t *)(in+20))) >> 8;
295 static void felem_to_bin28(u8 out[28], const felem in)
298 for (i = 0; i < 7; ++i) {
299 out[i] = in[0] >> (8 * i);
300 out[i + 7] = in[1] >> (8 * i);
301 out[i + 14] = in[2] >> (8 * i);
302 out[i + 21] = in[3] >> (8 * i);
306 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
307 static void flip_endian(u8 *out, const u8 *in, unsigned len)
310 for (i = 0; i < len; ++i)
311 out[i] = in[len - 1 - i];
314 /* From OpenSSL BIGNUM to internal representation */
315 static int BN_to_felem(felem out, const BIGNUM *bn)
317 felem_bytearray b_in;
318 felem_bytearray b_out;
321 /* BN_bn2bin eats leading zeroes */
322 memset(b_out, 0, sizeof(b_out));
323 num_bytes = BN_num_bytes(bn);
324 if (num_bytes > sizeof b_out) {
325 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
328 if (BN_is_negative(bn)) {
329 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
332 num_bytes = BN_bn2bin(bn, b_in);
333 flip_endian(b_out, b_in, num_bytes);
334 bin28_to_felem(out, b_out);
338 /* From internal representation to OpenSSL BIGNUM */
339 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
341 felem_bytearray b_in, b_out;
342 felem_to_bin28(b_in, in);
343 flip_endian(b_out, b_in, sizeof b_out);
344 return BN_bin2bn(b_out, sizeof b_out, out);
347 /******************************************************************************/
351 * Field operations, using the internal representation of field elements.
352 * NB! These operations are specific to our point multiplication and cannot be
353 * expected to be correct in general - e.g., multiplication with a large scalar
354 * will cause an overflow.
358 static void felem_one(felem out)
366 static void felem_assign(felem out, const felem in)
374 /* Sum two field elements: out += in */
375 static void felem_sum(felem out, const felem in)
383 /* Get negative value: out = -in */
384 /* Assumes in[i] < 2^57 */
385 static void felem_neg(felem out, const felem in)
387 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
388 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
389 static const limb two58m42m2 = (((limb) 1) << 58) -
390 (((limb) 1) << 42) - (((limb) 1) << 2);
392 /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
393 out[0] = two58p2 - in[0];
394 out[1] = two58m42m2 - in[1];
395 out[2] = two58m2 - in[2];
396 out[3] = two58m2 - in[3];
399 /* Subtract field elements: out -= in */
400 /* Assumes in[i] < 2^57 */
401 static void felem_diff(felem out, const felem in)
403 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
404 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
405 static const limb two58m42m2 = (((limb) 1) << 58) -
406 (((limb) 1) << 42) - (((limb) 1) << 2);
408 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
410 out[1] += two58m42m2;
420 /* Subtract in unreduced 128-bit mode: out -= in */
421 /* Assumes in[i] < 2^119 */
422 static void widefelem_diff(widefelem out, const widefelem in)
424 static const widelimb two120 = ((widelimb) 1) << 120;
425 static const widelimb two120m64 = (((widelimb) 1) << 120) -
426 (((widelimb) 1) << 64);
427 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
428 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
430 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
435 out[4] += two120m104m64;
448 /* Subtract in mixed mode: out128 -= in64 */
450 static void felem_diff_128_64(widefelem out, const felem in)
452 static const widelimb two64p8 = (((widelimb) 1) << 64) +
453 (((widelimb) 1) << 8);
454 static const widelimb two64m8 = (((widelimb) 1) << 64) -
455 (((widelimb) 1) << 8);
456 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
457 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
459 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
461 out[1] += two64m48m8;
472 * Multiply a field element by a scalar: out = out * scalar The scalars we
473 * actually use are small, so results fit without overflow
475 static void felem_scalar(felem out, const limb scalar)
484 * Multiply an unreduced field element by a scalar: out = out * scalar The
485 * scalars we actually use are small, so results fit without overflow
487 static void widefelem_scalar(widefelem out, const widelimb scalar)
498 /* Square a field element: out = in^2 */
499 static void felem_square(widefelem out, const felem in)
501 limb tmp0, tmp1, tmp2;
505 out[0] = ((widelimb) in[0]) * in[0];
506 out[1] = ((widelimb) in[0]) * tmp1;
507 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
508 out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
509 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
510 out[5] = ((widelimb) in[3]) * tmp2;
511 out[6] = ((widelimb) in[3]) * in[3];
514 /* Multiply two field elements: out = in1 * in2 */
515 static void felem_mul(widefelem out, const felem in1, const felem in2)
517 out[0] = ((widelimb) in1[0]) * in2[0];
518 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
519 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
520 ((widelimb) in1[2]) * in2[0];
521 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
522 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
523 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
524 ((widelimb) in1[3]) * in2[1];
525 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
526 out[6] = ((widelimb) in1[3]) * in2[3];
530 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
531 * Requires in[i] < 2^126,
532 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
533 static void felem_reduce(felem out, const widefelem in)
535 static const widelimb two127p15 = (((widelimb) 1) << 127) +
536 (((widelimb) 1) << 15);
537 static const widelimb two127m71 = (((widelimb) 1) << 127) -
538 (((widelimb) 1) << 71);
539 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
540 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
543 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
544 output[0] = in[0] + two127p15;
545 output[1] = in[1] + two127m71m55;
546 output[2] = in[2] + two127m71;
550 /* Eliminate in[4], in[5], in[6] */
551 output[4] += in[6] >> 16;
552 output[3] += (in[6] & 0xffff) << 40;
555 output[3] += in[5] >> 16;
556 output[2] += (in[5] & 0xffff) << 40;
559 output[2] += output[4] >> 16;
560 output[1] += (output[4] & 0xffff) << 40;
561 output[0] -= output[4];
563 /* Carry 2 -> 3 -> 4 */
564 output[3] += output[2] >> 56;
565 output[2] &= 0x00ffffffffffffff;
567 output[4] = output[3] >> 56;
568 output[3] &= 0x00ffffffffffffff;
570 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
572 /* Eliminate output[4] */
573 output[2] += output[4] >> 16;
574 /* output[2] < 2^56 + 2^56 = 2^57 */
575 output[1] += (output[4] & 0xffff) << 40;
576 output[0] -= output[4];
578 /* Carry 0 -> 1 -> 2 -> 3 */
579 output[1] += output[0] >> 56;
580 out[0] = output[0] & 0x00ffffffffffffff;
582 output[2] += output[1] >> 56;
583 /* output[2] < 2^57 + 2^72 */
584 out[1] = output[1] & 0x00ffffffffffffff;
585 output[3] += output[2] >> 56;
586 /* output[3] <= 2^56 + 2^16 */
587 out[2] = output[2] & 0x00ffffffffffffff;
590 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
591 * out[3] <= 2^56 + 2^16 (due to final carry),
597 static void felem_square_reduce(felem out, const felem in)
600 felem_square(tmp, in);
601 felem_reduce(out, tmp);
604 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
607 felem_mul(tmp, in1, in2);
608 felem_reduce(out, tmp);
612 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
613 * call felem_reduce first)
615 static void felem_contract(felem out, const felem in)
617 static const int64_t two56 = ((limb) 1) << 56;
618 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
619 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
625 /* Case 1: a = 1 iff in >= 2^224 */
629 tmp[3] &= 0x00ffffffffffffff;
631 * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
632 * and the lower part is non-zero
634 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
635 (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
636 a &= 0x00ffffffffffffff;
637 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
639 /* subtract 2^224 - 2^96 + 1 if a is all-one */
640 tmp[3] &= a ^ 0xffffffffffffffff;
641 tmp[2] &= a ^ 0xffffffffffffffff;
642 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
646 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
647 * non-zero, so we only need one step
653 /* carry 1 -> 2 -> 3 */
654 tmp[2] += tmp[1] >> 56;
655 tmp[1] &= 0x00ffffffffffffff;
657 tmp[3] += tmp[2] >> 56;
658 tmp[2] &= 0x00ffffffffffffff;
660 /* Now 0 <= out < p */
668 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
669 * elements are reduced to in < 2^225, so we only need to check three cases:
670 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
672 static limb felem_is_zero(const felem in)
674 limb zero, two224m96p1, two225m97p2;
676 zero = in[0] | in[1] | in[2] | in[3];
677 zero = (((int64_t) (zero) - 1) >> 63) & 1;
678 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
679 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
680 two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
681 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
682 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
683 two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
684 return (zero | two224m96p1 | two225m97p2);
687 static limb felem_is_zero_int(const felem in)
689 return (int)(felem_is_zero(in) & ((limb) 1));
692 /* Invert a field element */
693 /* Computation chain copied from djb's code */
694 static void felem_inv(felem out, const felem in)
696 felem ftmp, ftmp2, ftmp3, ftmp4;
700 felem_square(tmp, in);
701 felem_reduce(ftmp, tmp); /* 2 */
702 felem_mul(tmp, in, ftmp);
703 felem_reduce(ftmp, tmp); /* 2^2 - 1 */
704 felem_square(tmp, ftmp);
705 felem_reduce(ftmp, tmp); /* 2^3 - 2 */
706 felem_mul(tmp, in, ftmp);
707 felem_reduce(ftmp, tmp); /* 2^3 - 1 */
708 felem_square(tmp, ftmp);
709 felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
710 felem_square(tmp, ftmp2);
711 felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
712 felem_square(tmp, ftmp2);
713 felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
714 felem_mul(tmp, ftmp2, ftmp);
715 felem_reduce(ftmp, tmp); /* 2^6 - 1 */
716 felem_square(tmp, ftmp);
717 felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
718 for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */
719 felem_square(tmp, ftmp2);
720 felem_reduce(ftmp2, tmp);
722 felem_mul(tmp, ftmp2, ftmp);
723 felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
724 felem_square(tmp, ftmp2);
725 felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
726 for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */
727 felem_square(tmp, ftmp3);
728 felem_reduce(ftmp3, tmp);
730 felem_mul(tmp, ftmp3, ftmp2);
731 felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
732 felem_square(tmp, ftmp2);
733 felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
734 for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */
735 felem_square(tmp, ftmp3);
736 felem_reduce(ftmp3, tmp);
738 felem_mul(tmp, ftmp3, ftmp2);
739 felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
740 felem_square(tmp, ftmp3);
741 felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
742 for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */
743 felem_square(tmp, ftmp4);
744 felem_reduce(ftmp4, tmp);
746 felem_mul(tmp, ftmp3, ftmp4);
747 felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
748 felem_square(tmp, ftmp3);
749 felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
750 for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */
751 felem_square(tmp, ftmp4);
752 felem_reduce(ftmp4, tmp);
754 felem_mul(tmp, ftmp2, ftmp4);
755 felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
756 for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */
757 felem_square(tmp, ftmp2);
758 felem_reduce(ftmp2, tmp);
760 felem_mul(tmp, ftmp2, ftmp);
761 felem_reduce(ftmp, tmp); /* 2^126 - 1 */
762 felem_square(tmp, ftmp);
763 felem_reduce(ftmp, tmp); /* 2^127 - 2 */
764 felem_mul(tmp, ftmp, in);
765 felem_reduce(ftmp, tmp); /* 2^127 - 1 */
766 for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */
767 felem_square(tmp, ftmp);
768 felem_reduce(ftmp, tmp);
770 felem_mul(tmp, ftmp, ftmp3);
771 felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
775 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
778 static void copy_conditional(felem out, const felem in, limb icopy)
782 * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
784 const limb copy = -icopy;
785 for (i = 0; i < 4; ++i) {
786 const limb tmp = copy & (in[i] ^ out[i]);
791 /******************************************************************************/
793 * ELLIPTIC CURVE POINT OPERATIONS
795 * Points are represented in Jacobian projective coordinates:
796 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
797 * or to the point at infinity if Z == 0.
802 * Double an elliptic curve point:
803 * (X', Y', Z') = 2 * (X, Y, Z), where
804 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
805 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
806 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
807 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
808 * while x_out == y_in is not (maybe this works, but it's not tested).
811 point_double(felem x_out, felem y_out, felem z_out,
812 const felem x_in, const felem y_in, const felem z_in)
815 felem delta, gamma, beta, alpha, ftmp, ftmp2;
817 felem_assign(ftmp, x_in);
818 felem_assign(ftmp2, x_in);
821 felem_square(tmp, z_in);
822 felem_reduce(delta, tmp);
825 felem_square(tmp, y_in);
826 felem_reduce(gamma, tmp);
829 felem_mul(tmp, x_in, gamma);
830 felem_reduce(beta, tmp);
832 /* alpha = 3*(x-delta)*(x+delta) */
833 felem_diff(ftmp, delta);
834 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
835 felem_sum(ftmp2, delta);
836 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
837 felem_scalar(ftmp2, 3);
838 /* ftmp2[i] < 3 * 2^58 < 2^60 */
839 felem_mul(tmp, ftmp, ftmp2);
840 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
841 felem_reduce(alpha, tmp);
843 /* x' = alpha^2 - 8*beta */
844 felem_square(tmp, alpha);
845 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
846 felem_assign(ftmp, beta);
847 felem_scalar(ftmp, 8);
848 /* ftmp[i] < 8 * 2^57 = 2^60 */
849 felem_diff_128_64(tmp, ftmp);
850 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
851 felem_reduce(x_out, tmp);
853 /* z' = (y + z)^2 - gamma - delta */
854 felem_sum(delta, gamma);
855 /* delta[i] < 2^57 + 2^57 = 2^58 */
856 felem_assign(ftmp, y_in);
857 felem_sum(ftmp, z_in);
858 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
859 felem_square(tmp, ftmp);
860 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
861 felem_diff_128_64(tmp, delta);
862 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
863 felem_reduce(z_out, tmp);
865 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
866 felem_scalar(beta, 4);
867 /* beta[i] < 4 * 2^57 = 2^59 */
868 felem_diff(beta, x_out);
869 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
870 felem_mul(tmp, alpha, beta);
871 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
872 felem_square(tmp2, gamma);
873 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
874 widefelem_scalar(tmp2, 8);
875 /* tmp2[i] < 8 * 2^116 = 2^119 */
876 widefelem_diff(tmp, tmp2);
877 /* tmp[i] < 2^119 + 2^120 < 2^121 */
878 felem_reduce(y_out, tmp);
882 * Add two elliptic curve points:
883 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
884 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
885 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
886 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
887 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
888 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
890 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
894 * This function is not entirely constant-time: it includes a branch for
895 * checking whether the two input points are equal, (while not equal to the
896 * point at infinity). This case never happens during single point
897 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
899 static void point_add(felem x3, felem y3, felem z3,
900 const felem x1, const felem y1, const felem z1,
901 const int mixed, const felem x2, const felem y2,
904 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
906 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
910 felem_square(tmp, z2);
911 felem_reduce(ftmp2, tmp);
914 felem_mul(tmp, ftmp2, z2);
915 felem_reduce(ftmp4, tmp);
917 /* ftmp4 = z2^3*y1 */
918 felem_mul(tmp2, ftmp4, y1);
919 felem_reduce(ftmp4, tmp2);
921 /* ftmp2 = z2^2*x1 */
922 felem_mul(tmp2, ftmp2, x1);
923 felem_reduce(ftmp2, tmp2);
926 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
929 /* ftmp4 = z2^3*y1 */
930 felem_assign(ftmp4, y1);
932 /* ftmp2 = z2^2*x1 */
933 felem_assign(ftmp2, x1);
937 felem_square(tmp, z1);
938 felem_reduce(ftmp, tmp);
941 felem_mul(tmp, ftmp, z1);
942 felem_reduce(ftmp3, tmp);
945 felem_mul(tmp, ftmp3, y2);
946 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
948 /* ftmp3 = z1^3*y2 - z2^3*y1 */
949 felem_diff_128_64(tmp, ftmp4);
950 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
951 felem_reduce(ftmp3, tmp);
954 felem_mul(tmp, ftmp, x2);
955 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
957 /* ftmp = z1^2*x2 - z2^2*x1 */
958 felem_diff_128_64(tmp, ftmp2);
959 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
960 felem_reduce(ftmp, tmp);
963 * the formulae are incorrect if the points are equal so we check for
964 * this and do doubling if this happens
966 x_equal = felem_is_zero(ftmp);
967 y_equal = felem_is_zero(ftmp3);
968 z1_is_zero = felem_is_zero(z1);
969 z2_is_zero = felem_is_zero(z2);
970 /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
971 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
972 point_double(x3, y3, z3, x1, y1, z1);
978 felem_mul(tmp, z1, z2);
979 felem_reduce(ftmp5, tmp);
981 /* special case z2 = 0 is handled later */
982 felem_assign(ftmp5, z1);
985 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
986 felem_mul(tmp, ftmp, ftmp5);
987 felem_reduce(z_out, tmp);
989 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
990 felem_assign(ftmp5, ftmp);
991 felem_square(tmp, ftmp);
992 felem_reduce(ftmp, tmp);
994 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
995 felem_mul(tmp, ftmp, ftmp5);
996 felem_reduce(ftmp5, tmp);
998 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
999 felem_mul(tmp, ftmp2, ftmp);
1000 felem_reduce(ftmp2, tmp);
1002 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1003 felem_mul(tmp, ftmp4, ftmp5);
1004 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1006 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1007 felem_square(tmp2, ftmp3);
1008 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1010 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1011 felem_diff_128_64(tmp2, ftmp5);
1012 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1014 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1015 felem_assign(ftmp5, ftmp2);
1016 felem_scalar(ftmp5, 2);
1017 /* ftmp5[i] < 2 * 2^57 = 2^58 */
1020 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1021 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1023 felem_diff_128_64(tmp2, ftmp5);
1024 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1025 felem_reduce(x_out, tmp2);
1027 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1028 felem_diff(ftmp2, x_out);
1029 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1032 * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1034 felem_mul(tmp2, ftmp3, ftmp2);
1035 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1038 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1039 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1041 widefelem_diff(tmp2, tmp);
1042 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1043 felem_reduce(y_out, tmp2);
1046 * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1047 * the point at infinity, so we need to check for this separately
1051 * if point 1 is at infinity, copy point 2 to output, and vice versa
1053 copy_conditional(x_out, x2, z1_is_zero);
1054 copy_conditional(x_out, x1, z2_is_zero);
1055 copy_conditional(y_out, y2, z1_is_zero);
1056 copy_conditional(y_out, y1, z2_is_zero);
1057 copy_conditional(z_out, z2, z1_is_zero);
1058 copy_conditional(z_out, z1, z2_is_zero);
1059 felem_assign(x3, x_out);
1060 felem_assign(y3, y_out);
1061 felem_assign(z3, z_out);
1065 * select_point selects the |idx|th point from a precomputation table and
1067 * The pre_comp array argument should be size of |size| argument
1069 static void select_point(const u64 idx, unsigned int size,
1070 const felem pre_comp[][3], felem out[3])
1073 limb *outlimbs = &out[0][0];
1075 memset(out, 0, sizeof(*out) * 3);
1076 for (i = 0; i < size; i++) {
1077 const limb *inlimbs = &pre_comp[i][0][0];
1084 for (j = 0; j < 4 * 3; j++)
1085 outlimbs[j] |= inlimbs[j] & mask;
1089 /* get_bit returns the |i|th bit in |in| */
1090 static char get_bit(const felem_bytearray in, unsigned i)
1094 return (in[i >> 3] >> (i & 7)) & 1;
1098 * Interleaved point multiplication using precomputed point multiples: The
1099 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1100 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1101 * generator, using certain (large) precomputed multiples in g_pre_comp.
1102 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1104 static void batch_mul(felem x_out, felem y_out, felem z_out,
1105 const felem_bytearray scalars[],
1106 const unsigned num_points, const u8 *g_scalar,
1107 const int mixed, const felem pre_comp[][17][3],
1108 const felem g_pre_comp[2][16][3])
1112 unsigned gen_mul = (g_scalar != NULL);
1113 felem nq[3], tmp[4];
1117 /* set nq to the point at infinity */
1118 memset(nq, 0, sizeof(nq));
1121 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1122 * of the generator (two in each of the last 28 rounds) and additions of
1123 * other points multiples (every 5th round).
1125 skip = 1; /* save two point operations in the first
1127 for (i = (num_points ? 220 : 27); i >= 0; --i) {
1130 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1132 /* add multiples of the generator */
1133 if (gen_mul && (i <= 27)) {
1134 /* first, look 28 bits upwards */
1135 bits = get_bit(g_scalar, i + 196) << 3;
1136 bits |= get_bit(g_scalar, i + 140) << 2;
1137 bits |= get_bit(g_scalar, i + 84) << 1;
1138 bits |= get_bit(g_scalar, i + 28);
1139 /* select the point to add, in constant time */
1140 select_point(bits, 16, g_pre_comp[1], tmp);
1143 /* value 1 below is argument for "mixed" */
1144 point_add(nq[0], nq[1], nq[2],
1145 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1147 memcpy(nq, tmp, 3 * sizeof(felem));
1151 /* second, look at the current position */
1152 bits = get_bit(g_scalar, i + 168) << 3;
1153 bits |= get_bit(g_scalar, i + 112) << 2;
1154 bits |= get_bit(g_scalar, i + 56) << 1;
1155 bits |= get_bit(g_scalar, i);
1156 /* select the point to add, in constant time */
1157 select_point(bits, 16, g_pre_comp[0], tmp);
1158 point_add(nq[0], nq[1], nq[2],
1159 nq[0], nq[1], nq[2],
1160 1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1163 /* do other additions every 5 doublings */
1164 if (num_points && (i % 5 == 0)) {
1165 /* loop over all scalars */
1166 for (num = 0; num < num_points; ++num) {
1167 bits = get_bit(scalars[num], i + 4) << 5;
1168 bits |= get_bit(scalars[num], i + 3) << 4;
1169 bits |= get_bit(scalars[num], i + 2) << 3;
1170 bits |= get_bit(scalars[num], i + 1) << 2;
1171 bits |= get_bit(scalars[num], i) << 1;
1172 bits |= get_bit(scalars[num], i - 1);
1173 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1175 /* select the point to add or subtract */
1176 select_point(digit, 17, pre_comp[num], tmp);
1177 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1179 copy_conditional(tmp[1], tmp[3], sign);
1182 point_add(nq[0], nq[1], nq[2],
1183 nq[0], nq[1], nq[2],
1184 mixed, tmp[0], tmp[1], tmp[2]);
1186 memcpy(nq, tmp, 3 * sizeof(felem));
1192 felem_assign(x_out, nq[0]);
1193 felem_assign(y_out, nq[1]);
1194 felem_assign(z_out, nq[2]);
1197 /******************************************************************************/
1199 * FUNCTIONS TO MANAGE PRECOMPUTATION
1202 static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1204 NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1207 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1210 ret->references = 1;
1214 NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1217 CRYPTO_add(&p->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1221 void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1224 || CRYPTO_add(&p->references, -1, CRYPTO_LOCK_EC_PRE_COMP) > 0)
1229 /******************************************************************************/
1231 * OPENSSL EC_METHOD FUNCTIONS
1234 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1237 ret = ec_GFp_simple_group_init(group);
1238 group->a_is_minus3 = 1;
1242 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1243 const BIGNUM *a, const BIGNUM *b,
1247 BN_CTX *new_ctx = NULL;
1248 BIGNUM *curve_p, *curve_a, *curve_b;
1251 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1254 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1255 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1256 ((curve_b = BN_CTX_get(ctx)) == NULL))
1258 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1259 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1260 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1261 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1262 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1263 EC_R_WRONG_CURVE_PARAMETERS);
1266 group->field_mod_func = BN_nist_mod_224;
1267 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1270 BN_CTX_free(new_ctx);
1275 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1278 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1279 const EC_POINT *point,
1280 BIGNUM *x, BIGNUM *y,
1283 felem z1, z2, x_in, y_in, x_out, y_out;
1286 if (EC_POINT_is_at_infinity(group, point)) {
1287 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1288 EC_R_POINT_AT_INFINITY);
1291 if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1292 (!BN_to_felem(z1, point->Z)))
1295 felem_square(tmp, z2);
1296 felem_reduce(z1, tmp);
1297 felem_mul(tmp, x_in, z1);
1298 felem_reduce(x_in, tmp);
1299 felem_contract(x_out, x_in);
1301 if (!felem_to_BN(x, x_out)) {
1302 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1307 felem_mul(tmp, z1, z2);
1308 felem_reduce(z1, tmp);
1309 felem_mul(tmp, y_in, z1);
1310 felem_reduce(y_in, tmp);
1311 felem_contract(y_out, y_in);
1313 if (!felem_to_BN(y, y_out)) {
1314 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1322 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1323 felem tmp_felems[ /* num+1 */ ])
1326 * Runs in constant time, unless an input is the point at infinity (which
1327 * normally shouldn't happen).
1329 ec_GFp_nistp_points_make_affine_internal(num,
1333 (void (*)(void *))felem_one,
1334 (int (*)(const void *))
1336 (void (*)(void *, const void *))
1338 (void (*)(void *, const void *))
1339 felem_square_reduce, (void (*)
1346 (void (*)(void *, const void *))
1348 (void (*)(void *, const void *))
1353 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1354 * values Result is stored in r (r can equal one of the inputs).
1356 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1357 const BIGNUM *scalar, size_t num,
1358 const EC_POINT *points[],
1359 const BIGNUM *scalars[], BN_CTX *ctx)
1365 BN_CTX *new_ctx = NULL;
1366 BIGNUM *x, *y, *z, *tmp_scalar;
1367 felem_bytearray g_secret;
1368 felem_bytearray *secrets = NULL;
1369 felem (*pre_comp)[17][3] = NULL;
1370 felem *tmp_felems = NULL;
1371 felem_bytearray tmp;
1373 int have_pre_comp = 0;
1374 size_t num_points = num;
1375 felem x_in, y_in, z_in, x_out, y_out, z_out;
1376 NISTP224_PRE_COMP *pre = NULL;
1377 const felem(*g_pre_comp)[16][3] = NULL;
1378 EC_POINT *generator = NULL;
1379 const EC_POINT *p = NULL;
1380 const BIGNUM *p_scalar = NULL;
1383 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1386 if (((x = BN_CTX_get(ctx)) == NULL) ||
1387 ((y = BN_CTX_get(ctx)) == NULL) ||
1388 ((z = BN_CTX_get(ctx)) == NULL) ||
1389 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1392 if (scalar != NULL) {
1393 pre = group->pre_comp.nistp224;
1395 /* we have precomputation, try to use it */
1396 g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1398 /* try to use the standard precomputation */
1399 g_pre_comp = &gmul[0];
1400 generator = EC_POINT_new(group);
1401 if (generator == NULL)
1403 /* get the generator from precomputation */
1404 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1405 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1406 !felem_to_BN(z, g_pre_comp[0][1][2])) {
1407 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1410 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1414 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1415 /* precomputation matches generator */
1419 * we don't have valid precomputation: treat the generator as a
1422 num_points = num_points + 1;
1425 if (num_points > 0) {
1426 if (num_points >= 3) {
1428 * unless we precompute multiples for just one or two points,
1429 * converting those into affine form is time well spent
1433 secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1434 pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1437 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1438 if ((secrets == NULL) || (pre_comp == NULL)
1439 || (mixed && (tmp_felems == NULL))) {
1440 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1445 * we treat NULL scalars as 0, and NULL points as points at infinity,
1446 * i.e., they contribute nothing to the linear combination
1448 for (i = 0; i < num_points; ++i) {
1452 p = EC_GROUP_get0_generator(group);
1455 /* the i^th point */
1458 p_scalar = scalars[i];
1460 if ((p_scalar != NULL) && (p != NULL)) {
1461 /* reduce scalar to 0 <= scalar < 2^224 */
1462 if ((BN_num_bits(p_scalar) > 224)
1463 || (BN_is_negative(p_scalar))) {
1465 * this is an unusual input, and we don't guarantee
1468 if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1469 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1472 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1474 num_bytes = BN_bn2bin(p_scalar, tmp);
1475 flip_endian(secrets[i], tmp, num_bytes);
1476 /* precompute multiples */
1477 if ((!BN_to_felem(x_out, p->X)) ||
1478 (!BN_to_felem(y_out, p->Y)) ||
1479 (!BN_to_felem(z_out, p->Z)))
1481 felem_assign(pre_comp[i][1][0], x_out);
1482 felem_assign(pre_comp[i][1][1], y_out);
1483 felem_assign(pre_comp[i][1][2], z_out);
1484 for (j = 2; j <= 16; ++j) {
1486 point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1487 pre_comp[i][j][2], pre_comp[i][1][0],
1488 pre_comp[i][1][1], pre_comp[i][1][2], 0,
1489 pre_comp[i][j - 1][0],
1490 pre_comp[i][j - 1][1],
1491 pre_comp[i][j - 1][2]);
1493 point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1494 pre_comp[i][j][2], pre_comp[i][j / 2][0],
1495 pre_comp[i][j / 2][1],
1496 pre_comp[i][j / 2][2]);
1502 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1505 /* the scalar for the generator */
1506 if ((scalar != NULL) && (have_pre_comp)) {
1507 memset(g_secret, 0, sizeof(g_secret));
1508 /* reduce scalar to 0 <= scalar < 2^224 */
1509 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1511 * this is an unusual input, and we don't guarantee
1514 if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1515 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1518 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1520 num_bytes = BN_bn2bin(scalar, tmp);
1521 flip_endian(g_secret, tmp, num_bytes);
1522 /* do the multiplication with generator precomputation */
1523 batch_mul(x_out, y_out, z_out,
1524 (const felem_bytearray(*))secrets, num_points,
1526 mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1528 /* do the multiplication without generator precomputation */
1529 batch_mul(x_out, y_out, z_out,
1530 (const felem_bytearray(*))secrets, num_points,
1531 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1532 /* reduce the output to its unique minimal representation */
1533 felem_contract(x_in, x_out);
1534 felem_contract(y_in, y_out);
1535 felem_contract(z_in, z_out);
1536 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1537 (!felem_to_BN(z, z_in))) {
1538 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1541 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1545 EC_POINT_free(generator);
1546 BN_CTX_free(new_ctx);
1547 OPENSSL_free(secrets);
1548 OPENSSL_free(pre_comp);
1549 OPENSSL_free(tmp_felems);
1553 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1556 NISTP224_PRE_COMP *pre = NULL;
1558 BN_CTX *new_ctx = NULL;
1560 EC_POINT *generator = NULL;
1561 felem tmp_felems[32];
1563 /* throw away old precomputation */
1564 EC_pre_comp_free(group);
1566 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1569 if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
1571 /* get the generator */
1572 if (group->generator == NULL)
1574 generator = EC_POINT_new(group);
1575 if (generator == NULL)
1577 BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1578 BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1579 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1581 if ((pre = nistp224_pre_comp_new()) == NULL)
1584 * if the generator is the standard one, use built-in precomputation
1586 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1587 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1590 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1591 (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1592 (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1595 * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1596 * 2^140*G, 2^196*G for the second one
1598 for (i = 1; i <= 8; i <<= 1) {
1599 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1600 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1601 pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1602 for (j = 0; j < 27; ++j) {
1603 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1604 pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1605 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1609 point_double(pre->g_pre_comp[0][2 * i][0],
1610 pre->g_pre_comp[0][2 * i][1],
1611 pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1612 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1613 for (j = 0; j < 27; ++j) {
1614 point_double(pre->g_pre_comp[0][2 * i][0],
1615 pre->g_pre_comp[0][2 * i][1],
1616 pre->g_pre_comp[0][2 * i][2],
1617 pre->g_pre_comp[0][2 * i][0],
1618 pre->g_pre_comp[0][2 * i][1],
1619 pre->g_pre_comp[0][2 * i][2]);
1622 for (i = 0; i < 2; i++) {
1623 /* g_pre_comp[i][0] is the point at infinity */
1624 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1625 /* the remaining multiples */
1626 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1627 point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1628 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1629 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1630 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1631 pre->g_pre_comp[i][2][2]);
1632 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1633 point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1634 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1635 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1636 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1637 pre->g_pre_comp[i][2][2]);
1638 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1639 point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1640 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1641 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1642 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1643 pre->g_pre_comp[i][4][2]);
1645 * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1647 point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1648 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1649 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1650 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1651 pre->g_pre_comp[i][2][2]);
1652 for (j = 1; j < 8; ++j) {
1653 /* odd multiples: add G resp. 2^28*G */
1654 point_add(pre->g_pre_comp[i][2 * j + 1][0],
1655 pre->g_pre_comp[i][2 * j + 1][1],
1656 pre->g_pre_comp[i][2 * j + 1][2],
1657 pre->g_pre_comp[i][2 * j][0],
1658 pre->g_pre_comp[i][2 * j][1],
1659 pre->g_pre_comp[i][2 * j][2], 0,
1660 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1661 pre->g_pre_comp[i][1][2]);
1664 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1667 SETPRECOMP(group, nistp224, pre);
1672 EC_POINT_free(generator);
1673 BN_CTX_free(new_ctx);
1674 EC_nistp224_pre_comp_free(pre);
1678 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1680 return HAVEPRECOMP(group, nistp224);