2 * Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
10 /* Copyright 2011 Google Inc.
12 * Licensed under the Apache License, Version 2.0 (the "License");
14 * you may not use this file except in compliance with the License.
15 * You may obtain a copy of the License at
17 * http://www.apache.org/licenses/LICENSE-2.0
19 * Unless required by applicable law or agreed to in writing, software
20 * distributed under the License is distributed on an "AS IS" BASIS,
21 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 * See the License for the specific language governing permissions and
23 * limitations under the License.
27 * ECDSA low level APIs are deprecated for public use, but still ok for
30 #include "internal/deprecated.h"
33 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
35 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
36 * and Adam Langley's public domain 64-bit C implementation of curve25519
39 #include <openssl/opensslconf.h>
43 #include <openssl/err.h>
46 #if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
47 /* even with gcc, the typedef won't work for 32-bit platforms */
48 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
51 # error "Your compiler doesn't appear to support 128-bit integer types"
57 /******************************************************************************/
59 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
61 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
62 * using 64-bit coefficients called 'limbs',
63 * and sometimes (for multiplication results) as
64 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
65 * using 128-bit coefficients called 'widelimbs'.
66 * A 4-limb representation is an 'felem';
67 * a 7-widelimb representation is a 'widefelem'.
68 * Even within felems, bits of adjacent limbs overlap, and we don't always
69 * reduce the representations: we ensure that inputs to each felem
70 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
71 * and fit into a 128-bit word without overflow. The coefficients are then
72 * again partially reduced to obtain an felem satisfying a_i < 2^57.
73 * We only reduce to the unique minimal representation at the end of the
77 typedef uint64_t limb;
78 typedef uint64_t limb_aX __attribute((__aligned__(1)));
79 typedef uint128_t widelimb;
81 typedef limb felem[4];
82 typedef widelimb widefelem[7];
85 * Field element represented as a byte array. 28*8 = 224 bits is also the
86 * group order size for the elliptic curve, and we also use this type for
87 * scalars for point multiplication.
89 typedef u8 felem_bytearray[28];
91 static const felem_bytearray nistp224_curve_params[5] = {
92 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
93 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
94 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
95 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
96 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
97 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
98 {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
99 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
100 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
101 {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
102 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
103 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
104 {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
105 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
106 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
110 * Precomputed multiples of the standard generator
111 * Points are given in coordinates (X, Y, Z) where Z normally is 1
112 * (0 for the point at infinity).
113 * For each field element, slice a_0 is word 0, etc.
115 * The table has 2 * 16 elements, starting with the following:
116 * index | bits | point
117 * ------+---------+------------------------------
120 * 2 | 0 0 1 0 | 2^56G
121 * 3 | 0 0 1 1 | (2^56 + 1)G
122 * 4 | 0 1 0 0 | 2^112G
123 * 5 | 0 1 0 1 | (2^112 + 1)G
124 * 6 | 0 1 1 0 | (2^112 + 2^56)G
125 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
126 * 8 | 1 0 0 0 | 2^168G
127 * 9 | 1 0 0 1 | (2^168 + 1)G
128 * 10 | 1 0 1 0 | (2^168 + 2^56)G
129 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
130 * 12 | 1 1 0 0 | (2^168 + 2^112)G
131 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
132 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
133 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
134 * followed by a copy of this with each element multiplied by 2^28.
136 * The reason for this is so that we can clock bits into four different
137 * locations when doing simple scalar multiplies against the base point,
138 * and then another four locations using the second 16 elements.
140 static const felem gmul[2][16][3] = {
144 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
145 {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
147 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
148 {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
150 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
151 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
153 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
154 {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
156 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
157 {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
159 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
160 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
162 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
163 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
165 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
166 {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
168 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
169 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
171 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
172 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
174 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
175 {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
177 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
178 {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
180 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
181 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
183 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
184 {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
186 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
187 {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
192 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
193 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
195 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
196 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
198 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
199 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
201 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
202 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
204 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
205 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
207 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
208 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
210 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
211 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
213 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
214 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
216 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
217 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
219 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
220 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
222 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
223 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
225 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
226 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
228 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
229 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
231 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
232 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
234 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
235 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
239 /* Precomputation for the group generator. */
240 struct nistp224_pre_comp_st {
241 felem g_pre_comp[2][16][3];
242 CRYPTO_REF_COUNT references;
246 const EC_METHOD *EC_GFp_nistp224_method(void)
248 static const EC_METHOD ret = {
249 EC_FLAGS_DEFAULT_OCT,
250 NID_X9_62_prime_field,
251 ec_GFp_nistp224_group_init,
252 ec_GFp_simple_group_finish,
253 ec_GFp_simple_group_clear_finish,
254 ec_GFp_nist_group_copy,
255 ec_GFp_nistp224_group_set_curve,
256 ec_GFp_simple_group_get_curve,
257 ec_GFp_simple_group_get_degree,
258 ec_group_simple_order_bits,
259 ec_GFp_simple_group_check_discriminant,
260 ec_GFp_simple_point_init,
261 ec_GFp_simple_point_finish,
262 ec_GFp_simple_point_clear_finish,
263 ec_GFp_simple_point_copy,
264 ec_GFp_simple_point_set_to_infinity,
265 ec_GFp_simple_point_set_affine_coordinates,
266 ec_GFp_nistp224_point_get_affine_coordinates,
267 0 /* point_set_compressed_coordinates */ ,
272 ec_GFp_simple_invert,
273 ec_GFp_simple_is_at_infinity,
274 ec_GFp_simple_is_on_curve,
276 ec_GFp_simple_make_affine,
277 ec_GFp_simple_points_make_affine,
278 ec_GFp_nistp224_points_mul,
279 ec_GFp_nistp224_precompute_mult,
280 ec_GFp_nistp224_have_precompute_mult,
281 ec_GFp_nist_field_mul,
282 ec_GFp_nist_field_sqr,
284 ec_GFp_simple_field_inv,
285 0 /* field_encode */ ,
286 0 /* field_decode */ ,
287 0, /* field_set_to_one */
288 ec_key_simple_priv2oct,
289 ec_key_simple_oct2priv,
291 ec_key_simple_generate_key,
292 ec_key_simple_check_key,
293 ec_key_simple_generate_public_key,
296 ecdh_simple_compute_key,
297 ecdsa_simple_sign_setup,
298 ecdsa_simple_sign_sig,
299 ecdsa_simple_verify_sig,
300 0, /* field_inverse_mod_ord */
301 0, /* blind_coordinates */
311 * Helper functions to convert field elements to/from internal representation
313 static void bin28_to_felem(felem out, const u8 in[28])
315 out[0] = *((const limb *)(in)) & 0x00ffffffffffffff;
316 out[1] = (*((const limb_aX *)(in + 7))) & 0x00ffffffffffffff;
317 out[2] = (*((const limb_aX *)(in + 14))) & 0x00ffffffffffffff;
318 out[3] = (*((const limb_aX *)(in + 20))) >> 8;
321 static void felem_to_bin28(u8 out[28], const felem in)
324 for (i = 0; i < 7; ++i) {
325 out[i] = in[0] >> (8 * i);
326 out[i + 7] = in[1] >> (8 * i);
327 out[i + 14] = in[2] >> (8 * i);
328 out[i + 21] = in[3] >> (8 * i);
332 /* From OpenSSL BIGNUM to internal representation */
333 static int BN_to_felem(felem out, const BIGNUM *bn)
335 felem_bytearray b_out;
338 if (BN_is_negative(bn)) {
339 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
342 num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
344 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
347 bin28_to_felem(out, b_out);
351 /* From internal representation to OpenSSL BIGNUM */
352 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
354 felem_bytearray b_out;
355 felem_to_bin28(b_out, in);
356 return BN_lebin2bn(b_out, sizeof(b_out), out);
359 /******************************************************************************/
363 * Field operations, using the internal representation of field elements.
364 * NB! These operations are specific to our point multiplication and cannot be
365 * expected to be correct in general - e.g., multiplication with a large scalar
366 * will cause an overflow.
370 static void felem_one(felem out)
378 static void felem_assign(felem out, const felem in)
386 /* Sum two field elements: out += in */
387 static void felem_sum(felem out, const felem in)
395 /* Subtract field elements: out -= in */
396 /* Assumes in[i] < 2^57 */
397 static void felem_diff(felem out, const felem in)
399 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
400 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
401 static const limb two58m42m2 = (((limb) 1) << 58) -
402 (((limb) 1) << 42) - (((limb) 1) << 2);
404 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
406 out[1] += two58m42m2;
416 /* Subtract in unreduced 128-bit mode: out -= in */
417 /* Assumes in[i] < 2^119 */
418 static void widefelem_diff(widefelem out, const widefelem in)
420 static const widelimb two120 = ((widelimb) 1) << 120;
421 static const widelimb two120m64 = (((widelimb) 1) << 120) -
422 (((widelimb) 1) << 64);
423 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
424 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
426 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
431 out[4] += two120m104m64;
444 /* Subtract in mixed mode: out128 -= in64 */
446 static void felem_diff_128_64(widefelem out, const felem in)
448 static const widelimb two64p8 = (((widelimb) 1) << 64) +
449 (((widelimb) 1) << 8);
450 static const widelimb two64m8 = (((widelimb) 1) << 64) -
451 (((widelimb) 1) << 8);
452 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
453 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
455 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
457 out[1] += two64m48m8;
468 * Multiply a field element by a scalar: out = out * scalar The scalars we
469 * actually use are small, so results fit without overflow
471 static void felem_scalar(felem out, const limb scalar)
480 * Multiply an unreduced field element by a scalar: out = out * scalar The
481 * scalars we actually use are small, so results fit without overflow
483 static void widefelem_scalar(widefelem out, const widelimb scalar)
494 /* Square a field element: out = in^2 */
495 static void felem_square(widefelem out, const felem in)
497 limb tmp0, tmp1, tmp2;
501 out[0] = ((widelimb) in[0]) * in[0];
502 out[1] = ((widelimb) in[0]) * tmp1;
503 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
504 out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
505 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
506 out[5] = ((widelimb) in[3]) * tmp2;
507 out[6] = ((widelimb) in[3]) * in[3];
510 /* Multiply two field elements: out = in1 * in2 */
511 static void felem_mul(widefelem out, const felem in1, const felem in2)
513 out[0] = ((widelimb) in1[0]) * in2[0];
514 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
515 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
516 ((widelimb) in1[2]) * in2[0];
517 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
518 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
519 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
520 ((widelimb) in1[3]) * in2[1];
521 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
522 out[6] = ((widelimb) in1[3]) * in2[3];
526 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
527 * Requires in[i] < 2^126,
528 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
529 static void felem_reduce(felem out, const widefelem in)
531 static const widelimb two127p15 = (((widelimb) 1) << 127) +
532 (((widelimb) 1) << 15);
533 static const widelimb two127m71 = (((widelimb) 1) << 127) -
534 (((widelimb) 1) << 71);
535 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
536 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
539 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
540 output[0] = in[0] + two127p15;
541 output[1] = in[1] + two127m71m55;
542 output[2] = in[2] + two127m71;
546 /* Eliminate in[4], in[5], in[6] */
547 output[4] += in[6] >> 16;
548 output[3] += (in[6] & 0xffff) << 40;
551 output[3] += in[5] >> 16;
552 output[2] += (in[5] & 0xffff) << 40;
555 output[2] += output[4] >> 16;
556 output[1] += (output[4] & 0xffff) << 40;
557 output[0] -= output[4];
559 /* Carry 2 -> 3 -> 4 */
560 output[3] += output[2] >> 56;
561 output[2] &= 0x00ffffffffffffff;
563 output[4] = output[3] >> 56;
564 output[3] &= 0x00ffffffffffffff;
566 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
568 /* Eliminate output[4] */
569 output[2] += output[4] >> 16;
570 /* output[2] < 2^56 + 2^56 = 2^57 */
571 output[1] += (output[4] & 0xffff) << 40;
572 output[0] -= output[4];
574 /* Carry 0 -> 1 -> 2 -> 3 */
575 output[1] += output[0] >> 56;
576 out[0] = output[0] & 0x00ffffffffffffff;
578 output[2] += output[1] >> 56;
579 /* output[2] < 2^57 + 2^72 */
580 out[1] = output[1] & 0x00ffffffffffffff;
581 output[3] += output[2] >> 56;
582 /* output[3] <= 2^56 + 2^16 */
583 out[2] = output[2] & 0x00ffffffffffffff;
586 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
587 * out[3] <= 2^56 + 2^16 (due to final carry),
593 static void felem_square_reduce(felem out, const felem in)
596 felem_square(tmp, in);
597 felem_reduce(out, tmp);
600 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
603 felem_mul(tmp, in1, in2);
604 felem_reduce(out, tmp);
608 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
609 * call felem_reduce first)
611 static void felem_contract(felem out, const felem in)
613 static const int64_t two56 = ((limb) 1) << 56;
614 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
615 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
621 /* Case 1: a = 1 iff in >= 2^224 */
625 tmp[3] &= 0x00ffffffffffffff;
627 * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
628 * and the lower part is non-zero
630 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
631 (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
632 a &= 0x00ffffffffffffff;
633 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
635 /* subtract 2^224 - 2^96 + 1 if a is all-one */
636 tmp[3] &= a ^ 0xffffffffffffffff;
637 tmp[2] &= a ^ 0xffffffffffffffff;
638 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
642 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
643 * non-zero, so we only need one step
649 /* carry 1 -> 2 -> 3 */
650 tmp[2] += tmp[1] >> 56;
651 tmp[1] &= 0x00ffffffffffffff;
653 tmp[3] += tmp[2] >> 56;
654 tmp[2] &= 0x00ffffffffffffff;
656 /* Now 0 <= out < p */
664 * Get negative value: out = -in
665 * Requires in[i] < 2^63,
666 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
668 static void felem_neg(felem out, const felem in)
672 memset(tmp, 0, sizeof(tmp));
673 felem_diff_128_64(tmp, in);
674 felem_reduce(out, tmp);
678 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
679 * elements are reduced to in < 2^225, so we only need to check three cases:
680 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
682 static limb felem_is_zero(const felem in)
684 limb zero, two224m96p1, two225m97p2;
686 zero = in[0] | in[1] | in[2] | in[3];
687 zero = (((int64_t) (zero) - 1) >> 63) & 1;
688 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
689 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
690 two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
691 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
692 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
693 two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
694 return (zero | two224m96p1 | two225m97p2);
697 static int felem_is_zero_int(const void *in)
699 return (int)(felem_is_zero(in) & ((limb) 1));
702 /* Invert a field element */
703 /* Computation chain copied from djb's code */
704 static void felem_inv(felem out, const felem in)
706 felem ftmp, ftmp2, ftmp3, ftmp4;
710 felem_square(tmp, in);
711 felem_reduce(ftmp, tmp); /* 2 */
712 felem_mul(tmp, in, ftmp);
713 felem_reduce(ftmp, tmp); /* 2^2 - 1 */
714 felem_square(tmp, ftmp);
715 felem_reduce(ftmp, tmp); /* 2^3 - 2 */
716 felem_mul(tmp, in, ftmp);
717 felem_reduce(ftmp, tmp); /* 2^3 - 1 */
718 felem_square(tmp, ftmp);
719 felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
720 felem_square(tmp, ftmp2);
721 felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
722 felem_square(tmp, ftmp2);
723 felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
724 felem_mul(tmp, ftmp2, ftmp);
725 felem_reduce(ftmp, tmp); /* 2^6 - 1 */
726 felem_square(tmp, ftmp);
727 felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
728 for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */
729 felem_square(tmp, ftmp2);
730 felem_reduce(ftmp2, tmp);
732 felem_mul(tmp, ftmp2, ftmp);
733 felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
734 felem_square(tmp, ftmp2);
735 felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
736 for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */
737 felem_square(tmp, ftmp3);
738 felem_reduce(ftmp3, tmp);
740 felem_mul(tmp, ftmp3, ftmp2);
741 felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
742 felem_square(tmp, ftmp2);
743 felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
744 for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */
745 felem_square(tmp, ftmp3);
746 felem_reduce(ftmp3, tmp);
748 felem_mul(tmp, ftmp3, ftmp2);
749 felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
750 felem_square(tmp, ftmp3);
751 felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
752 for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */
753 felem_square(tmp, ftmp4);
754 felem_reduce(ftmp4, tmp);
756 felem_mul(tmp, ftmp3, ftmp4);
757 felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
758 felem_square(tmp, ftmp3);
759 felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
760 for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */
761 felem_square(tmp, ftmp4);
762 felem_reduce(ftmp4, tmp);
764 felem_mul(tmp, ftmp2, ftmp4);
765 felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
766 for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */
767 felem_square(tmp, ftmp2);
768 felem_reduce(ftmp2, tmp);
770 felem_mul(tmp, ftmp2, ftmp);
771 felem_reduce(ftmp, tmp); /* 2^126 - 1 */
772 felem_square(tmp, ftmp);
773 felem_reduce(ftmp, tmp); /* 2^127 - 2 */
774 felem_mul(tmp, ftmp, in);
775 felem_reduce(ftmp, tmp); /* 2^127 - 1 */
776 for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */
777 felem_square(tmp, ftmp);
778 felem_reduce(ftmp, tmp);
780 felem_mul(tmp, ftmp, ftmp3);
781 felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
785 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
788 static void copy_conditional(felem out, const felem in, limb icopy)
792 * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
794 const limb copy = -icopy;
795 for (i = 0; i < 4; ++i) {
796 const limb tmp = copy & (in[i] ^ out[i]);
801 /******************************************************************************/
803 * ELLIPTIC CURVE POINT OPERATIONS
805 * Points are represented in Jacobian projective coordinates:
806 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
807 * or to the point at infinity if Z == 0.
812 * Double an elliptic curve point:
813 * (X', Y', Z') = 2 * (X, Y, Z), where
814 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
815 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
816 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
817 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
818 * while x_out == y_in is not (maybe this works, but it's not tested).
821 point_double(felem x_out, felem y_out, felem z_out,
822 const felem x_in, const felem y_in, const felem z_in)
825 felem delta, gamma, beta, alpha, ftmp, ftmp2;
827 felem_assign(ftmp, x_in);
828 felem_assign(ftmp2, x_in);
831 felem_square(tmp, z_in);
832 felem_reduce(delta, tmp);
835 felem_square(tmp, y_in);
836 felem_reduce(gamma, tmp);
839 felem_mul(tmp, x_in, gamma);
840 felem_reduce(beta, tmp);
842 /* alpha = 3*(x-delta)*(x+delta) */
843 felem_diff(ftmp, delta);
844 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
845 felem_sum(ftmp2, delta);
846 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
847 felem_scalar(ftmp2, 3);
848 /* ftmp2[i] < 3 * 2^58 < 2^60 */
849 felem_mul(tmp, ftmp, ftmp2);
850 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
851 felem_reduce(alpha, tmp);
853 /* x' = alpha^2 - 8*beta */
854 felem_square(tmp, alpha);
855 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
856 felem_assign(ftmp, beta);
857 felem_scalar(ftmp, 8);
858 /* ftmp[i] < 8 * 2^57 = 2^60 */
859 felem_diff_128_64(tmp, ftmp);
860 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
861 felem_reduce(x_out, tmp);
863 /* z' = (y + z)^2 - gamma - delta */
864 felem_sum(delta, gamma);
865 /* delta[i] < 2^57 + 2^57 = 2^58 */
866 felem_assign(ftmp, y_in);
867 felem_sum(ftmp, z_in);
868 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
869 felem_square(tmp, ftmp);
870 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
871 felem_diff_128_64(tmp, delta);
872 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
873 felem_reduce(z_out, tmp);
875 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
876 felem_scalar(beta, 4);
877 /* beta[i] < 4 * 2^57 = 2^59 */
878 felem_diff(beta, x_out);
879 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
880 felem_mul(tmp, alpha, beta);
881 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
882 felem_square(tmp2, gamma);
883 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
884 widefelem_scalar(tmp2, 8);
885 /* tmp2[i] < 8 * 2^116 = 2^119 */
886 widefelem_diff(tmp, tmp2);
887 /* tmp[i] < 2^119 + 2^120 < 2^121 */
888 felem_reduce(y_out, tmp);
892 * Add two elliptic curve points:
893 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
894 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
895 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
896 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
897 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
898 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
900 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
904 * This function is not entirely constant-time: it includes a branch for
905 * checking whether the two input points are equal, (while not equal to the
906 * point at infinity). This case never happens during single point
907 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
909 static void point_add(felem x3, felem y3, felem z3,
910 const felem x1, const felem y1, const felem z1,
911 const int mixed, const felem x2, const felem y2,
914 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
916 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
921 felem_square(tmp, z2);
922 felem_reduce(ftmp2, tmp);
925 felem_mul(tmp, ftmp2, z2);
926 felem_reduce(ftmp4, tmp);
928 /* ftmp4 = z2^3*y1 */
929 felem_mul(tmp2, ftmp4, y1);
930 felem_reduce(ftmp4, tmp2);
932 /* ftmp2 = z2^2*x1 */
933 felem_mul(tmp2, ftmp2, x1);
934 felem_reduce(ftmp2, tmp2);
937 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
940 /* ftmp4 = z2^3*y1 */
941 felem_assign(ftmp4, y1);
943 /* ftmp2 = z2^2*x1 */
944 felem_assign(ftmp2, x1);
948 felem_square(tmp, z1);
949 felem_reduce(ftmp, tmp);
952 felem_mul(tmp, ftmp, z1);
953 felem_reduce(ftmp3, tmp);
956 felem_mul(tmp, ftmp3, y2);
957 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
959 /* ftmp3 = z1^3*y2 - z2^3*y1 */
960 felem_diff_128_64(tmp, ftmp4);
961 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
962 felem_reduce(ftmp3, tmp);
965 felem_mul(tmp, ftmp, x2);
966 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
968 /* ftmp = z1^2*x2 - z2^2*x1 */
969 felem_diff_128_64(tmp, ftmp2);
970 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
971 felem_reduce(ftmp, tmp);
974 * The formulae are incorrect if the points are equal, in affine coordinates
975 * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
978 * We use bitwise operations to avoid potential side-channels introduced by
979 * the short-circuiting behaviour of boolean operators.
981 x_equal = felem_is_zero(ftmp);
982 y_equal = felem_is_zero(ftmp3);
984 * The special case of either point being the point at infinity (z1 and/or
985 * z2 are zero), is handled separately later on in this function, so we
986 * avoid jumping to point_double here in those special cases.
988 z1_is_zero = felem_is_zero(z1);
989 z2_is_zero = felem_is_zero(z2);
992 * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
993 * specific implementation `felem_is_zero()` returns truth as `0x1`
994 * (rather than `0xff..ff`).
996 * This implies that `~true` in this implementation becomes
997 * `0xff..fe` (rather than `0x0`): for this reason, to be used in
998 * the if expression, we mask out only the last bit in the next
1001 points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
1005 * This is obviously not constant-time but, as mentioned before, this
1006 * case never happens during single point multiplication, so there is no
1007 * timing leak for ECDH or ECDSA signing.
1009 point_double(x3, y3, z3, x1, y1, z1);
1015 felem_mul(tmp, z1, z2);
1016 felem_reduce(ftmp5, tmp);
1018 /* special case z2 = 0 is handled later */
1019 felem_assign(ftmp5, z1);
1022 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1023 felem_mul(tmp, ftmp, ftmp5);
1024 felem_reduce(z_out, tmp);
1026 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1027 felem_assign(ftmp5, ftmp);
1028 felem_square(tmp, ftmp);
1029 felem_reduce(ftmp, tmp);
1031 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1032 felem_mul(tmp, ftmp, ftmp5);
1033 felem_reduce(ftmp5, tmp);
1035 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1036 felem_mul(tmp, ftmp2, ftmp);
1037 felem_reduce(ftmp2, tmp);
1039 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1040 felem_mul(tmp, ftmp4, ftmp5);
1041 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1043 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1044 felem_square(tmp2, ftmp3);
1045 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1047 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1048 felem_diff_128_64(tmp2, ftmp5);
1049 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1051 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1052 felem_assign(ftmp5, ftmp2);
1053 felem_scalar(ftmp5, 2);
1054 /* ftmp5[i] < 2 * 2^57 = 2^58 */
1057 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1058 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1060 felem_diff_128_64(tmp2, ftmp5);
1061 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1062 felem_reduce(x_out, tmp2);
1064 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1065 felem_diff(ftmp2, x_out);
1066 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1069 * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1071 felem_mul(tmp2, ftmp3, ftmp2);
1072 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1075 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1076 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1078 widefelem_diff(tmp2, tmp);
1079 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1080 felem_reduce(y_out, tmp2);
1083 * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1084 * the point at infinity, so we need to check for this separately
1088 * if point 1 is at infinity, copy point 2 to output, and vice versa
1090 copy_conditional(x_out, x2, z1_is_zero);
1091 copy_conditional(x_out, x1, z2_is_zero);
1092 copy_conditional(y_out, y2, z1_is_zero);
1093 copy_conditional(y_out, y1, z2_is_zero);
1094 copy_conditional(z_out, z2, z1_is_zero);
1095 copy_conditional(z_out, z1, z2_is_zero);
1096 felem_assign(x3, x_out);
1097 felem_assign(y3, y_out);
1098 felem_assign(z3, z_out);
1102 * select_point selects the |idx|th point from a precomputation table and
1104 * The pre_comp array argument should be size of |size| argument
1106 static void select_point(const u64 idx, unsigned int size,
1107 const felem pre_comp[][3], felem out[3])
1110 limb *outlimbs = &out[0][0];
1112 memset(out, 0, sizeof(*out) * 3);
1113 for (i = 0; i < size; i++) {
1114 const limb *inlimbs = &pre_comp[i][0][0];
1121 for (j = 0; j < 4 * 3; j++)
1122 outlimbs[j] |= inlimbs[j] & mask;
1126 /* get_bit returns the |i|th bit in |in| */
1127 static char get_bit(const felem_bytearray in, unsigned i)
1131 return (in[i >> 3] >> (i & 7)) & 1;
1135 * Interleaved point multiplication using precomputed point multiples: The
1136 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1137 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1138 * generator, using certain (large) precomputed multiples in g_pre_comp.
1139 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1141 static void batch_mul(felem x_out, felem y_out, felem z_out,
1142 const felem_bytearray scalars[],
1143 const unsigned num_points, const u8 *g_scalar,
1144 const int mixed, const felem pre_comp[][17][3],
1145 const felem g_pre_comp[2][16][3])
1149 unsigned gen_mul = (g_scalar != NULL);
1150 felem nq[3], tmp[4];
1154 /* set nq to the point at infinity */
1155 memset(nq, 0, sizeof(nq));
1158 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1159 * of the generator (two in each of the last 28 rounds) and additions of
1160 * other points multiples (every 5th round).
1162 skip = 1; /* save two point operations in the first
1164 for (i = (num_points ? 220 : 27); i >= 0; --i) {
1167 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1169 /* add multiples of the generator */
1170 if (gen_mul && (i <= 27)) {
1171 /* first, look 28 bits upwards */
1172 bits = get_bit(g_scalar, i + 196) << 3;
1173 bits |= get_bit(g_scalar, i + 140) << 2;
1174 bits |= get_bit(g_scalar, i + 84) << 1;
1175 bits |= get_bit(g_scalar, i + 28);
1176 /* select the point to add, in constant time */
1177 select_point(bits, 16, g_pre_comp[1], tmp);
1180 /* value 1 below is argument for "mixed" */
1181 point_add(nq[0], nq[1], nq[2],
1182 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1184 memcpy(nq, tmp, 3 * sizeof(felem));
1188 /* second, look at the current position */
1189 bits = get_bit(g_scalar, i + 168) << 3;
1190 bits |= get_bit(g_scalar, i + 112) << 2;
1191 bits |= get_bit(g_scalar, i + 56) << 1;
1192 bits |= get_bit(g_scalar, i);
1193 /* select the point to add, in constant time */
1194 select_point(bits, 16, g_pre_comp[0], tmp);
1195 point_add(nq[0], nq[1], nq[2],
1196 nq[0], nq[1], nq[2],
1197 1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1200 /* do other additions every 5 doublings */
1201 if (num_points && (i % 5 == 0)) {
1202 /* loop over all scalars */
1203 for (num = 0; num < num_points; ++num) {
1204 bits = get_bit(scalars[num], i + 4) << 5;
1205 bits |= get_bit(scalars[num], i + 3) << 4;
1206 bits |= get_bit(scalars[num], i + 2) << 3;
1207 bits |= get_bit(scalars[num], i + 1) << 2;
1208 bits |= get_bit(scalars[num], i) << 1;
1209 bits |= get_bit(scalars[num], i - 1);
1210 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1212 /* select the point to add or subtract */
1213 select_point(digit, 17, pre_comp[num], tmp);
1214 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1216 copy_conditional(tmp[1], tmp[3], sign);
1219 point_add(nq[0], nq[1], nq[2],
1220 nq[0], nq[1], nq[2],
1221 mixed, tmp[0], tmp[1], tmp[2]);
1223 memcpy(nq, tmp, 3 * sizeof(felem));
1229 felem_assign(x_out, nq[0]);
1230 felem_assign(y_out, nq[1]);
1231 felem_assign(z_out, nq[2]);
1234 /******************************************************************************/
1236 * FUNCTIONS TO MANAGE PRECOMPUTATION
1239 static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1241 NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1244 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1248 ret->references = 1;
1250 ret->lock = CRYPTO_THREAD_lock_new();
1251 if (ret->lock == NULL) {
1252 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1259 NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1263 CRYPTO_UP_REF(&p->references, &i, p->lock);
1267 void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1274 CRYPTO_DOWN_REF(&p->references, &i, p->lock);
1275 REF_PRINT_COUNT("EC_nistp224", x);
1278 REF_ASSERT_ISNT(i < 0);
1280 CRYPTO_THREAD_lock_free(p->lock);
1284 /******************************************************************************/
1286 * OPENSSL EC_METHOD FUNCTIONS
1289 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1292 ret = ec_GFp_simple_group_init(group);
1293 group->a_is_minus3 = 1;
1297 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1298 const BIGNUM *a, const BIGNUM *b,
1302 BIGNUM *curve_p, *curve_a, *curve_b;
1304 BN_CTX *new_ctx = NULL;
1307 ctx = new_ctx = BN_CTX_new();
1313 curve_p = BN_CTX_get(ctx);
1314 curve_a = BN_CTX_get(ctx);
1315 curve_b = BN_CTX_get(ctx);
1316 if (curve_b == NULL)
1318 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1319 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1320 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1321 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1322 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1323 EC_R_WRONG_CURVE_PARAMETERS);
1326 group->field_mod_func = BN_nist_mod_224;
1327 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1331 BN_CTX_free(new_ctx);
1337 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1340 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1341 const EC_POINT *point,
1342 BIGNUM *x, BIGNUM *y,
1345 felem z1, z2, x_in, y_in, x_out, y_out;
1348 if (EC_POINT_is_at_infinity(group, point)) {
1349 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1350 EC_R_POINT_AT_INFINITY);
1353 if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1354 (!BN_to_felem(z1, point->Z)))
1357 felem_square(tmp, z2);
1358 felem_reduce(z1, tmp);
1359 felem_mul(tmp, x_in, z1);
1360 felem_reduce(x_in, tmp);
1361 felem_contract(x_out, x_in);
1363 if (!felem_to_BN(x, x_out)) {
1364 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1369 felem_mul(tmp, z1, z2);
1370 felem_reduce(z1, tmp);
1371 felem_mul(tmp, y_in, z1);
1372 felem_reduce(y_in, tmp);
1373 felem_contract(y_out, y_in);
1375 if (!felem_to_BN(y, y_out)) {
1376 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1384 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1385 felem tmp_felems[ /* num+1 */ ])
1388 * Runs in constant time, unless an input is the point at infinity (which
1389 * normally shouldn't happen).
1391 ec_GFp_nistp_points_make_affine_internal(num,
1395 (void (*)(void *))felem_one,
1397 (void (*)(void *, const void *))
1399 (void (*)(void *, const void *))
1400 felem_square_reduce, (void (*)
1407 (void (*)(void *, const void *))
1409 (void (*)(void *, const void *))
1414 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1415 * values Result is stored in r (r can equal one of the inputs).
1417 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1418 const BIGNUM *scalar, size_t num,
1419 const EC_POINT *points[],
1420 const BIGNUM *scalars[], BN_CTX *ctx)
1426 BIGNUM *x, *y, *z, *tmp_scalar;
1427 felem_bytearray g_secret;
1428 felem_bytearray *secrets = NULL;
1429 felem (*pre_comp)[17][3] = NULL;
1430 felem *tmp_felems = NULL;
1432 int have_pre_comp = 0;
1433 size_t num_points = num;
1434 felem x_in, y_in, z_in, x_out, y_out, z_out;
1435 NISTP224_PRE_COMP *pre = NULL;
1436 const felem(*g_pre_comp)[16][3] = NULL;
1437 EC_POINT *generator = NULL;
1438 const EC_POINT *p = NULL;
1439 const BIGNUM *p_scalar = NULL;
1442 x = BN_CTX_get(ctx);
1443 y = BN_CTX_get(ctx);
1444 z = BN_CTX_get(ctx);
1445 tmp_scalar = BN_CTX_get(ctx);
1446 if (tmp_scalar == NULL)
1449 if (scalar != NULL) {
1450 pre = group->pre_comp.nistp224;
1452 /* we have precomputation, try to use it */
1453 g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1455 /* try to use the standard precomputation */
1456 g_pre_comp = &gmul[0];
1457 generator = EC_POINT_new(group);
1458 if (generator == NULL)
1460 /* get the generator from precomputation */
1461 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1462 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1463 !felem_to_BN(z, g_pre_comp[0][1][2])) {
1464 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1467 if (!ec_GFp_simple_set_Jprojective_coordinates_GFp(group, generator, x,
1470 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1471 /* precomputation matches generator */
1475 * we don't have valid precomputation: treat the generator as a
1478 num_points = num_points + 1;
1481 if (num_points > 0) {
1482 if (num_points >= 3) {
1484 * unless we precompute multiples for just one or two points,
1485 * converting those into affine form is time well spent
1489 secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1490 pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1493 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1494 if ((secrets == NULL) || (pre_comp == NULL)
1495 || (mixed && (tmp_felems == NULL))) {
1496 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1501 * we treat NULL scalars as 0, and NULL points as points at infinity,
1502 * i.e., they contribute nothing to the linear combination
1504 for (i = 0; i < num_points; ++i) {
1507 p = EC_GROUP_get0_generator(group);
1510 /* the i^th point */
1512 p_scalar = scalars[i];
1514 if ((p_scalar != NULL) && (p != NULL)) {
1515 /* reduce scalar to 0 <= scalar < 2^224 */
1516 if ((BN_num_bits(p_scalar) > 224)
1517 || (BN_is_negative(p_scalar))) {
1519 * this is an unusual input, and we don't guarantee
1522 if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1523 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1526 num_bytes = BN_bn2lebinpad(tmp_scalar,
1527 secrets[i], sizeof(secrets[i]));
1529 num_bytes = BN_bn2lebinpad(p_scalar,
1530 secrets[i], sizeof(secrets[i]));
1532 if (num_bytes < 0) {
1533 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1536 /* precompute multiples */
1537 if ((!BN_to_felem(x_out, p->X)) ||
1538 (!BN_to_felem(y_out, p->Y)) ||
1539 (!BN_to_felem(z_out, p->Z)))
1541 felem_assign(pre_comp[i][1][0], x_out);
1542 felem_assign(pre_comp[i][1][1], y_out);
1543 felem_assign(pre_comp[i][1][2], z_out);
1544 for (j = 2; j <= 16; ++j) {
1546 point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1547 pre_comp[i][j][2], pre_comp[i][1][0],
1548 pre_comp[i][1][1], pre_comp[i][1][2], 0,
1549 pre_comp[i][j - 1][0],
1550 pre_comp[i][j - 1][1],
1551 pre_comp[i][j - 1][2]);
1553 point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1554 pre_comp[i][j][2], pre_comp[i][j / 2][0],
1555 pre_comp[i][j / 2][1],
1556 pre_comp[i][j / 2][2]);
1562 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1565 /* the scalar for the generator */
1566 if ((scalar != NULL) && (have_pre_comp)) {
1567 memset(g_secret, 0, sizeof(g_secret));
1568 /* reduce scalar to 0 <= scalar < 2^224 */
1569 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1571 * this is an unusual input, and we don't guarantee
1574 if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1575 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1578 num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1580 num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1582 /* do the multiplication with generator precomputation */
1583 batch_mul(x_out, y_out, z_out,
1584 (const felem_bytearray(*))secrets, num_points,
1586 mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1588 /* do the multiplication without generator precomputation */
1589 batch_mul(x_out, y_out, z_out,
1590 (const felem_bytearray(*))secrets, num_points,
1591 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1593 /* reduce the output to its unique minimal representation */
1594 felem_contract(x_in, x_out);
1595 felem_contract(y_in, y_out);
1596 felem_contract(z_in, z_out);
1597 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1598 (!felem_to_BN(z, z_in))) {
1599 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1602 ret = ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1606 EC_POINT_free(generator);
1607 OPENSSL_free(secrets);
1608 OPENSSL_free(pre_comp);
1609 OPENSSL_free(tmp_felems);
1613 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1616 NISTP224_PRE_COMP *pre = NULL;
1619 EC_POINT *generator = NULL;
1620 felem tmp_felems[32];
1622 BN_CTX *new_ctx = NULL;
1625 /* throw away old precomputation */
1626 EC_pre_comp_free(group);
1630 ctx = new_ctx = BN_CTX_new();
1636 x = BN_CTX_get(ctx);
1637 y = BN_CTX_get(ctx);
1640 /* get the generator */
1641 if (group->generator == NULL)
1643 generator = EC_POINT_new(group);
1644 if (generator == NULL)
1646 BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1647 BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1648 if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1650 if ((pre = nistp224_pre_comp_new()) == NULL)
1653 * if the generator is the standard one, use built-in precomputation
1655 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1656 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1659 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1660 (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1661 (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1664 * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1665 * 2^140*G, 2^196*G for the second one
1667 for (i = 1; i <= 8; i <<= 1) {
1668 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1669 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1670 pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1671 for (j = 0; j < 27; ++j) {
1672 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1673 pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1674 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1678 point_double(pre->g_pre_comp[0][2 * i][0],
1679 pre->g_pre_comp[0][2 * i][1],
1680 pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1681 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1682 for (j = 0; j < 27; ++j) {
1683 point_double(pre->g_pre_comp[0][2 * i][0],
1684 pre->g_pre_comp[0][2 * i][1],
1685 pre->g_pre_comp[0][2 * i][2],
1686 pre->g_pre_comp[0][2 * i][0],
1687 pre->g_pre_comp[0][2 * i][1],
1688 pre->g_pre_comp[0][2 * i][2]);
1691 for (i = 0; i < 2; i++) {
1692 /* g_pre_comp[i][0] is the point at infinity */
1693 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1694 /* the remaining multiples */
1695 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1696 point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1697 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1698 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1699 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1700 pre->g_pre_comp[i][2][2]);
1701 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1702 point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1703 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1704 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1705 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1706 pre->g_pre_comp[i][2][2]);
1707 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1708 point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1709 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1710 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1711 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1712 pre->g_pre_comp[i][4][2]);
1714 * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1716 point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1717 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1718 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1719 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1720 pre->g_pre_comp[i][2][2]);
1721 for (j = 1; j < 8; ++j) {
1722 /* odd multiples: add G resp. 2^28*G */
1723 point_add(pre->g_pre_comp[i][2 * j + 1][0],
1724 pre->g_pre_comp[i][2 * j + 1][1],
1725 pre->g_pre_comp[i][2 * j + 1][2],
1726 pre->g_pre_comp[i][2 * j][0],
1727 pre->g_pre_comp[i][2 * j][1],
1728 pre->g_pre_comp[i][2 * j][2], 0,
1729 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1730 pre->g_pre_comp[i][1][2]);
1733 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1736 SETPRECOMP(group, nistp224, pre);
1741 EC_POINT_free(generator);
1743 BN_CTX_free(new_ctx);
1745 EC_nistp224_pre_comp_free(pre);
1749 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1751 return HAVEPRECOMP(group, nistp224);