2 * Copyright 2010-2023 The OpenSSL Project Authors. All Rights Reserved.
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
10 /* Copyright 2011 Google Inc.
12 * Licensed under the Apache License, Version 2.0 (the "License");
14 * you may not use this file except in compliance with the License.
15 * You may obtain a copy of the License at
17 * http://www.apache.org/licenses/LICENSE-2.0
19 * Unless required by applicable law or agreed to in writing, software
20 * distributed under the License is distributed on an "AS IS" BASIS,
21 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 * See the License for the specific language governing permissions and
23 * limitations under the License.
27 * ECDSA low level APIs are deprecated for public use, but still ok for
30 #include "internal/deprecated.h"
33 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
35 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
36 * and Adam Langley's public domain 64-bit C implementation of curve25519
39 #include <openssl/opensslconf.h>
43 #include <openssl/err.h>
46 #include "internal/numbers.h"
49 # error "Your compiler doesn't appear to support 128-bit integer types"
55 /******************************************************************************/
57 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
59 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
60 * using 64-bit coefficients called 'limbs',
61 * and sometimes (for multiplication results) as
62 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
63 * using 128-bit coefficients called 'widelimbs'.
64 * A 4-limb representation is an 'felem';
65 * a 7-widelimb representation is a 'widefelem'.
66 * Even within felems, bits of adjacent limbs overlap, and we don't always
67 * reduce the representations: we ensure that inputs to each felem
68 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
69 * and fit into a 128-bit word without overflow. The coefficients are then
70 * again partially reduced to obtain an felem satisfying a_i < 2^57.
71 * We only reduce to the unique minimal representation at the end of the
75 typedef uint64_t limb;
76 typedef uint64_t limb_aX __attribute((__aligned__(1)));
77 typedef uint128_t widelimb;
79 typedef limb felem[4];
80 typedef widelimb widefelem[7];
83 * Field element represented as a byte array. 28*8 = 224 bits is also the
84 * group order size for the elliptic curve, and we also use this type for
85 * scalars for point multiplication.
87 typedef u8 felem_bytearray[28];
89 static const felem_bytearray nistp224_curve_params[5] = {
90 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
91 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
92 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
93 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
94 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
95 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
96 {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
97 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
98 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
99 {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
100 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
101 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
102 {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
103 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
104 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
108 * Precomputed multiples of the standard generator
109 * Points are given in coordinates (X, Y, Z) where Z normally is 1
110 * (0 for the point at infinity).
111 * For each field element, slice a_0 is word 0, etc.
113 * The table has 2 * 16 elements, starting with the following:
114 * index | bits | point
115 * ------+---------+------------------------------
118 * 2 | 0 0 1 0 | 2^56G
119 * 3 | 0 0 1 1 | (2^56 + 1)G
120 * 4 | 0 1 0 0 | 2^112G
121 * 5 | 0 1 0 1 | (2^112 + 1)G
122 * 6 | 0 1 1 0 | (2^112 + 2^56)G
123 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
124 * 8 | 1 0 0 0 | 2^168G
125 * 9 | 1 0 0 1 | (2^168 + 1)G
126 * 10 | 1 0 1 0 | (2^168 + 2^56)G
127 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
128 * 12 | 1 1 0 0 | (2^168 + 2^112)G
129 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
130 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
131 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
132 * followed by a copy of this with each element multiplied by 2^28.
134 * The reason for this is so that we can clock bits into four different
135 * locations when doing simple scalar multiplies against the base point,
136 * and then another four locations using the second 16 elements.
138 static const felem gmul[2][16][3] = {
142 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
143 {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
145 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
146 {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
148 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
149 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
151 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
152 {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
154 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
155 {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
157 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
158 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
160 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
161 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
163 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
164 {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
166 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
167 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
169 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
170 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
172 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
173 {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
175 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
176 {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
178 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
179 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
181 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
182 {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
184 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
185 {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
190 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
191 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
193 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
194 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
196 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
197 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
199 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
200 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
202 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
203 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
205 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
206 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
208 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
209 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
211 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
212 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
214 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
215 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
217 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
218 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
220 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
221 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
223 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
224 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
226 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
227 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
229 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
230 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
232 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
233 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
237 /* Precomputation for the group generator. */
238 struct nistp224_pre_comp_st {
239 felem g_pre_comp[2][16][3];
240 CRYPTO_REF_COUNT references;
243 const EC_METHOD *EC_GFp_nistp224_method(void)
245 static const EC_METHOD ret = {
246 EC_FLAGS_DEFAULT_OCT,
247 NID_X9_62_prime_field,
248 ossl_ec_GFp_nistp224_group_init,
249 ossl_ec_GFp_simple_group_finish,
250 ossl_ec_GFp_simple_group_clear_finish,
251 ossl_ec_GFp_nist_group_copy,
252 ossl_ec_GFp_nistp224_group_set_curve,
253 ossl_ec_GFp_simple_group_get_curve,
254 ossl_ec_GFp_simple_group_get_degree,
255 ossl_ec_group_simple_order_bits,
256 ossl_ec_GFp_simple_group_check_discriminant,
257 ossl_ec_GFp_simple_point_init,
258 ossl_ec_GFp_simple_point_finish,
259 ossl_ec_GFp_simple_point_clear_finish,
260 ossl_ec_GFp_simple_point_copy,
261 ossl_ec_GFp_simple_point_set_to_infinity,
262 ossl_ec_GFp_simple_point_set_affine_coordinates,
263 ossl_ec_GFp_nistp224_point_get_affine_coordinates,
264 0 /* point_set_compressed_coordinates */ ,
267 ossl_ec_GFp_simple_add,
268 ossl_ec_GFp_simple_dbl,
269 ossl_ec_GFp_simple_invert,
270 ossl_ec_GFp_simple_is_at_infinity,
271 ossl_ec_GFp_simple_is_on_curve,
272 ossl_ec_GFp_simple_cmp,
273 ossl_ec_GFp_simple_make_affine,
274 ossl_ec_GFp_simple_points_make_affine,
275 ossl_ec_GFp_nistp224_points_mul,
276 ossl_ec_GFp_nistp224_precompute_mult,
277 ossl_ec_GFp_nistp224_have_precompute_mult,
278 ossl_ec_GFp_nist_field_mul,
279 ossl_ec_GFp_nist_field_sqr,
281 ossl_ec_GFp_simple_field_inv,
282 0 /* field_encode */ ,
283 0 /* field_decode */ ,
284 0, /* field_set_to_one */
285 ossl_ec_key_simple_priv2oct,
286 ossl_ec_key_simple_oct2priv,
288 ossl_ec_key_simple_generate_key,
289 ossl_ec_key_simple_check_key,
290 ossl_ec_key_simple_generate_public_key,
293 ossl_ecdh_simple_compute_key,
294 ossl_ecdsa_simple_sign_setup,
295 ossl_ecdsa_simple_sign_sig,
296 ossl_ecdsa_simple_verify_sig,
297 0, /* field_inverse_mod_ord */
298 0, /* blind_coordinates */
308 * Helper functions to convert field elements to/from internal representation
310 static void bin28_to_felem(felem out, const u8 in[28])
312 out[0] = *((const limb *)(in)) & 0x00ffffffffffffff;
313 out[1] = (*((const limb_aX *)(in + 7))) & 0x00ffffffffffffff;
314 out[2] = (*((const limb_aX *)(in + 14))) & 0x00ffffffffffffff;
315 out[3] = (*((const limb_aX *)(in + 20))) >> 8;
318 static void felem_to_bin28(u8 out[28], const felem in)
321 for (i = 0; i < 7; ++i) {
322 out[i] = in[0] >> (8 * i);
323 out[i + 7] = in[1] >> (8 * i);
324 out[i + 14] = in[2] >> (8 * i);
325 out[i + 21] = in[3] >> (8 * i);
329 /* From OpenSSL BIGNUM to internal representation */
330 static int BN_to_felem(felem out, const BIGNUM *bn)
332 felem_bytearray b_out;
335 if (BN_is_negative(bn)) {
336 ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
339 num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
341 ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
344 bin28_to_felem(out, b_out);
348 /* From internal representation to OpenSSL BIGNUM */
349 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
351 felem_bytearray b_out;
352 felem_to_bin28(b_out, in);
353 return BN_lebin2bn(b_out, sizeof(b_out), out);
356 /******************************************************************************/
360 * Field operations, using the internal representation of field elements.
361 * NB! These operations are specific to our point multiplication and cannot be
362 * expected to be correct in general - e.g., multiplication with a large scalar
363 * will cause an overflow.
367 static void felem_one(felem out)
375 static void felem_assign(felem out, const felem in)
383 /* Sum two field elements: out += in */
384 static void felem_sum(felem out, const felem in)
392 /* Subtract field elements: out -= in */
393 /* Assumes in[i] < 2^57 */
394 static void felem_diff(felem out, const felem in)
396 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
397 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
398 static const limb two58m42m2 = (((limb) 1) << 58) -
399 (((limb) 1) << 42) - (((limb) 1) << 2);
401 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
403 out[1] += two58m42m2;
413 /* Subtract in unreduced 128-bit mode: out -= in */
414 /* Assumes in[i] < 2^119 */
415 static void widefelem_diff(widefelem out, const widefelem in)
417 static const widelimb two120 = ((widelimb) 1) << 120;
418 static const widelimb two120m64 = (((widelimb) 1) << 120) -
419 (((widelimb) 1) << 64);
420 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
421 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
423 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
428 out[4] += two120m104m64;
441 /* Subtract in mixed mode: out128 -= in64 */
443 static void felem_diff_128_64(widefelem out, const felem in)
445 static const widelimb two64p8 = (((widelimb) 1) << 64) +
446 (((widelimb) 1) << 8);
447 static const widelimb two64m8 = (((widelimb) 1) << 64) -
448 (((widelimb) 1) << 8);
449 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
450 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
452 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
454 out[1] += two64m48m8;
465 * Multiply a field element by a scalar: out = out * scalar The scalars we
466 * actually use are small, so results fit without overflow
468 static void felem_scalar(felem out, const limb scalar)
477 * Multiply an unreduced field element by a scalar: out = out * scalar The
478 * scalars we actually use are small, so results fit without overflow
480 static void widefelem_scalar(widefelem out, const widelimb scalar)
491 /* Square a field element: out = in^2 */
492 static void felem_square(widefelem out, const felem in)
494 limb tmp0, tmp1, tmp2;
498 out[0] = ((widelimb) in[0]) * in[0];
499 out[1] = ((widelimb) in[0]) * tmp1;
500 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
501 out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
502 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
503 out[5] = ((widelimb) in[3]) * tmp2;
504 out[6] = ((widelimb) in[3]) * in[3];
507 /* Multiply two field elements: out = in1 * in2 */
508 static void felem_mul(widefelem out, const felem in1, const felem in2)
510 out[0] = ((widelimb) in1[0]) * in2[0];
511 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
512 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
513 ((widelimb) in1[2]) * in2[0];
514 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
515 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
516 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
517 ((widelimb) in1[3]) * in2[1];
518 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
519 out[6] = ((widelimb) in1[3]) * in2[3];
523 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
524 * Requires in[i] < 2^126,
525 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
526 static void felem_reduce(felem out, const widefelem in)
528 static const widelimb two127p15 = (((widelimb) 1) << 127) +
529 (((widelimb) 1) << 15);
530 static const widelimb two127m71 = (((widelimb) 1) << 127) -
531 (((widelimb) 1) << 71);
532 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
533 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
536 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
537 output[0] = in[0] + two127p15;
538 output[1] = in[1] + two127m71m55;
539 output[2] = in[2] + two127m71;
543 /* Eliminate in[4], in[5], in[6] */
544 output[4] += in[6] >> 16;
545 output[3] += (in[6] & 0xffff) << 40;
548 output[3] += in[5] >> 16;
549 output[2] += (in[5] & 0xffff) << 40;
552 output[2] += output[4] >> 16;
553 output[1] += (output[4] & 0xffff) << 40;
554 output[0] -= output[4];
556 /* Carry 2 -> 3 -> 4 */
557 output[3] += output[2] >> 56;
558 output[2] &= 0x00ffffffffffffff;
560 output[4] = output[3] >> 56;
561 output[3] &= 0x00ffffffffffffff;
563 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
565 /* Eliminate output[4] */
566 output[2] += output[4] >> 16;
567 /* output[2] < 2^56 + 2^56 = 2^57 */
568 output[1] += (output[4] & 0xffff) << 40;
569 output[0] -= output[4];
571 /* Carry 0 -> 1 -> 2 -> 3 */
572 output[1] += output[0] >> 56;
573 out[0] = output[0] & 0x00ffffffffffffff;
575 output[2] += output[1] >> 56;
576 /* output[2] < 2^57 + 2^72 */
577 out[1] = output[1] & 0x00ffffffffffffff;
578 output[3] += output[2] >> 56;
579 /* output[3] <= 2^56 + 2^16 */
580 out[2] = output[2] & 0x00ffffffffffffff;
583 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
584 * out[3] <= 2^56 + 2^16 (due to final carry),
590 static void felem_square_reduce(felem out, const felem in)
593 felem_square(tmp, in);
594 felem_reduce(out, tmp);
597 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
600 felem_mul(tmp, in1, in2);
601 felem_reduce(out, tmp);
605 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
606 * call felem_reduce first)
608 static void felem_contract(felem out, const felem in)
610 static const int64_t two56 = ((limb) 1) << 56;
611 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
612 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
618 /* Case 1: a = 1 iff in >= 2^224 */
622 tmp[3] &= 0x00ffffffffffffff;
624 * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
625 * and the lower part is non-zero
627 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
628 (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
629 a &= 0x00ffffffffffffff;
630 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
632 /* subtract 2^224 - 2^96 + 1 if a is all-one */
633 tmp[3] &= a ^ 0xffffffffffffffff;
634 tmp[2] &= a ^ 0xffffffffffffffff;
635 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
639 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
640 * non-zero, so we only need one step
646 /* carry 1 -> 2 -> 3 */
647 tmp[2] += tmp[1] >> 56;
648 tmp[1] &= 0x00ffffffffffffff;
650 tmp[3] += tmp[2] >> 56;
651 tmp[2] &= 0x00ffffffffffffff;
653 /* Now 0 <= out < p */
661 * Get negative value: out = -in
662 * Requires in[i] < 2^63,
663 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
665 static void felem_neg(felem out, const felem in)
669 memset(tmp, 0, sizeof(tmp));
670 felem_diff_128_64(tmp, in);
671 felem_reduce(out, tmp);
675 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
676 * elements are reduced to in < 2^225, so we only need to check three cases:
677 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
679 static limb felem_is_zero(const felem in)
681 limb zero, two224m96p1, two225m97p2;
683 zero = in[0] | in[1] | in[2] | in[3];
684 zero = (((int64_t) (zero) - 1) >> 63) & 1;
685 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
686 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
687 two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
688 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
689 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
690 two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
691 return (zero | two224m96p1 | two225m97p2);
694 static int felem_is_zero_int(const void *in)
696 return (int)(felem_is_zero(in) & ((limb) 1));
699 /* Invert a field element */
700 /* Computation chain copied from djb's code */
701 static void felem_inv(felem out, const felem in)
703 felem ftmp, ftmp2, ftmp3, ftmp4;
707 felem_square(tmp, in);
708 felem_reduce(ftmp, tmp); /* 2 */
709 felem_mul(tmp, in, ftmp);
710 felem_reduce(ftmp, tmp); /* 2^2 - 1 */
711 felem_square(tmp, ftmp);
712 felem_reduce(ftmp, tmp); /* 2^3 - 2 */
713 felem_mul(tmp, in, ftmp);
714 felem_reduce(ftmp, tmp); /* 2^3 - 1 */
715 felem_square(tmp, ftmp);
716 felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
717 felem_square(tmp, ftmp2);
718 felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
719 felem_square(tmp, ftmp2);
720 felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
721 felem_mul(tmp, ftmp2, ftmp);
722 felem_reduce(ftmp, tmp); /* 2^6 - 1 */
723 felem_square(tmp, ftmp);
724 felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
725 for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */
726 felem_square(tmp, ftmp2);
727 felem_reduce(ftmp2, tmp);
729 felem_mul(tmp, ftmp2, ftmp);
730 felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
731 felem_square(tmp, ftmp2);
732 felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
733 for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */
734 felem_square(tmp, ftmp3);
735 felem_reduce(ftmp3, tmp);
737 felem_mul(tmp, ftmp3, ftmp2);
738 felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
739 felem_square(tmp, ftmp2);
740 felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
741 for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */
742 felem_square(tmp, ftmp3);
743 felem_reduce(ftmp3, tmp);
745 felem_mul(tmp, ftmp3, ftmp2);
746 felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
747 felem_square(tmp, ftmp3);
748 felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
749 for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */
750 felem_square(tmp, ftmp4);
751 felem_reduce(ftmp4, tmp);
753 felem_mul(tmp, ftmp3, ftmp4);
754 felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
755 felem_square(tmp, ftmp3);
756 felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
757 for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */
758 felem_square(tmp, ftmp4);
759 felem_reduce(ftmp4, tmp);
761 felem_mul(tmp, ftmp2, ftmp4);
762 felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
763 for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */
764 felem_square(tmp, ftmp2);
765 felem_reduce(ftmp2, tmp);
767 felem_mul(tmp, ftmp2, ftmp);
768 felem_reduce(ftmp, tmp); /* 2^126 - 1 */
769 felem_square(tmp, ftmp);
770 felem_reduce(ftmp, tmp); /* 2^127 - 2 */
771 felem_mul(tmp, ftmp, in);
772 felem_reduce(ftmp, tmp); /* 2^127 - 1 */
773 for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */
774 felem_square(tmp, ftmp);
775 felem_reduce(ftmp, tmp);
777 felem_mul(tmp, ftmp, ftmp3);
778 felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
782 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
785 static void copy_conditional(felem out, const felem in, limb icopy)
789 * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
791 const limb copy = -icopy;
792 for (i = 0; i < 4; ++i) {
793 const limb tmp = copy & (in[i] ^ out[i]);
798 /******************************************************************************/
800 * ELLIPTIC CURVE POINT OPERATIONS
802 * Points are represented in Jacobian projective coordinates:
803 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
804 * or to the point at infinity if Z == 0.
809 * Double an elliptic curve point:
810 * (X', Y', Z') = 2 * (X, Y, Z), where
811 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
812 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
813 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
814 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
815 * while x_out == y_in is not (maybe this works, but it's not tested).
818 point_double(felem x_out, felem y_out, felem z_out,
819 const felem x_in, const felem y_in, const felem z_in)
822 felem delta, gamma, beta, alpha, ftmp, ftmp2;
824 felem_assign(ftmp, x_in);
825 felem_assign(ftmp2, x_in);
828 felem_square(tmp, z_in);
829 felem_reduce(delta, tmp);
832 felem_square(tmp, y_in);
833 felem_reduce(gamma, tmp);
836 felem_mul(tmp, x_in, gamma);
837 felem_reduce(beta, tmp);
839 /* alpha = 3*(x-delta)*(x+delta) */
840 felem_diff(ftmp, delta);
841 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
842 felem_sum(ftmp2, delta);
843 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
844 felem_scalar(ftmp2, 3);
845 /* ftmp2[i] < 3 * 2^58 < 2^60 */
846 felem_mul(tmp, ftmp, ftmp2);
847 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
848 felem_reduce(alpha, tmp);
850 /* x' = alpha^2 - 8*beta */
851 felem_square(tmp, alpha);
852 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
853 felem_assign(ftmp, beta);
854 felem_scalar(ftmp, 8);
855 /* ftmp[i] < 8 * 2^57 = 2^60 */
856 felem_diff_128_64(tmp, ftmp);
857 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
858 felem_reduce(x_out, tmp);
860 /* z' = (y + z)^2 - gamma - delta */
861 felem_sum(delta, gamma);
862 /* delta[i] < 2^57 + 2^57 = 2^58 */
863 felem_assign(ftmp, y_in);
864 felem_sum(ftmp, z_in);
865 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
866 felem_square(tmp, ftmp);
867 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
868 felem_diff_128_64(tmp, delta);
869 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
870 felem_reduce(z_out, tmp);
872 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
873 felem_scalar(beta, 4);
874 /* beta[i] < 4 * 2^57 = 2^59 */
875 felem_diff(beta, x_out);
876 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
877 felem_mul(tmp, alpha, beta);
878 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
879 felem_square(tmp2, gamma);
880 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
881 widefelem_scalar(tmp2, 8);
882 /* tmp2[i] < 8 * 2^116 = 2^119 */
883 widefelem_diff(tmp, tmp2);
884 /* tmp[i] < 2^119 + 2^120 < 2^121 */
885 felem_reduce(y_out, tmp);
889 * Add two elliptic curve points:
890 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
891 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
892 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
893 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
894 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
895 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
897 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
901 * This function is not entirely constant-time: it includes a branch for
902 * checking whether the two input points are equal, (while not equal to the
903 * point at infinity). This case never happens during single point
904 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
906 static void point_add(felem x3, felem y3, felem z3,
907 const felem x1, const felem y1, const felem z1,
908 const int mixed, const felem x2, const felem y2,
911 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
913 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
918 felem_square(tmp, z2);
919 felem_reduce(ftmp2, tmp);
922 felem_mul(tmp, ftmp2, z2);
923 felem_reduce(ftmp4, tmp);
925 /* ftmp4 = z2^3*y1 */
926 felem_mul(tmp2, ftmp4, y1);
927 felem_reduce(ftmp4, tmp2);
929 /* ftmp2 = z2^2*x1 */
930 felem_mul(tmp2, ftmp2, x1);
931 felem_reduce(ftmp2, tmp2);
934 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
937 /* ftmp4 = z2^3*y1 */
938 felem_assign(ftmp4, y1);
940 /* ftmp2 = z2^2*x1 */
941 felem_assign(ftmp2, x1);
945 felem_square(tmp, z1);
946 felem_reduce(ftmp, tmp);
949 felem_mul(tmp, ftmp, z1);
950 felem_reduce(ftmp3, tmp);
953 felem_mul(tmp, ftmp3, y2);
954 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
956 /* ftmp3 = z1^3*y2 - z2^3*y1 */
957 felem_diff_128_64(tmp, ftmp4);
958 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
959 felem_reduce(ftmp3, tmp);
962 felem_mul(tmp, ftmp, x2);
963 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
965 /* ftmp = z1^2*x2 - z2^2*x1 */
966 felem_diff_128_64(tmp, ftmp2);
967 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
968 felem_reduce(ftmp, tmp);
971 * The formulae are incorrect if the points are equal, in affine coordinates
972 * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
975 * We use bitwise operations to avoid potential side-channels introduced by
976 * the short-circuiting behaviour of boolean operators.
978 x_equal = felem_is_zero(ftmp);
979 y_equal = felem_is_zero(ftmp3);
981 * The special case of either point being the point at infinity (z1 and/or
982 * z2 are zero), is handled separately later on in this function, so we
983 * avoid jumping to point_double here in those special cases.
985 z1_is_zero = felem_is_zero(z1);
986 z2_is_zero = felem_is_zero(z2);
989 * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
990 * specific implementation `felem_is_zero()` returns truth as `0x1`
991 * (rather than `0xff..ff`).
993 * This implies that `~true` in this implementation becomes
994 * `0xff..fe` (rather than `0x0`): for this reason, to be used in
995 * the if expression, we mask out only the last bit in the next
998 points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
1002 * This is obviously not constant-time but, as mentioned before, this
1003 * case never happens during single point multiplication, so there is no
1004 * timing leak for ECDH or ECDSA signing.
1006 point_double(x3, y3, z3, x1, y1, z1);
1012 felem_mul(tmp, z1, z2);
1013 felem_reduce(ftmp5, tmp);
1015 /* special case z2 = 0 is handled later */
1016 felem_assign(ftmp5, z1);
1019 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1020 felem_mul(tmp, ftmp, ftmp5);
1021 felem_reduce(z_out, tmp);
1023 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1024 felem_assign(ftmp5, ftmp);
1025 felem_square(tmp, ftmp);
1026 felem_reduce(ftmp, tmp);
1028 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1029 felem_mul(tmp, ftmp, ftmp5);
1030 felem_reduce(ftmp5, tmp);
1032 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1033 felem_mul(tmp, ftmp2, ftmp);
1034 felem_reduce(ftmp2, tmp);
1036 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1037 felem_mul(tmp, ftmp4, ftmp5);
1038 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1040 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1041 felem_square(tmp2, ftmp3);
1042 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1044 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1045 felem_diff_128_64(tmp2, ftmp5);
1046 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1048 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1049 felem_assign(ftmp5, ftmp2);
1050 felem_scalar(ftmp5, 2);
1051 /* ftmp5[i] < 2 * 2^57 = 2^58 */
1054 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1055 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1057 felem_diff_128_64(tmp2, ftmp5);
1058 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1059 felem_reduce(x_out, tmp2);
1061 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1062 felem_diff(ftmp2, x_out);
1063 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1066 * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1068 felem_mul(tmp2, ftmp3, ftmp2);
1069 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1072 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1073 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1075 widefelem_diff(tmp2, tmp);
1076 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1077 felem_reduce(y_out, tmp2);
1080 * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1081 * the point at infinity, so we need to check for this separately
1085 * if point 1 is at infinity, copy point 2 to output, and vice versa
1087 copy_conditional(x_out, x2, z1_is_zero);
1088 copy_conditional(x_out, x1, z2_is_zero);
1089 copy_conditional(y_out, y2, z1_is_zero);
1090 copy_conditional(y_out, y1, z2_is_zero);
1091 copy_conditional(z_out, z2, z1_is_zero);
1092 copy_conditional(z_out, z1, z2_is_zero);
1093 felem_assign(x3, x_out);
1094 felem_assign(y3, y_out);
1095 felem_assign(z3, z_out);
1099 * select_point selects the |idx|th point from a precomputation table and
1101 * The pre_comp array argument should be size of |size| argument
1103 static void select_point(const u64 idx, unsigned int size,
1104 const felem pre_comp[][3], felem out[3])
1107 limb *outlimbs = &out[0][0];
1109 memset(out, 0, sizeof(*out) * 3);
1110 for (i = 0; i < size; i++) {
1111 const limb *inlimbs = &pre_comp[i][0][0];
1118 for (j = 0; j < 4 * 3; j++)
1119 outlimbs[j] |= inlimbs[j] & mask;
1123 /* get_bit returns the |i|th bit in |in| */
1124 static char get_bit(const felem_bytearray in, unsigned i)
1128 return (in[i >> 3] >> (i & 7)) & 1;
1132 * Interleaved point multiplication using precomputed point multiples: The
1133 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1134 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1135 * generator, using certain (large) precomputed multiples in g_pre_comp.
1136 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1138 static void batch_mul(felem x_out, felem y_out, felem z_out,
1139 const felem_bytearray scalars[],
1140 const unsigned num_points, const u8 *g_scalar,
1141 const int mixed, const felem pre_comp[][17][3],
1142 const felem g_pre_comp[2][16][3])
1146 unsigned gen_mul = (g_scalar != NULL);
1147 felem nq[3], tmp[4];
1151 /* set nq to the point at infinity */
1152 memset(nq, 0, sizeof(nq));
1155 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1156 * of the generator (two in each of the last 28 rounds) and additions of
1157 * other points multiples (every 5th round).
1159 skip = 1; /* save two point operations in the first
1161 for (i = (num_points ? 220 : 27); i >= 0; --i) {
1164 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1166 /* add multiples of the generator */
1167 if (gen_mul && (i <= 27)) {
1168 /* first, look 28 bits upwards */
1169 bits = get_bit(g_scalar, i + 196) << 3;
1170 bits |= get_bit(g_scalar, i + 140) << 2;
1171 bits |= get_bit(g_scalar, i + 84) << 1;
1172 bits |= get_bit(g_scalar, i + 28);
1173 /* select the point to add, in constant time */
1174 select_point(bits, 16, g_pre_comp[1], tmp);
1177 /* value 1 below is argument for "mixed" */
1178 point_add(nq[0], nq[1], nq[2],
1179 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1181 memcpy(nq, tmp, 3 * sizeof(felem));
1185 /* second, look at the current position */
1186 bits = get_bit(g_scalar, i + 168) << 3;
1187 bits |= get_bit(g_scalar, i + 112) << 2;
1188 bits |= get_bit(g_scalar, i + 56) << 1;
1189 bits |= get_bit(g_scalar, i);
1190 /* select the point to add, in constant time */
1191 select_point(bits, 16, g_pre_comp[0], tmp);
1192 point_add(nq[0], nq[1], nq[2],
1193 nq[0], nq[1], nq[2],
1194 1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1197 /* do other additions every 5 doublings */
1198 if (num_points && (i % 5 == 0)) {
1199 /* loop over all scalars */
1200 for (num = 0; num < num_points; ++num) {
1201 bits = get_bit(scalars[num], i + 4) << 5;
1202 bits |= get_bit(scalars[num], i + 3) << 4;
1203 bits |= get_bit(scalars[num], i + 2) << 3;
1204 bits |= get_bit(scalars[num], i + 1) << 2;
1205 bits |= get_bit(scalars[num], i) << 1;
1206 bits |= get_bit(scalars[num], i - 1);
1207 ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1209 /* select the point to add or subtract */
1210 select_point(digit, 17, pre_comp[num], tmp);
1211 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1213 copy_conditional(tmp[1], tmp[3], sign);
1216 point_add(nq[0], nq[1], nq[2],
1217 nq[0], nq[1], nq[2],
1218 mixed, tmp[0], tmp[1], tmp[2]);
1220 memcpy(nq, tmp, 3 * sizeof(felem));
1226 felem_assign(x_out, nq[0]);
1227 felem_assign(y_out, nq[1]);
1228 felem_assign(z_out, nq[2]);
1231 /******************************************************************************/
1233 * FUNCTIONS TO MANAGE PRECOMPUTATION
1236 static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1238 NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1244 if (!CRYPTO_NEW_REF(&ret->references, 1)) {
1251 NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1255 CRYPTO_UP_REF(&p->references, &i);
1259 void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1266 CRYPTO_DOWN_REF(&p->references, &i);
1267 REF_PRINT_COUNT("EC_nistp224", p);
1270 REF_ASSERT_ISNT(i < 0);
1272 CRYPTO_FREE_REF(&p->references);
1276 /******************************************************************************/
1278 * OPENSSL EC_METHOD FUNCTIONS
1281 int ossl_ec_GFp_nistp224_group_init(EC_GROUP *group)
1284 ret = ossl_ec_GFp_simple_group_init(group);
1285 group->a_is_minus3 = 1;
1289 int ossl_ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1290 const BIGNUM *a, const BIGNUM *b,
1294 BIGNUM *curve_p, *curve_a, *curve_b;
1296 BN_CTX *new_ctx = NULL;
1299 ctx = new_ctx = BN_CTX_new();
1305 curve_p = BN_CTX_get(ctx);
1306 curve_a = BN_CTX_get(ctx);
1307 curve_b = BN_CTX_get(ctx);
1308 if (curve_b == NULL)
1310 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1311 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1312 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1313 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1314 ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);
1317 group->field_mod_func = BN_nist_mod_224;
1318 ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1322 BN_CTX_free(new_ctx);
1328 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1331 int ossl_ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1332 const EC_POINT *point,
1333 BIGNUM *x, BIGNUM *y,
1336 felem z1, z2, x_in, y_in, x_out, y_out;
1339 if (EC_POINT_is_at_infinity(group, point)) {
1340 ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
1343 if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1344 (!BN_to_felem(z1, point->Z)))
1347 felem_square(tmp, z2);
1348 felem_reduce(z1, tmp);
1349 felem_mul(tmp, x_in, z1);
1350 felem_reduce(x_in, tmp);
1351 felem_contract(x_out, x_in);
1353 if (!felem_to_BN(x, x_out)) {
1354 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1358 felem_mul(tmp, z1, z2);
1359 felem_reduce(z1, tmp);
1360 felem_mul(tmp, y_in, z1);
1361 felem_reduce(y_in, tmp);
1362 felem_contract(y_out, y_in);
1364 if (!felem_to_BN(y, y_out)) {
1365 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1372 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1373 felem tmp_felems[ /* num+1 */ ])
1376 * Runs in constant time, unless an input is the point at infinity (which
1377 * normally shouldn't happen).
1379 ossl_ec_GFp_nistp_points_make_affine_internal(num,
1383 (void (*)(void *))felem_one,
1385 (void (*)(void *, const void *))
1387 (void (*)(void *, const void *))
1388 felem_square_reduce, (void (*)
1395 (void (*)(void *, const void *))
1397 (void (*)(void *, const void *))
1402 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1403 * values Result is stored in r (r can equal one of the inputs).
1405 int ossl_ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1406 const BIGNUM *scalar, size_t num,
1407 const EC_POINT *points[],
1408 const BIGNUM *scalars[], BN_CTX *ctx)
1414 BIGNUM *x, *y, *z, *tmp_scalar;
1415 felem_bytearray g_secret;
1416 felem_bytearray *secrets = NULL;
1417 felem (*pre_comp)[17][3] = NULL;
1418 felem *tmp_felems = NULL;
1420 int have_pre_comp = 0;
1421 size_t num_points = num;
1422 felem x_in, y_in, z_in, x_out, y_out, z_out;
1423 NISTP224_PRE_COMP *pre = NULL;
1424 const felem(*g_pre_comp)[16][3] = NULL;
1425 EC_POINT *generator = NULL;
1426 const EC_POINT *p = NULL;
1427 const BIGNUM *p_scalar = NULL;
1430 x = BN_CTX_get(ctx);
1431 y = BN_CTX_get(ctx);
1432 z = BN_CTX_get(ctx);
1433 tmp_scalar = BN_CTX_get(ctx);
1434 if (tmp_scalar == NULL)
1437 if (scalar != NULL) {
1438 pre = group->pre_comp.nistp224;
1440 /* we have precomputation, try to use it */
1441 g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1443 /* try to use the standard precomputation */
1444 g_pre_comp = &gmul[0];
1445 generator = EC_POINT_new(group);
1446 if (generator == NULL)
1448 /* get the generator from precomputation */
1449 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1450 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1451 !felem_to_BN(z, g_pre_comp[0][1][2])) {
1452 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1455 if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,
1459 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1460 /* precomputation matches generator */
1464 * we don't have valid precomputation: treat the generator as a
1467 num_points = num_points + 1;
1470 if (num_points > 0) {
1471 if (num_points >= 3) {
1473 * unless we precompute multiples for just one or two points,
1474 * converting those into affine form is time well spent
1478 secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1479 pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1482 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1483 if ((secrets == NULL) || (pre_comp == NULL)
1484 || (mixed && (tmp_felems == NULL)))
1488 * we treat NULL scalars as 0, and NULL points as points at infinity,
1489 * i.e., they contribute nothing to the linear combination
1491 for (i = 0; i < num_points; ++i) {
1494 p = EC_GROUP_get0_generator(group);
1497 /* the i^th point */
1499 p_scalar = scalars[i];
1501 if ((p_scalar != NULL) && (p != NULL)) {
1502 /* reduce scalar to 0 <= scalar < 2^224 */
1503 if ((BN_num_bits(p_scalar) > 224)
1504 || (BN_is_negative(p_scalar))) {
1506 * this is an unusual input, and we don't guarantee
1509 if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1510 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1513 num_bytes = BN_bn2lebinpad(tmp_scalar,
1514 secrets[i], sizeof(secrets[i]));
1516 num_bytes = BN_bn2lebinpad(p_scalar,
1517 secrets[i], sizeof(secrets[i]));
1519 if (num_bytes < 0) {
1520 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1523 /* precompute multiples */
1524 if ((!BN_to_felem(x_out, p->X)) ||
1525 (!BN_to_felem(y_out, p->Y)) ||
1526 (!BN_to_felem(z_out, p->Z)))
1528 felem_assign(pre_comp[i][1][0], x_out);
1529 felem_assign(pre_comp[i][1][1], y_out);
1530 felem_assign(pre_comp[i][1][2], z_out);
1531 for (j = 2; j <= 16; ++j) {
1533 point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1534 pre_comp[i][j][2], pre_comp[i][1][0],
1535 pre_comp[i][1][1], pre_comp[i][1][2], 0,
1536 pre_comp[i][j - 1][0],
1537 pre_comp[i][j - 1][1],
1538 pre_comp[i][j - 1][2]);
1540 point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1541 pre_comp[i][j][2], pre_comp[i][j / 2][0],
1542 pre_comp[i][j / 2][1],
1543 pre_comp[i][j / 2][2]);
1549 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1552 /* the scalar for the generator */
1553 if ((scalar != NULL) && (have_pre_comp)) {
1554 memset(g_secret, 0, sizeof(g_secret));
1555 /* reduce scalar to 0 <= scalar < 2^224 */
1556 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1558 * this is an unusual input, and we don't guarantee
1561 if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1562 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1565 num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1567 num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1569 /* do the multiplication with generator precomputation */
1570 batch_mul(x_out, y_out, z_out,
1571 (const felem_bytearray(*))secrets, num_points,
1573 mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1575 /* do the multiplication without generator precomputation */
1576 batch_mul(x_out, y_out, z_out,
1577 (const felem_bytearray(*))secrets, num_points,
1578 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1580 /* reduce the output to its unique minimal representation */
1581 felem_contract(x_in, x_out);
1582 felem_contract(y_in, y_out);
1583 felem_contract(z_in, z_out);
1584 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1585 (!felem_to_BN(z, z_in))) {
1586 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1589 ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,
1594 EC_POINT_free(generator);
1595 OPENSSL_free(secrets);
1596 OPENSSL_free(pre_comp);
1597 OPENSSL_free(tmp_felems);
1601 int ossl_ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1604 NISTP224_PRE_COMP *pre = NULL;
1607 EC_POINT *generator = NULL;
1608 felem tmp_felems[32];
1610 BN_CTX *new_ctx = NULL;
1613 /* throw away old precomputation */
1614 EC_pre_comp_free(group);
1618 ctx = new_ctx = BN_CTX_new();
1624 x = BN_CTX_get(ctx);
1625 y = BN_CTX_get(ctx);
1628 /* get the generator */
1629 if (group->generator == NULL)
1631 generator = EC_POINT_new(group);
1632 if (generator == NULL)
1634 BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1635 BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1636 if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1638 if ((pre = nistp224_pre_comp_new()) == NULL)
1641 * if the generator is the standard one, use built-in precomputation
1643 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1644 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1647 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1648 (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1649 (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1652 * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1653 * 2^140*G, 2^196*G for the second one
1655 for (i = 1; i <= 8; i <<= 1) {
1656 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1657 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1658 pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1659 for (j = 0; j < 27; ++j) {
1660 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1661 pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1662 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1666 point_double(pre->g_pre_comp[0][2 * i][0],
1667 pre->g_pre_comp[0][2 * i][1],
1668 pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1669 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1670 for (j = 0; j < 27; ++j) {
1671 point_double(pre->g_pre_comp[0][2 * i][0],
1672 pre->g_pre_comp[0][2 * i][1],
1673 pre->g_pre_comp[0][2 * i][2],
1674 pre->g_pre_comp[0][2 * i][0],
1675 pre->g_pre_comp[0][2 * i][1],
1676 pre->g_pre_comp[0][2 * i][2]);
1679 for (i = 0; i < 2; i++) {
1680 /* g_pre_comp[i][0] is the point at infinity */
1681 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1682 /* the remaining multiples */
1683 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1684 point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1685 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1686 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1687 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1688 pre->g_pre_comp[i][2][2]);
1689 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1690 point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1691 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1692 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1693 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1694 pre->g_pre_comp[i][2][2]);
1695 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1696 point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1697 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1698 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1699 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1700 pre->g_pre_comp[i][4][2]);
1702 * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1704 point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1705 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1706 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1707 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1708 pre->g_pre_comp[i][2][2]);
1709 for (j = 1; j < 8; ++j) {
1710 /* odd multiples: add G resp. 2^28*G */
1711 point_add(pre->g_pre_comp[i][2 * j + 1][0],
1712 pre->g_pre_comp[i][2 * j + 1][1],
1713 pre->g_pre_comp[i][2 * j + 1][2],
1714 pre->g_pre_comp[i][2 * j][0],
1715 pre->g_pre_comp[i][2 * j][1],
1716 pre->g_pre_comp[i][2 * j][2], 0,
1717 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1718 pre->g_pre_comp[i][1][2]);
1721 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1724 SETPRECOMP(group, nistp224, pre);
1729 EC_POINT_free(generator);
1731 BN_CTX_free(new_ctx);
1733 EC_nistp224_pre_comp_free(pre);
1737 int ossl_ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1739 return HAVEPRECOMP(group, nistp224);