1 /* crypto/ec/ecp_nistp224.c */
3 * Written by Emilia Kasper (Google) for the OpenSSL project.
5 /* Copyright 2011 Google Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
24 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
25 * and Adam Langley's public domain 64-bit C implementation of curve25519
28 #include <openssl/opensslconf.h>
29 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
31 #ifndef OPENSSL_SYS_VMS
38 #include <openssl/err.h>
41 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
42 /* even with gcc, the typedef won't work for 32-bit platforms */
43 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
45 #error "Need GCC 3.1 or later to define type uint128_t"
53 /******************************************************************************/
55 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
57 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
58 * using 64-bit coefficients called 'limbs',
59 * and sometimes (for multiplication results) as
60 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
61 * using 128-bit coefficients called 'widelimbs'.
62 * A 4-limb representation is an 'felem';
63 * a 7-widelimb representation is a 'widefelem'.
64 * Even within felems, bits of adjacent limbs overlap, and we don't always
65 * reduce the representations: we ensure that inputs to each felem
66 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
67 * and fit into a 128-bit word without overflow. The coefficients are then
68 * again partially reduced to obtain an felem satisfying a_i < 2^57.
69 * We only reduce to the unique minimal representation at the end of the
73 typedef uint64_t limb;
74 typedef uint128_t widelimb;
76 typedef limb felem[4];
77 typedef widelimb widefelem[7];
79 /* Field element represented as a byte arrary.
80 * 28*8 = 224 bits is also the group order size for the elliptic curve,
81 * and we also use this type for scalars for point multiplication.
83 typedef u8 felem_bytearray[28];
85 static const felem_bytearray nistp224_curve_params[5] = {
86 {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, /* p */
87 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,
88 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01},
89 {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, /* a */
90 0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFF,0xFF,
91 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE},
92 {0xB4,0x05,0x0A,0x85,0x0C,0x04,0xB3,0xAB,0xF5,0x41, /* b */
93 0x32,0x56,0x50,0x44,0xB0,0xB7,0xD7,0xBF,0xD8,0xBA,
94 0x27,0x0B,0x39,0x43,0x23,0x55,0xFF,0xB4},
95 {0xB7,0x0E,0x0C,0xBD,0x6B,0xB4,0xBF,0x7F,0x32,0x13, /* x */
96 0x90,0xB9,0x4A,0x03,0xC1,0xD3,0x56,0xC2,0x11,0x22,
97 0x34,0x32,0x80,0xD6,0x11,0x5C,0x1D,0x21},
98 {0xbd,0x37,0x63,0x88,0xb5,0xf7,0x23,0xfb,0x4c,0x22, /* y */
99 0xdf,0xe6,0xcd,0x43,0x75,0xa0,0x5a,0x07,0x47,0x64,
100 0x44,0xd5,0x81,0x99,0x85,0x00,0x7e,0x34}
104 * Precomputed multiples of the standard generator
105 * Points are given in coordinates (X, Y, Z) where Z normally is 1
106 * (0 for the point at infinity).
107 * For each field element, slice a_0 is word 0, etc.
109 * The table has 2 * 16 elements, starting with the following:
110 * index | bits | point
111 * ------+---------+------------------------------
114 * 2 | 0 0 1 0 | 2^56G
115 * 3 | 0 0 1 1 | (2^56 + 1)G
116 * 4 | 0 1 0 0 | 2^112G
117 * 5 | 0 1 0 1 | (2^112 + 1)G
118 * 6 | 0 1 1 0 | (2^112 + 2^56)G
119 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
120 * 8 | 1 0 0 0 | 2^168G
121 * 9 | 1 0 0 1 | (2^168 + 1)G
122 * 10 | 1 0 1 0 | (2^168 + 2^56)G
123 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
124 * 12 | 1 1 0 0 | (2^168 + 2^112)G
125 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
126 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
127 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
128 * followed by a copy of this with each element multiplied by 2^28.
130 * The reason for this is so that we can clock bits into four different
131 * locations when doing simple scalar multiplies against the base point,
132 * and then another four locations using the second 16 elements.
134 static const felem gmul[2][16][3] =
138 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
139 {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
141 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
142 {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
144 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
145 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
147 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
148 {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
150 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
151 {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
153 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
154 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
156 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
157 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
159 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
160 {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
162 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
163 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
165 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
166 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
168 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
169 {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
171 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
172 {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
174 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
175 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
177 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
178 {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
180 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
181 {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
186 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
187 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
189 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
190 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
192 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
193 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
195 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
196 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
198 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
199 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
201 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
202 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
204 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
205 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
207 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
208 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
210 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
211 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
213 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
214 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
216 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
217 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
219 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
220 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
222 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
223 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
225 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
226 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
228 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
229 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
232 /* Precomputation for the group generator. */
234 felem g_pre_comp[2][16][3];
238 const EC_METHOD *EC_GFp_nistp224_method(void)
240 static const EC_METHOD ret = {
241 EC_FLAGS_DEFAULT_OCT,
242 NID_X9_62_prime_field,
243 ec_GFp_nistp224_group_init,
244 ec_GFp_simple_group_finish,
245 ec_GFp_simple_group_clear_finish,
246 ec_GFp_nist_group_copy,
247 ec_GFp_nistp224_group_set_curve,
248 ec_GFp_simple_group_get_curve,
249 ec_GFp_simple_group_get_degree,
250 ec_GFp_simple_group_check_discriminant,
251 ec_GFp_simple_point_init,
252 ec_GFp_simple_point_finish,
253 ec_GFp_simple_point_clear_finish,
254 ec_GFp_simple_point_copy,
255 ec_GFp_simple_point_set_to_infinity,
256 ec_GFp_simple_set_Jprojective_coordinates_GFp,
257 ec_GFp_simple_get_Jprojective_coordinates_GFp,
258 ec_GFp_simple_point_set_affine_coordinates,
259 ec_GFp_nistp224_point_get_affine_coordinates,
260 0 /* point_set_compressed_coordinates */,
265 ec_GFp_simple_invert,
266 ec_GFp_simple_is_at_infinity,
267 ec_GFp_simple_is_on_curve,
269 ec_GFp_simple_make_affine,
270 ec_GFp_simple_points_make_affine,
271 ec_GFp_nistp224_points_mul,
272 ec_GFp_nistp224_precompute_mult,
273 ec_GFp_nistp224_have_precompute_mult,
274 ec_GFp_nist_field_mul,
275 ec_GFp_nist_field_sqr,
277 0 /* field_encode */,
278 0 /* field_decode */,
279 0 /* field_set_to_one */ };
284 /* Helper functions to convert field elements to/from internal representation */
285 static void bin28_to_felem(felem out, const u8 in[28])
287 out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
288 out[1] = (*((const uint64_t *)(in+7))) & 0x00ffffffffffffff;
289 out[2] = (*((const uint64_t *)(in+14))) & 0x00ffffffffffffff;
290 out[3] = (*((const uint64_t *)(in+21))) & 0x00ffffffffffffff;
293 static void felem_to_bin28(u8 out[28], const felem in)
296 for (i = 0; i < 7; ++i)
298 out[i] = in[0]>>(8*i);
299 out[i+7] = in[1]>>(8*i);
300 out[i+14] = in[2]>>(8*i);
301 out[i+21] = in[3]>>(8*i);
305 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
306 static void flip_endian(u8 *out, const u8 *in, unsigned len)
309 for (i = 0; i < len; ++i)
310 out[i] = in[len-1-i];
313 /* From OpenSSL BIGNUM to internal representation */
314 static int BN_to_felem(felem out, const BIGNUM *bn)
316 felem_bytearray b_in;
317 felem_bytearray b_out;
320 /* BN_bn2bin eats leading zeroes */
321 memset(b_out, 0, sizeof b_out);
322 num_bytes = BN_num_bytes(bn);
323 if (num_bytes > sizeof b_out)
325 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
328 if (BN_is_negative(bn))
330 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
333 num_bytes = BN_bn2bin(bn, b_in);
334 flip_endian(b_out, b_in, num_bytes);
335 bin28_to_felem(out, b_out);
339 /* From internal representation to OpenSSL BIGNUM */
340 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
342 felem_bytearray b_in, b_out;
343 felem_to_bin28(b_in, in);
344 flip_endian(b_out, b_in, sizeof b_out);
345 return BN_bin2bn(b_out, sizeof b_out, out);
348 /******************************************************************************/
352 * Field operations, using the internal representation of field elements.
353 * NB! These operations are specific to our point multiplication and cannot be
354 * expected to be correct in general - e.g., multiplication with a large scalar
355 * will cause an overflow.
359 static void felem_one(felem out)
367 static void felem_assign(felem out, const felem in)
375 /* Sum two field elements: out += in */
376 static void felem_sum(felem out, const felem in)
384 /* Get negative value: out = -in */
385 /* Assumes in[i] < 2^57 */
386 static void felem_neg(felem out, const felem in)
388 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
389 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
390 static const limb two58m42m2 = (((limb) 1) << 58) -
391 (((limb) 1) << 42) - (((limb) 1) << 2);
393 /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
394 out[0] = two58p2 - in[0];
395 out[1] = two58m42m2 - in[1];
396 out[2] = two58m2 - in[2];
397 out[3] = two58m2 - in[3];
400 /* Subtract field elements: out -= in */
401 /* Assumes in[i] < 2^57 */
402 static void felem_diff(felem out, const felem in)
404 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
405 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
406 static const limb two58m42m2 = (((limb) 1) << 58) -
407 (((limb) 1) << 42) - (((limb) 1) << 2);
409 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
411 out[1] += two58m42m2;
421 /* Subtract in unreduced 128-bit mode: out -= in */
422 /* Assumes in[i] < 2^119 */
423 static void widefelem_diff(widefelem out, const widefelem in)
425 static const widelimb two120 = ((widelimb) 1) << 120;
426 static const widelimb two120m64 = (((widelimb) 1) << 120) -
427 (((widelimb) 1) << 64);
428 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
429 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
431 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
436 out[4] += two120m104m64;
449 /* Subtract in mixed mode: out128 -= in64 */
451 static void felem_diff_128_64(widefelem out, const felem in)
453 static const widelimb two64p8 = (((widelimb) 1) << 64) +
454 (((widelimb) 1) << 8);
455 static const widelimb two64m8 = (((widelimb) 1) << 64) -
456 (((widelimb) 1) << 8);
457 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
458 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
460 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
462 out[1] += two64m48m8;
472 /* Multiply a field element by a scalar: out = out * scalar
473 * The scalars we actually use are small, so results fit without overflow */
474 static void felem_scalar(felem out, const limb scalar)
482 /* Multiply an unreduced field element by a scalar: out = out * scalar
483 * The scalars we actually use are small, so results fit without overflow */
484 static void widefelem_scalar(widefelem out, const widelimb scalar)
495 /* Square a field element: out = in^2 */
496 static void felem_square(widefelem out, const felem in)
498 limb tmp0, tmp1, tmp2;
499 tmp0 = 2 * in[0]; tmp1 = 2 * in[1]; tmp2 = 2 * in[2];
500 out[0] = ((widelimb) in[0]) * in[0];
501 out[1] = ((widelimb) in[0]) * tmp1;
502 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
503 out[3] = ((widelimb) in[3]) * tmp0 +
504 ((widelimb) in[1]) * tmp2;
505 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
506 out[5] = ((widelimb) in[3]) * tmp2;
507 out[6] = ((widelimb) in[3]) * in[3];
510 /* Multiply two field elements: out = in1 * in2 */
511 static void felem_mul(widefelem out, const felem in1, const felem in2)
513 out[0] = ((widelimb) in1[0]) * in2[0];
514 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
515 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
516 ((widelimb) in1[2]) * in2[0];
517 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
518 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
519 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
520 ((widelimb) in1[3]) * in2[1];
521 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
522 out[6] = ((widelimb) in1[3]) * in2[3];
526 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
527 * Requires in[i] < 2^126,
528 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
529 static void felem_reduce(felem out, const widefelem in)
531 static const widelimb two127p15 = (((widelimb) 1) << 127) +
532 (((widelimb) 1) << 15);
533 static const widelimb two127m71 = (((widelimb) 1) << 127) -
534 (((widelimb) 1) << 71);
535 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
536 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
539 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
540 output[0] = in[0] + two127p15;
541 output[1] = in[1] + two127m71m55;
542 output[2] = in[2] + two127m71;
546 /* Eliminate in[4], in[5], in[6] */
547 output[4] += in[6] >> 16;
548 output[3] += (in[6] & 0xffff) << 40;
551 output[3] += in[5] >> 16;
552 output[2] += (in[5] & 0xffff) << 40;
555 output[2] += output[4] >> 16;
556 output[1] += (output[4] & 0xffff) << 40;
557 output[0] -= output[4];
559 /* Carry 2 -> 3 -> 4 */
560 output[3] += output[2] >> 56;
561 output[2] &= 0x00ffffffffffffff;
563 output[4] = output[3] >> 56;
564 output[3] &= 0x00ffffffffffffff;
566 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
568 /* Eliminate output[4] */
569 output[2] += output[4] >> 16;
570 /* output[2] < 2^56 + 2^56 = 2^57 */
571 output[1] += (output[4] & 0xffff) << 40;
572 output[0] -= output[4];
574 /* Carry 0 -> 1 -> 2 -> 3 */
575 output[1] += output[0] >> 56;
576 out[0] = output[0] & 0x00ffffffffffffff;
578 output[2] += output[1] >> 56;
579 /* output[2] < 2^57 + 2^72 */
580 out[1] = output[1] & 0x00ffffffffffffff;
581 output[3] += output[2] >> 56;
582 /* output[3] <= 2^56 + 2^16 */
583 out[2] = output[2] & 0x00ffffffffffffff;
586 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
587 * out[3] <= 2^56 + 2^16 (due to final carry),
593 static void felem_square_reduce(felem out, const felem in)
596 felem_square(tmp, in);
597 felem_reduce(out, tmp);
600 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
603 felem_mul(tmp, in1, in2);
604 felem_reduce(out, tmp);
607 /* Reduce to unique minimal representation.
608 * Requires 0 <= in < 2*p (always call felem_reduce first) */
609 static void felem_contract(felem out, const felem in)
611 static const int64_t two56 = ((limb) 1) << 56;
612 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
613 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
619 /* Case 1: a = 1 iff in >= 2^224 */
623 tmp[3] &= 0x00ffffffffffffff;
624 /* Case 2: a = 0 iff p <= in < 2^224, i.e.,
625 * the high 128 bits are all 1 and the lower part is non-zero */
626 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
627 (((int64_t)(in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
628 a &= 0x00ffffffffffffff;
629 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
631 /* subtract 2^224 - 2^96 + 1 if a is all-one*/
632 tmp[3] &= a ^ 0xffffffffffffffff;
633 tmp[2] &= a ^ 0xffffffffffffffff;
634 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
637 /* eliminate negative coefficients: if tmp[0] is negative, tmp[1] must
638 * be non-zero, so we only need one step */
643 /* carry 1 -> 2 -> 3 */
644 tmp[2] += tmp[1] >> 56;
645 tmp[1] &= 0x00ffffffffffffff;
647 tmp[3] += tmp[2] >> 56;
648 tmp[2] &= 0x00ffffffffffffff;
650 /* Now 0 <= out < p */
657 /* Zero-check: returns 1 if input is 0, and 0 otherwise.
658 * We know that field elements are reduced to in < 2^225,
659 * so we only need to check three cases: 0, 2^224 - 2^96 + 1,
660 * and 2^225 - 2^97 + 2 */
661 static limb felem_is_zero(const felem in)
663 limb zero, two224m96p1, two225m97p2;
665 zero = in[0] | in[1] | in[2] | in[3];
666 zero = (((int64_t)(zero) - 1) >> 63) & 1;
667 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
668 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
669 two224m96p1 = (((int64_t)(two224m96p1) - 1) >> 63) & 1;
670 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
671 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
672 two225m97p2 = (((int64_t)(two225m97p2) - 1) >> 63) & 1;
673 return (zero | two224m96p1 | two225m97p2);
676 static limb felem_is_zero_int(const felem in)
678 return (int) (felem_is_zero(in) & ((limb)1));
681 /* Invert a field element */
682 /* Computation chain copied from djb's code */
683 static void felem_inv(felem out, const felem in)
685 felem ftmp, ftmp2, ftmp3, ftmp4;
689 felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2 */
690 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 1 */
691 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2 */
692 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 1 */
693 felem_square(tmp, ftmp); felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
694 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
695 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
696 felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp, tmp); /* 2^6 - 1 */
697 felem_square(tmp, ftmp); felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
698 for (i = 0; i < 5; ++i) /* 2^12 - 2^6 */
700 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
702 felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
703 felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
704 for (i = 0; i < 11; ++i) /* 2^24 - 2^12 */
706 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
708 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
709 felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
710 for (i = 0; i < 23; ++i) /* 2^48 - 2^24 */
712 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
714 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
715 felem_square(tmp, ftmp3); felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
716 for (i = 0; i < 47; ++i) /* 2^96 - 2^48 */
718 felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp);
720 felem_mul(tmp, ftmp3, ftmp4); felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
721 felem_square(tmp, ftmp3); felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
722 for (i = 0; i < 23; ++i) /* 2^120 - 2^24 */
724 felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp);
726 felem_mul(tmp, ftmp2, ftmp4); felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
727 for (i = 0; i < 6; ++i) /* 2^126 - 2^6 */
729 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
731 felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp, tmp); /* 2^126 - 1 */
732 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^127 - 2 */
733 felem_mul(tmp, ftmp, in); felem_reduce(ftmp, tmp); /* 2^127 - 1 */
734 for (i = 0; i < 97; ++i) /* 2^224 - 2^97 */
736 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
738 felem_mul(tmp, ftmp, ftmp3); felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
741 /* Copy in constant time:
742 * if icopy == 1, copy in to out,
743 * if icopy == 0, copy out to itself. */
745 copy_conditional(felem out, const felem in, limb icopy)
748 /* icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one */
749 const limb copy = -icopy;
750 for (i = 0; i < 4; ++i)
752 const limb tmp = copy & (in[i] ^ out[i]);
757 /******************************************************************************/
759 * ELLIPTIC CURVE POINT OPERATIONS
761 * Points are represented in Jacobian projective coordinates:
762 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
763 * or to the point at infinity if Z == 0.
768 * Double an elliptic curve point:
769 * (X', Y', Z') = 2 * (X, Y, Z), where
770 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
771 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
772 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
773 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
774 * while x_out == y_in is not (maybe this works, but it's not tested).
777 point_double(felem x_out, felem y_out, felem z_out,
778 const felem x_in, const felem y_in, const felem z_in)
781 felem delta, gamma, beta, alpha, ftmp, ftmp2;
783 felem_assign(ftmp, x_in);
784 felem_assign(ftmp2, x_in);
787 felem_square(tmp, z_in);
788 felem_reduce(delta, tmp);
791 felem_square(tmp, y_in);
792 felem_reduce(gamma, tmp);
795 felem_mul(tmp, x_in, gamma);
796 felem_reduce(beta, tmp);
798 /* alpha = 3*(x-delta)*(x+delta) */
799 felem_diff(ftmp, delta);
800 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
801 felem_sum(ftmp2, delta);
802 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
803 felem_scalar(ftmp2, 3);
804 /* ftmp2[i] < 3 * 2^58 < 2^60 */
805 felem_mul(tmp, ftmp, ftmp2);
806 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
807 felem_reduce(alpha, tmp);
809 /* x' = alpha^2 - 8*beta */
810 felem_square(tmp, alpha);
811 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
812 felem_assign(ftmp, beta);
813 felem_scalar(ftmp, 8);
814 /* ftmp[i] < 8 * 2^57 = 2^60 */
815 felem_diff_128_64(tmp, ftmp);
816 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
817 felem_reduce(x_out, tmp);
819 /* z' = (y + z)^2 - gamma - delta */
820 felem_sum(delta, gamma);
821 /* delta[i] < 2^57 + 2^57 = 2^58 */
822 felem_assign(ftmp, y_in);
823 felem_sum(ftmp, z_in);
824 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
825 felem_square(tmp, ftmp);
826 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
827 felem_diff_128_64(tmp, delta);
828 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
829 felem_reduce(z_out, tmp);
831 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
832 felem_scalar(beta, 4);
833 /* beta[i] < 4 * 2^57 = 2^59 */
834 felem_diff(beta, x_out);
835 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
836 felem_mul(tmp, alpha, beta);
837 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
838 felem_square(tmp2, gamma);
839 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
840 widefelem_scalar(tmp2, 8);
841 /* tmp2[i] < 8 * 2^116 = 2^119 */
842 widefelem_diff(tmp, tmp2);
843 /* tmp[i] < 2^119 + 2^120 < 2^121 */
844 felem_reduce(y_out, tmp);
848 * Add two elliptic curve points:
849 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
850 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
851 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
852 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
853 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
854 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
856 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
859 /* This function is not entirely constant-time:
860 * it includes a branch for checking whether the two input points are equal,
861 * (while not equal to the point at infinity).
862 * This case never happens during single point multiplication,
863 * so there is no timing leak for ECDH or ECDSA signing. */
864 static void point_add(felem x3, felem y3, felem z3,
865 const felem x1, const felem y1, const felem z1,
866 const int mixed, const felem x2, const felem y2, const felem z2)
868 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
870 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
875 felem_square(tmp, z2);
876 felem_reduce(ftmp2, tmp);
879 felem_mul(tmp, ftmp2, z2);
880 felem_reduce(ftmp4, tmp);
882 /* ftmp4 = z2^3*y1 */
883 felem_mul(tmp2, ftmp4, y1);
884 felem_reduce(ftmp4, tmp2);
886 /* ftmp2 = z2^2*x1 */
887 felem_mul(tmp2, ftmp2, x1);
888 felem_reduce(ftmp2, tmp2);
892 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
894 /* ftmp4 = z2^3*y1 */
895 felem_assign(ftmp4, y1);
897 /* ftmp2 = z2^2*x1 */
898 felem_assign(ftmp2, x1);
902 felem_square(tmp, z1);
903 felem_reduce(ftmp, tmp);
906 felem_mul(tmp, ftmp, z1);
907 felem_reduce(ftmp3, tmp);
910 felem_mul(tmp, ftmp3, y2);
911 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
913 /* ftmp3 = z1^3*y2 - z2^3*y1 */
914 felem_diff_128_64(tmp, ftmp4);
915 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
916 felem_reduce(ftmp3, tmp);
919 felem_mul(tmp, ftmp, x2);
920 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
922 /* ftmp = z1^2*x2 - z2^2*x1 */
923 felem_diff_128_64(tmp, ftmp2);
924 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
925 felem_reduce(ftmp, tmp);
927 /* the formulae are incorrect if the points are equal
928 * so we check for this and do doubling if this happens */
929 x_equal = felem_is_zero(ftmp);
930 y_equal = felem_is_zero(ftmp3);
931 z1_is_zero = felem_is_zero(z1);
932 z2_is_zero = felem_is_zero(z2);
933 /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
934 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
936 point_double(x3, y3, z3, x1, y1, z1);
943 felem_mul(tmp, z1, z2);
944 felem_reduce(ftmp5, tmp);
948 /* special case z2 = 0 is handled later */
949 felem_assign(ftmp5, z1);
952 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
953 felem_mul(tmp, ftmp, ftmp5);
954 felem_reduce(z_out, tmp);
956 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
957 felem_assign(ftmp5, ftmp);
958 felem_square(tmp, ftmp);
959 felem_reduce(ftmp, tmp);
961 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
962 felem_mul(tmp, ftmp, ftmp5);
963 felem_reduce(ftmp5, tmp);
965 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
966 felem_mul(tmp, ftmp2, ftmp);
967 felem_reduce(ftmp2, tmp);
969 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
970 felem_mul(tmp, ftmp4, ftmp5);
971 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
973 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
974 felem_square(tmp2, ftmp3);
975 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
977 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
978 felem_diff_128_64(tmp2, ftmp5);
979 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
981 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
982 felem_assign(ftmp5, ftmp2);
983 felem_scalar(ftmp5, 2);
984 /* ftmp5[i] < 2 * 2^57 = 2^58 */
987 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
988 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
990 felem_diff_128_64(tmp2, ftmp5);
991 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
992 felem_reduce(x_out, tmp2);
994 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
995 felem_diff(ftmp2, x_out);
996 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
998 /* tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) */
999 felem_mul(tmp2, ftmp3, ftmp2);
1000 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1003 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1004 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1006 widefelem_diff(tmp2, tmp);
1007 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1008 felem_reduce(y_out, tmp2);
1010 /* the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1011 * the point at infinity, so we need to check for this separately */
1013 /* if point 1 is at infinity, copy point 2 to output, and vice versa */
1014 copy_conditional(x_out, x2, z1_is_zero);
1015 copy_conditional(x_out, x1, z2_is_zero);
1016 copy_conditional(y_out, y2, z1_is_zero);
1017 copy_conditional(y_out, y1, z2_is_zero);
1018 copy_conditional(z_out, z2, z1_is_zero);
1019 copy_conditional(z_out, z1, z2_is_zero);
1020 felem_assign(x3, x_out);
1021 felem_assign(y3, y_out);
1022 felem_assign(z3, z_out);
1026 * select_point selects the |idx|th point from a precomputation table and
1028 * The pre_comp array argument should be size of |size| argument
1030 static void select_point(const u64 idx, unsigned int size, const felem pre_comp[][3], felem out[3])
1033 limb *outlimbs = &out[0][0];
1034 memset(outlimbs, 0, 3 * sizeof(felem));
1036 for (i = 0; i < size; i++)
1038 const limb *inlimbs = &pre_comp[i][0][0];
1045 for (j = 0; j < 4 * 3; j++)
1046 outlimbs[j] |= inlimbs[j] & mask;
1050 /* get_bit returns the |i|th bit in |in| */
1051 static char get_bit(const felem_bytearray in, unsigned i)
1055 return (in[i >> 3] >> (i & 7)) & 1;
1058 /* Interleaved point multiplication using precomputed point multiples:
1059 * The small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[],
1060 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1061 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1062 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1063 static void batch_mul(felem x_out, felem y_out, felem z_out,
1064 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1065 const int mixed, const felem pre_comp[][17][3], const felem g_pre_comp[2][16][3])
1069 unsigned gen_mul = (g_scalar != NULL);
1070 felem nq[3], tmp[4];
1074 /* set nq to the point at infinity */
1075 memset(nq, 0, 3 * sizeof(felem));
1077 /* Loop over all scalars msb-to-lsb, interleaving additions
1078 * of multiples of the generator (two in each of the last 28 rounds)
1079 * and additions of other points multiples (every 5th round).
1081 skip = 1; /* save two point operations in the first round */
1082 for (i = (num_points ? 220 : 27); i >= 0; --i)
1086 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1088 /* add multiples of the generator */
1089 if (gen_mul && (i <= 27))
1091 /* first, look 28 bits upwards */
1092 bits = get_bit(g_scalar, i + 196) << 3;
1093 bits |= get_bit(g_scalar, i + 140) << 2;
1094 bits |= get_bit(g_scalar, i + 84) << 1;
1095 bits |= get_bit(g_scalar, i + 28);
1096 /* select the point to add, in constant time */
1097 select_point(bits, 16, g_pre_comp[1], tmp);
1101 point_add(nq[0], nq[1], nq[2],
1102 nq[0], nq[1], nq[2],
1103 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1107 memcpy(nq, tmp, 3 * sizeof(felem));
1111 /* second, look at the current position */
1112 bits = get_bit(g_scalar, i + 168) << 3;
1113 bits |= get_bit(g_scalar, i + 112) << 2;
1114 bits |= get_bit(g_scalar, i + 56) << 1;
1115 bits |= get_bit(g_scalar, i);
1116 /* select the point to add, in constant time */
1117 select_point(bits, 16, g_pre_comp[0], tmp);
1118 point_add(nq[0], nq[1], nq[2],
1119 nq[0], nq[1], nq[2],
1120 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1123 /* do other additions every 5 doublings */
1124 if (num_points && (i % 5 == 0))
1126 /* loop over all scalars */
1127 for (num = 0; num < num_points; ++num)
1129 bits = get_bit(scalars[num], i + 4) << 5;
1130 bits |= get_bit(scalars[num], i + 3) << 4;
1131 bits |= get_bit(scalars[num], i + 2) << 3;
1132 bits |= get_bit(scalars[num], i + 1) << 2;
1133 bits |= get_bit(scalars[num], i) << 1;
1134 bits |= get_bit(scalars[num], i - 1);
1135 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1137 /* select the point to add or subtract */
1138 select_point(digit, 17, pre_comp[num], tmp);
1139 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative point */
1140 copy_conditional(tmp[1], tmp[3], sign);
1144 point_add(nq[0], nq[1], nq[2],
1145 nq[0], nq[1], nq[2],
1146 mixed, tmp[0], tmp[1], tmp[2]);
1150 memcpy(nq, tmp, 3 * sizeof(felem));
1156 felem_assign(x_out, nq[0]);
1157 felem_assign(y_out, nq[1]);
1158 felem_assign(z_out, nq[2]);
1161 /******************************************************************************/
1162 /* FUNCTIONS TO MANAGE PRECOMPUTATION
1165 static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1167 NISTP224_PRE_COMP *ret = NULL;
1168 ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1171 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1174 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1175 ret->references = 1;
1179 static void *nistp224_pre_comp_dup(void *src_)
1181 NISTP224_PRE_COMP *src = src_;
1183 /* no need to actually copy, these objects never change! */
1184 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1189 static void nistp224_pre_comp_free(void *pre_)
1192 NISTP224_PRE_COMP *pre = pre_;
1197 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1204 static void nistp224_pre_comp_clear_free(void *pre_)
1207 NISTP224_PRE_COMP *pre = pre_;
1212 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1216 OPENSSL_cleanse(pre, sizeof *pre);
1220 /******************************************************************************/
1221 /* OPENSSL EC_METHOD FUNCTIONS
1224 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1227 ret = ec_GFp_simple_group_init(group);
1228 group->a_is_minus3 = 1;
1232 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1233 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1236 BN_CTX *new_ctx = NULL;
1237 BIGNUM *curve_p, *curve_a, *curve_b;
1240 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1242 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1243 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1244 ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
1245 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1246 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1247 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1248 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1249 (BN_cmp(curve_b, b)))
1251 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1252 EC_R_WRONG_CURVE_PARAMETERS);
1255 group->field_mod_func = BN_nist_mod_224;
1256 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1259 if (new_ctx != NULL)
1260 BN_CTX_free(new_ctx);
1264 /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1265 * (X', Y') = (X/Z^2, Y/Z^3) */
1266 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1267 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1269 felem z1, z2, x_in, y_in, x_out, y_out;
1272 if (EC_POINT_is_at_infinity(group, point))
1274 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1275 EC_R_POINT_AT_INFINITY);
1278 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1279 (!BN_to_felem(z1, &point->Z))) return 0;
1281 felem_square(tmp, z2); felem_reduce(z1, tmp);
1282 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1283 felem_contract(x_out, x_in);
1286 if (!felem_to_BN(x, x_out)) {
1287 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1292 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1293 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1294 felem_contract(y_out, y_in);
1297 if (!felem_to_BN(y, y_out)) {
1298 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1306 static void make_points_affine(size_t num, felem points[/*num*/][3], felem tmp_felems[/*num+1*/])
1308 /* Runs in constant time, unless an input is the point at infinity
1309 * (which normally shouldn't happen). */
1310 ec_GFp_nistp_points_make_affine_internal(
1315 (void (*)(void *)) felem_one,
1316 (int (*)(const void *)) felem_is_zero_int,
1317 (void (*)(void *, const void *)) felem_assign,
1318 (void (*)(void *, const void *)) felem_square_reduce,
1319 (void (*)(void *, const void *, const void *)) felem_mul_reduce,
1320 (void (*)(void *, const void *)) felem_inv,
1321 (void (*)(void *, const void *)) felem_contract);
1324 /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1325 * Result is stored in r (r can equal one of the inputs). */
1326 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1327 const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1328 const BIGNUM *scalars[], BN_CTX *ctx)
1334 BN_CTX *new_ctx = NULL;
1335 BIGNUM *x, *y, *z, *tmp_scalar;
1336 felem_bytearray g_secret;
1337 felem_bytearray *secrets = NULL;
1338 felem (*pre_comp)[17][3] = NULL;
1339 felem *tmp_felems = NULL;
1340 felem_bytearray tmp;
1342 int have_pre_comp = 0;
1343 size_t num_points = num;
1344 felem x_in, y_in, z_in, x_out, y_out, z_out;
1345 NISTP224_PRE_COMP *pre = NULL;
1346 const felem (*g_pre_comp)[16][3] = NULL;
1347 EC_POINT *generator = NULL;
1348 const EC_POINT *p = NULL;
1349 const BIGNUM *p_scalar = NULL;
1352 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1354 if (((x = BN_CTX_get(ctx)) == NULL) ||
1355 ((y = BN_CTX_get(ctx)) == NULL) ||
1356 ((z = BN_CTX_get(ctx)) == NULL) ||
1357 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1362 pre = EC_EX_DATA_get_data(group->extra_data,
1363 nistp224_pre_comp_dup, nistp224_pre_comp_free,
1364 nistp224_pre_comp_clear_free);
1366 /* we have precomputation, try to use it */
1367 g_pre_comp = (const felem (*)[16][3]) pre->g_pre_comp;
1369 /* try to use the standard precomputation */
1370 g_pre_comp = &gmul[0];
1371 generator = EC_POINT_new(group);
1372 if (generator == NULL)
1374 /* get the generator from precomputation */
1375 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1376 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1377 !felem_to_BN(z, g_pre_comp[0][1][2]))
1379 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1382 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1383 generator, x, y, z, ctx))
1385 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1386 /* precomputation matches generator */
1389 /* we don't have valid precomputation:
1390 * treat the generator as a random point */
1391 num_points = num_points + 1;
1396 if (num_points >= 3)
1398 /* unless we precompute multiples for just one or two points,
1399 * converting those into affine form is time well spent */
1402 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1403 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1405 tmp_felems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1406 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_felems == NULL)))
1408 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1412 /* we treat NULL scalars as 0, and NULL points as points at infinity,
1413 * i.e., they contribute nothing to the linear combination */
1414 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1415 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1416 for (i = 0; i < num_points; ++i)
1421 p = EC_GROUP_get0_generator(group);
1425 /* the i^th point */
1428 p_scalar = scalars[i];
1430 if ((p_scalar != NULL) && (p != NULL))
1432 /* reduce scalar to 0 <= scalar < 2^224 */
1433 if ((BN_num_bits(p_scalar) > 224) || (BN_is_negative(p_scalar)))
1435 /* this is an unusual input, and we don't guarantee
1436 * constant-timeness */
1437 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
1439 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1442 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1445 num_bytes = BN_bn2bin(p_scalar, tmp);
1446 flip_endian(secrets[i], tmp, num_bytes);
1447 /* precompute multiples */
1448 if ((!BN_to_felem(x_out, &p->X)) ||
1449 (!BN_to_felem(y_out, &p->Y)) ||
1450 (!BN_to_felem(z_out, &p->Z))) goto err;
1451 felem_assign(pre_comp[i][1][0], x_out);
1452 felem_assign(pre_comp[i][1][1], y_out);
1453 felem_assign(pre_comp[i][1][2], z_out);
1454 for (j = 2; j <= 16; ++j)
1459 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1460 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1461 0, pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1466 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1467 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
1473 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1476 /* the scalar for the generator */
1477 if ((scalar != NULL) && (have_pre_comp))
1479 memset(g_secret, 0, sizeof g_secret);
1480 /* reduce scalar to 0 <= scalar < 2^224 */
1481 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar)))
1483 /* this is an unusual input, and we don't guarantee
1484 * constant-timeness */
1485 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
1487 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1490 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1493 num_bytes = BN_bn2bin(scalar, tmp);
1494 flip_endian(g_secret, tmp, num_bytes);
1495 /* do the multiplication with generator precomputation*/
1496 batch_mul(x_out, y_out, z_out,
1497 (const felem_bytearray (*)) secrets, num_points,
1499 mixed, (const felem (*)[17][3]) pre_comp,
1503 /* do the multiplication without generator precomputation */
1504 batch_mul(x_out, y_out, z_out,
1505 (const felem_bytearray (*)) secrets, num_points,
1506 NULL, mixed, (const felem (*)[17][3]) pre_comp, NULL);
1507 /* reduce the output to its unique minimal representation */
1508 felem_contract(x_in, x_out);
1509 felem_contract(y_in, y_out);
1510 felem_contract(z_in, z_out);
1511 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1512 (!felem_to_BN(z, z_in)))
1514 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1517 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1521 if (generator != NULL)
1522 EC_POINT_free(generator);
1523 if (new_ctx != NULL)
1524 BN_CTX_free(new_ctx);
1525 if (secrets != NULL)
1526 OPENSSL_free(secrets);
1527 if (pre_comp != NULL)
1528 OPENSSL_free(pre_comp);
1529 if (tmp_felems != NULL)
1530 OPENSSL_free(tmp_felems);
1534 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1537 NISTP224_PRE_COMP *pre = NULL;
1539 BN_CTX *new_ctx = NULL;
1541 EC_POINT *generator = NULL;
1542 felem tmp_felems[32];
1544 /* throw away old precomputation */
1545 EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
1546 nistp224_pre_comp_free, nistp224_pre_comp_clear_free);
1548 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1550 if (((x = BN_CTX_get(ctx)) == NULL) ||
1551 ((y = BN_CTX_get(ctx)) == NULL))
1553 /* get the generator */
1554 if (group->generator == NULL) goto err;
1555 generator = EC_POINT_new(group);
1556 if (generator == NULL)
1558 BN_bin2bn(nistp224_curve_params[3], sizeof (felem_bytearray), x);
1559 BN_bin2bn(nistp224_curve_params[4], sizeof (felem_bytearray), y);
1560 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1562 if ((pre = nistp224_pre_comp_new()) == NULL)
1564 /* if the generator is the standard one, use built-in precomputation */
1565 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1567 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1571 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
1572 (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
1573 (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z)))
1575 /* compute 2^56*G, 2^112*G, 2^168*G for the first table,
1576 * 2^28*G, 2^84*G, 2^140*G, 2^196*G for the second one
1578 for (i = 1; i <= 8; i <<= 1)
1581 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
1582 pre->g_pre_comp[0][i][0], pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1583 for (j = 0; j < 27; ++j)
1586 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
1587 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1592 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
1593 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1594 for (j = 0; j < 27; ++j)
1597 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
1598 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2]);
1601 for (i = 0; i < 2; i++)
1603 /* g_pre_comp[i][0] is the point at infinity */
1604 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1605 /* the remaining multiples */
1606 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1608 pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1609 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1610 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1611 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1612 pre->g_pre_comp[i][2][2]);
1613 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1615 pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1616 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1617 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1618 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1619 pre->g_pre_comp[i][2][2]);
1620 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1622 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1623 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1624 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1625 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1626 pre->g_pre_comp[i][4][2]);
1627 /* 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G */
1629 pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1630 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1631 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1632 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1633 pre->g_pre_comp[i][2][2]);
1634 for (j = 1; j < 8; ++j)
1636 /* odd multiples: add G resp. 2^28*G */
1638 pre->g_pre_comp[i][2*j+1][0], pre->g_pre_comp[i][2*j+1][1],
1639 pre->g_pre_comp[i][2*j+1][2], pre->g_pre_comp[i][2*j][0],
1640 pre->g_pre_comp[i][2*j][1], pre->g_pre_comp[i][2*j][2],
1641 0, pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1642 pre->g_pre_comp[i][1][2]);
1645 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1647 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
1648 nistp224_pre_comp_free, nistp224_pre_comp_clear_free))
1654 if (generator != NULL)
1655 EC_POINT_free(generator);
1656 if (new_ctx != NULL)
1657 BN_CTX_free(new_ctx);
1659 nistp224_pre_comp_free(pre);
1663 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1665 if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
1666 nistp224_pre_comp_free, nistp224_pre_comp_clear_free)
1674 static void *dummy=&dummy;