1 /* crypto/ec/ecp_nistp224.c */
3 * Written by Emilia Kasper (Google) for the OpenSSL project.
5 /* Copyright 2011 Google Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
24 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
25 * and Adam Langley's public domain 64-bit C implementation of curve25519
28 #include <openssl/opensslconf.h>
29 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
31 #ifndef OPENSSL_SYS_VMS
38 #include <openssl/err.h>
41 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
42 /* even with gcc, the typedef won't work for 32-bit platforms */
43 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
45 #error "Need GCC 3.1 or later to define type uint128_t"
53 /******************************************************************************/
55 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
57 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
58 * using 64-bit coefficients called 'limbs',
59 * and sometimes (for multiplication results) as
60 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
61 * using 128-bit coefficients called 'widelimbs'.
62 * A 4-limb representation is an 'felem';
63 * a 7-widelimb representation is a 'widefelem'.
64 * Even within felems, bits of adjacent limbs overlap, and we don't always
65 * reduce the representations: we ensure that inputs to each felem
66 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
67 * and fit into a 128-bit word without overflow. The coefficients are then
68 * again partially reduced to obtain an felem satisfying a_i < 2^57.
69 * We only reduce to the unique minimal representation at the end of the
73 typedef uint64_t limb;
74 typedef uint128_t widelimb;
76 typedef limb felem[4];
77 typedef widelimb widefelem[7];
79 /* Field element represented as a byte arrary.
80 * 28*8 = 224 bits is also the group order size for the elliptic curve,
81 * and we also use this type for scalars for point multiplication.
83 typedef u8 felem_bytearray[28];
85 static const felem_bytearray nistp224_curve_params[5] = {
86 {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, /* p */
87 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,
88 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01},
89 {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, /* a */
90 0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFF,0xFF,
91 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE},
92 {0xB4,0x05,0x0A,0x85,0x0C,0x04,0xB3,0xAB,0xF5,0x41, /* b */
93 0x32,0x56,0x50,0x44,0xB0,0xB7,0xD7,0xBF,0xD8,0xBA,
94 0x27,0x0B,0x39,0x43,0x23,0x55,0xFF,0xB4},
95 {0xB7,0x0E,0x0C,0xBD,0x6B,0xB4,0xBF,0x7F,0x32,0x13, /* x */
96 0x90,0xB9,0x4A,0x03,0xC1,0xD3,0x56,0xC2,0x11,0x22,
97 0x34,0x32,0x80,0xD6,0x11,0x5C,0x1D,0x21},
98 {0xbd,0x37,0x63,0x88,0xb5,0xf7,0x23,0xfb,0x4c,0x22, /* y */
99 0xdf,0xe6,0xcd,0x43,0x75,0xa0,0x5a,0x07,0x47,0x64,
100 0x44,0xd5,0x81,0x99,0x85,0x00,0x7e,0x34}
104 * Precomputed multiples of the standard generator
105 * Points are given in coordinates (X, Y, Z) where Z normally is 1
106 * (0 for the point at infinity).
107 * For each field element, slice a_0 is word 0, etc.
109 * The table has 2 * 16 elements, starting with the following:
110 * index | bits | point
111 * ------+---------+------------------------------
114 * 2 | 0 0 1 0 | 2^56G
115 * 3 | 0 0 1 1 | (2^56 + 1)G
116 * 4 | 0 1 0 0 | 2^112G
117 * 5 | 0 1 0 1 | (2^112 + 1)G
118 * 6 | 0 1 1 0 | (2^112 + 2^56)G
119 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
120 * 8 | 1 0 0 0 | 2^168G
121 * 9 | 1 0 0 1 | (2^168 + 1)G
122 * 10 | 1 0 1 0 | (2^168 + 2^56)G
123 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
124 * 12 | 1 1 0 0 | (2^168 + 2^112)G
125 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
126 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
127 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
128 * followed by a copy of this with each element multiplied by 2^28.
130 * The reason for this is so that we can clock bits into four different
131 * locations when doing simple scalar multiplies against the base point,
132 * and then another four locations using the second 16 elements.
134 static const felem gmul[2][16][3] =
138 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
139 {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
141 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
142 {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
144 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
145 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
147 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
148 {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
150 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
151 {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
153 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
154 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
156 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
157 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
159 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
160 {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
162 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
163 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
165 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
166 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
168 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
169 {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
171 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
172 {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
174 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
175 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
177 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
178 {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
180 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
181 {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
186 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
187 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
189 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
190 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
192 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
193 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
195 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
196 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
198 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
199 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
201 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
202 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
204 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
205 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
207 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
208 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
210 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
211 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
213 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
214 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
216 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
217 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
219 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
220 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
222 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
223 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
225 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
226 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
228 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
229 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
232 /* Precomputation for the group generator. */
234 felem g_pre_comp[2][16][3];
238 const EC_METHOD *EC_GFp_nistp224_method(void)
240 static const EC_METHOD ret = {
241 EC_FLAGS_DEFAULT_OCT,
242 NID_X9_62_prime_field,
243 ec_GFp_nistp224_group_init,
244 ec_GFp_simple_group_finish,
245 ec_GFp_simple_group_clear_finish,
246 ec_GFp_nist_group_copy,
247 ec_GFp_nistp224_group_set_curve,
248 ec_GFp_simple_group_get_curve,
249 ec_GFp_simple_group_get_degree,
250 ec_GFp_simple_group_check_discriminant,
251 ec_GFp_simple_point_init,
252 ec_GFp_simple_point_finish,
253 ec_GFp_simple_point_clear_finish,
254 ec_GFp_simple_point_copy,
255 ec_GFp_simple_point_set_to_infinity,
256 ec_GFp_simple_set_Jprojective_coordinates_GFp,
257 ec_GFp_simple_get_Jprojective_coordinates_GFp,
258 ec_GFp_simple_point_set_affine_coordinates,
259 ec_GFp_nistp224_point_get_affine_coordinates,
260 0 /* point_set_compressed_coordinates */,
265 ec_GFp_simple_invert,
266 ec_GFp_simple_is_at_infinity,
267 ec_GFp_simple_is_on_curve,
269 ec_GFp_simple_make_affine,
270 ec_GFp_simple_points_make_affine,
271 ec_GFp_nistp224_points_mul,
272 ec_GFp_nistp224_precompute_mult,
273 ec_GFp_nistp224_have_precompute_mult,
274 ec_GFp_nist_field_mul,
275 ec_GFp_nist_field_sqr,
277 0 /* field_encode */,
278 0 /* field_decode */,
279 0 /* field_set_to_one */ };
284 /* Helper functions to convert field elements to/from internal representation */
285 static void bin28_to_felem(felem out, const u8 in[28])
287 out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
288 out[1] = (*((const uint64_t *)(in+7))) & 0x00ffffffffffffff;
289 out[2] = (*((const uint64_t *)(in+14))) & 0x00ffffffffffffff;
290 out[3] = (*((const uint64_t *)(in+21))) & 0x00ffffffffffffff;
293 static void felem_to_bin28(u8 out[28], const felem in)
296 for (i = 0; i < 7; ++i)
298 out[i] = in[0]>>(8*i);
299 out[i+7] = in[1]>>(8*i);
300 out[i+14] = in[2]>>(8*i);
301 out[i+21] = in[3]>>(8*i);
305 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
306 static void flip_endian(u8 *out, const u8 *in, unsigned len)
309 for (i = 0; i < len; ++i)
310 out[i] = in[len-1-i];
313 /* From OpenSSL BIGNUM to internal representation */
314 static int BN_to_felem(felem out, const BIGNUM *bn)
316 felem_bytearray b_in;
317 felem_bytearray b_out;
320 /* BN_bn2bin eats leading zeroes */
321 memset(b_out, 0, sizeof b_out);
322 num_bytes = BN_num_bytes(bn);
323 if (num_bytes > sizeof b_out)
325 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
328 if (BN_is_negative(bn))
330 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
333 num_bytes = BN_bn2bin(bn, b_in);
334 flip_endian(b_out, b_in, num_bytes);
335 bin28_to_felem(out, b_out);
339 /* From internal representation to OpenSSL BIGNUM */
340 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
342 felem_bytearray b_in, b_out;
343 felem_to_bin28(b_in, in);
344 flip_endian(b_out, b_in, sizeof b_out);
345 return BN_bin2bn(b_out, sizeof b_out, out);
348 /******************************************************************************/
352 * Field operations, using the internal representation of field elements.
353 * NB! These operations are specific to our point multiplication and cannot be
354 * expected to be correct in general - e.g., multiplication with a large scalar
355 * will cause an overflow.
359 static void felem_one(felem out)
367 static void felem_assign(felem out, const felem in)
375 /* Sum two field elements: out += in */
376 static void felem_sum(felem out, const felem in)
384 /* Get negative value: out = -in */
385 /* Assumes in[i] < 2^57 */
386 static void felem_neg(felem out, const felem in)
388 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
389 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
390 static const limb two58m42m2 = (((limb) 1) << 58) -
391 (((limb) 1) << 42) - (((limb) 1) << 2);
393 /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
394 out[0] = two58p2 - in[0];
395 out[1] = two58m42m2 - in[1];
396 out[2] = two58m2 - in[2];
397 out[3] = two58m2 - in[3];
400 /* Subtract field elements: out -= in */
401 /* Assumes in[i] < 2^57 */
402 static void felem_diff(felem out, const felem in)
404 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
405 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
406 static const limb two58m42m2 = (((limb) 1) << 58) -
407 (((limb) 1) << 42) - (((limb) 1) << 2);
409 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
411 out[1] += two58m42m2;
421 /* Subtract in unreduced 128-bit mode: out -= in */
422 /* Assumes in[i] < 2^119 */
423 static void widefelem_diff(widefelem out, const widefelem in)
425 static const widelimb two120 = ((widelimb) 1) << 120;
426 static const widelimb two120m64 = (((widelimb) 1) << 120) -
427 (((widelimb) 1) << 64);
428 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
429 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
431 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
436 out[4] += two120m104m64;
449 /* Subtract in mixed mode: out128 -= in64 */
451 static void felem_diff_128_64(widefelem out, const felem in)
453 static const widelimb two64p8 = (((widelimb) 1) << 64) +
454 (((widelimb) 1) << 8);
455 static const widelimb two64m8 = (((widelimb) 1) << 64) -
456 (((widelimb) 1) << 8);
457 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
458 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
460 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
462 out[1] += two64m48m8;
472 /* Multiply a field element by a scalar: out = out * scalar
473 * The scalars we actually use are small, so results fit without overflow */
474 static void felem_scalar(felem out, const limb scalar)
482 /* Multiply an unreduced field element by a scalar: out = out * scalar
483 * The scalars we actually use are small, so results fit without overflow */
484 static void widefelem_scalar(widefelem out, const widelimb scalar)
495 /* Square a field element: out = in^2 */
496 static void felem_square(widefelem out, const felem in)
498 limb tmp0, tmp1, tmp2;
499 tmp0 = 2 * in[0]; tmp1 = 2 * in[1]; tmp2 = 2 * in[2];
500 out[0] = ((widelimb) in[0]) * in[0];
501 out[1] = ((widelimb) in[0]) * tmp1;
502 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
503 out[3] = ((widelimb) in[3]) * tmp0 +
504 ((widelimb) in[1]) * tmp2;
505 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
506 out[5] = ((widelimb) in[3]) * tmp2;
507 out[6] = ((widelimb) in[3]) * in[3];
510 /* Multiply two field elements: out = in1 * in2 */
511 static void felem_mul(widefelem out, const felem in1, const felem in2)
513 out[0] = ((widelimb) in1[0]) * in2[0];
514 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
515 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
516 ((widelimb) in1[2]) * in2[0];
517 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
518 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
519 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
520 ((widelimb) in1[3]) * in2[1];
521 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
522 out[6] = ((widelimb) in1[3]) * in2[3];
526 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
527 * Requires in[i] < 2^126,
528 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
529 static void felem_reduce(felem out, const widefelem in)
531 static const widelimb two127p15 = (((widelimb) 1) << 127) +
532 (((widelimb) 1) << 15);
533 static const widelimb two127m71 = (((widelimb) 1) << 127) -
534 (((widelimb) 1) << 71);
535 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
536 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
539 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
540 output[0] = in[0] + two127p15;
541 output[1] = in[1] + two127m71m55;
542 output[2] = in[2] + two127m71;
546 /* Eliminate in[4], in[5], in[6] */
547 output[4] += in[6] >> 16;
548 output[3] += (in[6] & 0xffff) << 40;
551 output[3] += in[5] >> 16;
552 output[2] += (in[5] & 0xffff) << 40;
555 output[2] += output[4] >> 16;
556 output[1] += (output[4] & 0xffff) << 40;
557 output[0] -= output[4];
559 /* Carry 2 -> 3 -> 4 */
560 output[3] += output[2] >> 56;
561 output[2] &= 0x00ffffffffffffff;
563 output[4] = output[3] >> 56;
564 output[3] &= 0x00ffffffffffffff;
566 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
568 /* Eliminate output[4] */
569 output[2] += output[4] >> 16;
570 /* output[2] < 2^56 + 2^56 = 2^57 */
571 output[1] += (output[4] & 0xffff) << 40;
572 output[0] -= output[4];
574 /* Carry 0 -> 1 -> 2 -> 3 */
575 output[1] += output[0] >> 56;
576 out[0] = output[0] & 0x00ffffffffffffff;
578 output[2] += output[1] >> 56;
579 /* output[2] < 2^57 + 2^72 */
580 out[1] = output[1] & 0x00ffffffffffffff;
581 output[3] += output[2] >> 56;
582 /* output[3] <= 2^56 + 2^16 */
583 out[2] = output[2] & 0x00ffffffffffffff;
586 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
587 * out[3] <= 2^56 + 2^16 (due to final carry),
593 static void felem_square_reduce(felem out, const felem in)
596 felem_square(tmp, in);
597 felem_reduce(out, tmp);
600 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
603 felem_mul(tmp, in1, in2);
604 felem_reduce(out, tmp);
607 /* Reduce to unique minimal representation.
608 * Requires 0 <= in < 2*p (always call felem_reduce first) */
609 static void felem_contract(felem out, const felem in)
611 static const int64_t two56 = ((limb) 1) << 56;
612 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
613 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
619 /* Case 1: a = 1 iff in >= 2^224 */
623 tmp[3] &= 0x00ffffffffffffff;
624 /* Case 2: a = 0 iff p <= in < 2^224, i.e.,
625 * the high 128 bits are all 1 and the lower part is non-zero */
626 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
627 (((int64_t)(in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
628 a &= 0x00ffffffffffffff;
629 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
631 /* subtract 2^224 - 2^96 + 1 if a is all-one*/
632 tmp[3] &= a ^ 0xffffffffffffffff;
633 tmp[2] &= a ^ 0xffffffffffffffff;
634 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
637 /* eliminate negative coefficients: if tmp[0] is negative, tmp[1] must
638 * be non-zero, so we only need one step */
643 /* carry 1 -> 2 -> 3 */
644 tmp[2] += tmp[1] >> 56;
645 tmp[1] &= 0x00ffffffffffffff;
647 tmp[3] += tmp[2] >> 56;
648 tmp[2] &= 0x00ffffffffffffff;
650 /* Now 0 <= out < p */
657 /* Zero-check: returns 1 if input is 0, and 0 otherwise.
658 * We know that field elements are reduced to in < 2^225,
659 * so we only need to check three cases: 0, 2^224 - 2^96 + 1,
660 * and 2^225 - 2^97 + 2 */
661 static limb felem_is_zero(const felem in)
663 limb zero, two224m96p1, two225m97p2;
665 zero = in[0] | in[1] | in[2] | in[3];
666 zero = (((int64_t)(zero) - 1) >> 63) & 1;
667 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
668 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
669 two224m96p1 = (((int64_t)(two224m96p1) - 1) >> 63) & 1;
670 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
671 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
672 two225m97p2 = (((int64_t)(two225m97p2) - 1) >> 63) & 1;
673 return (zero | two224m96p1 | two225m97p2);
676 static limb felem_is_zero_int(const felem in)
678 return (int) (felem_is_zero(in) & ((limb)1));
681 /* Invert a field element */
682 /* Computation chain copied from djb's code */
683 static void felem_inv(felem out, const felem in)
685 felem ftmp, ftmp2, ftmp3, ftmp4;
689 felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2 */
690 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 1 */
691 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2 */
692 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 1 */
693 felem_square(tmp, ftmp); felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
694 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
695 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
696 felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp, tmp); /* 2^6 - 1 */
697 felem_square(tmp, ftmp); felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
698 for (i = 0; i < 5; ++i) /* 2^12 - 2^6 */
700 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
702 felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
703 felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
704 for (i = 0; i < 11; ++i) /* 2^24 - 2^12 */
706 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
708 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
709 felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
710 for (i = 0; i < 23; ++i) /* 2^48 - 2^24 */
712 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
714 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
715 felem_square(tmp, ftmp3); felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
716 for (i = 0; i < 47; ++i) /* 2^96 - 2^48 */
718 felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp);
720 felem_mul(tmp, ftmp3, ftmp4); felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
721 felem_square(tmp, ftmp3); felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
722 for (i = 0; i < 23; ++i) /* 2^120 - 2^24 */
724 felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp);
726 felem_mul(tmp, ftmp2, ftmp4); felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
727 for (i = 0; i < 6; ++i) /* 2^126 - 2^6 */
729 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
731 felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp, tmp); /* 2^126 - 1 */
732 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^127 - 2 */
733 felem_mul(tmp, ftmp, in); felem_reduce(ftmp, tmp); /* 2^127 - 1 */
734 for (i = 0; i < 97; ++i) /* 2^224 - 2^97 */
736 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
738 felem_mul(tmp, ftmp, ftmp3); felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
741 /* Copy in constant time:
742 * if icopy == 1, copy in to out,
743 * if icopy == 0, copy out to itself. */
745 copy_conditional(felem out, const felem in, limb icopy)
748 /* icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one */
749 const limb copy = -icopy;
750 for (i = 0; i < 4; ++i)
752 const limb tmp = copy & (in[i] ^ out[i]);
757 /******************************************************************************/
759 * ELLIPTIC CURVE POINT OPERATIONS
761 * Points are represented in Jacobian projective coordinates:
762 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
763 * or to the point at infinity if Z == 0.
768 * Double an elliptic curve point:
769 * (X', Y', Z') = 2 * (X, Y, Z), where
770 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
771 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
772 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
773 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
774 * while x_out == y_in is not (maybe this works, but it's not tested).
777 point_double(felem x_out, felem y_out, felem z_out,
778 const felem x_in, const felem y_in, const felem z_in)
781 felem delta, gamma, beta, alpha, ftmp, ftmp2;
783 felem_assign(ftmp, x_in);
784 felem_assign(ftmp2, x_in);
787 felem_square(tmp, z_in);
788 felem_reduce(delta, tmp);
791 felem_square(tmp, y_in);
792 felem_reduce(gamma, tmp);
795 felem_mul(tmp, x_in, gamma);
796 felem_reduce(beta, tmp);
798 /* alpha = 3*(x-delta)*(x+delta) */
799 felem_diff(ftmp, delta);
800 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
801 felem_sum(ftmp2, delta);
802 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
803 felem_scalar(ftmp2, 3);
804 /* ftmp2[i] < 3 * 2^58 < 2^60 */
805 felem_mul(tmp, ftmp, ftmp2);
806 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
807 felem_reduce(alpha, tmp);
809 /* x' = alpha^2 - 8*beta */
810 felem_square(tmp, alpha);
811 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
812 felem_assign(ftmp, beta);
813 felem_scalar(ftmp, 8);
814 /* ftmp[i] < 8 * 2^57 = 2^60 */
815 felem_diff_128_64(tmp, ftmp);
816 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
817 felem_reduce(x_out, tmp);
819 /* z' = (y + z)^2 - gamma - delta */
820 felem_sum(delta, gamma);
821 /* delta[i] < 2^57 + 2^57 = 2^58 */
822 felem_assign(ftmp, y_in);
823 felem_sum(ftmp, z_in);
824 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
825 felem_square(tmp, ftmp);
826 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
827 felem_diff_128_64(tmp, delta);
828 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
829 felem_reduce(z_out, tmp);
831 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
832 felem_scalar(beta, 4);
833 /* beta[i] < 4 * 2^57 = 2^59 */
834 felem_diff(beta, x_out);
835 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
836 felem_mul(tmp, alpha, beta);
837 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
838 felem_square(tmp2, gamma);
839 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
840 widefelem_scalar(tmp2, 8);
841 /* tmp2[i] < 8 * 2^116 = 2^119 */
842 widefelem_diff(tmp, tmp2);
843 /* tmp[i] < 2^119 + 2^120 < 2^121 */
844 felem_reduce(y_out, tmp);
848 * Add two elliptic curve points:
849 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
850 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
851 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
852 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
853 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
854 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
856 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
859 /* This function is not entirely constant-time:
860 * it includes a branch for checking whether the two input points are equal,
861 * (while not equal to the point at infinity).
862 * This case never happens during single point multiplication,
863 * so there is no timing leak for ECDH or ECDSA signing. */
864 static void point_add(felem x3, felem y3, felem z3,
865 const felem x1, const felem y1, const felem z1,
866 const int mixed, const felem x2, const felem y2, const felem z2)
868 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
870 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
875 felem_square(tmp, z2);
876 felem_reduce(ftmp2, tmp);
879 felem_mul(tmp, ftmp2, z2);
880 felem_reduce(ftmp4, tmp);
882 /* ftmp4 = z2^3*y1 */
883 felem_mul(tmp2, ftmp4, y1);
884 felem_reduce(ftmp4, tmp2);
886 /* ftmp2 = z2^2*x1 */
887 felem_mul(tmp2, ftmp2, x1);
888 felem_reduce(ftmp2, tmp2);
892 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
894 /* ftmp4 = z2^3*y1 */
895 felem_assign(ftmp4, y1);
897 /* ftmp2 = z2^2*x1 */
898 felem_assign(ftmp2, x1);
902 felem_square(tmp, z1);
903 felem_reduce(ftmp, tmp);
906 felem_mul(tmp, ftmp, z1);
907 felem_reduce(ftmp3, tmp);
910 felem_mul(tmp, ftmp3, y2);
911 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
913 /* ftmp3 = z1^3*y2 - z2^3*y1 */
914 felem_diff_128_64(tmp, ftmp4);
915 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
916 felem_reduce(ftmp3, tmp);
919 felem_mul(tmp, ftmp, x2);
920 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
922 /* ftmp = z1^2*x2 - z2^2*x1 */
923 felem_diff_128_64(tmp, ftmp2);
924 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
925 felem_reduce(ftmp, tmp);
927 /* the formulae are incorrect if the points are equal
928 * so we check for this and do doubling if this happens */
929 x_equal = felem_is_zero(ftmp);
930 y_equal = felem_is_zero(ftmp3);
931 z1_is_zero = felem_is_zero(z1);
932 z2_is_zero = felem_is_zero(z2);
933 /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
934 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
936 point_double(x3, y3, z3, x1, y1, z1);
943 felem_mul(tmp, z1, z2);
944 felem_reduce(ftmp5, tmp);
948 /* special case z2 = 0 is handled later */
949 felem_assign(ftmp5, z1);
952 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
953 felem_mul(tmp, ftmp, ftmp5);
954 felem_reduce(z_out, tmp);
956 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
957 felem_assign(ftmp5, ftmp);
958 felem_square(tmp, ftmp);
959 felem_reduce(ftmp, tmp);
961 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
962 felem_mul(tmp, ftmp, ftmp5);
963 felem_reduce(ftmp5, tmp);
965 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
966 felem_mul(tmp, ftmp2, ftmp);
967 felem_reduce(ftmp2, tmp);
969 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
970 felem_mul(tmp, ftmp4, ftmp5);
971 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
973 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
974 felem_square(tmp2, ftmp3);
975 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
977 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
978 felem_diff_128_64(tmp2, ftmp5);
979 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
981 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
982 felem_assign(ftmp5, ftmp2);
983 felem_scalar(ftmp5, 2);
984 /* ftmp5[i] < 2 * 2^57 = 2^58 */
987 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
988 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
990 felem_diff_128_64(tmp2, ftmp5);
991 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
992 felem_reduce(x_out, tmp2);
994 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
995 felem_diff(ftmp2, x_out);
996 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
998 /* tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) */
999 felem_mul(tmp2, ftmp3, ftmp2);
1000 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1003 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1004 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1006 widefelem_diff(tmp2, tmp);
1007 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1008 felem_reduce(y_out, tmp2);
1010 /* the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1011 * the point at infinity, so we need to check for this separately */
1013 /* if point 1 is at infinity, copy point 2 to output, and vice versa */
1014 copy_conditional(x_out, x2, z1_is_zero);
1015 copy_conditional(x_out, x1, z2_is_zero);
1016 copy_conditional(y_out, y2, z1_is_zero);
1017 copy_conditional(y_out, y1, z2_is_zero);
1018 copy_conditional(z_out, z2, z1_is_zero);
1019 copy_conditional(z_out, z1, z2_is_zero);
1020 felem_assign(x3, x_out);
1021 felem_assign(y3, y_out);
1022 felem_assign(z3, z_out);
1026 * select_point selects the |idx|th point from a precomputation table and
1028 * The pre_comp array argument should be size of |size| argument
1030 static void select_point(const u64 idx, unsigned int size, const felem pre_comp[][3], felem out[3])
1033 limb *outlimbs = &out[0][0];
1034 memset(outlimbs, 0, 3 * sizeof(felem));
1036 for (i = 0; i < size; i++)
1038 const limb *inlimbs = &pre_comp[i][0][0];
1045 for (j = 0; j < 4 * 3; j++)
1046 outlimbs[j] |= inlimbs[j] & mask;
1050 /* get_bit returns the |i|th bit in |in| */
1051 static char get_bit(const felem_bytearray in, unsigned i)
1055 return (in[i >> 3] >> (i & 7)) & 1;
1058 /* Interleaved point multiplication using precomputed point multiples:
1059 * The small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[],
1060 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1061 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1062 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1063 static void batch_mul(felem x_out, felem y_out, felem z_out,
1064 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1065 const int mixed, const felem pre_comp[][17][3], const felem g_pre_comp[2][16][3])
1069 unsigned gen_mul = (g_scalar != NULL);
1070 felem nq[3], tmp[4];
1074 /* set nq to the point at infinity */
1075 memset(nq, 0, 3 * sizeof(felem));
1077 /* Loop over all scalars msb-to-lsb, interleaving additions
1078 * of multiples of the generator (two in each of the last 28 rounds)
1079 * and additions of other points multiples (every 5th round).
1081 skip = 1; /* save two point operations in the first round */
1082 for (i = (num_points ? 220 : 27); i >= 0; --i)
1086 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1088 /* add multiples of the generator */
1089 if (gen_mul && (i <= 27))
1091 /* first, look 28 bits upwards */
1092 bits = get_bit(g_scalar, i + 196) << 3;
1093 bits |= get_bit(g_scalar, i + 140) << 2;
1094 bits |= get_bit(g_scalar, i + 84) << 1;
1095 bits |= get_bit(g_scalar, i + 28);
1096 /* select the point to add, in constant time */
1097 select_point(bits, 16, g_pre_comp[1], tmp);
1101 /* value 1 below is argument for "mixed" */
1102 point_add(nq[0], nq[1], nq[2],
1103 nq[0], nq[1], nq[2],
1104 1, tmp[0], tmp[1], tmp[2]);
1108 memcpy(nq, tmp, 3 * sizeof(felem));
1112 /* second, look at the current position */
1113 bits = get_bit(g_scalar, i + 168) << 3;
1114 bits |= get_bit(g_scalar, i + 112) << 2;
1115 bits |= get_bit(g_scalar, i + 56) << 1;
1116 bits |= get_bit(g_scalar, i);
1117 /* select the point to add, in constant time */
1118 select_point(bits, 16, g_pre_comp[0], tmp);
1119 point_add(nq[0], nq[1], nq[2],
1120 nq[0], nq[1], nq[2],
1121 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1124 /* do other additions every 5 doublings */
1125 if (num_points && (i % 5 == 0))
1127 /* loop over all scalars */
1128 for (num = 0; num < num_points; ++num)
1130 bits = get_bit(scalars[num], i + 4) << 5;
1131 bits |= get_bit(scalars[num], i + 3) << 4;
1132 bits |= get_bit(scalars[num], i + 2) << 3;
1133 bits |= get_bit(scalars[num], i + 1) << 2;
1134 bits |= get_bit(scalars[num], i) << 1;
1135 bits |= get_bit(scalars[num], i - 1);
1136 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1138 /* select the point to add or subtract */
1139 select_point(digit, 17, pre_comp[num], tmp);
1140 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative point */
1141 copy_conditional(tmp[1], tmp[3], sign);
1145 point_add(nq[0], nq[1], nq[2],
1146 nq[0], nq[1], nq[2],
1147 mixed, tmp[0], tmp[1], tmp[2]);
1151 memcpy(nq, tmp, 3 * sizeof(felem));
1157 felem_assign(x_out, nq[0]);
1158 felem_assign(y_out, nq[1]);
1159 felem_assign(z_out, nq[2]);
1162 /******************************************************************************/
1163 /* FUNCTIONS TO MANAGE PRECOMPUTATION
1166 static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1168 NISTP224_PRE_COMP *ret = NULL;
1169 ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1172 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1175 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1176 ret->references = 1;
1180 static void *nistp224_pre_comp_dup(void *src_)
1182 NISTP224_PRE_COMP *src = src_;
1184 /* no need to actually copy, these objects never change! */
1185 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1190 static void nistp224_pre_comp_free(void *pre_)
1193 NISTP224_PRE_COMP *pre = pre_;
1198 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1205 static void nistp224_pre_comp_clear_free(void *pre_)
1208 NISTP224_PRE_COMP *pre = pre_;
1213 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1217 OPENSSL_cleanse(pre, sizeof *pre);
1221 /******************************************************************************/
1222 /* OPENSSL EC_METHOD FUNCTIONS
1225 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1228 ret = ec_GFp_simple_group_init(group);
1229 group->a_is_minus3 = 1;
1233 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1234 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1237 BN_CTX *new_ctx = NULL;
1238 BIGNUM *curve_p, *curve_a, *curve_b;
1241 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1243 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1244 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1245 ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
1246 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1247 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1248 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1249 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1250 (BN_cmp(curve_b, b)))
1252 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1253 EC_R_WRONG_CURVE_PARAMETERS);
1256 group->field_mod_func = BN_nist_mod_224;
1257 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1260 if (new_ctx != NULL)
1261 BN_CTX_free(new_ctx);
1265 /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1266 * (X', Y') = (X/Z^2, Y/Z^3) */
1267 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1268 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1270 felem z1, z2, x_in, y_in, x_out, y_out;
1273 if (EC_POINT_is_at_infinity(group, point))
1275 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1276 EC_R_POINT_AT_INFINITY);
1279 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1280 (!BN_to_felem(z1, &point->Z))) return 0;
1282 felem_square(tmp, z2); felem_reduce(z1, tmp);
1283 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1284 felem_contract(x_out, x_in);
1287 if (!felem_to_BN(x, x_out)) {
1288 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1293 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1294 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1295 felem_contract(y_out, y_in);
1298 if (!felem_to_BN(y, y_out)) {
1299 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1307 static void make_points_affine(size_t num, felem points[/*num*/][3], felem tmp_felems[/*num+1*/])
1309 /* Runs in constant time, unless an input is the point at infinity
1310 * (which normally shouldn't happen). */
1311 ec_GFp_nistp_points_make_affine_internal(
1316 (void (*)(void *)) felem_one,
1317 (int (*)(const void *)) felem_is_zero_int,
1318 (void (*)(void *, const void *)) felem_assign,
1319 (void (*)(void *, const void *)) felem_square_reduce,
1320 (void (*)(void *, const void *, const void *)) felem_mul_reduce,
1321 (void (*)(void *, const void *)) felem_inv,
1322 (void (*)(void *, const void *)) felem_contract);
1325 /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1326 * Result is stored in r (r can equal one of the inputs). */
1327 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1328 const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1329 const BIGNUM *scalars[], BN_CTX *ctx)
1335 BN_CTX *new_ctx = NULL;
1336 BIGNUM *x, *y, *z, *tmp_scalar;
1337 felem_bytearray g_secret;
1338 felem_bytearray *secrets = NULL;
1339 felem (*pre_comp)[17][3] = NULL;
1340 felem *tmp_felems = NULL;
1341 felem_bytearray tmp;
1343 int have_pre_comp = 0;
1344 size_t num_points = num;
1345 felem x_in, y_in, z_in, x_out, y_out, z_out;
1346 NISTP224_PRE_COMP *pre = NULL;
1347 const felem (*g_pre_comp)[16][3] = NULL;
1348 EC_POINT *generator = NULL;
1349 const EC_POINT *p = NULL;
1350 const BIGNUM *p_scalar = NULL;
1353 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1355 if (((x = BN_CTX_get(ctx)) == NULL) ||
1356 ((y = BN_CTX_get(ctx)) == NULL) ||
1357 ((z = BN_CTX_get(ctx)) == NULL) ||
1358 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1363 pre = EC_EX_DATA_get_data(group->extra_data,
1364 nistp224_pre_comp_dup, nistp224_pre_comp_free,
1365 nistp224_pre_comp_clear_free);
1367 /* we have precomputation, try to use it */
1368 g_pre_comp = (const felem (*)[16][3]) pre->g_pre_comp;
1370 /* try to use the standard precomputation */
1371 g_pre_comp = &gmul[0];
1372 generator = EC_POINT_new(group);
1373 if (generator == NULL)
1375 /* get the generator from precomputation */
1376 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1377 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1378 !felem_to_BN(z, g_pre_comp[0][1][2]))
1380 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1383 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1384 generator, x, y, z, ctx))
1386 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1387 /* precomputation matches generator */
1390 /* we don't have valid precomputation:
1391 * treat the generator as a random point */
1392 num_points = num_points + 1;
1397 if (num_points >= 3)
1399 /* unless we precompute multiples for just one or two points,
1400 * converting those into affine form is time well spent */
1403 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1404 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1406 tmp_felems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1407 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_felems == NULL)))
1409 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1413 /* we treat NULL scalars as 0, and NULL points as points at infinity,
1414 * i.e., they contribute nothing to the linear combination */
1415 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1416 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1417 for (i = 0; i < num_points; ++i)
1422 p = EC_GROUP_get0_generator(group);
1426 /* the i^th point */
1429 p_scalar = scalars[i];
1431 if ((p_scalar != NULL) && (p != NULL))
1433 /* reduce scalar to 0 <= scalar < 2^224 */
1434 if ((BN_num_bits(p_scalar) > 224) || (BN_is_negative(p_scalar)))
1436 /* this is an unusual input, and we don't guarantee
1437 * constant-timeness */
1438 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
1440 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1443 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1446 num_bytes = BN_bn2bin(p_scalar, tmp);
1447 flip_endian(secrets[i], tmp, num_bytes);
1448 /* precompute multiples */
1449 if ((!BN_to_felem(x_out, &p->X)) ||
1450 (!BN_to_felem(y_out, &p->Y)) ||
1451 (!BN_to_felem(z_out, &p->Z))) goto err;
1452 felem_assign(pre_comp[i][1][0], x_out);
1453 felem_assign(pre_comp[i][1][1], y_out);
1454 felem_assign(pre_comp[i][1][2], z_out);
1455 for (j = 2; j <= 16; ++j)
1460 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1461 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1462 0, pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1467 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1468 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
1474 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1477 /* the scalar for the generator */
1478 if ((scalar != NULL) && (have_pre_comp))
1480 memset(g_secret, 0, sizeof g_secret);
1481 /* reduce scalar to 0 <= scalar < 2^224 */
1482 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar)))
1484 /* this is an unusual input, and we don't guarantee
1485 * constant-timeness */
1486 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
1488 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1491 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1494 num_bytes = BN_bn2bin(scalar, tmp);
1495 flip_endian(g_secret, tmp, num_bytes);
1496 /* do the multiplication with generator precomputation*/
1497 batch_mul(x_out, y_out, z_out,
1498 (const felem_bytearray (*)) secrets, num_points,
1500 mixed, (const felem (*)[17][3]) pre_comp,
1504 /* do the multiplication without generator precomputation */
1505 batch_mul(x_out, y_out, z_out,
1506 (const felem_bytearray (*)) secrets, num_points,
1507 NULL, mixed, (const felem (*)[17][3]) pre_comp, NULL);
1508 /* reduce the output to its unique minimal representation */
1509 felem_contract(x_in, x_out);
1510 felem_contract(y_in, y_out);
1511 felem_contract(z_in, z_out);
1512 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1513 (!felem_to_BN(z, z_in)))
1515 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1518 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1522 if (generator != NULL)
1523 EC_POINT_free(generator);
1524 if (new_ctx != NULL)
1525 BN_CTX_free(new_ctx);
1526 if (secrets != NULL)
1527 OPENSSL_free(secrets);
1528 if (pre_comp != NULL)
1529 OPENSSL_free(pre_comp);
1530 if (tmp_felems != NULL)
1531 OPENSSL_free(tmp_felems);
1535 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1538 NISTP224_PRE_COMP *pre = NULL;
1540 BN_CTX *new_ctx = NULL;
1542 EC_POINT *generator = NULL;
1543 felem tmp_felems[32];
1545 /* throw away old precomputation */
1546 EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
1547 nistp224_pre_comp_free, nistp224_pre_comp_clear_free);
1549 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1551 if (((x = BN_CTX_get(ctx)) == NULL) ||
1552 ((y = BN_CTX_get(ctx)) == NULL))
1554 /* get the generator */
1555 if (group->generator == NULL) goto err;
1556 generator = EC_POINT_new(group);
1557 if (generator == NULL)
1559 BN_bin2bn(nistp224_curve_params[3], sizeof (felem_bytearray), x);
1560 BN_bin2bn(nistp224_curve_params[4], sizeof (felem_bytearray), y);
1561 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1563 if ((pre = nistp224_pre_comp_new()) == NULL)
1565 /* if the generator is the standard one, use built-in precomputation */
1566 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1568 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1572 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
1573 (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
1574 (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z)))
1576 /* compute 2^56*G, 2^112*G, 2^168*G for the first table,
1577 * 2^28*G, 2^84*G, 2^140*G, 2^196*G for the second one
1579 for (i = 1; i <= 8; i <<= 1)
1582 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
1583 pre->g_pre_comp[0][i][0], pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1584 for (j = 0; j < 27; ++j)
1587 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
1588 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1593 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
1594 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1595 for (j = 0; j < 27; ++j)
1598 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
1599 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2]);
1602 for (i = 0; i < 2; i++)
1604 /* g_pre_comp[i][0] is the point at infinity */
1605 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1606 /* the remaining multiples */
1607 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1609 pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1610 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1611 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1612 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1613 pre->g_pre_comp[i][2][2]);
1614 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1616 pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1617 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1618 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1619 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1620 pre->g_pre_comp[i][2][2]);
1621 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1623 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1624 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1625 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1626 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1627 pre->g_pre_comp[i][4][2]);
1628 /* 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G */
1630 pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1631 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1632 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1633 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1634 pre->g_pre_comp[i][2][2]);
1635 for (j = 1; j < 8; ++j)
1637 /* odd multiples: add G resp. 2^28*G */
1639 pre->g_pre_comp[i][2*j+1][0], pre->g_pre_comp[i][2*j+1][1],
1640 pre->g_pre_comp[i][2*j+1][2], pre->g_pre_comp[i][2*j][0],
1641 pre->g_pre_comp[i][2*j][1], pre->g_pre_comp[i][2*j][2],
1642 0, pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1643 pre->g_pre_comp[i][1][2]);
1646 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1648 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
1649 nistp224_pre_comp_free, nistp224_pre_comp_clear_free))
1655 if (generator != NULL)
1656 EC_POINT_free(generator);
1657 if (new_ctx != NULL)
1658 BN_CTX_free(new_ctx);
1660 nistp224_pre_comp_free(pre);
1664 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1666 if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
1667 nistp224_pre_comp_free, nistp224_pre_comp_clear_free)
1675 static void *dummy=&dummy;