crypto/ec/ecp_nistp224.c

   1 /*
   2  * Copyright 2010-2023 The OpenSSL Project Authors. All Rights Reserved.
   3  *
   4  * Licensed under the Apache License 2.0 (the "License").  You may not use
   5  * this file except in compliance with the License.  You can obtain a copy
   6  * in the file LICENSE in the source distribution or at
   7  * https://www.openssl.org/source/license.html
   8  */
   9
  10 /* Copyright 2011 Google Inc.
  11  *
  12  * Licensed under the Apache License, Version 2.0 (the "License");
  13  *
  14  * you may not use this file except in compliance with the License.
  15  * You may obtain a copy of the License at
  16  *
  17  *     http://www.apache.org/licenses/LICENSE-2.0
  18  *
  19  *  Unless required by applicable law or agreed to in writing, software
  20  *  distributed under the License is distributed on an "AS IS" BASIS,
  21  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  22  *  See the License for the specific language governing permissions and
  23  *  limitations under the License.
  24  */
  25
  26 /*
  27  * ECDSA low level APIs are deprecated for public use, but still ok for
  28  * internal use.
  29  */
  30 #include "internal/deprecated.h"
  31
  32 /*
  33  * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
  34  *
  35  * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
  36  * and Adam Langley's public domain 64-bit C implementation of curve25519
  37  */
  38
  39 #include <openssl/opensslconf.h>
  40
  41 #include <stdint.h>
  42 #include <string.h>
  43 #include <openssl/err.h>
  44 #include "ec_local.h"
  45
  46 #include "internal/numbers.h"
  47
  48 #ifndef INT128_MAX
  49 # error "Your compiler doesn't appear to support 128-bit integer types"
  50 #endif
  51
  52 typedef uint8_t u8;
  53 typedef uint64_t u64;
  54
  55 /******************************************************************************/
  56 /*-
  57  * INTERNAL REPRESENTATION OF FIELD ELEMENTS
  58  *
  59  * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
  60  * using 64-bit coefficients called 'limbs',
  61  * and sometimes (for multiplication results) as
  62  * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
  63  * using 128-bit coefficients called 'widelimbs'.
  64  * A 4-limb representation is an 'felem';
  65  * a 7-widelimb representation is a 'widefelem'.
  66  * Even within felems, bits of adjacent limbs overlap, and we don't always
  67  * reduce the representations: we ensure that inputs to each felem
  68  * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
  69  * and fit into a 128-bit word without overflow. The coefficients are then
  70  * again partially reduced to obtain an felem satisfying a_i < 2^57.
  71  * We only reduce to the unique minimal representation at the end of the
  72  * computation.
  73  */
  74
  75 typedef uint64_t limb;
  76 typedef uint64_t limb_aX __attribute((__aligned__(1)));
  77 typedef uint128_t widelimb;
  78
  79 typedef limb felem[4];
  80 typedef widelimb widefelem[7];
  81
  82 /*
  83  * Field element represented as a byte array. 28*8 = 224 bits is also the
  84  * group order size for the elliptic curve, and we also use this type for
  85  * scalars for point multiplication.
  86  */
  87 typedef u8 felem_bytearray[28];
  88
  89 static const felem_bytearray nistp224_curve_params[5] = {
  90     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
  91      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
  92      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
  93     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
  94      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
  95      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
  96     {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
  97      0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
  98      0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
  99     {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
 100      0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
 101      0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
 102     {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
 103      0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
 104      0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
 105 };
 106
 107 /*-
 108  * Precomputed multiples of the standard generator
 109  * Points are given in coordinates (X, Y, Z) where Z normally is 1
 110  * (0 for the point at infinity).
 111  * For each field element, slice a_0 is word 0, etc.
 112  *
 113  * The table has 2 * 16 elements, starting with the following:
 114  * index | bits    | point
 115  * ------+---------+------------------------------
 116  *     0 | 0 0 0 0 | 0G
 117  *     1 | 0 0 0 1 | 1G
 118  *     2 | 0 0 1 0 | 2^56G
 119  *     3 | 0 0 1 1 | (2^56 + 1)G
 120  *     4 | 0 1 0 0 | 2^112G
 121  *     5 | 0 1 0 1 | (2^112 + 1)G
 122  *     6 | 0 1 1 0 | (2^112 + 2^56)G
 123  *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
 124  *     8 | 1 0 0 0 | 2^168G
 125  *     9 | 1 0 0 1 | (2^168 + 1)G
 126  *    10 | 1 0 1 0 | (2^168 + 2^56)G
 127  *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
 128  *    12 | 1 1 0 0 | (2^168 + 2^112)G
 129  *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
 130  *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
 131  *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
 132  * followed by a copy of this with each element multiplied by 2^28.
 133  *
 134  * The reason for this is so that we can clock bits into four different
 135  * locations when doing simple scalar multiplies against the base point,
 136  * and then another four locations using the second 16 elements.
 137  */
 138 static const felem gmul[2][16][3] = {
 139 {{{0, 0, 0, 0},
 140   {0, 0, 0, 0},
 141   {0, 0, 0, 0}},
 142  {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
 143   {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
 144   {1, 0, 0, 0}},
 145  {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
 146   {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
 147   {1, 0, 0, 0}},
 148  {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
 149   {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
 150   {1, 0, 0, 0}},
 151  {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
 152   {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
 153   {1, 0, 0, 0}},
 154  {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
 155   {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
 156   {1, 0, 0, 0}},
 157  {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
 158   {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
 159   {1, 0, 0, 0}},
 160  {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
 161   {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
 162   {1, 0, 0, 0}},
 163  {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
 164   {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
 165   {1, 0, 0, 0}},
 166  {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
 167   {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
 168   {1, 0, 0, 0}},
 169  {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
 170   {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
 171   {1, 0, 0, 0}},
 172  {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
 173   {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
 174   {1, 0, 0, 0}},
 175  {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
 176   {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
 177   {1, 0, 0, 0}},
 178  {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
 179   {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
 180   {1, 0, 0, 0}},
 181  {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
 182   {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
 183   {1, 0, 0, 0}},
 184  {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
 185   {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
 186   {1, 0, 0, 0}}},
 187 {{{0, 0, 0, 0},
 188   {0, 0, 0, 0},
 189   {0, 0, 0, 0}},
 190  {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
 191   {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
 192   {1, 0, 0, 0}},
 193  {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
 194   {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
 195   {1, 0, 0, 0}},
 196  {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
 197   {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
 198   {1, 0, 0, 0}},
 199  {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
 200   {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
 201   {1, 0, 0, 0}},
 202  {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
 203   {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
 204   {1, 0, 0, 0}},
 205  {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
 206   {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
 207   {1, 0, 0, 0}},
 208  {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
 209   {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
 210   {1, 0, 0, 0}},
 211  {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
 212   {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
 213   {1, 0, 0, 0}},
 214  {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
 215   {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
 216   {1, 0, 0, 0}},
 217  {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
 218   {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
 219   {1, 0, 0, 0}},
 220  {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
 221   {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
 222   {1, 0, 0, 0}},
 223  {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
 224   {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
 225   {1, 0, 0, 0}},
 226  {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
 227   {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
 228   {1, 0, 0, 0}},
 229  {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
 230   {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
 231   {1, 0, 0, 0}},
 232  {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
 233   {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
 234   {1, 0, 0, 0}}}
 235 };
 236
 237 /* Precomputation for the group generator. */
 238 struct nistp224_pre_comp_st {
 239     felem g_pre_comp[2][16][3];
 240     CRYPTO_REF_COUNT references;
 241 };
 242
 243 const EC_METHOD *EC_GFp_nistp224_method(void)
 244 {
 245     static const EC_METHOD ret = {
 246         EC_FLAGS_DEFAULT_OCT,
 247         NID_X9_62_prime_field,
 248         ossl_ec_GFp_nistp224_group_init,
 249         ossl_ec_GFp_simple_group_finish,
 250         ossl_ec_GFp_simple_group_clear_finish,
 251         ossl_ec_GFp_nist_group_copy,
 252         ossl_ec_GFp_nistp224_group_set_curve,
 253         ossl_ec_GFp_simple_group_get_curve,
 254         ossl_ec_GFp_simple_group_get_degree,
 255         ossl_ec_group_simple_order_bits,
 256         ossl_ec_GFp_simple_group_check_discriminant,
 257         ossl_ec_GFp_simple_point_init,
 258         ossl_ec_GFp_simple_point_finish,
 259         ossl_ec_GFp_simple_point_clear_finish,
 260         ossl_ec_GFp_simple_point_copy,
 261         ossl_ec_GFp_simple_point_set_to_infinity,
 262         ossl_ec_GFp_simple_point_set_affine_coordinates,
 263         ossl_ec_GFp_nistp224_point_get_affine_coordinates,
 264         0 /* point_set_compressed_coordinates */ ,
 265         0 /* point2oct */ ,
 266         0 /* oct2point */ ,
 267         ossl_ec_GFp_simple_add,
 268         ossl_ec_GFp_simple_dbl,
 269         ossl_ec_GFp_simple_invert,
 270         ossl_ec_GFp_simple_is_at_infinity,
 271         ossl_ec_GFp_simple_is_on_curve,
 272         ossl_ec_GFp_simple_cmp,
 273         ossl_ec_GFp_simple_make_affine,
 274         ossl_ec_GFp_simple_points_make_affine,
 275         ossl_ec_GFp_nistp224_points_mul,
 276         ossl_ec_GFp_nistp224_precompute_mult,
 277         ossl_ec_GFp_nistp224_have_precompute_mult,
 278         ossl_ec_GFp_nist_field_mul,
 279         ossl_ec_GFp_nist_field_sqr,
 280         0 /* field_div */ ,
 281         ossl_ec_GFp_simple_field_inv,
 282         0 /* field_encode */ ,
 283         0 /* field_decode */ ,
 284         0,                      /* field_set_to_one */
 285         ossl_ec_key_simple_priv2oct,
 286         ossl_ec_key_simple_oct2priv,
 287         0, /* set private */
 288         ossl_ec_key_simple_generate_key,
 289         ossl_ec_key_simple_check_key,
 290         ossl_ec_key_simple_generate_public_key,
 291         0, /* keycopy */
 292         0, /* keyfinish */
 293         ossl_ecdh_simple_compute_key,
 294         ossl_ecdsa_simple_sign_setup,
 295         ossl_ecdsa_simple_sign_sig,
 296         ossl_ecdsa_simple_verify_sig,
 297         0, /* field_inverse_mod_ord */
 298         0, /* blind_coordinates */
 299         0, /* ladder_pre */
 300         0, /* ladder_step */
 301         0  /* ladder_post */
 302     };
 303
 304     return &ret;
 305 }
 306
 307 /*
 308  * Helper functions to convert field elements to/from internal representation
 309  */
 310 static void bin28_to_felem(felem out, const u8 in[28])
 311 {
 312     out[0] = *((const limb *)(in)) & 0x00ffffffffffffff;
 313     out[1] = (*((const limb_aX *)(in + 7))) & 0x00ffffffffffffff;
 314     out[2] = (*((const limb_aX *)(in + 14))) & 0x00ffffffffffffff;
 315     out[3] = (*((const limb_aX *)(in + 20))) >> 8;
 316 }
 317
 318 static void felem_to_bin28(u8 out[28], const felem in)
 319 {
 320     unsigned i;
 321     for (i = 0; i < 7; ++i) {
 322         out[i] = in[0] >> (8 * i);
 323         out[i + 7] = in[1] >> (8 * i);
 324         out[i + 14] = in[2] >> (8 * i);
 325         out[i + 21] = in[3] >> (8 * i);
 326     }
 327 }
 328
 329 /* From OpenSSL BIGNUM to internal representation */
 330 static int BN_to_felem(felem out, const BIGNUM *bn)
 331 {
 332     felem_bytearray b_out;
 333     int num_bytes;
 334
 335     if (BN_is_negative(bn)) {
 336         ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
 337         return 0;
 338     }
 339     num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
 340     if (num_bytes < 0) {
 341         ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
 342         return 0;
 343     }
 344     bin28_to_felem(out, b_out);
 345     return 1;
 346 }
 347
 348 /* From internal representation to OpenSSL BIGNUM */
 349 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
 350 {
 351     felem_bytearray b_out;
 352     felem_to_bin28(b_out, in);
 353     return BN_lebin2bn(b_out, sizeof(b_out), out);
 354 }
 355
 356 /******************************************************************************/
 357 /*-
 358  *                              FIELD OPERATIONS
 359  *
 360  * Field operations, using the internal representation of field elements.
 361  * NB! These operations are specific to our point multiplication and cannot be
 362  * expected to be correct in general - e.g., multiplication with a large scalar
 363  * will cause an overflow.
 364  *
 365  */
 366
 367 static void felem_one(felem out)
 368 {
 369     out[0] = 1;
 370     out[1] = 0;
 371     out[2] = 0;
 372     out[3] = 0;
 373 }
 374
 375 static void felem_assign(felem out, const felem in)
 376 {
 377     out[0] = in[0];
 378     out[1] = in[1];
 379     out[2] = in[2];
 380     out[3] = in[3];
 381 }
 382
 383 /* Sum two field elements: out += in */
 384 static void felem_sum(felem out, const felem in)
 385 {
 386     out[0] += in[0];
 387     out[1] += in[1];
 388     out[2] += in[2];
 389     out[3] += in[3];
 390 }
 391
 392 /* Subtract field elements: out -= in */
 393 /* Assumes in[i] < 2^57 */
 394 static void felem_diff(felem out, const felem in)
 395 {
 396     static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
 397     static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
 398     static const limb two58m42m2 = (((limb) 1) << 58) -
 399         (((limb) 1) << 42) - (((limb) 1) << 2);
 400
 401     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 402     out[0] += two58p2;
 403     out[1] += two58m42m2;
 404     out[2] += two58m2;
 405     out[3] += two58m2;
 406
 407     out[0] -= in[0];
 408     out[1] -= in[1];
 409     out[2] -= in[2];
 410     out[3] -= in[3];
 411 }
 412
 413 /* Subtract in unreduced 128-bit mode: out -= in */
 414 /* Assumes in[i] < 2^119 */
 415 static void widefelem_diff(widefelem out, const widefelem in)
 416 {
 417     static const widelimb two120 = ((widelimb) 1) << 120;
 418     static const widelimb two120m64 = (((widelimb) 1) << 120) -
 419         (((widelimb) 1) << 64);
 420     static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
 421         (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
 422
 423     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 424     out[0] += two120;
 425     out[1] += two120m64;
 426     out[2] += two120m64;
 427     out[3] += two120;
 428     out[4] += two120m104m64;
 429     out[5] += two120m64;
 430     out[6] += two120m64;
 431
 432     out[0] -= in[0];
 433     out[1] -= in[1];
 434     out[2] -= in[2];
 435     out[3] -= in[3];
 436     out[4] -= in[4];
 437     out[5] -= in[5];
 438     out[6] -= in[6];
 439 }
 440
 441 /* Subtract in mixed mode: out128 -= in64 */
 442 /* in[i] < 2^63 */
 443 static void felem_diff_128_64(widefelem out, const felem in)
 444 {
 445     static const widelimb two64p8 = (((widelimb) 1) << 64) +
 446         (((widelimb) 1) << 8);
 447     static const widelimb two64m8 = (((widelimb) 1) << 64) -
 448         (((widelimb) 1) << 8);
 449     static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
 450         (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
 451
 452     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 453     out[0] += two64p8;
 454     out[1] += two64m48m8;
 455     out[2] += two64m8;
 456     out[3] += two64m8;
 457
 458     out[0] -= in[0];
 459     out[1] -= in[1];
 460     out[2] -= in[2];
 461     out[3] -= in[3];
 462 }
 463
 464 /*
 465  * Multiply a field element by a scalar: out = out * scalar The scalars we
 466  * actually use are small, so results fit without overflow
 467  */
 468 static void felem_scalar(felem out, const limb scalar)
 469 {
 470     out[0] *= scalar;
 471     out[1] *= scalar;
 472     out[2] *= scalar;
 473     out[3] *= scalar;
 474 }
 475
 476 /*
 477  * Multiply an unreduced field element by a scalar: out = out * scalar The
 478  * scalars we actually use are small, so results fit without overflow
 479  */
 480 static void widefelem_scalar(widefelem out, const widelimb scalar)
 481 {
 482     out[0] *= scalar;
 483     out[1] *= scalar;
 484     out[2] *= scalar;
 485     out[3] *= scalar;
 486     out[4] *= scalar;
 487     out[5] *= scalar;
 488     out[6] *= scalar;
 489 }
 490
 491 /* Square a field element: out = in^2 */
 492 static void felem_square(widefelem out, const felem in)
 493 {
 494     limb tmp0, tmp1, tmp2;
 495     tmp0 = 2 * in[0];
 496     tmp1 = 2 * in[1];
 497     tmp2 = 2 * in[2];
 498     out[0] = ((widelimb) in[0]) * in[0];
 499     out[1] = ((widelimb) in[0]) * tmp1;
 500     out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
 501     out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
 502     out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
 503     out[5] = ((widelimb) in[3]) * tmp2;
 504     out[6] = ((widelimb) in[3]) * in[3];
 505 }
 506
 507 /* Multiply two field elements: out = in1 * in2 */
 508 static void felem_mul(widefelem out, const felem in1, const felem in2)
 509 {
 510     out[0] = ((widelimb) in1[0]) * in2[0];
 511     out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
 512     out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
 513              ((widelimb) in1[2]) * in2[0];
 514     out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
 515              ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
 516     out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
 517              ((widelimb) in1[3]) * in2[1];
 518     out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
 519     out[6] = ((widelimb) in1[3]) * in2[3];
 520 }
 521
 522 /*-
 523  * Reduce seven 128-bit coefficients to four 64-bit coefficients.
 524  * Requires in[i] < 2^126,
 525  * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
 526 static void felem_reduce(felem out, const widefelem in)
 527 {
 528     static const widelimb two127p15 = (((widelimb) 1) << 127) +
 529         (((widelimb) 1) << 15);
 530     static const widelimb two127m71 = (((widelimb) 1) << 127) -
 531         (((widelimb) 1) << 71);
 532     static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
 533         (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
 534     widelimb output[5];
 535
 536     /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
 537     output[0] = in[0] + two127p15;
 538     output[1] = in[1] + two127m71m55;
 539     output[2] = in[2] + two127m71;
 540     output[3] = in[3];
 541     output[4] = in[4];
 542
 543     /* Eliminate in[4], in[5], in[6] */
 544     output[4] += in[6] >> 16;
 545     output[3] += (in[6] & 0xffff) << 40;
 546     output[2] -= in[6];
 547
 548     output[3] += in[5] >> 16;
 549     output[2] += (in[5] & 0xffff) << 40;
 550     output[1] -= in[5];
 551
 552     output[2] += output[4] >> 16;
 553     output[1] += (output[4] & 0xffff) << 40;
 554     output[0] -= output[4];
 555
 556     /* Carry 2 -> 3 -> 4 */
 557     output[3] += output[2] >> 56;
 558     output[2] &= 0x00ffffffffffffff;
 559
 560     output[4] = output[3] >> 56;
 561     output[3] &= 0x00ffffffffffffff;
 562
 563     /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
 564
 565     /* Eliminate output[4] */
 566     output[2] += output[4] >> 16;
 567     /* output[2] < 2^56 + 2^56 = 2^57 */
 568     output[1] += (output[4] & 0xffff) << 40;
 569     output[0] -= output[4];
 570
 571     /* Carry 0 -> 1 -> 2 -> 3 */
 572     output[1] += output[0] >> 56;
 573     out[0] = output[0] & 0x00ffffffffffffff;
 574
 575     output[2] += output[1] >> 56;
 576     /* output[2] < 2^57 + 2^72 */
 577     out[1] = output[1] & 0x00ffffffffffffff;
 578     output[3] += output[2] >> 56;
 579     /* output[3] <= 2^56 + 2^16 */
 580     out[2] = output[2] & 0x00ffffffffffffff;
 581
 582     /*-
 583      * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
 584      * out[3] <= 2^56 + 2^16 (due to final carry),
 585      * so out < 2*p
 586      */
 587     out[3] = output[3];
 588 }
 589
 590 static void felem_square_reduce(felem out, const felem in)
 591 {
 592     widefelem tmp;
 593     felem_square(tmp, in);
 594     felem_reduce(out, tmp);
 595 }
 596
 597 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
 598 {
 599     widefelem tmp;
 600     felem_mul(tmp, in1, in2);
 601     felem_reduce(out, tmp);
 602 }
 603
 604 /*
 605  * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
 606  * call felem_reduce first)
 607  */
 608 static void felem_contract(felem out, const felem in)
 609 {
 610     static const int64_t two56 = ((limb) 1) << 56;
 611     /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
 612     /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
 613     int64_t tmp[4], a;
 614     tmp[0] = in[0];
 615     tmp[1] = in[1];
 616     tmp[2] = in[2];
 617     tmp[3] = in[3];
 618     /* Case 1: a = 1 iff in >= 2^224 */
 619     a = (in[3] >> 56);
 620     tmp[0] -= a;
 621     tmp[1] += a << 40;
 622     tmp[3] &= 0x00ffffffffffffff;
 623     /*
 624      * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
 625      * and the lower part is non-zero
 626      */
 627     a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
 628         (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
 629     a &= 0x00ffffffffffffff;
 630     /* turn a into an all-one mask (if a = 0) or an all-zero mask */
 631     a = (a - 1) >> 63;
 632     /* subtract 2^224 - 2^96 + 1 if a is all-one */
 633     tmp[3] &= a ^ 0xffffffffffffffff;
 634     tmp[2] &= a ^ 0xffffffffffffffff;
 635     tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
 636     tmp[0] -= 1 & a;
 637
 638     /*
 639      * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
 640      * non-zero, so we only need one step
 641      */
 642     a = tmp[0] >> 63;
 643     tmp[0] += two56 & a;
 644     tmp[1] -= 1 & a;
 645
 646     /* carry 1 -> 2 -> 3 */
 647     tmp[2] += tmp[1] >> 56;
 648     tmp[1] &= 0x00ffffffffffffff;
 649
 650     tmp[3] += tmp[2] >> 56;
 651     tmp[2] &= 0x00ffffffffffffff;
 652
 653     /* Now 0 <= out < p */
 654     out[0] = tmp[0];
 655     out[1] = tmp[1];
 656     out[2] = tmp[2];
 657     out[3] = tmp[3];
 658 }
 659
 660 /*
 661  * Get negative value: out = -in
 662  * Requires in[i] < 2^63,
 663  * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
 664  */
 665 static void felem_neg(felem out, const felem in)
 666 {
 667     widefelem tmp;
 668
 669     memset(tmp, 0, sizeof(tmp));
 670     felem_diff_128_64(tmp, in);
 671     felem_reduce(out, tmp);
 672 }
 673
 674 /*
 675  * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
 676  * elements are reduced to in < 2^225, so we only need to check three cases:
 677  * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
 678  */
 679 static limb felem_is_zero(const felem in)
 680 {
 681     limb zero, two224m96p1, two225m97p2;
 682
 683     zero = in[0] | in[1] | in[2] | in[3];
 684     zero = (((int64_t) (zero) - 1) >> 63) & 1;
 685     two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
 686         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
 687     two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
 688     two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
 689         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
 690     two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
 691     return (zero | two224m96p1 | two225m97p2);
 692 }
 693
 694 static int felem_is_zero_int(const void *in)
 695 {
 696     return (int)(felem_is_zero(in) & ((limb) 1));
 697 }
 698
 699 /* Invert a field element */
 700 /* Computation chain copied from djb's code */
 701 static void felem_inv(felem out, const felem in)
 702 {
 703     felem ftmp, ftmp2, ftmp3, ftmp4;
 704     widefelem tmp;
 705     unsigned i;
 706
 707     felem_square(tmp, in);
 708     felem_reduce(ftmp, tmp);    /* 2 */
 709     felem_mul(tmp, in, ftmp);
 710     felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
 711     felem_square(tmp, ftmp);
 712     felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
 713     felem_mul(tmp, in, ftmp);
 714     felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
 715     felem_square(tmp, ftmp);
 716     felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
 717     felem_square(tmp, ftmp2);
 718     felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
 719     felem_square(tmp, ftmp2);
 720     felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
 721     felem_mul(tmp, ftmp2, ftmp);
 722     felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
 723     felem_square(tmp, ftmp);
 724     felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
 725     for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
 726         felem_square(tmp, ftmp2);
 727         felem_reduce(ftmp2, tmp);
 728     }
 729     felem_mul(tmp, ftmp2, ftmp);
 730     felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
 731     felem_square(tmp, ftmp2);
 732     felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
 733     for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
 734         felem_square(tmp, ftmp3);
 735         felem_reduce(ftmp3, tmp);
 736     }
 737     felem_mul(tmp, ftmp3, ftmp2);
 738     felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
 739     felem_square(tmp, ftmp2);
 740     felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
 741     for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
 742         felem_square(tmp, ftmp3);
 743         felem_reduce(ftmp3, tmp);
 744     }
 745     felem_mul(tmp, ftmp3, ftmp2);
 746     felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
 747     felem_square(tmp, ftmp3);
 748     felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
 749     for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
 750         felem_square(tmp, ftmp4);
 751         felem_reduce(ftmp4, tmp);
 752     }
 753     felem_mul(tmp, ftmp3, ftmp4);
 754     felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
 755     felem_square(tmp, ftmp3);
 756     felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
 757     for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
 758         felem_square(tmp, ftmp4);
 759         felem_reduce(ftmp4, tmp);
 760     }
 761     felem_mul(tmp, ftmp2, ftmp4);
 762     felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
 763     for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
 764         felem_square(tmp, ftmp2);
 765         felem_reduce(ftmp2, tmp);
 766     }
 767     felem_mul(tmp, ftmp2, ftmp);
 768     felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
 769     felem_square(tmp, ftmp);
 770     felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
 771     felem_mul(tmp, ftmp, in);
 772     felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
 773     for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
 774         felem_square(tmp, ftmp);
 775         felem_reduce(ftmp, tmp);
 776     }
 777     felem_mul(tmp, ftmp, ftmp3);
 778     felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
 779 }
 780
 781 /*
 782  * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
 783  * out to itself.
 784  */
 785 static void copy_conditional(felem out, const felem in, limb icopy)
 786 {
 787     unsigned i;
 788     /*
 789      * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
 790      */
 791     const limb copy = -icopy;
 792     for (i = 0; i < 4; ++i) {
 793         const limb tmp = copy & (in[i] ^ out[i]);
 794         out[i] ^= tmp;
 795     }
 796 }
 797
 798 /******************************************************************************/
 799 /*-
 800  *                       ELLIPTIC CURVE POINT OPERATIONS
 801  *
 802  * Points are represented in Jacobian projective coordinates:
 803  * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
 804  * or to the point at infinity if Z == 0.
 805  *
 806  */
 807
 808 /*-
 809  * Double an elliptic curve point:
 810  * (X', Y', Z') = 2 * (X, Y, Z), where
 811  * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
 812  * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
 813  * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
 814  * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
 815  * while x_out == y_in is not (maybe this works, but it's not tested).
 816  */
 817 static void
 818 point_double(felem x_out, felem y_out, felem z_out,
 819              const felem x_in, const felem y_in, const felem z_in)
 820 {
 821     widefelem tmp, tmp2;
 822     felem delta, gamma, beta, alpha, ftmp, ftmp2;
 823
 824     felem_assign(ftmp, x_in);
 825     felem_assign(ftmp2, x_in);
 826
 827     /* delta = z^2 */
 828     felem_square(tmp, z_in);
 829     felem_reduce(delta, tmp);
 830
 831     /* gamma = y^2 */
 832     felem_square(tmp, y_in);
 833     felem_reduce(gamma, tmp);
 834
 835     /* beta = x*gamma */
 836     felem_mul(tmp, x_in, gamma);
 837     felem_reduce(beta, tmp);
 838
 839     /* alpha = 3*(x-delta)*(x+delta) */
 840     felem_diff(ftmp, delta);
 841     /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
 842     felem_sum(ftmp2, delta);
 843     /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
 844     felem_scalar(ftmp2, 3);
 845     /* ftmp2[i] < 3 * 2^58 < 2^60 */
 846     felem_mul(tmp, ftmp, ftmp2);
 847     /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
 848     felem_reduce(alpha, tmp);
 849
 850     /* x' = alpha^2 - 8*beta */
 851     felem_square(tmp, alpha);
 852     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 853     felem_assign(ftmp, beta);
 854     felem_scalar(ftmp, 8);
 855     /* ftmp[i] < 8 * 2^57 = 2^60 */
 856     felem_diff_128_64(tmp, ftmp);
 857     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 858     felem_reduce(x_out, tmp);
 859
 860     /* z' = (y + z)^2 - gamma - delta */
 861     felem_sum(delta, gamma);
 862     /* delta[i] < 2^57 + 2^57 = 2^58 */
 863     felem_assign(ftmp, y_in);
 864     felem_sum(ftmp, z_in);
 865     /* ftmp[i] < 2^57 + 2^57 = 2^58 */
 866     felem_square(tmp, ftmp);
 867     /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
 868     felem_diff_128_64(tmp, delta);
 869     /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
 870     felem_reduce(z_out, tmp);
 871
 872     /* y' = alpha*(4*beta - x') - 8*gamma^2 */
 873     felem_scalar(beta, 4);
 874     /* beta[i] < 4 * 2^57 = 2^59 */
 875     felem_diff(beta, x_out);
 876     /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
 877     felem_mul(tmp, alpha, beta);
 878     /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
 879     felem_square(tmp2, gamma);
 880     /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
 881     widefelem_scalar(tmp2, 8);
 882     /* tmp2[i] < 8 * 2^116 = 2^119 */
 883     widefelem_diff(tmp, tmp2);
 884     /* tmp[i] < 2^119 + 2^120 < 2^121 */
 885     felem_reduce(y_out, tmp);
 886 }
 887
 888 /*-
 889  * Add two elliptic curve points:
 890  * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
 891  * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
 892  * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
 893  * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
 894  *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
 895  * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
 896  *
 897  * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
 898  */
 899
 900 /*
 901  * This function is not entirely constant-time: it includes a branch for
 902  * checking whether the two input points are equal, (while not equal to the
 903  * point at infinity). This case never happens during single point
 904  * multiplication, so there is no timing leak for ECDH or ECDSA signing.
 905  */
 906 static void point_add(felem x3, felem y3, felem z3,
 907                       const felem x1, const felem y1, const felem z1,
 908                       const int mixed, const felem x2, const felem y2,
 909                       const felem z2)
 910 {
 911     felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
 912     widefelem tmp, tmp2;
 913     limb z1_is_zero, z2_is_zero, x_equal, y_equal;
 914     limb points_equal;
 915
 916     if (!mixed) {
 917         /* ftmp2 = z2^2 */
 918         felem_square(tmp, z2);
 919         felem_reduce(ftmp2, tmp);
 920
 921         /* ftmp4 = z2^3 */
 922         felem_mul(tmp, ftmp2, z2);
 923         felem_reduce(ftmp4, tmp);
 924
 925         /* ftmp4 = z2^3*y1 */
 926         felem_mul(tmp2, ftmp4, y1);
 927         felem_reduce(ftmp4, tmp2);
 928
 929         /* ftmp2 = z2^2*x1 */
 930         felem_mul(tmp2, ftmp2, x1);
 931         felem_reduce(ftmp2, tmp2);
 932     } else {
 933         /*
 934          * We'll assume z2 = 1 (special case z2 = 0 is handled later)
 935          */
 936
 937         /* ftmp4 = z2^3*y1 */
 938         felem_assign(ftmp4, y1);
 939
 940         /* ftmp2 = z2^2*x1 */
 941         felem_assign(ftmp2, x1);
 942     }
 943
 944     /* ftmp = z1^2 */
 945     felem_square(tmp, z1);
 946     felem_reduce(ftmp, tmp);
 947
 948     /* ftmp3 = z1^3 */
 949     felem_mul(tmp, ftmp, z1);
 950     felem_reduce(ftmp3, tmp);
 951
 952     /* tmp = z1^3*y2 */
 953     felem_mul(tmp, ftmp3, y2);
 954     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 955
 956     /* ftmp3 = z1^3*y2 - z2^3*y1 */
 957     felem_diff_128_64(tmp, ftmp4);
 958     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 959     felem_reduce(ftmp3, tmp);
 960
 961     /* tmp = z1^2*x2 */
 962     felem_mul(tmp, ftmp, x2);
 963     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 964
 965     /* ftmp = z1^2*x2 - z2^2*x1 */
 966     felem_diff_128_64(tmp, ftmp2);
 967     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 968     felem_reduce(ftmp, tmp);
 969
 970     /*
 971      * The formulae are incorrect if the points are equal, in affine coordinates
 972      * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
 973      * happens.
 974      *
 975      * We use bitwise operations to avoid potential side-channels introduced by
 976      * the short-circuiting behaviour of boolean operators.
 977      */
 978     x_equal = felem_is_zero(ftmp);
 979     y_equal = felem_is_zero(ftmp3);
 980     /*
 981      * The special case of either point being the point at infinity (z1 and/or
 982      * z2 are zero), is handled separately later on in this function, so we
 983      * avoid jumping to point_double here in those special cases.
 984      */
 985     z1_is_zero = felem_is_zero(z1);
 986     z2_is_zero = felem_is_zero(z2);
 987
 988     /*
 989      * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
 990      * specific implementation `felem_is_zero()` returns truth as `0x1`
 991      * (rather than `0xff..ff`).
 992      *
 993      * This implies that `~true` in this implementation becomes
 994      * `0xff..fe` (rather than `0x0`): for this reason, to be used in
 995      * the if expression, we mask out only the last bit in the next
 996      * line.
 997      */
 998     points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
 999
1000     if (points_equal) {
1001         /*
1002          * This is obviously not constant-time but, as mentioned before, this
1003          * case never happens during single point multiplication, so there is no
1004          * timing leak for ECDH or ECDSA signing.
1005          */
1006         point_double(x3, y3, z3, x1, y1, z1);
1007         return;
1008     }
1009
1010     /* ftmp5 = z1*z2 */
1011     if (!mixed) {
1012         felem_mul(tmp, z1, z2);
1013         felem_reduce(ftmp5, tmp);
1014     } else {
1015         /* special case z2 = 0 is handled later */
1016         felem_assign(ftmp5, z1);
1017     }
1018
1019     /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1020     felem_mul(tmp, ftmp, ftmp5);
1021     felem_reduce(z_out, tmp);
1022
1023     /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1024     felem_assign(ftmp5, ftmp);
1025     felem_square(tmp, ftmp);
1026     felem_reduce(ftmp, tmp);
1027
1028     /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1029     felem_mul(tmp, ftmp, ftmp5);
1030     felem_reduce(ftmp5, tmp);
1031
1032     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1033     felem_mul(tmp, ftmp2, ftmp);
1034     felem_reduce(ftmp2, tmp);
1035
1036     /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1037     felem_mul(tmp, ftmp4, ftmp5);
1038     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1039
1040     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1041     felem_square(tmp2, ftmp3);
1042     /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1043
1044     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1045     felem_diff_128_64(tmp2, ftmp5);
1046     /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1047
1048     /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1049     felem_assign(ftmp5, ftmp2);
1050     felem_scalar(ftmp5, 2);
1051     /* ftmp5[i] < 2 * 2^57 = 2^58 */
1052
1053     /*-
1054      * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1055      *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1056      */
1057     felem_diff_128_64(tmp2, ftmp5);
1058     /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1059     felem_reduce(x_out, tmp2);
1060
1061     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1062     felem_diff(ftmp2, x_out);
1063     /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1064
1065     /*
1066      * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1067      */
1068     felem_mul(tmp2, ftmp3, ftmp2);
1069     /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1070
1071     /*-
1072      * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1073      *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1074      */
1075     widefelem_diff(tmp2, tmp);
1076     /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1077     felem_reduce(y_out, tmp2);
1078
1079     /*
1080      * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1081      * the point at infinity, so we need to check for this separately
1082      */
1083
1084     /*
1085      * if point 1 is at infinity, copy point 2 to output, and vice versa
1086      */
1087     copy_conditional(x_out, x2, z1_is_zero);
1088     copy_conditional(x_out, x1, z2_is_zero);
1089     copy_conditional(y_out, y2, z1_is_zero);
1090     copy_conditional(y_out, y1, z2_is_zero);
1091     copy_conditional(z_out, z2, z1_is_zero);
1092     copy_conditional(z_out, z1, z2_is_zero);
1093     felem_assign(x3, x_out);
1094     felem_assign(y3, y_out);
1095     felem_assign(z3, z_out);
1096 }
1097
1098 /*
1099  * select_point selects the |idx|th point from a precomputation table and
1100  * copies it to out.
1101  * The pre_comp array argument should be size of |size| argument
1102  */
1103 static void select_point(const u64 idx, unsigned int size,
1104                          const felem pre_comp[][3], felem out[3])
1105 {
1106     unsigned i, j;
1107     limb *outlimbs = &out[0][0];
1108
1109     memset(out, 0, sizeof(*out) * 3);
1110     for (i = 0; i < size; i++) {
1111         const limb *inlimbs = &pre_comp[i][0][0];
1112         u64 mask = i ^ idx;
1113         mask |= mask >> 4;
1114         mask |= mask >> 2;
1115         mask |= mask >> 1;
1116         mask &= 1;
1117         mask--;
1118         for (j = 0; j < 4 * 3; j++)
1119             outlimbs[j] |= inlimbs[j] & mask;
1120     }
1121 }
1122
1123 /* get_bit returns the |i|th bit in |in| */
1124 static char get_bit(const felem_bytearray in, unsigned i)
1125 {
1126     if (i >= 224)
1127         return 0;
1128     return (in[i >> 3] >> (i & 7)) & 1;
1129 }
1130
1131 /*
1132  * Interleaved point multiplication using precomputed point multiples: The
1133  * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1134  * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1135  * generator, using certain (large) precomputed multiples in g_pre_comp.
1136  * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1137  */
1138 static void batch_mul(felem x_out, felem y_out, felem z_out,
1139                       const felem_bytearray scalars[],
1140                       const unsigned num_points, const u8 *g_scalar,
1141                       const int mixed, const felem pre_comp[][17][3],
1142                       const felem g_pre_comp[2][16][3])
1143 {
1144     int i, skip;
1145     unsigned num;
1146     unsigned gen_mul = (g_scalar != NULL);
1147     felem nq[3], tmp[4];
1148     u64 bits;
1149     u8 sign, digit;
1150
1151     /* set nq to the point at infinity */
1152     memset(nq, 0, sizeof(nq));
1153
1154     /*
1155      * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1156      * of the generator (two in each of the last 28 rounds) and additions of
1157      * other points multiples (every 5th round).
1158      */
1159     skip = 1;                   /* save two point operations in the first
1160                                  * round */
1161     for (i = (num_points ? 220 : 27); i >= 0; --i) {
1162         /* double */
1163         if (!skip)
1164             point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1165
1166         /* add multiples of the generator */
1167         if (gen_mul && (i <= 27)) {
1168             /* first, look 28 bits upwards */
1169             bits = get_bit(g_scalar, i + 196) << 3;
1170             bits |= get_bit(g_scalar, i + 140) << 2;
1171             bits |= get_bit(g_scalar, i + 84) << 1;
1172             bits |= get_bit(g_scalar, i + 28);
1173             /* select the point to add, in constant time */
1174             select_point(bits, 16, g_pre_comp[1], tmp);
1175
1176             if (!skip) {
1177                 /* value 1 below is argument for "mixed" */
1178                 point_add(nq[0], nq[1], nq[2],
1179                           nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1180             } else {
1181                 memcpy(nq, tmp, 3 * sizeof(felem));
1182                 skip = 0;
1183             }
1184
1185             /* second, look at the current position */
1186             bits = get_bit(g_scalar, i + 168) << 3;
1187             bits |= get_bit(g_scalar, i + 112) << 2;
1188             bits |= get_bit(g_scalar, i + 56) << 1;
1189             bits |= get_bit(g_scalar, i);
1190             /* select the point to add, in constant time */
1191             select_point(bits, 16, g_pre_comp[0], tmp);
1192             point_add(nq[0], nq[1], nq[2],
1193                       nq[0], nq[1], nq[2],
1194                       1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1195         }
1196
1197         /* do other additions every 5 doublings */
1198         if (num_points && (i % 5 == 0)) {
1199             /* loop over all scalars */
1200             for (num = 0; num < num_points; ++num) {
1201                 bits = get_bit(scalars[num], i + 4) << 5;
1202                 bits |= get_bit(scalars[num], i + 3) << 4;
1203                 bits |= get_bit(scalars[num], i + 2) << 3;
1204                 bits |= get_bit(scalars[num], i + 1) << 2;
1205                 bits |= get_bit(scalars[num], i) << 1;
1206                 bits |= get_bit(scalars[num], i - 1);
1207                 ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1208
1209                 /* select the point to add or subtract */
1210                 select_point(digit, 17, pre_comp[num], tmp);
1211                 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1212                                             * point */
1213                 copy_conditional(tmp[1], tmp[3], sign);
1214
1215                 if (!skip) {
1216                     point_add(nq[0], nq[1], nq[2],
1217                               nq[0], nq[1], nq[2],
1218                               mixed, tmp[0], tmp[1], tmp[2]);
1219                 } else {
1220                     memcpy(nq, tmp, 3 * sizeof(felem));
1221                     skip = 0;
1222                 }
1223             }
1224         }
1225     }
1226     felem_assign(x_out, nq[0]);
1227     felem_assign(y_out, nq[1]);
1228     felem_assign(z_out, nq[2]);
1229 }
1230
1231 /******************************************************************************/
1232 /*
1233  * FUNCTIONS TO MANAGE PRECOMPUTATION
1234  */
1235
1236 static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1237 {
1238     NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1239
1240     if (ret == NULL)
1241         return ret;
1242
1243
1244     if (!CRYPTO_NEW_REF(&ret->references, 1)) {
1245         OPENSSL_free(ret);
1246         return NULL;
1247     }
1248     return ret;
1249 }
1250
1251 NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1252 {
1253     int i;
1254     if (p != NULL)
1255         CRYPTO_UP_REF(&p->references, &i);
1256     return p;
1257 }
1258
1259 void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1260 {
1261     int i;
1262
1263     if (p == NULL)
1264         return;
1265
1266     CRYPTO_DOWN_REF(&p->references, &i);
1267     REF_PRINT_COUNT("EC_nistp224", p);
1268     if (i > 0)
1269         return;
1270     REF_ASSERT_ISNT(i < 0);
1271
1272     CRYPTO_FREE_REF(&p->references);
1273     OPENSSL_free(p);
1274 }
1275
1276 /******************************************************************************/
1277 /*
1278  * OPENSSL EC_METHOD FUNCTIONS
1279  */
1280
1281 int ossl_ec_GFp_nistp224_group_init(EC_GROUP *group)
1282 {
1283     int ret;
1284     ret = ossl_ec_GFp_simple_group_init(group);
1285     group->a_is_minus3 = 1;
1286     return ret;
1287 }
1288
1289 int ossl_ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1290                                          const BIGNUM *a, const BIGNUM *b,
1291                                          BN_CTX *ctx)
1292 {
1293     int ret = 0;
1294     BIGNUM *curve_p, *curve_a, *curve_b;
1295 #ifndef FIPS_MODULE
1296     BN_CTX *new_ctx = NULL;
1297
1298     if (ctx == NULL)
1299         ctx = new_ctx = BN_CTX_new();
1300 #endif
1301     if (ctx == NULL)
1302         return 0;
1303
1304     BN_CTX_start(ctx);
1305     curve_p = BN_CTX_get(ctx);
1306     curve_a = BN_CTX_get(ctx);
1307     curve_b = BN_CTX_get(ctx);
1308     if (curve_b == NULL)
1309         goto err;
1310     BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1311     BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1312     BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1313     if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1314         ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);
1315         goto err;
1316     }
1317     group->field_mod_func = BN_nist_mod_224;
1318     ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1319  err:
1320     BN_CTX_end(ctx);
1321 #ifndef FIPS_MODULE
1322     BN_CTX_free(new_ctx);
1323 #endif
1324     return ret;
1325 }
1326
1327 /*
1328  * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1329  * (X/Z^2, Y/Z^3)
1330  */
1331 int ossl_ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1332                                                       const EC_POINT *point,
1333                                                       BIGNUM *x, BIGNUM *y,
1334                                                       BN_CTX *ctx)
1335 {
1336     felem z1, z2, x_in, y_in, x_out, y_out;
1337     widefelem tmp;
1338
1339     if (EC_POINT_is_at_infinity(group, point)) {
1340         ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
1341         return 0;
1342     }
1343     if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1344         (!BN_to_felem(z1, point->Z)))
1345         return 0;
1346     felem_inv(z2, z1);
1347     felem_square(tmp, z2);
1348     felem_reduce(z1, tmp);
1349     felem_mul(tmp, x_in, z1);
1350     felem_reduce(x_in, tmp);
1351     felem_contract(x_out, x_in);
1352     if (x != NULL) {
1353         if (!felem_to_BN(x, x_out)) {
1354             ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1355             return 0;
1356         }
1357     }
1358     felem_mul(tmp, z1, z2);
1359     felem_reduce(z1, tmp);
1360     felem_mul(tmp, y_in, z1);
1361     felem_reduce(y_in, tmp);
1362     felem_contract(y_out, y_in);
1363     if (y != NULL) {
1364         if (!felem_to_BN(y, y_out)) {
1365             ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1366             return 0;
1367         }
1368     }
1369     return 1;
1370 }
1371
1372 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1373                                felem tmp_felems[ /* num+1 */ ])
1374 {
1375     /*
1376      * Runs in constant time, unless an input is the point at infinity (which
1377      * normally shouldn't happen).
1378      */
1379     ossl_ec_GFp_nistp_points_make_affine_internal(num,
1380                                                   points,
1381                                                   sizeof(felem),
1382                                                   tmp_felems,
1383                                                   (void (*)(void *))felem_one,
1384                                                   felem_is_zero_int,
1385                                                   (void (*)(void *, const void *))
1386                                                   felem_assign,
1387                                                   (void (*)(void *, const void *))
1388                                                   felem_square_reduce, (void (*)
1389                                                                         (void *,
1390                                                                          const void
1391                                                                          *,
1392                                                                          const void
1393                                                                          *))
1394                                                   felem_mul_reduce,
1395                                                   (void (*)(void *, const void *))
1396                                                   felem_inv,
1397                                                   (void (*)(void *, const void *))
1398                                                   felem_contract);
1399 }
1400
1401 /*
1402  * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1403  * values Result is stored in r (r can equal one of the inputs).
1404  */
1405 int ossl_ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1406                                     const BIGNUM *scalar, size_t num,
1407                                     const EC_POINT *points[],
1408                                     const BIGNUM *scalars[], BN_CTX *ctx)
1409 {
1410     int ret = 0;
1411     int j;
1412     unsigned i;
1413     int mixed = 0;
1414     BIGNUM *x, *y, *z, *tmp_scalar;
1415     felem_bytearray g_secret;
1416     felem_bytearray *secrets = NULL;
1417     felem (*pre_comp)[17][3] = NULL;
1418     felem *tmp_felems = NULL;
1419     int num_bytes;
1420     int have_pre_comp = 0;
1421     size_t num_points = num;
1422     felem x_in, y_in, z_in, x_out, y_out, z_out;
1423     NISTP224_PRE_COMP *pre = NULL;
1424     const felem(*g_pre_comp)[16][3] = NULL;
1425     EC_POINT *generator = NULL;
1426     const EC_POINT *p = NULL;
1427     const BIGNUM *p_scalar = NULL;
1428
1429     BN_CTX_start(ctx);
1430     x = BN_CTX_get(ctx);
1431     y = BN_CTX_get(ctx);
1432     z = BN_CTX_get(ctx);
1433     tmp_scalar = BN_CTX_get(ctx);
1434     if (tmp_scalar == NULL)
1435         goto err;
1436
1437     if (scalar != NULL) {
1438         pre = group->pre_comp.nistp224;
1439         if (pre)
1440             /* we have precomputation, try to use it */
1441             g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1442         else
1443             /* try to use the standard precomputation */
1444             g_pre_comp = &gmul[0];
1445         generator = EC_POINT_new(group);
1446         if (generator == NULL)
1447             goto err;
1448         /* get the generator from precomputation */
1449         if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1450             !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1451             !felem_to_BN(z, g_pre_comp[0][1][2])) {
1452             ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1453             goto err;
1454         }
1455         if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,
1456                                                                 generator,
1457                                                                 x, y, z, ctx))
1458             goto err;
1459         if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1460             /* precomputation matches generator */
1461             have_pre_comp = 1;
1462         else
1463             /*
1464              * we don't have valid precomputation: treat the generator as a
1465              * random point
1466              */
1467             num_points = num_points + 1;
1468     }
1469
1470     if (num_points > 0) {
1471         if (num_points >= 3) {
1472             /*
1473              * unless we precompute multiples for just one or two points,
1474              * converting those into affine form is time well spent
1475              */
1476             mixed = 1;
1477         }
1478         secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1479         pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1480         if (mixed)
1481             tmp_felems =
1482                 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1483         if ((secrets == NULL) || (pre_comp == NULL)
1484             || (mixed && (tmp_felems == NULL)))
1485             goto err;
1486
1487         /*
1488          * we treat NULL scalars as 0, and NULL points as points at infinity,
1489          * i.e., they contribute nothing to the linear combination
1490          */
1491         for (i = 0; i < num_points; ++i) {
1492             if (i == num) {
1493                 /* the generator */
1494                 p = EC_GROUP_get0_generator(group);
1495                 p_scalar = scalar;
1496             } else {
1497                 /* the i^th point */
1498                 p = points[i];
1499                 p_scalar = scalars[i];
1500             }
1501             if ((p_scalar != NULL) && (p != NULL)) {
1502                 /* reduce scalar to 0 <= scalar < 2^224 */
1503                 if ((BN_num_bits(p_scalar) > 224)
1504                     || (BN_is_negative(p_scalar))) {
1505                     /*
1506                      * this is an unusual input, and we don't guarantee
1507                      * constant-timeness
1508                      */
1509                     if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1510                         ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1511                         goto err;
1512                     }
1513                     num_bytes = BN_bn2lebinpad(tmp_scalar,
1514                                                secrets[i], sizeof(secrets[i]));
1515                 } else {
1516                     num_bytes = BN_bn2lebinpad(p_scalar,
1517                                                secrets[i], sizeof(secrets[i]));
1518                 }
1519                 if (num_bytes < 0) {
1520                     ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1521                     goto err;
1522                 }
1523                 /* precompute multiples */
1524                 if ((!BN_to_felem(x_out, p->X)) ||
1525                     (!BN_to_felem(y_out, p->Y)) ||
1526                     (!BN_to_felem(z_out, p->Z)))
1527                     goto err;
1528                 felem_assign(pre_comp[i][1][0], x_out);
1529                 felem_assign(pre_comp[i][1][1], y_out);
1530                 felem_assign(pre_comp[i][1][2], z_out);
1531                 for (j = 2; j <= 16; ++j) {
1532                     if (j & 1) {
1533                         point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1534                                   pre_comp[i][j][2], pre_comp[i][1][0],
1535                                   pre_comp[i][1][1], pre_comp[i][1][2], 0,
1536                                   pre_comp[i][j - 1][0],
1537                                   pre_comp[i][j - 1][1],
1538                                   pre_comp[i][j - 1][2]);
1539                     } else {
1540                         point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1541                                      pre_comp[i][j][2], pre_comp[i][j / 2][0],
1542                                      pre_comp[i][j / 2][1],
1543                                      pre_comp[i][j / 2][2]);
1544                     }
1545                 }
1546             }
1547         }
1548         if (mixed)
1549             make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1550     }
1551
1552     /* the scalar for the generator */
1553     if ((scalar != NULL) && (have_pre_comp)) {
1554         memset(g_secret, 0, sizeof(g_secret));
1555         /* reduce scalar to 0 <= scalar < 2^224 */
1556         if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1557             /*
1558              * this is an unusual input, and we don't guarantee
1559              * constant-timeness
1560              */
1561             if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1562                 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1563                 goto err;
1564             }
1565             num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1566         } else {
1567             num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1568         }
1569         /* do the multiplication with generator precomputation */
1570         batch_mul(x_out, y_out, z_out,
1571                   (const felem_bytearray(*))secrets, num_points,
1572                   g_secret,
1573                   mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1574     } else {
1575         /* do the multiplication without generator precomputation */
1576         batch_mul(x_out, y_out, z_out,
1577                   (const felem_bytearray(*))secrets, num_points,
1578                   NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1579     }
1580     /* reduce the output to its unique minimal representation */
1581     felem_contract(x_in, x_out);
1582     felem_contract(y_in, y_out);
1583     felem_contract(z_in, z_out);
1584     if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1585         (!felem_to_BN(z, z_in))) {
1586         ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1587         goto err;
1588     }
1589     ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,
1590                                                              ctx);
1591
1592  err:
1593     BN_CTX_end(ctx);
1594     EC_POINT_free(generator);
1595     OPENSSL_free(secrets);
1596     OPENSSL_free(pre_comp);
1597     OPENSSL_free(tmp_felems);
1598     return ret;
1599 }
1600
1601 int ossl_ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1602 {
1603     int ret = 0;
1604     NISTP224_PRE_COMP *pre = NULL;
1605     int i, j;
1606     BIGNUM *x, *y;
1607     EC_POINT *generator = NULL;
1608     felem tmp_felems[32];
1609 #ifndef FIPS_MODULE
1610     BN_CTX *new_ctx = NULL;
1611 #endif
1612
1613     /* throw away old precomputation */
1614     EC_pre_comp_free(group);
1615
1616 #ifndef FIPS_MODULE
1617     if (ctx == NULL)
1618         ctx = new_ctx = BN_CTX_new();
1619 #endif
1620     if (ctx == NULL)
1621         return 0;
1622
1623     BN_CTX_start(ctx);
1624     x = BN_CTX_get(ctx);
1625     y = BN_CTX_get(ctx);
1626     if (y == NULL)
1627         goto err;
1628     /* get the generator */
1629     if (group->generator == NULL)
1630         goto err;
1631     generator = EC_POINT_new(group);
1632     if (generator == NULL)
1633         goto err;
1634     BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1635     BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1636     if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1637         goto err;
1638     if ((pre = nistp224_pre_comp_new()) == NULL)
1639         goto err;
1640     /*
1641      * if the generator is the standard one, use built-in precomputation
1642      */
1643     if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1644         memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1645         goto done;
1646     }
1647     if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1648         (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1649         (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1650         goto err;
1651     /*
1652      * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1653      * 2^140*G, 2^196*G for the second one
1654      */
1655     for (i = 1; i <= 8; i <<= 1) {
1656         point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1657                      pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1658                      pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1659         for (j = 0; j < 27; ++j) {
1660             point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1661                          pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1662                          pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1663         }
1664         if (i == 8)
1665             break;
1666         point_double(pre->g_pre_comp[0][2 * i][0],
1667                      pre->g_pre_comp[0][2 * i][1],
1668                      pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1669                      pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1670         for (j = 0; j < 27; ++j) {
1671             point_double(pre->g_pre_comp[0][2 * i][0],
1672                          pre->g_pre_comp[0][2 * i][1],
1673                          pre->g_pre_comp[0][2 * i][2],
1674                          pre->g_pre_comp[0][2 * i][0],
1675                          pre->g_pre_comp[0][2 * i][1],
1676                          pre->g_pre_comp[0][2 * i][2]);
1677         }
1678     }
1679     for (i = 0; i < 2; i++) {
1680         /* g_pre_comp[i][0] is the point at infinity */
1681         memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1682         /* the remaining multiples */
1683         /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1684         point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1685                   pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1686                   pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1687                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1688                   pre->g_pre_comp[i][2][2]);
1689         /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1690         point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1691                   pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1692                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1693                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1694                   pre->g_pre_comp[i][2][2]);
1695         /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1696         point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1697                   pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1698                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1699                   0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1700                   pre->g_pre_comp[i][4][2]);
1701         /*
1702          * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1703          */
1704         point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1705                   pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1706                   pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1707                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1708                   pre->g_pre_comp[i][2][2]);
1709         for (j = 1; j < 8; ++j) {
1710             /* odd multiples: add G resp. 2^28*G */
1711             point_add(pre->g_pre_comp[i][2 * j + 1][0],
1712                       pre->g_pre_comp[i][2 * j + 1][1],
1713                       pre->g_pre_comp[i][2 * j + 1][2],
1714                       pre->g_pre_comp[i][2 * j][0],
1715                       pre->g_pre_comp[i][2 * j][1],
1716                       pre->g_pre_comp[i][2 * j][2], 0,
1717                       pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1718                       pre->g_pre_comp[i][1][2]);
1719         }
1720     }
1721     make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1722
1723  done:
1724     SETPRECOMP(group, nistp224, pre);
1725     pre = NULL;
1726     ret = 1;
1727  err:
1728     BN_CTX_end(ctx);
1729     EC_POINT_free(generator);
1730 #ifndef FIPS_MODULE
1731     BN_CTX_free(new_ctx);
1732 #endif
1733     EC_nistp224_pre_comp_free(pre);
1734     return ret;
1735 }
1736
1737 int ossl_ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1738 {
1739     return HAVEPRECOMP(group, nistp224);
1740 }