crypto/ec/ecp_nistp224.c

   1 /*
   2  * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved.
   3  *
   4  * Licensed under the Apache License 2.0 (the "License").  You may not use
   5  * this file except in compliance with the License.  You can obtain a copy
   6  * in the file LICENSE in the source distribution or at
   7  * https://www.openssl.org/source/license.html
   8  */
   9
  10 /* Copyright 2011 Google Inc.
  11  *
  12  * Licensed under the Apache License, Version 2.0 (the "License");
  13  *
  14  * you may not use this file except in compliance with the License.
  15  * You may obtain a copy of the License at
  16  *
  17  *     http://www.apache.org/licenses/LICENSE-2.0
  18  *
  19  *  Unless required by applicable law or agreed to in writing, software
  20  *  distributed under the License is distributed on an "AS IS" BASIS,
  21  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  22  *  See the License for the specific language governing permissions and
  23  *  limitations under the License.
  24  */
  25
  26 /*
  27  * ECDSA low level APIs are deprecated for public use, but still ok for
  28  * internal use.
  29  */
  30 #include "internal/deprecated.h"
  31
  32 /*
  33  * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
  34  *
  35  * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
  36  * and Adam Langley's public domain 64-bit C implementation of curve25519
  37  */
  38
  39 #include <openssl/opensslconf.h>
  40
  41 #include <stdint.h>
  42 #include <string.h>
  43 #include <openssl/err.h>
  44 #include "ec_local.h"
  45
  46 #include "internal/numbers.h"
  47
  48 #ifndef INT128_MAX
  49 # error "Your compiler doesn't appear to support 128-bit integer types"
  50 #endif
  51
  52 typedef uint8_t u8;
  53 typedef uint64_t u64;
  54
  55 /******************************************************************************/
  56 /*-
  57  * INTERNAL REPRESENTATION OF FIELD ELEMENTS
  58  *
  59  * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
  60  * using 64-bit coefficients called 'limbs',
  61  * and sometimes (for multiplication results) as
  62  * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
  63  * using 128-bit coefficients called 'widelimbs'.
  64  * A 4-limb representation is an 'felem';
  65  * a 7-widelimb representation is a 'widefelem'.
  66  * Even within felems, bits of adjacent limbs overlap, and we don't always
  67  * reduce the representations: we ensure that inputs to each felem
  68  * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
  69  * and fit into a 128-bit word without overflow. The coefficients are then
  70  * again partially reduced to obtain an felem satisfying a_i < 2^57.
  71  * We only reduce to the unique minimal representation at the end of the
  72  * computation.
  73  */
  74
  75 typedef uint64_t limb;
  76 typedef uint64_t limb_aX __attribute((__aligned__(1)));
  77 typedef uint128_t widelimb;
  78
  79 typedef limb felem[4];
  80 typedef widelimb widefelem[7];
  81
  82 /*
  83  * Field element represented as a byte array. 28*8 = 224 bits is also the
  84  * group order size for the elliptic curve, and we also use this type for
  85  * scalars for point multiplication.
  86  */
  87 typedef u8 felem_bytearray[28];
  88
  89 static const felem_bytearray nistp224_curve_params[5] = {
  90     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
  91      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
  92      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
  93     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
  94      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
  95      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
  96     {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
  97      0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
  98      0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
  99     {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
 100      0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
 101      0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
 102     {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
 103      0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
 104      0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
 105 };
 106
 107 /*-
 108  * Precomputed multiples of the standard generator
 109  * Points are given in coordinates (X, Y, Z) where Z normally is 1
 110  * (0 for the point at infinity).
 111  * For each field element, slice a_0 is word 0, etc.
 112  *
 113  * The table has 2 * 16 elements, starting with the following:
 114  * index | bits    | point
 115  * ------+---------+------------------------------
 116  *     0 | 0 0 0 0 | 0G
 117  *     1 | 0 0 0 1 | 1G
 118  *     2 | 0 0 1 0 | 2^56G
 119  *     3 | 0 0 1 1 | (2^56 + 1)G
 120  *     4 | 0 1 0 0 | 2^112G
 121  *     5 | 0 1 0 1 | (2^112 + 1)G
 122  *     6 | 0 1 1 0 | (2^112 + 2^56)G
 123  *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
 124  *     8 | 1 0 0 0 | 2^168G
 125  *     9 | 1 0 0 1 | (2^168 + 1)G
 126  *    10 | 1 0 1 0 | (2^168 + 2^56)G
 127  *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
 128  *    12 | 1 1 0 0 | (2^168 + 2^112)G
 129  *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
 130  *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
 131  *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
 132  * followed by a copy of this with each element multiplied by 2^28.
 133  *
 134  * The reason for this is so that we can clock bits into four different
 135  * locations when doing simple scalar multiplies against the base point,
 136  * and then another four locations using the second 16 elements.
 137  */
 138 static const felem gmul[2][16][3] = {
 139 {{{0, 0, 0, 0},
 140   {0, 0, 0, 0},
 141   {0, 0, 0, 0}},
 142  {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
 143   {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
 144   {1, 0, 0, 0}},
 145  {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
 146   {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
 147   {1, 0, 0, 0}},
 148  {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
 149   {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
 150   {1, 0, 0, 0}},
 151  {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
 152   {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
 153   {1, 0, 0, 0}},
 154  {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
 155   {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
 156   {1, 0, 0, 0}},
 157  {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
 158   {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
 159   {1, 0, 0, 0}},
 160  {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
 161   {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
 162   {1, 0, 0, 0}},
 163  {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
 164   {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
 165   {1, 0, 0, 0}},
 166  {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
 167   {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
 168   {1, 0, 0, 0}},
 169  {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
 170   {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
 171   {1, 0, 0, 0}},
 172  {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
 173   {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
 174   {1, 0, 0, 0}},
 175  {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
 176   {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
 177   {1, 0, 0, 0}},
 178  {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
 179   {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
 180   {1, 0, 0, 0}},
 181  {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
 182   {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
 183   {1, 0, 0, 0}},
 184  {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
 185   {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
 186   {1, 0, 0, 0}}},
 187 {{{0, 0, 0, 0},
 188   {0, 0, 0, 0},
 189   {0, 0, 0, 0}},
 190  {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
 191   {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
 192   {1, 0, 0, 0}},
 193  {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
 194   {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
 195   {1, 0, 0, 0}},
 196  {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
 197   {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
 198   {1, 0, 0, 0}},
 199  {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
 200   {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
 201   {1, 0, 0, 0}},
 202  {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
 203   {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
 204   {1, 0, 0, 0}},
 205  {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
 206   {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
 207   {1, 0, 0, 0}},
 208  {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
 209   {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
 210   {1, 0, 0, 0}},
 211  {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
 212   {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
 213   {1, 0, 0, 0}},
 214  {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
 215   {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
 216   {1, 0, 0, 0}},
 217  {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
 218   {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
 219   {1, 0, 0, 0}},
 220  {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
 221   {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
 222   {1, 0, 0, 0}},
 223  {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
 224   {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
 225   {1, 0, 0, 0}},
 226  {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
 227   {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
 228   {1, 0, 0, 0}},
 229  {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
 230   {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
 231   {1, 0, 0, 0}},
 232  {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
 233   {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
 234   {1, 0, 0, 0}}}
 235 };
 236
 237 /* Precomputation for the group generator. */
 238 struct nistp224_pre_comp_st {
 239     felem g_pre_comp[2][16][3];
 240     CRYPTO_REF_COUNT references;
 241     CRYPTO_RWLOCK *lock;
 242 };
 243
 244 const EC_METHOD *EC_GFp_nistp224_method(void)
 245 {
 246     static const EC_METHOD ret = {
 247         EC_FLAGS_DEFAULT_OCT,
 248         NID_X9_62_prime_field,
 249         ossl_ec_GFp_nistp224_group_init,
 250         ossl_ec_GFp_simple_group_finish,
 251         ossl_ec_GFp_simple_group_clear_finish,
 252         ossl_ec_GFp_nist_group_copy,
 253         ossl_ec_GFp_nistp224_group_set_curve,
 254         ossl_ec_GFp_simple_group_get_curve,
 255         ossl_ec_GFp_simple_group_get_degree,
 256         ossl_ec_group_simple_order_bits,
 257         ossl_ec_GFp_simple_group_check_discriminant,
 258         ossl_ec_GFp_simple_point_init,
 259         ossl_ec_GFp_simple_point_finish,
 260         ossl_ec_GFp_simple_point_clear_finish,
 261         ossl_ec_GFp_simple_point_copy,
 262         ossl_ec_GFp_simple_point_set_to_infinity,
 263         ossl_ec_GFp_simple_point_set_affine_coordinates,
 264         ossl_ec_GFp_nistp224_point_get_affine_coordinates,
 265         0 /* point_set_compressed_coordinates */ ,
 266         0 /* point2oct */ ,
 267         0 /* oct2point */ ,
 268         ossl_ec_GFp_simple_add,
 269         ossl_ec_GFp_simple_dbl,
 270         ossl_ec_GFp_simple_invert,
 271         ossl_ec_GFp_simple_is_at_infinity,
 272         ossl_ec_GFp_simple_is_on_curve,
 273         ossl_ec_GFp_simple_cmp,
 274         ossl_ec_GFp_simple_make_affine,
 275         ossl_ec_GFp_simple_points_make_affine,
 276         ossl_ec_GFp_nistp224_points_mul,
 277         ossl_ec_GFp_nistp224_precompute_mult,
 278         ossl_ec_GFp_nistp224_have_precompute_mult,
 279         ossl_ec_GFp_nist_field_mul,
 280         ossl_ec_GFp_nist_field_sqr,
 281         0 /* field_div */ ,
 282         ossl_ec_GFp_simple_field_inv,
 283         0 /* field_encode */ ,
 284         0 /* field_decode */ ,
 285         0,                      /* field_set_to_one */
 286         ossl_ec_key_simple_priv2oct,
 287         ossl_ec_key_simple_oct2priv,
 288         0, /* set private */
 289         ossl_ec_key_simple_generate_key,
 290         ossl_ec_key_simple_check_key,
 291         ossl_ec_key_simple_generate_public_key,
 292         0, /* keycopy */
 293         0, /* keyfinish */
 294         ossl_ecdh_simple_compute_key,
 295         ossl_ecdsa_simple_sign_setup,
 296         ossl_ecdsa_simple_sign_sig,
 297         ossl_ecdsa_simple_verify_sig,
 298         0, /* field_inverse_mod_ord */
 299         0, /* blind_coordinates */
 300         0, /* ladder_pre */
 301         0, /* ladder_step */
 302         0  /* ladder_post */
 303     };
 304
 305     return &ret;
 306 }
 307
 308 /*
 309  * Helper functions to convert field elements to/from internal representation
 310  */
 311 static void bin28_to_felem(felem out, const u8 in[28])
 312 {
 313     out[0] = *((const limb *)(in)) & 0x00ffffffffffffff;
 314     out[1] = (*((const limb_aX *)(in + 7))) & 0x00ffffffffffffff;
 315     out[2] = (*((const limb_aX *)(in + 14))) & 0x00ffffffffffffff;
 316     out[3] = (*((const limb_aX *)(in + 20))) >> 8;
 317 }
 318
 319 static void felem_to_bin28(u8 out[28], const felem in)
 320 {
 321     unsigned i;
 322     for (i = 0; i < 7; ++i) {
 323         out[i] = in[0] >> (8 * i);
 324         out[i + 7] = in[1] >> (8 * i);
 325         out[i + 14] = in[2] >> (8 * i);
 326         out[i + 21] = in[3] >> (8 * i);
 327     }
 328 }
 329
 330 /* From OpenSSL BIGNUM to internal representation */
 331 static int BN_to_felem(felem out, const BIGNUM *bn)
 332 {
 333     felem_bytearray b_out;
 334     int num_bytes;
 335
 336     if (BN_is_negative(bn)) {
 337         ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
 338         return 0;
 339     }
 340     num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
 341     if (num_bytes < 0) {
 342         ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
 343         return 0;
 344     }
 345     bin28_to_felem(out, b_out);
 346     return 1;
 347 }
 348
 349 /* From internal representation to OpenSSL BIGNUM */
 350 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
 351 {
 352     felem_bytearray b_out;
 353     felem_to_bin28(b_out, in);
 354     return BN_lebin2bn(b_out, sizeof(b_out), out);
 355 }
 356
 357 /******************************************************************************/
 358 /*-
 359  *                              FIELD OPERATIONS
 360  *
 361  * Field operations, using the internal representation of field elements.
 362  * NB! These operations are specific to our point multiplication and cannot be
 363  * expected to be correct in general - e.g., multiplication with a large scalar
 364  * will cause an overflow.
 365  *
 366  */
 367
 368 static void felem_one(felem out)
 369 {
 370     out[0] = 1;
 371     out[1] = 0;
 372     out[2] = 0;
 373     out[3] = 0;
 374 }
 375
 376 static void felem_assign(felem out, const felem in)
 377 {
 378     out[0] = in[0];
 379     out[1] = in[1];
 380     out[2] = in[2];
 381     out[3] = in[3];
 382 }
 383
 384 /* Sum two field elements: out += in */
 385 static void felem_sum(felem out, const felem in)
 386 {
 387     out[0] += in[0];
 388     out[1] += in[1];
 389     out[2] += in[2];
 390     out[3] += in[3];
 391 }
 392
 393 /* Subtract field elements: out -= in */
 394 /* Assumes in[i] < 2^57 */
 395 static void felem_diff(felem out, const felem in)
 396 {
 397     static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
 398     static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
 399     static const limb two58m42m2 = (((limb) 1) << 58) -
 400         (((limb) 1) << 42) - (((limb) 1) << 2);
 401
 402     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 403     out[0] += two58p2;
 404     out[1] += two58m42m2;
 405     out[2] += two58m2;
 406     out[3] += two58m2;
 407
 408     out[0] -= in[0];
 409     out[1] -= in[1];
 410     out[2] -= in[2];
 411     out[3] -= in[3];
 412 }
 413
 414 /* Subtract in unreduced 128-bit mode: out -= in */
 415 /* Assumes in[i] < 2^119 */
 416 static void widefelem_diff(widefelem out, const widefelem in)
 417 {
 418     static const widelimb two120 = ((widelimb) 1) << 120;
 419     static const widelimb two120m64 = (((widelimb) 1) << 120) -
 420         (((widelimb) 1) << 64);
 421     static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
 422         (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
 423
 424     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 425     out[0] += two120;
 426     out[1] += two120m64;
 427     out[2] += two120m64;
 428     out[3] += two120;
 429     out[4] += two120m104m64;
 430     out[5] += two120m64;
 431     out[6] += two120m64;
 432
 433     out[0] -= in[0];
 434     out[1] -= in[1];
 435     out[2] -= in[2];
 436     out[3] -= in[3];
 437     out[4] -= in[4];
 438     out[5] -= in[5];
 439     out[6] -= in[6];
 440 }
 441
 442 /* Subtract in mixed mode: out128 -= in64 */
 443 /* in[i] < 2^63 */
 444 static void felem_diff_128_64(widefelem out, const felem in)
 445 {
 446     static const widelimb two64p8 = (((widelimb) 1) << 64) +
 447         (((widelimb) 1) << 8);
 448     static const widelimb two64m8 = (((widelimb) 1) << 64) -
 449         (((widelimb) 1) << 8);
 450     static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
 451         (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
 452
 453     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 454     out[0] += two64p8;
 455     out[1] += two64m48m8;
 456     out[2] += two64m8;
 457     out[3] += two64m8;
 458
 459     out[0] -= in[0];
 460     out[1] -= in[1];
 461     out[2] -= in[2];
 462     out[3] -= in[3];
 463 }
 464
 465 /*
 466  * Multiply a field element by a scalar: out = out * scalar The scalars we
 467  * actually use are small, so results fit without overflow
 468  */
 469 static void felem_scalar(felem out, const limb scalar)
 470 {
 471     out[0] *= scalar;
 472     out[1] *= scalar;
 473     out[2] *= scalar;
 474     out[3] *= scalar;
 475 }
 476
 477 /*
 478  * Multiply an unreduced field element by a scalar: out = out * scalar The
 479  * scalars we actually use are small, so results fit without overflow
 480  */
 481 static void widefelem_scalar(widefelem out, const widelimb scalar)
 482 {
 483     out[0] *= scalar;
 484     out[1] *= scalar;
 485     out[2] *= scalar;
 486     out[3] *= scalar;
 487     out[4] *= scalar;
 488     out[5] *= scalar;
 489     out[6] *= scalar;
 490 }
 491
 492 /* Square a field element: out = in^2 */
 493 static void felem_square(widefelem out, const felem in)
 494 {
 495     limb tmp0, tmp1, tmp2;
 496     tmp0 = 2 * in[0];
 497     tmp1 = 2 * in[1];
 498     tmp2 = 2 * in[2];
 499     out[0] = ((widelimb) in[0]) * in[0];
 500     out[1] = ((widelimb) in[0]) * tmp1;
 501     out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
 502     out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
 503     out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
 504     out[5] = ((widelimb) in[3]) * tmp2;
 505     out[6] = ((widelimb) in[3]) * in[3];
 506 }
 507
 508 /* Multiply two field elements: out = in1 * in2 */
 509 static void felem_mul(widefelem out, const felem in1, const felem in2)
 510 {
 511     out[0] = ((widelimb) in1[0]) * in2[0];
 512     out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
 513     out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
 514              ((widelimb) in1[2]) * in2[0];
 515     out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
 516              ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
 517     out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
 518              ((widelimb) in1[3]) * in2[1];
 519     out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
 520     out[6] = ((widelimb) in1[3]) * in2[3];
 521 }
 522
 523 /*-
 524  * Reduce seven 128-bit coefficients to four 64-bit coefficients.
 525  * Requires in[i] < 2^126,
 526  * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
 527 static void felem_reduce(felem out, const widefelem in)
 528 {
 529     static const widelimb two127p15 = (((widelimb) 1) << 127) +
 530         (((widelimb) 1) << 15);
 531     static const widelimb two127m71 = (((widelimb) 1) << 127) -
 532         (((widelimb) 1) << 71);
 533     static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
 534         (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
 535     widelimb output[5];
 536
 537     /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
 538     output[0] = in[0] + two127p15;
 539     output[1] = in[1] + two127m71m55;
 540     output[2] = in[2] + two127m71;
 541     output[3] = in[3];
 542     output[4] = in[4];
 543
 544     /* Eliminate in[4], in[5], in[6] */
 545     output[4] += in[6] >> 16;
 546     output[3] += (in[6] & 0xffff) << 40;
 547     output[2] -= in[6];
 548
 549     output[3] += in[5] >> 16;
 550     output[2] += (in[5] & 0xffff) << 40;
 551     output[1] -= in[5];
 552
 553     output[2] += output[4] >> 16;
 554     output[1] += (output[4] & 0xffff) << 40;
 555     output[0] -= output[4];
 556
 557     /* Carry 2 -> 3 -> 4 */
 558     output[3] += output[2] >> 56;
 559     output[2] &= 0x00ffffffffffffff;
 560
 561     output[4] = output[3] >> 56;
 562     output[3] &= 0x00ffffffffffffff;
 563
 564     /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
 565
 566     /* Eliminate output[4] */
 567     output[2] += output[4] >> 16;
 568     /* output[2] < 2^56 + 2^56 = 2^57 */
 569     output[1] += (output[4] & 0xffff) << 40;
 570     output[0] -= output[4];
 571
 572     /* Carry 0 -> 1 -> 2 -> 3 */
 573     output[1] += output[0] >> 56;
 574     out[0] = output[0] & 0x00ffffffffffffff;
 575
 576     output[2] += output[1] >> 56;
 577     /* output[2] < 2^57 + 2^72 */
 578     out[1] = output[1] & 0x00ffffffffffffff;
 579     output[3] += output[2] >> 56;
 580     /* output[3] <= 2^56 + 2^16 */
 581     out[2] = output[2] & 0x00ffffffffffffff;
 582
 583     /*-
 584      * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
 585      * out[3] <= 2^56 + 2^16 (due to final carry),
 586      * so out < 2*p
 587      */
 588     out[3] = output[3];
 589 }
 590
 591 static void felem_square_reduce(felem out, const felem in)
 592 {
 593     widefelem tmp;
 594     felem_square(tmp, in);
 595     felem_reduce(out, tmp);
 596 }
 597
 598 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
 599 {
 600     widefelem tmp;
 601     felem_mul(tmp, in1, in2);
 602     felem_reduce(out, tmp);
 603 }
 604
 605 /*
 606  * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
 607  * call felem_reduce first)
 608  */
 609 static void felem_contract(felem out, const felem in)
 610 {
 611     static const int64_t two56 = ((limb) 1) << 56;
 612     /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
 613     /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
 614     int64_t tmp[4], a;
 615     tmp[0] = in[0];
 616     tmp[1] = in[1];
 617     tmp[2] = in[2];
 618     tmp[3] = in[3];
 619     /* Case 1: a = 1 iff in >= 2^224 */
 620     a = (in[3] >> 56);
 621     tmp[0] -= a;
 622     tmp[1] += a << 40;
 623     tmp[3] &= 0x00ffffffffffffff;
 624     /*
 625      * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
 626      * and the lower part is non-zero
 627      */
 628     a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
 629         (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
 630     a &= 0x00ffffffffffffff;
 631     /* turn a into an all-one mask (if a = 0) or an all-zero mask */
 632     a = (a - 1) >> 63;
 633     /* subtract 2^224 - 2^96 + 1 if a is all-one */
 634     tmp[3] &= a ^ 0xffffffffffffffff;
 635     tmp[2] &= a ^ 0xffffffffffffffff;
 636     tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
 637     tmp[0] -= 1 & a;
 638
 639     /*
 640      * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
 641      * non-zero, so we only need one step
 642      */
 643     a = tmp[0] >> 63;
 644     tmp[0] += two56 & a;
 645     tmp[1] -= 1 & a;
 646
 647     /* carry 1 -> 2 -> 3 */
 648     tmp[2] += tmp[1] >> 56;
 649     tmp[1] &= 0x00ffffffffffffff;
 650
 651     tmp[3] += tmp[2] >> 56;
 652     tmp[2] &= 0x00ffffffffffffff;
 653
 654     /* Now 0 <= out < p */
 655     out[0] = tmp[0];
 656     out[1] = tmp[1];
 657     out[2] = tmp[2];
 658     out[3] = tmp[3];
 659 }
 660
 661 /*
 662  * Get negative value: out = -in
 663  * Requires in[i] < 2^63,
 664  * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
 665  */
 666 static void felem_neg(felem out, const felem in)
 667 {
 668     widefelem tmp;
 669
 670     memset(tmp, 0, sizeof(tmp));
 671     felem_diff_128_64(tmp, in);
 672     felem_reduce(out, tmp);
 673 }
 674
 675 /*
 676  * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
 677  * elements are reduced to in < 2^225, so we only need to check three cases:
 678  * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
 679  */
 680 static limb felem_is_zero(const felem in)
 681 {
 682     limb zero, two224m96p1, two225m97p2;
 683
 684     zero = in[0] | in[1] | in[2] | in[3];
 685     zero = (((int64_t) (zero) - 1) >> 63) & 1;
 686     two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
 687         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
 688     two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
 689     two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
 690         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
 691     two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
 692     return (zero | two224m96p1 | two225m97p2);
 693 }
 694
 695 static int felem_is_zero_int(const void *in)
 696 {
 697     return (int)(felem_is_zero(in) & ((limb) 1));
 698 }
 699
 700 /* Invert a field element */
 701 /* Computation chain copied from djb's code */
 702 static void felem_inv(felem out, const felem in)
 703 {
 704     felem ftmp, ftmp2, ftmp3, ftmp4;
 705     widefelem tmp;
 706     unsigned i;
 707
 708     felem_square(tmp, in);
 709     felem_reduce(ftmp, tmp);    /* 2 */
 710     felem_mul(tmp, in, ftmp);
 711     felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
 712     felem_square(tmp, ftmp);
 713     felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
 714     felem_mul(tmp, in, ftmp);
 715     felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
 716     felem_square(tmp, ftmp);
 717     felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
 718     felem_square(tmp, ftmp2);
 719     felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
 720     felem_square(tmp, ftmp2);
 721     felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
 722     felem_mul(tmp, ftmp2, ftmp);
 723     felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
 724     felem_square(tmp, ftmp);
 725     felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
 726     for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
 727         felem_square(tmp, ftmp2);
 728         felem_reduce(ftmp2, tmp);
 729     }
 730     felem_mul(tmp, ftmp2, ftmp);
 731     felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
 732     felem_square(tmp, ftmp2);
 733     felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
 734     for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
 735         felem_square(tmp, ftmp3);
 736         felem_reduce(ftmp3, tmp);
 737     }
 738     felem_mul(tmp, ftmp3, ftmp2);
 739     felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
 740     felem_square(tmp, ftmp2);
 741     felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
 742     for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
 743         felem_square(tmp, ftmp3);
 744         felem_reduce(ftmp3, tmp);
 745     }
 746     felem_mul(tmp, ftmp3, ftmp2);
 747     felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
 748     felem_square(tmp, ftmp3);
 749     felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
 750     for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
 751         felem_square(tmp, ftmp4);
 752         felem_reduce(ftmp4, tmp);
 753     }
 754     felem_mul(tmp, ftmp3, ftmp4);
 755     felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
 756     felem_square(tmp, ftmp3);
 757     felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
 758     for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
 759         felem_square(tmp, ftmp4);
 760         felem_reduce(ftmp4, tmp);
 761     }
 762     felem_mul(tmp, ftmp2, ftmp4);
 763     felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
 764     for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
 765         felem_square(tmp, ftmp2);
 766         felem_reduce(ftmp2, tmp);
 767     }
 768     felem_mul(tmp, ftmp2, ftmp);
 769     felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
 770     felem_square(tmp, ftmp);
 771     felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
 772     felem_mul(tmp, ftmp, in);
 773     felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
 774     for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
 775         felem_square(tmp, ftmp);
 776         felem_reduce(ftmp, tmp);
 777     }
 778     felem_mul(tmp, ftmp, ftmp3);
 779     felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
 780 }
 781
 782 /*
 783  * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
 784  * out to itself.
 785  */
 786 static void copy_conditional(felem out, const felem in, limb icopy)
 787 {
 788     unsigned i;
 789     /*
 790      * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
 791      */
 792     const limb copy = -icopy;
 793     for (i = 0; i < 4; ++i) {
 794         const limb tmp = copy & (in[i] ^ out[i]);
 795         out[i] ^= tmp;
 796     }
 797 }
 798
 799 /******************************************************************************/
 800 /*-
 801  *                       ELLIPTIC CURVE POINT OPERATIONS
 802  *
 803  * Points are represented in Jacobian projective coordinates:
 804  * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
 805  * or to the point at infinity if Z == 0.
 806  *
 807  */
 808
 809 /*-
 810  * Double an elliptic curve point:
 811  * (X', Y', Z') = 2 * (X, Y, Z), where
 812  * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
 813  * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
 814  * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
 815  * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
 816  * while x_out == y_in is not (maybe this works, but it's not tested).
 817  */
 818 static void
 819 point_double(felem x_out, felem y_out, felem z_out,
 820              const felem x_in, const felem y_in, const felem z_in)
 821 {
 822     widefelem tmp, tmp2;
 823     felem delta, gamma, beta, alpha, ftmp, ftmp2;
 824
 825     felem_assign(ftmp, x_in);
 826     felem_assign(ftmp2, x_in);
 827
 828     /* delta = z^2 */
 829     felem_square(tmp, z_in);
 830     felem_reduce(delta, tmp);
 831
 832     /* gamma = y^2 */
 833     felem_square(tmp, y_in);
 834     felem_reduce(gamma, tmp);
 835
 836     /* beta = x*gamma */
 837     felem_mul(tmp, x_in, gamma);
 838     felem_reduce(beta, tmp);
 839
 840     /* alpha = 3*(x-delta)*(x+delta) */
 841     felem_diff(ftmp, delta);
 842     /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
 843     felem_sum(ftmp2, delta);
 844     /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
 845     felem_scalar(ftmp2, 3);
 846     /* ftmp2[i] < 3 * 2^58 < 2^60 */
 847     felem_mul(tmp, ftmp, ftmp2);
 848     /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
 849     felem_reduce(alpha, tmp);
 850
 851     /* x' = alpha^2 - 8*beta */
 852     felem_square(tmp, alpha);
 853     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 854     felem_assign(ftmp, beta);
 855     felem_scalar(ftmp, 8);
 856     /* ftmp[i] < 8 * 2^57 = 2^60 */
 857     felem_diff_128_64(tmp, ftmp);
 858     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 859     felem_reduce(x_out, tmp);
 860
 861     /* z' = (y + z)^2 - gamma - delta */
 862     felem_sum(delta, gamma);
 863     /* delta[i] < 2^57 + 2^57 = 2^58 */
 864     felem_assign(ftmp, y_in);
 865     felem_sum(ftmp, z_in);
 866     /* ftmp[i] < 2^57 + 2^57 = 2^58 */
 867     felem_square(tmp, ftmp);
 868     /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
 869     felem_diff_128_64(tmp, delta);
 870     /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
 871     felem_reduce(z_out, tmp);
 872
 873     /* y' = alpha*(4*beta - x') - 8*gamma^2 */
 874     felem_scalar(beta, 4);
 875     /* beta[i] < 4 * 2^57 = 2^59 */
 876     felem_diff(beta, x_out);
 877     /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
 878     felem_mul(tmp, alpha, beta);
 879     /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
 880     felem_square(tmp2, gamma);
 881     /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
 882     widefelem_scalar(tmp2, 8);
 883     /* tmp2[i] < 8 * 2^116 = 2^119 */
 884     widefelem_diff(tmp, tmp2);
 885     /* tmp[i] < 2^119 + 2^120 < 2^121 */
 886     felem_reduce(y_out, tmp);
 887 }
 888
 889 /*-
 890  * Add two elliptic curve points:
 891  * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
 892  * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
 893  * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
 894  * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
 895  *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
 896  * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
 897  *
 898  * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
 899  */
 900
 901 /*
 902  * This function is not entirely constant-time: it includes a branch for
 903  * checking whether the two input points are equal, (while not equal to the
 904  * point at infinity). This case never happens during single point
 905  * multiplication, so there is no timing leak for ECDH or ECDSA signing.
 906  */
 907 static void point_add(felem x3, felem y3, felem z3,
 908                       const felem x1, const felem y1, const felem z1,
 909                       const int mixed, const felem x2, const felem y2,
 910                       const felem z2)
 911 {
 912     felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
 913     widefelem tmp, tmp2;
 914     limb z1_is_zero, z2_is_zero, x_equal, y_equal;
 915     limb points_equal;
 916
 917     if (!mixed) {
 918         /* ftmp2 = z2^2 */
 919         felem_square(tmp, z2);
 920         felem_reduce(ftmp2, tmp);
 921
 922         /* ftmp4 = z2^3 */
 923         felem_mul(tmp, ftmp2, z2);
 924         felem_reduce(ftmp4, tmp);
 925
 926         /* ftmp4 = z2^3*y1 */
 927         felem_mul(tmp2, ftmp4, y1);
 928         felem_reduce(ftmp4, tmp2);
 929
 930         /* ftmp2 = z2^2*x1 */
 931         felem_mul(tmp2, ftmp2, x1);
 932         felem_reduce(ftmp2, tmp2);
 933     } else {
 934         /*
 935          * We'll assume z2 = 1 (special case z2 = 0 is handled later)
 936          */
 937
 938         /* ftmp4 = z2^3*y1 */
 939         felem_assign(ftmp4, y1);
 940
 941         /* ftmp2 = z2^2*x1 */
 942         felem_assign(ftmp2, x1);
 943     }
 944
 945     /* ftmp = z1^2 */
 946     felem_square(tmp, z1);
 947     felem_reduce(ftmp, tmp);
 948
 949     /* ftmp3 = z1^3 */
 950     felem_mul(tmp, ftmp, z1);
 951     felem_reduce(ftmp3, tmp);
 952
 953     /* tmp = z1^3*y2 */
 954     felem_mul(tmp, ftmp3, y2);
 955     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 956
 957     /* ftmp3 = z1^3*y2 - z2^3*y1 */
 958     felem_diff_128_64(tmp, ftmp4);
 959     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 960     felem_reduce(ftmp3, tmp);
 961
 962     /* tmp = z1^2*x2 */
 963     felem_mul(tmp, ftmp, x2);
 964     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 965
 966     /* ftmp = z1^2*x2 - z2^2*x1 */
 967     felem_diff_128_64(tmp, ftmp2);
 968     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 969     felem_reduce(ftmp, tmp);
 970
 971     /*
 972      * The formulae are incorrect if the points are equal, in affine coordinates
 973      * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
 974      * happens.
 975      *
 976      * We use bitwise operations to avoid potential side-channels introduced by
 977      * the short-circuiting behaviour of boolean operators.
 978      */
 979     x_equal = felem_is_zero(ftmp);
 980     y_equal = felem_is_zero(ftmp3);
 981     /*
 982      * The special case of either point being the point at infinity (z1 and/or
 983      * z2 are zero), is handled separately later on in this function, so we
 984      * avoid jumping to point_double here in those special cases.
 985      */
 986     z1_is_zero = felem_is_zero(z1);
 987     z2_is_zero = felem_is_zero(z2);
 988
 989     /*
 990      * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
 991      * specific implementation `felem_is_zero()` returns truth as `0x1`
 992      * (rather than `0xff..ff`).
 993      *
 994      * This implies that `~true` in this implementation becomes
 995      * `0xff..fe` (rather than `0x0`): for this reason, to be used in
 996      * the if expression, we mask out only the last bit in the next
 997      * line.
 998      */
 999     points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
1000
1001     if (points_equal) {
1002         /*
1003          * This is obviously not constant-time but, as mentioned before, this
1004          * case never happens during single point multiplication, so there is no
1005          * timing leak for ECDH or ECDSA signing.
1006          */
1007         point_double(x3, y3, z3, x1, y1, z1);
1008         return;
1009     }
1010
1011     /* ftmp5 = z1*z2 */
1012     if (!mixed) {
1013         felem_mul(tmp, z1, z2);
1014         felem_reduce(ftmp5, tmp);
1015     } else {
1016         /* special case z2 = 0 is handled later */
1017         felem_assign(ftmp5, z1);
1018     }
1019
1020     /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1021     felem_mul(tmp, ftmp, ftmp5);
1022     felem_reduce(z_out, tmp);
1023
1024     /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1025     felem_assign(ftmp5, ftmp);
1026     felem_square(tmp, ftmp);
1027     felem_reduce(ftmp, tmp);
1028
1029     /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1030     felem_mul(tmp, ftmp, ftmp5);
1031     felem_reduce(ftmp5, tmp);
1032
1033     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1034     felem_mul(tmp, ftmp2, ftmp);
1035     felem_reduce(ftmp2, tmp);
1036
1037     /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1038     felem_mul(tmp, ftmp4, ftmp5);
1039     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1040
1041     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1042     felem_square(tmp2, ftmp3);
1043     /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1044
1045     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1046     felem_diff_128_64(tmp2, ftmp5);
1047     /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1048
1049     /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1050     felem_assign(ftmp5, ftmp2);
1051     felem_scalar(ftmp5, 2);
1052     /* ftmp5[i] < 2 * 2^57 = 2^58 */
1053
1054     /*-
1055      * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1056      *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1057      */
1058     felem_diff_128_64(tmp2, ftmp5);
1059     /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1060     felem_reduce(x_out, tmp2);
1061
1062     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1063     felem_diff(ftmp2, x_out);
1064     /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1065
1066     /*
1067      * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1068      */
1069     felem_mul(tmp2, ftmp3, ftmp2);
1070     /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1071
1072     /*-
1073      * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1074      *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1075      */
1076     widefelem_diff(tmp2, tmp);
1077     /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1078     felem_reduce(y_out, tmp2);
1079
1080     /*
1081      * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1082      * the point at infinity, so we need to check for this separately
1083      */
1084
1085     /*
1086      * if point 1 is at infinity, copy point 2 to output, and vice versa
1087      */
1088     copy_conditional(x_out, x2, z1_is_zero);
1089     copy_conditional(x_out, x1, z2_is_zero);
1090     copy_conditional(y_out, y2, z1_is_zero);
1091     copy_conditional(y_out, y1, z2_is_zero);
1092     copy_conditional(z_out, z2, z1_is_zero);
1093     copy_conditional(z_out, z1, z2_is_zero);
1094     felem_assign(x3, x_out);
1095     felem_assign(y3, y_out);
1096     felem_assign(z3, z_out);
1097 }
1098
1099 /*
1100  * select_point selects the |idx|th point from a precomputation table and
1101  * copies it to out.
1102  * The pre_comp array argument should be size of |size| argument
1103  */
1104 static void select_point(const u64 idx, unsigned int size,
1105                          const felem pre_comp[][3], felem out[3])
1106 {
1107     unsigned i, j;
1108     limb *outlimbs = &out[0][0];
1109
1110     memset(out, 0, sizeof(*out) * 3);
1111     for (i = 0; i < size; i++) {
1112         const limb *inlimbs = &pre_comp[i][0][0];
1113         u64 mask = i ^ idx;
1114         mask |= mask >> 4;
1115         mask |= mask >> 2;
1116         mask |= mask >> 1;
1117         mask &= 1;
1118         mask--;
1119         for (j = 0; j < 4 * 3; j++)
1120             outlimbs[j] |= inlimbs[j] & mask;
1121     }
1122 }
1123
1124 /* get_bit returns the |i|th bit in |in| */
1125 static char get_bit(const felem_bytearray in, unsigned i)
1126 {
1127     if (i >= 224)
1128         return 0;
1129     return (in[i >> 3] >> (i & 7)) & 1;
1130 }
1131
1132 /*
1133  * Interleaved point multiplication using precomputed point multiples: The
1134  * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1135  * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1136  * generator, using certain (large) precomputed multiples in g_pre_comp.
1137  * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1138  */
1139 static void batch_mul(felem x_out, felem y_out, felem z_out,
1140                       const felem_bytearray scalars[],
1141                       const unsigned num_points, const u8 *g_scalar,
1142                       const int mixed, const felem pre_comp[][17][3],
1143                       const felem g_pre_comp[2][16][3])
1144 {
1145     int i, skip;
1146     unsigned num;
1147     unsigned gen_mul = (g_scalar != NULL);
1148     felem nq[3], tmp[4];
1149     u64 bits;
1150     u8 sign, digit;
1151
1152     /* set nq to the point at infinity */
1153     memset(nq, 0, sizeof(nq));
1154
1155     /*
1156      * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1157      * of the generator (two in each of the last 28 rounds) and additions of
1158      * other points multiples (every 5th round).
1159      */
1160     skip = 1;                   /* save two point operations in the first
1161                                  * round */
1162     for (i = (num_points ? 220 : 27); i >= 0; --i) {
1163         /* double */
1164         if (!skip)
1165             point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1166
1167         /* add multiples of the generator */
1168         if (gen_mul && (i <= 27)) {
1169             /* first, look 28 bits upwards */
1170             bits = get_bit(g_scalar, i + 196) << 3;
1171             bits |= get_bit(g_scalar, i + 140) << 2;
1172             bits |= get_bit(g_scalar, i + 84) << 1;
1173             bits |= get_bit(g_scalar, i + 28);
1174             /* select the point to add, in constant time */
1175             select_point(bits, 16, g_pre_comp[1], tmp);
1176
1177             if (!skip) {
1178                 /* value 1 below is argument for "mixed" */
1179                 point_add(nq[0], nq[1], nq[2],
1180                           nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1181             } else {
1182                 memcpy(nq, tmp, 3 * sizeof(felem));
1183                 skip = 0;
1184             }
1185
1186             /* second, look at the current position */
1187             bits = get_bit(g_scalar, i + 168) << 3;
1188             bits |= get_bit(g_scalar, i + 112) << 2;
1189             bits |= get_bit(g_scalar, i + 56) << 1;
1190             bits |= get_bit(g_scalar, i);
1191             /* select the point to add, in constant time */
1192             select_point(bits, 16, g_pre_comp[0], tmp);
1193             point_add(nq[0], nq[1], nq[2],
1194                       nq[0], nq[1], nq[2],
1195                       1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1196         }
1197
1198         /* do other additions every 5 doublings */
1199         if (num_points && (i % 5 == 0)) {
1200             /* loop over all scalars */
1201             for (num = 0; num < num_points; ++num) {
1202                 bits = get_bit(scalars[num], i + 4) << 5;
1203                 bits |= get_bit(scalars[num], i + 3) << 4;
1204                 bits |= get_bit(scalars[num], i + 2) << 3;
1205                 bits |= get_bit(scalars[num], i + 1) << 2;
1206                 bits |= get_bit(scalars[num], i) << 1;
1207                 bits |= get_bit(scalars[num], i - 1);
1208                 ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1209
1210                 /* select the point to add or subtract */
1211                 select_point(digit, 17, pre_comp[num], tmp);
1212                 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1213                                             * point */
1214                 copy_conditional(tmp[1], tmp[3], sign);
1215
1216                 if (!skip) {
1217                     point_add(nq[0], nq[1], nq[2],
1218                               nq[0], nq[1], nq[2],
1219                               mixed, tmp[0], tmp[1], tmp[2]);
1220                 } else {
1221                     memcpy(nq, tmp, 3 * sizeof(felem));
1222                     skip = 0;
1223                 }
1224             }
1225         }
1226     }
1227     felem_assign(x_out, nq[0]);
1228     felem_assign(y_out, nq[1]);
1229     felem_assign(z_out, nq[2]);
1230 }
1231
1232 /******************************************************************************/
1233 /*
1234  * FUNCTIONS TO MANAGE PRECOMPUTATION
1235  */
1236
1237 static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1238 {
1239     NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1240
1241     if (!ret) {
1242         ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1243         return ret;
1244     }
1245
1246     ret->references = 1;
1247
1248     ret->lock = CRYPTO_THREAD_lock_new();
1249     if (ret->lock == NULL) {
1250         ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1251         OPENSSL_free(ret);
1252         return NULL;
1253     }
1254     return ret;
1255 }
1256
1257 NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1258 {
1259     int i;
1260     if (p != NULL)
1261         CRYPTO_UP_REF(&p->references, &i, p->lock);
1262     return p;
1263 }
1264
1265 void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1266 {
1267     int i;
1268
1269     if (p == NULL)
1270         return;
1271
1272     CRYPTO_DOWN_REF(&p->references, &i, p->lock);
1273     REF_PRINT_COUNT("EC_nistp224", p);
1274     if (i > 0)
1275         return;
1276     REF_ASSERT_ISNT(i < 0);
1277
1278     CRYPTO_THREAD_lock_free(p->lock);
1279     OPENSSL_free(p);
1280 }
1281
1282 /******************************************************************************/
1283 /*
1284  * OPENSSL EC_METHOD FUNCTIONS
1285  */
1286
1287 int ossl_ec_GFp_nistp224_group_init(EC_GROUP *group)
1288 {
1289     int ret;
1290     ret = ossl_ec_GFp_simple_group_init(group);
1291     group->a_is_minus3 = 1;
1292     return ret;
1293 }
1294
1295 int ossl_ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1296                                          const BIGNUM *a, const BIGNUM *b,
1297                                          BN_CTX *ctx)
1298 {
1299     int ret = 0;
1300     BIGNUM *curve_p, *curve_a, *curve_b;
1301 #ifndef FIPS_MODULE
1302     BN_CTX *new_ctx = NULL;
1303
1304     if (ctx == NULL)
1305         ctx = new_ctx = BN_CTX_new();
1306 #endif
1307     if (ctx == NULL)
1308         return 0;
1309
1310     BN_CTX_start(ctx);
1311     curve_p = BN_CTX_get(ctx);
1312     curve_a = BN_CTX_get(ctx);
1313     curve_b = BN_CTX_get(ctx);
1314     if (curve_b == NULL)
1315         goto err;
1316     BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1317     BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1318     BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1319     if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1320         ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);
1321         goto err;
1322     }
1323     group->field_mod_func = BN_nist_mod_224;
1324     ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1325  err:
1326     BN_CTX_end(ctx);
1327 #ifndef FIPS_MODULE
1328     BN_CTX_free(new_ctx);
1329 #endif
1330     return ret;
1331 }
1332
1333 /*
1334  * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1335  * (X/Z^2, Y/Z^3)
1336  */
1337 int ossl_ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1338                                                       const EC_POINT *point,
1339                                                       BIGNUM *x, BIGNUM *y,
1340                                                       BN_CTX *ctx)
1341 {
1342     felem z1, z2, x_in, y_in, x_out, y_out;
1343     widefelem tmp;
1344
1345     if (EC_POINT_is_at_infinity(group, point)) {
1346         ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
1347         return 0;
1348     }
1349     if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1350         (!BN_to_felem(z1, point->Z)))
1351         return 0;
1352     felem_inv(z2, z1);
1353     felem_square(tmp, z2);
1354     felem_reduce(z1, tmp);
1355     felem_mul(tmp, x_in, z1);
1356     felem_reduce(x_in, tmp);
1357     felem_contract(x_out, x_in);
1358     if (x != NULL) {
1359         if (!felem_to_BN(x, x_out)) {
1360             ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1361             return 0;
1362         }
1363     }
1364     felem_mul(tmp, z1, z2);
1365     felem_reduce(z1, tmp);
1366     felem_mul(tmp, y_in, z1);
1367     felem_reduce(y_in, tmp);
1368     felem_contract(y_out, y_in);
1369     if (y != NULL) {
1370         if (!felem_to_BN(y, y_out)) {
1371             ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1372             return 0;
1373         }
1374     }
1375     return 1;
1376 }
1377
1378 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1379                                felem tmp_felems[ /* num+1 */ ])
1380 {
1381     /*
1382      * Runs in constant time, unless an input is the point at infinity (which
1383      * normally shouldn't happen).
1384      */
1385     ossl_ec_GFp_nistp_points_make_affine_internal(num,
1386                                                   points,
1387                                                   sizeof(felem),
1388                                                   tmp_felems,
1389                                                   (void (*)(void *))felem_one,
1390                                                   felem_is_zero_int,
1391                                                   (void (*)(void *, const void *))
1392                                                   felem_assign,
1393                                                   (void (*)(void *, const void *))
1394                                                   felem_square_reduce, (void (*)
1395                                                                         (void *,
1396                                                                          const void
1397                                                                          *,
1398                                                                          const void
1399                                                                          *))
1400                                                   felem_mul_reduce,
1401                                                   (void (*)(void *, const void *))
1402                                                   felem_inv,
1403                                                   (void (*)(void *, const void *))
1404                                                   felem_contract);
1405 }
1406
1407 /*
1408  * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1409  * values Result is stored in r (r can equal one of the inputs).
1410  */
1411 int ossl_ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1412                                     const BIGNUM *scalar, size_t num,
1413                                     const EC_POINT *points[],
1414                                     const BIGNUM *scalars[], BN_CTX *ctx)
1415 {
1416     int ret = 0;
1417     int j;
1418     unsigned i;
1419     int mixed = 0;
1420     BIGNUM *x, *y, *z, *tmp_scalar;
1421     felem_bytearray g_secret;
1422     felem_bytearray *secrets = NULL;
1423     felem (*pre_comp)[17][3] = NULL;
1424     felem *tmp_felems = NULL;
1425     int num_bytes;
1426     int have_pre_comp = 0;
1427     size_t num_points = num;
1428     felem x_in, y_in, z_in, x_out, y_out, z_out;
1429     NISTP224_PRE_COMP *pre = NULL;
1430     const felem(*g_pre_comp)[16][3] = NULL;
1431     EC_POINT *generator = NULL;
1432     const EC_POINT *p = NULL;
1433     const BIGNUM *p_scalar = NULL;
1434
1435     BN_CTX_start(ctx);
1436     x = BN_CTX_get(ctx);
1437     y = BN_CTX_get(ctx);
1438     z = BN_CTX_get(ctx);
1439     tmp_scalar = BN_CTX_get(ctx);
1440     if (tmp_scalar == NULL)
1441         goto err;
1442
1443     if (scalar != NULL) {
1444         pre = group->pre_comp.nistp224;
1445         if (pre)
1446             /* we have precomputation, try to use it */
1447             g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1448         else
1449             /* try to use the standard precomputation */
1450             g_pre_comp = &gmul[0];
1451         generator = EC_POINT_new(group);
1452         if (generator == NULL)
1453             goto err;
1454         /* get the generator from precomputation */
1455         if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1456             !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1457             !felem_to_BN(z, g_pre_comp[0][1][2])) {
1458             ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1459             goto err;
1460         }
1461         if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,
1462                                                                 generator,
1463                                                                 x, y, z, ctx))
1464             goto err;
1465         if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1466             /* precomputation matches generator */
1467             have_pre_comp = 1;
1468         else
1469             /*
1470              * we don't have valid precomputation: treat the generator as a
1471              * random point
1472              */
1473             num_points = num_points + 1;
1474     }
1475
1476     if (num_points > 0) {
1477         if (num_points >= 3) {
1478             /*
1479              * unless we precompute multiples for just one or two points,
1480              * converting those into affine form is time well spent
1481              */
1482             mixed = 1;
1483         }
1484         secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1485         pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1486         if (mixed)
1487             tmp_felems =
1488                 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1489         if ((secrets == NULL) || (pre_comp == NULL)
1490             || (mixed && (tmp_felems == NULL))) {
1491             ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1492             goto err;
1493         }
1494
1495         /*
1496          * we treat NULL scalars as 0, and NULL points as points at infinity,
1497          * i.e., they contribute nothing to the linear combination
1498          */
1499         for (i = 0; i < num_points; ++i) {
1500             if (i == num) {
1501                 /* the generator */
1502                 p = EC_GROUP_get0_generator(group);
1503                 p_scalar = scalar;
1504             } else {
1505                 /* the i^th point */
1506                 p = points[i];
1507                 p_scalar = scalars[i];
1508             }
1509             if ((p_scalar != NULL) && (p != NULL)) {
1510                 /* reduce scalar to 0 <= scalar < 2^224 */
1511                 if ((BN_num_bits(p_scalar) > 224)
1512                     || (BN_is_negative(p_scalar))) {
1513                     /*
1514                      * this is an unusual input, and we don't guarantee
1515                      * constant-timeness
1516                      */
1517                     if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1518                         ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1519                         goto err;
1520                     }
1521                     num_bytes = BN_bn2lebinpad(tmp_scalar,
1522                                                secrets[i], sizeof(secrets[i]));
1523                 } else {
1524                     num_bytes = BN_bn2lebinpad(p_scalar,
1525                                                secrets[i], sizeof(secrets[i]));
1526                 }
1527                 if (num_bytes < 0) {
1528                     ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1529                     goto err;
1530                 }
1531                 /* precompute multiples */
1532                 if ((!BN_to_felem(x_out, p->X)) ||
1533                     (!BN_to_felem(y_out, p->Y)) ||
1534                     (!BN_to_felem(z_out, p->Z)))
1535                     goto err;
1536                 felem_assign(pre_comp[i][1][0], x_out);
1537                 felem_assign(pre_comp[i][1][1], y_out);
1538                 felem_assign(pre_comp[i][1][2], z_out);
1539                 for (j = 2; j <= 16; ++j) {
1540                     if (j & 1) {
1541                         point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1542                                   pre_comp[i][j][2], pre_comp[i][1][0],
1543                                   pre_comp[i][1][1], pre_comp[i][1][2], 0,
1544                                   pre_comp[i][j - 1][0],
1545                                   pre_comp[i][j - 1][1],
1546                                   pre_comp[i][j - 1][2]);
1547                     } else {
1548                         point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1549                                      pre_comp[i][j][2], pre_comp[i][j / 2][0],
1550                                      pre_comp[i][j / 2][1],
1551                                      pre_comp[i][j / 2][2]);
1552                     }
1553                 }
1554             }
1555         }
1556         if (mixed)
1557             make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1558     }
1559
1560     /* the scalar for the generator */
1561     if ((scalar != NULL) && (have_pre_comp)) {
1562         memset(g_secret, 0, sizeof(g_secret));
1563         /* reduce scalar to 0 <= scalar < 2^224 */
1564         if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1565             /*
1566              * this is an unusual input, and we don't guarantee
1567              * constant-timeness
1568              */
1569             if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1570                 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1571                 goto err;
1572             }
1573             num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1574         } else {
1575             num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1576         }
1577         /* do the multiplication with generator precomputation */
1578         batch_mul(x_out, y_out, z_out,
1579                   (const felem_bytearray(*))secrets, num_points,
1580                   g_secret,
1581                   mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1582     } else {
1583         /* do the multiplication without generator precomputation */
1584         batch_mul(x_out, y_out, z_out,
1585                   (const felem_bytearray(*))secrets, num_points,
1586                   NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1587     }
1588     /* reduce the output to its unique minimal representation */
1589     felem_contract(x_in, x_out);
1590     felem_contract(y_in, y_out);
1591     felem_contract(z_in, z_out);
1592     if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1593         (!felem_to_BN(z, z_in))) {
1594         ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1595         goto err;
1596     }
1597     ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,
1598                                                              ctx);
1599
1600  err:
1601     BN_CTX_end(ctx);
1602     EC_POINT_free(generator);
1603     OPENSSL_free(secrets);
1604     OPENSSL_free(pre_comp);
1605     OPENSSL_free(tmp_felems);
1606     return ret;
1607 }
1608
1609 int ossl_ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1610 {
1611     int ret = 0;
1612     NISTP224_PRE_COMP *pre = NULL;
1613     int i, j;
1614     BIGNUM *x, *y;
1615     EC_POINT *generator = NULL;
1616     felem tmp_felems[32];
1617 #ifndef FIPS_MODULE
1618     BN_CTX *new_ctx = NULL;
1619 #endif
1620
1621     /* throw away old precomputation */
1622     EC_pre_comp_free(group);
1623
1624 #ifndef FIPS_MODULE
1625     if (ctx == NULL)
1626         ctx = new_ctx = BN_CTX_new();
1627 #endif
1628     if (ctx == NULL)
1629         return 0;
1630
1631     BN_CTX_start(ctx);
1632     x = BN_CTX_get(ctx);
1633     y = BN_CTX_get(ctx);
1634     if (y == NULL)
1635         goto err;
1636     /* get the generator */
1637     if (group->generator == NULL)
1638         goto err;
1639     generator = EC_POINT_new(group);
1640     if (generator == NULL)
1641         goto err;
1642     BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1643     BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1644     if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1645         goto err;
1646     if ((pre = nistp224_pre_comp_new()) == NULL)
1647         goto err;
1648     /*
1649      * if the generator is the standard one, use built-in precomputation
1650      */
1651     if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1652         memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1653         goto done;
1654     }
1655     if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1656         (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1657         (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1658         goto err;
1659     /*
1660      * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1661      * 2^140*G, 2^196*G for the second one
1662      */
1663     for (i = 1; i <= 8; i <<= 1) {
1664         point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1665                      pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1666                      pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1667         for (j = 0; j < 27; ++j) {
1668             point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1669                          pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1670                          pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1671         }
1672         if (i == 8)
1673             break;
1674         point_double(pre->g_pre_comp[0][2 * i][0],
1675                      pre->g_pre_comp[0][2 * i][1],
1676                      pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1677                      pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1678         for (j = 0; j < 27; ++j) {
1679             point_double(pre->g_pre_comp[0][2 * i][0],
1680                          pre->g_pre_comp[0][2 * i][1],
1681                          pre->g_pre_comp[0][2 * i][2],
1682                          pre->g_pre_comp[0][2 * i][0],
1683                          pre->g_pre_comp[0][2 * i][1],
1684                          pre->g_pre_comp[0][2 * i][2]);
1685         }
1686     }
1687     for (i = 0; i < 2; i++) {
1688         /* g_pre_comp[i][0] is the point at infinity */
1689         memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1690         /* the remaining multiples */
1691         /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1692         point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1693                   pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1694                   pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1695                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1696                   pre->g_pre_comp[i][2][2]);
1697         /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1698         point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1699                   pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1700                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1701                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1702                   pre->g_pre_comp[i][2][2]);
1703         /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1704         point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1705                   pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1706                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1707                   0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1708                   pre->g_pre_comp[i][4][2]);
1709         /*
1710          * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1711          */
1712         point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1713                   pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1714                   pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1715                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1716                   pre->g_pre_comp[i][2][2]);
1717         for (j = 1; j < 8; ++j) {
1718             /* odd multiples: add G resp. 2^28*G */
1719             point_add(pre->g_pre_comp[i][2 * j + 1][0],
1720                       pre->g_pre_comp[i][2 * j + 1][1],
1721                       pre->g_pre_comp[i][2 * j + 1][2],
1722                       pre->g_pre_comp[i][2 * j][0],
1723                       pre->g_pre_comp[i][2 * j][1],
1724                       pre->g_pre_comp[i][2 * j][2], 0,
1725                       pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1726                       pre->g_pre_comp[i][1][2]);
1727         }
1728     }
1729     make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1730
1731  done:
1732     SETPRECOMP(group, nistp224, pre);
1733     pre = NULL;
1734     ret = 1;
1735  err:
1736     BN_CTX_end(ctx);
1737     EC_POINT_free(generator);
1738 #ifndef FIPS_MODULE
1739     BN_CTX_free(new_ctx);
1740 #endif
1741     EC_nistp224_pre_comp_free(pre);
1742     return ret;
1743 }
1744
1745 int ossl_ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1746 {
1747     return HAVEPRECOMP(group, nistp224);
1748 }