crypto/ec/ecp_nistp224.c

   1 /*
   2  * Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.
   3  *
   4  * Licensed under the Apache License 2.0 (the "License").  You may not use
   5  * this file except in compliance with the License.  You can obtain a copy
   6  * in the file LICENSE in the source distribution or at
   7  * https://www.openssl.org/source/license.html
   8  */
   9
  10 /* Copyright 2011 Google Inc.
  11  *
  12  * Licensed under the Apache License, Version 2.0 (the "License");
  13  *
  14  * you may not use this file except in compliance with the License.
  15  * You may obtain a copy of the License at
  16  *
  17  *     http://www.apache.org/licenses/LICENSE-2.0
  18  *
  19  *  Unless required by applicable law or agreed to in writing, software
  20  *  distributed under the License is distributed on an "AS IS" BASIS,
  21  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  22  *  See the License for the specific language governing permissions and
  23  *  limitations under the License.
  24  */
  25
  26 /*
  27  * ECDSA low level APIs are deprecated for public use, but still ok for
  28  * internal use.
  29  */
  30 #include "internal/deprecated.h"
  31
  32 /*
  33  * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
  34  *
  35  * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
  36  * and Adam Langley's public domain 64-bit C implementation of curve25519
  37  */
  38
  39 #include <openssl/opensslconf.h>
  40
  41 #include <stdint.h>
  42 #include <string.h>
  43 #include <openssl/err.h>
  44 #include "ec_local.h"
  45
  46 #if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
  47   /* even with gcc, the typedef won't work for 32-bit platforms */
  48 typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
  49                                  * platforms */
  50 #else
  51 # error "Your compiler doesn't appear to support 128-bit integer types"
  52 #endif
  53
  54 typedef uint8_t u8;
  55 typedef uint64_t u64;
  56
  57 /******************************************************************************/
  58 /*-
  59  * INTERNAL REPRESENTATION OF FIELD ELEMENTS
  60  *
  61  * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
  62  * using 64-bit coefficients called 'limbs',
  63  * and sometimes (for multiplication results) as
  64  * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
  65  * using 128-bit coefficients called 'widelimbs'.
  66  * A 4-limb representation is an 'felem';
  67  * a 7-widelimb representation is a 'widefelem'.
  68  * Even within felems, bits of adjacent limbs overlap, and we don't always
  69  * reduce the representations: we ensure that inputs to each felem
  70  * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
  71  * and fit into a 128-bit word without overflow. The coefficients are then
  72  * again partially reduced to obtain an felem satisfying a_i < 2^57.
  73  * We only reduce to the unique minimal representation at the end of the
  74  * computation.
  75  */
  76
  77 typedef uint64_t limb;
  78 typedef uint64_t limb_aX __attribute((__aligned__(1)));
  79 typedef uint128_t widelimb;
  80
  81 typedef limb felem[4];
  82 typedef widelimb widefelem[7];
  83
  84 /*
  85  * Field element represented as a byte array. 28*8 = 224 bits is also the
  86  * group order size for the elliptic curve, and we also use this type for
  87  * scalars for point multiplication.
  88  */
  89 typedef u8 felem_bytearray[28];
  90
  91 static const felem_bytearray nistp224_curve_params[5] = {
  92     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
  93      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
  94      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
  95     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
  96      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
  97      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
  98     {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
  99      0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
 100      0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
 101     {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
 102      0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
 103      0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
 104     {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
 105      0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
 106      0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
 107 };
 108
 109 /*-
 110  * Precomputed multiples of the standard generator
 111  * Points are given in coordinates (X, Y, Z) where Z normally is 1
 112  * (0 for the point at infinity).
 113  * For each field element, slice a_0 is word 0, etc.
 114  *
 115  * The table has 2 * 16 elements, starting with the following:
 116  * index | bits    | point
 117  * ------+---------+------------------------------
 118  *     0 | 0 0 0 0 | 0G
 119  *     1 | 0 0 0 1 | 1G
 120  *     2 | 0 0 1 0 | 2^56G
 121  *     3 | 0 0 1 1 | (2^56 + 1)G
 122  *     4 | 0 1 0 0 | 2^112G
 123  *     5 | 0 1 0 1 | (2^112 + 1)G
 124  *     6 | 0 1 1 0 | (2^112 + 2^56)G
 125  *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
 126  *     8 | 1 0 0 0 | 2^168G
 127  *     9 | 1 0 0 1 | (2^168 + 1)G
 128  *    10 | 1 0 1 0 | (2^168 + 2^56)G
 129  *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
 130  *    12 | 1 1 0 0 | (2^168 + 2^112)G
 131  *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
 132  *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
 133  *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
 134  * followed by a copy of this with each element multiplied by 2^28.
 135  *
 136  * The reason for this is so that we can clock bits into four different
 137  * locations when doing simple scalar multiplies against the base point,
 138  * and then another four locations using the second 16 elements.
 139  */
 140 static const felem gmul[2][16][3] = {
 141 {{{0, 0, 0, 0},
 142   {0, 0, 0, 0},
 143   {0, 0, 0, 0}},
 144  {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
 145   {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
 146   {1, 0, 0, 0}},
 147  {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
 148   {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
 149   {1, 0, 0, 0}},
 150  {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
 151   {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
 152   {1, 0, 0, 0}},
 153  {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
 154   {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
 155   {1, 0, 0, 0}},
 156  {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
 157   {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
 158   {1, 0, 0, 0}},
 159  {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
 160   {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
 161   {1, 0, 0, 0}},
 162  {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
 163   {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
 164   {1, 0, 0, 0}},
 165  {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
 166   {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
 167   {1, 0, 0, 0}},
 168  {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
 169   {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
 170   {1, 0, 0, 0}},
 171  {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
 172   {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
 173   {1, 0, 0, 0}},
 174  {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
 175   {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
 176   {1, 0, 0, 0}},
 177  {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
 178   {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
 179   {1, 0, 0, 0}},
 180  {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
 181   {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
 182   {1, 0, 0, 0}},
 183  {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
 184   {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
 185   {1, 0, 0, 0}},
 186  {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
 187   {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
 188   {1, 0, 0, 0}}},
 189 {{{0, 0, 0, 0},
 190   {0, 0, 0, 0},
 191   {0, 0, 0, 0}},
 192  {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
 193   {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
 194   {1, 0, 0, 0}},
 195  {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
 196   {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
 197   {1, 0, 0, 0}},
 198  {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
 199   {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
 200   {1, 0, 0, 0}},
 201  {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
 202   {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
 203   {1, 0, 0, 0}},
 204  {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
 205   {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
 206   {1, 0, 0, 0}},
 207  {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
 208   {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
 209   {1, 0, 0, 0}},
 210  {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
 211   {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
 212   {1, 0, 0, 0}},
 213  {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
 214   {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
 215   {1, 0, 0, 0}},
 216  {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
 217   {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
 218   {1, 0, 0, 0}},
 219  {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
 220   {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
 221   {1, 0, 0, 0}},
 222  {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
 223   {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
 224   {1, 0, 0, 0}},
 225  {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
 226   {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
 227   {1, 0, 0, 0}},
 228  {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
 229   {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
 230   {1, 0, 0, 0}},
 231  {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
 232   {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
 233   {1, 0, 0, 0}},
 234  {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
 235   {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
 236   {1, 0, 0, 0}}}
 237 };
 238
 239 /* Precomputation for the group generator. */
 240 struct nistp224_pre_comp_st {
 241     felem g_pre_comp[2][16][3];
 242     CRYPTO_REF_COUNT references;
 243     CRYPTO_RWLOCK *lock;
 244 };
 245
 246 const EC_METHOD *EC_GFp_nistp224_method(void)
 247 {
 248     static const EC_METHOD ret = {
 249         EC_FLAGS_DEFAULT_OCT,
 250         NID_X9_62_prime_field,
 251         ec_GFp_nistp224_group_init,
 252         ec_GFp_simple_group_finish,
 253         ec_GFp_simple_group_clear_finish,
 254         ec_GFp_nist_group_copy,
 255         ec_GFp_nistp224_group_set_curve,
 256         ec_GFp_simple_group_get_curve,
 257         ec_GFp_simple_group_get_degree,
 258         ec_group_simple_order_bits,
 259         ec_GFp_simple_group_check_discriminant,
 260         ec_GFp_simple_point_init,
 261         ec_GFp_simple_point_finish,
 262         ec_GFp_simple_point_clear_finish,
 263         ec_GFp_simple_point_copy,
 264         ec_GFp_simple_point_set_to_infinity,
 265         ec_GFp_simple_point_set_affine_coordinates,
 266         ec_GFp_nistp224_point_get_affine_coordinates,
 267         0 /* point_set_compressed_coordinates */ ,
 268         0 /* point2oct */ ,
 269         0 /* oct2point */ ,
 270         ec_GFp_simple_add,
 271         ec_GFp_simple_dbl,
 272         ec_GFp_simple_invert,
 273         ec_GFp_simple_is_at_infinity,
 274         ec_GFp_simple_is_on_curve,
 275         ec_GFp_simple_cmp,
 276         ec_GFp_simple_make_affine,
 277         ec_GFp_simple_points_make_affine,
 278         ec_GFp_nistp224_points_mul,
 279         ec_GFp_nistp224_precompute_mult,
 280         ec_GFp_nistp224_have_precompute_mult,
 281         ec_GFp_nist_field_mul,
 282         ec_GFp_nist_field_sqr,
 283         0 /* field_div */ ,
 284         ec_GFp_simple_field_inv,
 285         0 /* field_encode */ ,
 286         0 /* field_decode */ ,
 287         0,                      /* field_set_to_one */
 288         ec_key_simple_priv2oct,
 289         ec_key_simple_oct2priv,
 290         0, /* set private */
 291         ec_key_simple_generate_key,
 292         ec_key_simple_check_key,
 293         ec_key_simple_generate_public_key,
 294         0, /* keycopy */
 295         0, /* keyfinish */
 296         ecdh_simple_compute_key,
 297         ecdsa_simple_sign_setup,
 298         ecdsa_simple_sign_sig,
 299         ecdsa_simple_verify_sig,
 300         0, /* field_inverse_mod_ord */
 301         0, /* blind_coordinates */
 302         0, /* ladder_pre */
 303         0, /* ladder_step */
 304         0  /* ladder_post */
 305     };
 306
 307     return &ret;
 308 }
 309
 310 /*
 311  * Helper functions to convert field elements to/from internal representation
 312  */
 313 static void bin28_to_felem(felem out, const u8 in[28])
 314 {
 315     out[0] = *((const limb *)(in)) & 0x00ffffffffffffff;
 316     out[1] = (*((const limb_aX *)(in + 7))) & 0x00ffffffffffffff;
 317     out[2] = (*((const limb_aX *)(in + 14))) & 0x00ffffffffffffff;
 318     out[3] = (*((const limb_aX *)(in + 20))) >> 8;
 319 }
 320
 321 static void felem_to_bin28(u8 out[28], const felem in)
 322 {
 323     unsigned i;
 324     for (i = 0; i < 7; ++i) {
 325         out[i] = in[0] >> (8 * i);
 326         out[i + 7] = in[1] >> (8 * i);
 327         out[i + 14] = in[2] >> (8 * i);
 328         out[i + 21] = in[3] >> (8 * i);
 329     }
 330 }
 331
 332 /* From OpenSSL BIGNUM to internal representation */
 333 static int BN_to_felem(felem out, const BIGNUM *bn)
 334 {
 335     felem_bytearray b_out;
 336     int num_bytes;
 337
 338     if (BN_is_negative(bn)) {
 339         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
 340         return 0;
 341     }
 342     num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
 343     if (num_bytes < 0) {
 344         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
 345         return 0;
 346     }
 347     bin28_to_felem(out, b_out);
 348     return 1;
 349 }
 350
 351 /* From internal representation to OpenSSL BIGNUM */
 352 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
 353 {
 354     felem_bytearray b_out;
 355     felem_to_bin28(b_out, in);
 356     return BN_lebin2bn(b_out, sizeof(b_out), out);
 357 }
 358
 359 /******************************************************************************/
 360 /*-
 361  *                              FIELD OPERATIONS
 362  *
 363  * Field operations, using the internal representation of field elements.
 364  * NB! These operations are specific to our point multiplication and cannot be
 365  * expected to be correct in general - e.g., multiplication with a large scalar
 366  * will cause an overflow.
 367  *
 368  */
 369
 370 static void felem_one(felem out)
 371 {
 372     out[0] = 1;
 373     out[1] = 0;
 374     out[2] = 0;
 375     out[3] = 0;
 376 }
 377
 378 static void felem_assign(felem out, const felem in)
 379 {
 380     out[0] = in[0];
 381     out[1] = in[1];
 382     out[2] = in[2];
 383     out[3] = in[3];
 384 }
 385
 386 /* Sum two field elements: out += in */
 387 static void felem_sum(felem out, const felem in)
 388 {
 389     out[0] += in[0];
 390     out[1] += in[1];
 391     out[2] += in[2];
 392     out[3] += in[3];
 393 }
 394
 395 /* Subtract field elements: out -= in */
 396 /* Assumes in[i] < 2^57 */
 397 static void felem_diff(felem out, const felem in)
 398 {
 399     static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
 400     static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
 401     static const limb two58m42m2 = (((limb) 1) << 58) -
 402         (((limb) 1) << 42) - (((limb) 1) << 2);
 403
 404     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 405     out[0] += two58p2;
 406     out[1] += two58m42m2;
 407     out[2] += two58m2;
 408     out[3] += two58m2;
 409
 410     out[0] -= in[0];
 411     out[1] -= in[1];
 412     out[2] -= in[2];
 413     out[3] -= in[3];
 414 }
 415
 416 /* Subtract in unreduced 128-bit mode: out -= in */
 417 /* Assumes in[i] < 2^119 */
 418 static void widefelem_diff(widefelem out, const widefelem in)
 419 {
 420     static const widelimb two120 = ((widelimb) 1) << 120;
 421     static const widelimb two120m64 = (((widelimb) 1) << 120) -
 422         (((widelimb) 1) << 64);
 423     static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
 424         (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
 425
 426     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 427     out[0] += two120;
 428     out[1] += two120m64;
 429     out[2] += two120m64;
 430     out[3] += two120;
 431     out[4] += two120m104m64;
 432     out[5] += two120m64;
 433     out[6] += two120m64;
 434
 435     out[0] -= in[0];
 436     out[1] -= in[1];
 437     out[2] -= in[2];
 438     out[3] -= in[3];
 439     out[4] -= in[4];
 440     out[5] -= in[5];
 441     out[6] -= in[6];
 442 }
 443
 444 /* Subtract in mixed mode: out128 -= in64 */
 445 /* in[i] < 2^63 */
 446 static void felem_diff_128_64(widefelem out, const felem in)
 447 {
 448     static const widelimb two64p8 = (((widelimb) 1) << 64) +
 449         (((widelimb) 1) << 8);
 450     static const widelimb two64m8 = (((widelimb) 1) << 64) -
 451         (((widelimb) 1) << 8);
 452     static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
 453         (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
 454
 455     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 456     out[0] += two64p8;
 457     out[1] += two64m48m8;
 458     out[2] += two64m8;
 459     out[3] += two64m8;
 460
 461     out[0] -= in[0];
 462     out[1] -= in[1];
 463     out[2] -= in[2];
 464     out[3] -= in[3];
 465 }
 466
 467 /*
 468  * Multiply a field element by a scalar: out = out * scalar The scalars we
 469  * actually use are small, so results fit without overflow
 470  */
 471 static void felem_scalar(felem out, const limb scalar)
 472 {
 473     out[0] *= scalar;
 474     out[1] *= scalar;
 475     out[2] *= scalar;
 476     out[3] *= scalar;
 477 }
 478
 479 /*
 480  * Multiply an unreduced field element by a scalar: out = out * scalar The
 481  * scalars we actually use are small, so results fit without overflow
 482  */
 483 static void widefelem_scalar(widefelem out, const widelimb scalar)
 484 {
 485     out[0] *= scalar;
 486     out[1] *= scalar;
 487     out[2] *= scalar;
 488     out[3] *= scalar;
 489     out[4] *= scalar;
 490     out[5] *= scalar;
 491     out[6] *= scalar;
 492 }
 493
 494 /* Square a field element: out = in^2 */
 495 static void felem_square(widefelem out, const felem in)
 496 {
 497     limb tmp0, tmp1, tmp2;
 498     tmp0 = 2 * in[0];
 499     tmp1 = 2 * in[1];
 500     tmp2 = 2 * in[2];
 501     out[0] = ((widelimb) in[0]) * in[0];
 502     out[1] = ((widelimb) in[0]) * tmp1;
 503     out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
 504     out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
 505     out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
 506     out[5] = ((widelimb) in[3]) * tmp2;
 507     out[6] = ((widelimb) in[3]) * in[3];
 508 }
 509
 510 /* Multiply two field elements: out = in1 * in2 */
 511 static void felem_mul(widefelem out, const felem in1, const felem in2)
 512 {
 513     out[0] = ((widelimb) in1[0]) * in2[0];
 514     out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
 515     out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
 516              ((widelimb) in1[2]) * in2[0];
 517     out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
 518              ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
 519     out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
 520              ((widelimb) in1[3]) * in2[1];
 521     out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
 522     out[6] = ((widelimb) in1[3]) * in2[3];
 523 }
 524
 525 /*-
 526  * Reduce seven 128-bit coefficients to four 64-bit coefficients.
 527  * Requires in[i] < 2^126,
 528  * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
 529 static void felem_reduce(felem out, const widefelem in)
 530 {
 531     static const widelimb two127p15 = (((widelimb) 1) << 127) +
 532         (((widelimb) 1) << 15);
 533     static const widelimb two127m71 = (((widelimb) 1) << 127) -
 534         (((widelimb) 1) << 71);
 535     static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
 536         (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
 537     widelimb output[5];
 538
 539     /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
 540     output[0] = in[0] + two127p15;
 541     output[1] = in[1] + two127m71m55;
 542     output[2] = in[2] + two127m71;
 543     output[3] = in[3];
 544     output[4] = in[4];
 545
 546     /* Eliminate in[4], in[5], in[6] */
 547     output[4] += in[6] >> 16;
 548     output[3] += (in[6] & 0xffff) << 40;
 549     output[2] -= in[6];
 550
 551     output[3] += in[5] >> 16;
 552     output[2] += (in[5] & 0xffff) << 40;
 553     output[1] -= in[5];
 554
 555     output[2] += output[4] >> 16;
 556     output[1] += (output[4] & 0xffff) << 40;
 557     output[0] -= output[4];
 558
 559     /* Carry 2 -> 3 -> 4 */
 560     output[3] += output[2] >> 56;
 561     output[2] &= 0x00ffffffffffffff;
 562
 563     output[4] = output[3] >> 56;
 564     output[3] &= 0x00ffffffffffffff;
 565
 566     /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
 567
 568     /* Eliminate output[4] */
 569     output[2] += output[4] >> 16;
 570     /* output[2] < 2^56 + 2^56 = 2^57 */
 571     output[1] += (output[4] & 0xffff) << 40;
 572     output[0] -= output[4];
 573
 574     /* Carry 0 -> 1 -> 2 -> 3 */
 575     output[1] += output[0] >> 56;
 576     out[0] = output[0] & 0x00ffffffffffffff;
 577
 578     output[2] += output[1] >> 56;
 579     /* output[2] < 2^57 + 2^72 */
 580     out[1] = output[1] & 0x00ffffffffffffff;
 581     output[3] += output[2] >> 56;
 582     /* output[3] <= 2^56 + 2^16 */
 583     out[2] = output[2] & 0x00ffffffffffffff;
 584
 585     /*-
 586      * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
 587      * out[3] <= 2^56 + 2^16 (due to final carry),
 588      * so out < 2*p
 589      */
 590     out[3] = output[3];
 591 }
 592
 593 static void felem_square_reduce(felem out, const felem in)
 594 {
 595     widefelem tmp;
 596     felem_square(tmp, in);
 597     felem_reduce(out, tmp);
 598 }
 599
 600 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
 601 {
 602     widefelem tmp;
 603     felem_mul(tmp, in1, in2);
 604     felem_reduce(out, tmp);
 605 }
 606
 607 /*
 608  * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
 609  * call felem_reduce first)
 610  */
 611 static void felem_contract(felem out, const felem in)
 612 {
 613     static const int64_t two56 = ((limb) 1) << 56;
 614     /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
 615     /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
 616     int64_t tmp[4], a;
 617     tmp[0] = in[0];
 618     tmp[1] = in[1];
 619     tmp[2] = in[2];
 620     tmp[3] = in[3];
 621     /* Case 1: a = 1 iff in >= 2^224 */
 622     a = (in[3] >> 56);
 623     tmp[0] -= a;
 624     tmp[1] += a << 40;
 625     tmp[3] &= 0x00ffffffffffffff;
 626     /*
 627      * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
 628      * and the lower part is non-zero
 629      */
 630     a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
 631         (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
 632     a &= 0x00ffffffffffffff;
 633     /* turn a into an all-one mask (if a = 0) or an all-zero mask */
 634     a = (a - 1) >> 63;
 635     /* subtract 2^224 - 2^96 + 1 if a is all-one */
 636     tmp[3] &= a ^ 0xffffffffffffffff;
 637     tmp[2] &= a ^ 0xffffffffffffffff;
 638     tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
 639     tmp[0] -= 1 & a;
 640
 641     /*
 642      * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
 643      * non-zero, so we only need one step
 644      */
 645     a = tmp[0] >> 63;
 646     tmp[0] += two56 & a;
 647     tmp[1] -= 1 & a;
 648
 649     /* carry 1 -> 2 -> 3 */
 650     tmp[2] += tmp[1] >> 56;
 651     tmp[1] &= 0x00ffffffffffffff;
 652
 653     tmp[3] += tmp[2] >> 56;
 654     tmp[2] &= 0x00ffffffffffffff;
 655
 656     /* Now 0 <= out < p */
 657     out[0] = tmp[0];
 658     out[1] = tmp[1];
 659     out[2] = tmp[2];
 660     out[3] = tmp[3];
 661 }
 662
 663 /*
 664  * Get negative value: out = -in
 665  * Requires in[i] < 2^63,
 666  * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
 667  */
 668 static void felem_neg(felem out, const felem in)
 669 {
 670     widefelem tmp;
 671
 672     memset(tmp, 0, sizeof(tmp));
 673     felem_diff_128_64(tmp, in);
 674     felem_reduce(out, tmp);
 675 }
 676
 677 /*
 678  * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
 679  * elements are reduced to in < 2^225, so we only need to check three cases:
 680  * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
 681  */
 682 static limb felem_is_zero(const felem in)
 683 {
 684     limb zero, two224m96p1, two225m97p2;
 685
 686     zero = in[0] | in[1] | in[2] | in[3];
 687     zero = (((int64_t) (zero) - 1) >> 63) & 1;
 688     two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
 689         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
 690     two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
 691     two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
 692         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
 693     two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
 694     return (zero | two224m96p1 | two225m97p2);
 695 }
 696
 697 static int felem_is_zero_int(const void *in)
 698 {
 699     return (int)(felem_is_zero(in) & ((limb) 1));
 700 }
 701
 702 /* Invert a field element */
 703 /* Computation chain copied from djb's code */
 704 static void felem_inv(felem out, const felem in)
 705 {
 706     felem ftmp, ftmp2, ftmp3, ftmp4;
 707     widefelem tmp;
 708     unsigned i;
 709
 710     felem_square(tmp, in);
 711     felem_reduce(ftmp, tmp);    /* 2 */
 712     felem_mul(tmp, in, ftmp);
 713     felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
 714     felem_square(tmp, ftmp);
 715     felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
 716     felem_mul(tmp, in, ftmp);
 717     felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
 718     felem_square(tmp, ftmp);
 719     felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
 720     felem_square(tmp, ftmp2);
 721     felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
 722     felem_square(tmp, ftmp2);
 723     felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
 724     felem_mul(tmp, ftmp2, ftmp);
 725     felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
 726     felem_square(tmp, ftmp);
 727     felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
 728     for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
 729         felem_square(tmp, ftmp2);
 730         felem_reduce(ftmp2, tmp);
 731     }
 732     felem_mul(tmp, ftmp2, ftmp);
 733     felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
 734     felem_square(tmp, ftmp2);
 735     felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
 736     for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
 737         felem_square(tmp, ftmp3);
 738         felem_reduce(ftmp3, tmp);
 739     }
 740     felem_mul(tmp, ftmp3, ftmp2);
 741     felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
 742     felem_square(tmp, ftmp2);
 743     felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
 744     for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
 745         felem_square(tmp, ftmp3);
 746         felem_reduce(ftmp3, tmp);
 747     }
 748     felem_mul(tmp, ftmp3, ftmp2);
 749     felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
 750     felem_square(tmp, ftmp3);
 751     felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
 752     for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
 753         felem_square(tmp, ftmp4);
 754         felem_reduce(ftmp4, tmp);
 755     }
 756     felem_mul(tmp, ftmp3, ftmp4);
 757     felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
 758     felem_square(tmp, ftmp3);
 759     felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
 760     for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
 761         felem_square(tmp, ftmp4);
 762         felem_reduce(ftmp4, tmp);
 763     }
 764     felem_mul(tmp, ftmp2, ftmp4);
 765     felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
 766     for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
 767         felem_square(tmp, ftmp2);
 768         felem_reduce(ftmp2, tmp);
 769     }
 770     felem_mul(tmp, ftmp2, ftmp);
 771     felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
 772     felem_square(tmp, ftmp);
 773     felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
 774     felem_mul(tmp, ftmp, in);
 775     felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
 776     for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
 777         felem_square(tmp, ftmp);
 778         felem_reduce(ftmp, tmp);
 779     }
 780     felem_mul(tmp, ftmp, ftmp3);
 781     felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
 782 }
 783
 784 /*
 785  * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
 786  * out to itself.
 787  */
 788 static void copy_conditional(felem out, const felem in, limb icopy)
 789 {
 790     unsigned i;
 791     /*
 792      * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
 793      */
 794     const limb copy = -icopy;
 795     for (i = 0; i < 4; ++i) {
 796         const limb tmp = copy & (in[i] ^ out[i]);
 797         out[i] ^= tmp;
 798     }
 799 }
 800
 801 /******************************************************************************/
 802 /*-
 803  *                       ELLIPTIC CURVE POINT OPERATIONS
 804  *
 805  * Points are represented in Jacobian projective coordinates:
 806  * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
 807  * or to the point at infinity if Z == 0.
 808  *
 809  */
 810
 811 /*-
 812  * Double an elliptic curve point:
 813  * (X', Y', Z') = 2 * (X, Y, Z), where
 814  * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
 815  * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
 816  * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
 817  * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
 818  * while x_out == y_in is not (maybe this works, but it's not tested).
 819  */
 820 static void
 821 point_double(felem x_out, felem y_out, felem z_out,
 822              const felem x_in, const felem y_in, const felem z_in)
 823 {
 824     widefelem tmp, tmp2;
 825     felem delta, gamma, beta, alpha, ftmp, ftmp2;
 826
 827     felem_assign(ftmp, x_in);
 828     felem_assign(ftmp2, x_in);
 829
 830     /* delta = z^2 */
 831     felem_square(tmp, z_in);
 832     felem_reduce(delta, tmp);
 833
 834     /* gamma = y^2 */
 835     felem_square(tmp, y_in);
 836     felem_reduce(gamma, tmp);
 837
 838     /* beta = x*gamma */
 839     felem_mul(tmp, x_in, gamma);
 840     felem_reduce(beta, tmp);
 841
 842     /* alpha = 3*(x-delta)*(x+delta) */
 843     felem_diff(ftmp, delta);
 844     /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
 845     felem_sum(ftmp2, delta);
 846     /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
 847     felem_scalar(ftmp2, 3);
 848     /* ftmp2[i] < 3 * 2^58 < 2^60 */
 849     felem_mul(tmp, ftmp, ftmp2);
 850     /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
 851     felem_reduce(alpha, tmp);
 852
 853     /* x' = alpha^2 - 8*beta */
 854     felem_square(tmp, alpha);
 855     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 856     felem_assign(ftmp, beta);
 857     felem_scalar(ftmp, 8);
 858     /* ftmp[i] < 8 * 2^57 = 2^60 */
 859     felem_diff_128_64(tmp, ftmp);
 860     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 861     felem_reduce(x_out, tmp);
 862
 863     /* z' = (y + z)^2 - gamma - delta */
 864     felem_sum(delta, gamma);
 865     /* delta[i] < 2^57 + 2^57 = 2^58 */
 866     felem_assign(ftmp, y_in);
 867     felem_sum(ftmp, z_in);
 868     /* ftmp[i] < 2^57 + 2^57 = 2^58 */
 869     felem_square(tmp, ftmp);
 870     /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
 871     felem_diff_128_64(tmp, delta);
 872     /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
 873     felem_reduce(z_out, tmp);
 874
 875     /* y' = alpha*(4*beta - x') - 8*gamma^2 */
 876     felem_scalar(beta, 4);
 877     /* beta[i] < 4 * 2^57 = 2^59 */
 878     felem_diff(beta, x_out);
 879     /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
 880     felem_mul(tmp, alpha, beta);
 881     /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
 882     felem_square(tmp2, gamma);
 883     /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
 884     widefelem_scalar(tmp2, 8);
 885     /* tmp2[i] < 8 * 2^116 = 2^119 */
 886     widefelem_diff(tmp, tmp2);
 887     /* tmp[i] < 2^119 + 2^120 < 2^121 */
 888     felem_reduce(y_out, tmp);
 889 }
 890
 891 /*-
 892  * Add two elliptic curve points:
 893  * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
 894  * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
 895  * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
 896  * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
 897  *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
 898  * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
 899  *
 900  * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
 901  */
 902
 903 /*
 904  * This function is not entirely constant-time: it includes a branch for
 905  * checking whether the two input points are equal, (while not equal to the
 906  * point at infinity). This case never happens during single point
 907  * multiplication, so there is no timing leak for ECDH or ECDSA signing.
 908  */
 909 static void point_add(felem x3, felem y3, felem z3,
 910                       const felem x1, const felem y1, const felem z1,
 911                       const int mixed, const felem x2, const felem y2,
 912                       const felem z2)
 913 {
 914     felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
 915     widefelem tmp, tmp2;
 916     limb z1_is_zero, z2_is_zero, x_equal, y_equal;
 917     limb points_equal;
 918
 919     if (!mixed) {
 920         /* ftmp2 = z2^2 */
 921         felem_square(tmp, z2);
 922         felem_reduce(ftmp2, tmp);
 923
 924         /* ftmp4 = z2^3 */
 925         felem_mul(tmp, ftmp2, z2);
 926         felem_reduce(ftmp4, tmp);
 927
 928         /* ftmp4 = z2^3*y1 */
 929         felem_mul(tmp2, ftmp4, y1);
 930         felem_reduce(ftmp4, tmp2);
 931
 932         /* ftmp2 = z2^2*x1 */
 933         felem_mul(tmp2, ftmp2, x1);
 934         felem_reduce(ftmp2, tmp2);
 935     } else {
 936         /*
 937          * We'll assume z2 = 1 (special case z2 = 0 is handled later)
 938          */
 939
 940         /* ftmp4 = z2^3*y1 */
 941         felem_assign(ftmp4, y1);
 942
 943         /* ftmp2 = z2^2*x1 */
 944         felem_assign(ftmp2, x1);
 945     }
 946
 947     /* ftmp = z1^2 */
 948     felem_square(tmp, z1);
 949     felem_reduce(ftmp, tmp);
 950
 951     /* ftmp3 = z1^3 */
 952     felem_mul(tmp, ftmp, z1);
 953     felem_reduce(ftmp3, tmp);
 954
 955     /* tmp = z1^3*y2 */
 956     felem_mul(tmp, ftmp3, y2);
 957     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 958
 959     /* ftmp3 = z1^3*y2 - z2^3*y1 */
 960     felem_diff_128_64(tmp, ftmp4);
 961     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 962     felem_reduce(ftmp3, tmp);
 963
 964     /* tmp = z1^2*x2 */
 965     felem_mul(tmp, ftmp, x2);
 966     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 967
 968     /* ftmp = z1^2*x2 - z2^2*x1 */
 969     felem_diff_128_64(tmp, ftmp2);
 970     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 971     felem_reduce(ftmp, tmp);
 972
 973     /*
 974      * The formulae are incorrect if the points are equal, in affine coordinates
 975      * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
 976      * happens.
 977      *
 978      * We use bitwise operations to avoid potential side-channels introduced by
 979      * the short-circuiting behaviour of boolean operators.
 980      */
 981     x_equal = felem_is_zero(ftmp);
 982     y_equal = felem_is_zero(ftmp3);
 983     /*
 984      * The special case of either point being the point at infinity (z1 and/or
 985      * z2 are zero), is handled separately later on in this function, so we
 986      * avoid jumping to point_double here in those special cases.
 987      */
 988     z1_is_zero = felem_is_zero(z1);
 989     z2_is_zero = felem_is_zero(z2);
 990
 991     /*
 992      * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
 993      * specific implementation `felem_is_zero()` returns truth as `0x1`
 994      * (rather than `0xff..ff`).
 995      *
 996      * This implies that `~true` in this implementation becomes
 997      * `0xff..fe` (rather than `0x0`): for this reason, to be used in
 998      * the if expression, we mask out only the last bit in the next
 999      * line.
1000      */
1001     points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
1002
1003     if (points_equal) {
1004         /*
1005          * This is obviously not constant-time but, as mentioned before, this
1006          * case never happens during single point multiplication, so there is no
1007          * timing leak for ECDH or ECDSA signing.
1008          */
1009         point_double(x3, y3, z3, x1, y1, z1);
1010         return;
1011     }
1012
1013     /* ftmp5 = z1*z2 */
1014     if (!mixed) {
1015         felem_mul(tmp, z1, z2);
1016         felem_reduce(ftmp5, tmp);
1017     } else {
1018         /* special case z2 = 0 is handled later */
1019         felem_assign(ftmp5, z1);
1020     }
1021
1022     /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1023     felem_mul(tmp, ftmp, ftmp5);
1024     felem_reduce(z_out, tmp);
1025
1026     /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1027     felem_assign(ftmp5, ftmp);
1028     felem_square(tmp, ftmp);
1029     felem_reduce(ftmp, tmp);
1030
1031     /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1032     felem_mul(tmp, ftmp, ftmp5);
1033     felem_reduce(ftmp5, tmp);
1034
1035     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1036     felem_mul(tmp, ftmp2, ftmp);
1037     felem_reduce(ftmp2, tmp);
1038
1039     /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1040     felem_mul(tmp, ftmp4, ftmp5);
1041     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1042
1043     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1044     felem_square(tmp2, ftmp3);
1045     /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1046
1047     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1048     felem_diff_128_64(tmp2, ftmp5);
1049     /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1050
1051     /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1052     felem_assign(ftmp5, ftmp2);
1053     felem_scalar(ftmp5, 2);
1054     /* ftmp5[i] < 2 * 2^57 = 2^58 */
1055
1056     /*-
1057      * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1058      *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1059      */
1060     felem_diff_128_64(tmp2, ftmp5);
1061     /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1062     felem_reduce(x_out, tmp2);
1063
1064     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1065     felem_diff(ftmp2, x_out);
1066     /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1067
1068     /*
1069      * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1070      */
1071     felem_mul(tmp2, ftmp3, ftmp2);
1072     /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1073
1074     /*-
1075      * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1076      *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1077      */
1078     widefelem_diff(tmp2, tmp);
1079     /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1080     felem_reduce(y_out, tmp2);
1081
1082     /*
1083      * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1084      * the point at infinity, so we need to check for this separately
1085      */
1086
1087     /*
1088      * if point 1 is at infinity, copy point 2 to output, and vice versa
1089      */
1090     copy_conditional(x_out, x2, z1_is_zero);
1091     copy_conditional(x_out, x1, z2_is_zero);
1092     copy_conditional(y_out, y2, z1_is_zero);
1093     copy_conditional(y_out, y1, z2_is_zero);
1094     copy_conditional(z_out, z2, z1_is_zero);
1095     copy_conditional(z_out, z1, z2_is_zero);
1096     felem_assign(x3, x_out);
1097     felem_assign(y3, y_out);
1098     felem_assign(z3, z_out);
1099 }
1100
1101 /*
1102  * select_point selects the |idx|th point from a precomputation table and
1103  * copies it to out.
1104  * The pre_comp array argument should be size of |size| argument
1105  */
1106 static void select_point(const u64 idx, unsigned int size,
1107                          const felem pre_comp[][3], felem out[3])
1108 {
1109     unsigned i, j;
1110     limb *outlimbs = &out[0][0];
1111
1112     memset(out, 0, sizeof(*out) * 3);
1113     for (i = 0; i < size; i++) {
1114         const limb *inlimbs = &pre_comp[i][0][0];
1115         u64 mask = i ^ idx;
1116         mask |= mask >> 4;
1117         mask |= mask >> 2;
1118         mask |= mask >> 1;
1119         mask &= 1;
1120         mask--;
1121         for (j = 0; j < 4 * 3; j++)
1122             outlimbs[j] |= inlimbs[j] & mask;
1123     }
1124 }
1125
1126 /* get_bit returns the |i|th bit in |in| */
1127 static char get_bit(const felem_bytearray in, unsigned i)
1128 {
1129     if (i >= 224)
1130         return 0;
1131     return (in[i >> 3] >> (i & 7)) & 1;
1132 }
1133
1134 /*
1135  * Interleaved point multiplication using precomputed point multiples: The
1136  * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1137  * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1138  * generator, using certain (large) precomputed multiples in g_pre_comp.
1139  * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1140  */
1141 static void batch_mul(felem x_out, felem y_out, felem z_out,
1142                       const felem_bytearray scalars[],
1143                       const unsigned num_points, const u8 *g_scalar,
1144                       const int mixed, const felem pre_comp[][17][3],
1145                       const felem g_pre_comp[2][16][3])
1146 {
1147     int i, skip;
1148     unsigned num;
1149     unsigned gen_mul = (g_scalar != NULL);
1150     felem nq[3], tmp[4];
1151     u64 bits;
1152     u8 sign, digit;
1153
1154     /* set nq to the point at infinity */
1155     memset(nq, 0, sizeof(nq));
1156
1157     /*
1158      * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1159      * of the generator (two in each of the last 28 rounds) and additions of
1160      * other points multiples (every 5th round).
1161      */
1162     skip = 1;                   /* save two point operations in the first
1163                                  * round */
1164     for (i = (num_points ? 220 : 27); i >= 0; --i) {
1165         /* double */
1166         if (!skip)
1167             point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1168
1169         /* add multiples of the generator */
1170         if (gen_mul && (i <= 27)) {
1171             /* first, look 28 bits upwards */
1172             bits = get_bit(g_scalar, i + 196) << 3;
1173             bits |= get_bit(g_scalar, i + 140) << 2;
1174             bits |= get_bit(g_scalar, i + 84) << 1;
1175             bits |= get_bit(g_scalar, i + 28);
1176             /* select the point to add, in constant time */
1177             select_point(bits, 16, g_pre_comp[1], tmp);
1178
1179             if (!skip) {
1180                 /* value 1 below is argument for "mixed" */
1181                 point_add(nq[0], nq[1], nq[2],
1182                           nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1183             } else {
1184                 memcpy(nq, tmp, 3 * sizeof(felem));
1185                 skip = 0;
1186             }
1187
1188             /* second, look at the current position */
1189             bits = get_bit(g_scalar, i + 168) << 3;
1190             bits |= get_bit(g_scalar, i + 112) << 2;
1191             bits |= get_bit(g_scalar, i + 56) << 1;
1192             bits |= get_bit(g_scalar, i);
1193             /* select the point to add, in constant time */
1194             select_point(bits, 16, g_pre_comp[0], tmp);
1195             point_add(nq[0], nq[1], nq[2],
1196                       nq[0], nq[1], nq[2],
1197                       1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1198         }
1199
1200         /* do other additions every 5 doublings */
1201         if (num_points && (i % 5 == 0)) {
1202             /* loop over all scalars */
1203             for (num = 0; num < num_points; ++num) {
1204                 bits = get_bit(scalars[num], i + 4) << 5;
1205                 bits |= get_bit(scalars[num], i + 3) << 4;
1206                 bits |= get_bit(scalars[num], i + 2) << 3;
1207                 bits |= get_bit(scalars[num], i + 1) << 2;
1208                 bits |= get_bit(scalars[num], i) << 1;
1209                 bits |= get_bit(scalars[num], i - 1);
1210                 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1211
1212                 /* select the point to add or subtract */
1213                 select_point(digit, 17, pre_comp[num], tmp);
1214                 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1215                                             * point */
1216                 copy_conditional(tmp[1], tmp[3], sign);
1217
1218                 if (!skip) {
1219                     point_add(nq[0], nq[1], nq[2],
1220                               nq[0], nq[1], nq[2],
1221                               mixed, tmp[0], tmp[1], tmp[2]);
1222                 } else {
1223                     memcpy(nq, tmp, 3 * sizeof(felem));
1224                     skip = 0;
1225                 }
1226             }
1227         }
1228     }
1229     felem_assign(x_out, nq[0]);
1230     felem_assign(y_out, nq[1]);
1231     felem_assign(z_out, nq[2]);
1232 }
1233
1234 /******************************************************************************/
1235 /*
1236  * FUNCTIONS TO MANAGE PRECOMPUTATION
1237  */
1238
1239 static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1240 {
1241     NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1242
1243     if (!ret) {
1244         ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1245         return ret;
1246     }
1247
1248     ret->references = 1;
1249
1250     ret->lock = CRYPTO_THREAD_lock_new();
1251     if (ret->lock == NULL) {
1252         ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1253         OPENSSL_free(ret);
1254         return NULL;
1255     }
1256     return ret;
1257 }
1258
1259 NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1260 {
1261     int i;
1262     if (p != NULL)
1263         CRYPTO_UP_REF(&p->references, &i, p->lock);
1264     return p;
1265 }
1266
1267 void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1268 {
1269     int i;
1270
1271     if (p == NULL)
1272         return;
1273
1274     CRYPTO_DOWN_REF(&p->references, &i, p->lock);
1275     REF_PRINT_COUNT("EC_nistp224", x);
1276     if (i > 0)
1277         return;
1278     REF_ASSERT_ISNT(i < 0);
1279
1280     CRYPTO_THREAD_lock_free(p->lock);
1281     OPENSSL_free(p);
1282 }
1283
1284 /******************************************************************************/
1285 /*
1286  * OPENSSL EC_METHOD FUNCTIONS
1287  */
1288
1289 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1290 {
1291     int ret;
1292     ret = ec_GFp_simple_group_init(group);
1293     group->a_is_minus3 = 1;
1294     return ret;
1295 }
1296
1297 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1298                                     const BIGNUM *a, const BIGNUM *b,
1299                                     BN_CTX *ctx)
1300 {
1301     int ret = 0;
1302     BIGNUM *curve_p, *curve_a, *curve_b;
1303 #ifndef FIPS_MODULE
1304     BN_CTX *new_ctx = NULL;
1305
1306     if (ctx == NULL)
1307         ctx = new_ctx = BN_CTX_new();
1308 #endif
1309     if (ctx == NULL)
1310         return 0;
1311
1312     BN_CTX_start(ctx);
1313     curve_p = BN_CTX_get(ctx);
1314     curve_a = BN_CTX_get(ctx);
1315     curve_b = BN_CTX_get(ctx);
1316     if (curve_b == NULL)
1317         goto err;
1318     BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1319     BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1320     BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1321     if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1322         ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1323               EC_R_WRONG_CURVE_PARAMETERS);
1324         goto err;
1325     }
1326     group->field_mod_func = BN_nist_mod_224;
1327     ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1328  err:
1329     BN_CTX_end(ctx);
1330 #ifndef FIPS_MODULE
1331     BN_CTX_free(new_ctx);
1332 #endif
1333     return ret;
1334 }
1335
1336 /*
1337  * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1338  * (X/Z^2, Y/Z^3)
1339  */
1340 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1341                                                  const EC_POINT *point,
1342                                                  BIGNUM *x, BIGNUM *y,
1343                                                  BN_CTX *ctx)
1344 {
1345     felem z1, z2, x_in, y_in, x_out, y_out;
1346     widefelem tmp;
1347
1348     if (EC_POINT_is_at_infinity(group, point)) {
1349         ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1350               EC_R_POINT_AT_INFINITY);
1351         return 0;
1352     }
1353     if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1354         (!BN_to_felem(z1, point->Z)))
1355         return 0;
1356     felem_inv(z2, z1);
1357     felem_square(tmp, z2);
1358     felem_reduce(z1, tmp);
1359     felem_mul(tmp, x_in, z1);
1360     felem_reduce(x_in, tmp);
1361     felem_contract(x_out, x_in);
1362     if (x != NULL) {
1363         if (!felem_to_BN(x, x_out)) {
1364             ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1365                   ERR_R_BN_LIB);
1366             return 0;
1367         }
1368     }
1369     felem_mul(tmp, z1, z2);
1370     felem_reduce(z1, tmp);
1371     felem_mul(tmp, y_in, z1);
1372     felem_reduce(y_in, tmp);
1373     felem_contract(y_out, y_in);
1374     if (y != NULL) {
1375         if (!felem_to_BN(y, y_out)) {
1376             ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1377                   ERR_R_BN_LIB);
1378             return 0;
1379         }
1380     }
1381     return 1;
1382 }
1383
1384 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1385                                felem tmp_felems[ /* num+1 */ ])
1386 {
1387     /*
1388      * Runs in constant time, unless an input is the point at infinity (which
1389      * normally shouldn't happen).
1390      */
1391     ec_GFp_nistp_points_make_affine_internal(num,
1392                                              points,
1393                                              sizeof(felem),
1394                                              tmp_felems,
1395                                              (void (*)(void *))felem_one,
1396                                              felem_is_zero_int,
1397                                              (void (*)(void *, const void *))
1398                                              felem_assign,
1399                                              (void (*)(void *, const void *))
1400                                              felem_square_reduce, (void (*)
1401                                                                    (void *,
1402                                                                     const void
1403                                                                     *,
1404                                                                     const void
1405                                                                     *))
1406                                              felem_mul_reduce,
1407                                              (void (*)(void *, const void *))
1408                                              felem_inv,
1409                                              (void (*)(void *, const void *))
1410                                              felem_contract);
1411 }
1412
1413 /*
1414  * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1415  * values Result is stored in r (r can equal one of the inputs).
1416  */
1417 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1418                                const BIGNUM *scalar, size_t num,
1419                                const EC_POINT *points[],
1420                                const BIGNUM *scalars[], BN_CTX *ctx)
1421 {
1422     int ret = 0;
1423     int j;
1424     unsigned i;
1425     int mixed = 0;
1426     BIGNUM *x, *y, *z, *tmp_scalar;
1427     felem_bytearray g_secret;
1428     felem_bytearray *secrets = NULL;
1429     felem (*pre_comp)[17][3] = NULL;
1430     felem *tmp_felems = NULL;
1431     int num_bytes;
1432     int have_pre_comp = 0;
1433     size_t num_points = num;
1434     felem x_in, y_in, z_in, x_out, y_out, z_out;
1435     NISTP224_PRE_COMP *pre = NULL;
1436     const felem(*g_pre_comp)[16][3] = NULL;
1437     EC_POINT *generator = NULL;
1438     const EC_POINT *p = NULL;
1439     const BIGNUM *p_scalar = NULL;
1440
1441     BN_CTX_start(ctx);
1442     x = BN_CTX_get(ctx);
1443     y = BN_CTX_get(ctx);
1444     z = BN_CTX_get(ctx);
1445     tmp_scalar = BN_CTX_get(ctx);
1446     if (tmp_scalar == NULL)
1447         goto err;
1448
1449     if (scalar != NULL) {
1450         pre = group->pre_comp.nistp224;
1451         if (pre)
1452             /* we have precomputation, try to use it */
1453             g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1454         else
1455             /* try to use the standard precomputation */
1456             g_pre_comp = &gmul[0];
1457         generator = EC_POINT_new(group);
1458         if (generator == NULL)
1459             goto err;
1460         /* get the generator from precomputation */
1461         if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1462             !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1463             !felem_to_BN(z, g_pre_comp[0][1][2])) {
1464             ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1465             goto err;
1466         }
1467         if (!ec_GFp_simple_set_Jprojective_coordinates_GFp(group, generator, x,
1468                                                            y, z, ctx))
1469             goto err;
1470         if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1471             /* precomputation matches generator */
1472             have_pre_comp = 1;
1473         else
1474             /*
1475              * we don't have valid precomputation: treat the generator as a
1476              * random point
1477              */
1478             num_points = num_points + 1;
1479     }
1480
1481     if (num_points > 0) {
1482         if (num_points >= 3) {
1483             /*
1484              * unless we precompute multiples for just one or two points,
1485              * converting those into affine form is time well spent
1486              */
1487             mixed = 1;
1488         }
1489         secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1490         pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1491         if (mixed)
1492             tmp_felems =
1493                 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1494         if ((secrets == NULL) || (pre_comp == NULL)
1495             || (mixed && (tmp_felems == NULL))) {
1496             ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1497             goto err;
1498         }
1499
1500         /*
1501          * we treat NULL scalars as 0, and NULL points as points at infinity,
1502          * i.e., they contribute nothing to the linear combination
1503          */
1504         for (i = 0; i < num_points; ++i) {
1505             if (i == num) {
1506                 /* the generator */
1507                 p = EC_GROUP_get0_generator(group);
1508                 p_scalar = scalar;
1509             } else {
1510                 /* the i^th point */
1511                 p = points[i];
1512                 p_scalar = scalars[i];
1513             }
1514             if ((p_scalar != NULL) && (p != NULL)) {
1515                 /* reduce scalar to 0 <= scalar < 2^224 */
1516                 if ((BN_num_bits(p_scalar) > 224)
1517                     || (BN_is_negative(p_scalar))) {
1518                     /*
1519                      * this is an unusual input, and we don't guarantee
1520                      * constant-timeness
1521                      */
1522                     if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1523                         ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1524                         goto err;
1525                     }
1526                     num_bytes = BN_bn2lebinpad(tmp_scalar,
1527                                                secrets[i], sizeof(secrets[i]));
1528                 } else {
1529                     num_bytes = BN_bn2lebinpad(p_scalar,
1530                                                secrets[i], sizeof(secrets[i]));
1531                 }
1532                 if (num_bytes < 0) {
1533                     ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1534                     goto err;
1535                 }
1536                 /* precompute multiples */
1537                 if ((!BN_to_felem(x_out, p->X)) ||
1538                     (!BN_to_felem(y_out, p->Y)) ||
1539                     (!BN_to_felem(z_out, p->Z)))
1540                     goto err;
1541                 felem_assign(pre_comp[i][1][0], x_out);
1542                 felem_assign(pre_comp[i][1][1], y_out);
1543                 felem_assign(pre_comp[i][1][2], z_out);
1544                 for (j = 2; j <= 16; ++j) {
1545                     if (j & 1) {
1546                         point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1547                                   pre_comp[i][j][2], pre_comp[i][1][0],
1548                                   pre_comp[i][1][1], pre_comp[i][1][2], 0,
1549                                   pre_comp[i][j - 1][0],
1550                                   pre_comp[i][j - 1][1],
1551                                   pre_comp[i][j - 1][2]);
1552                     } else {
1553                         point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1554                                      pre_comp[i][j][2], pre_comp[i][j / 2][0],
1555                                      pre_comp[i][j / 2][1],
1556                                      pre_comp[i][j / 2][2]);
1557                     }
1558                 }
1559             }
1560         }
1561         if (mixed)
1562             make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1563     }
1564
1565     /* the scalar for the generator */
1566     if ((scalar != NULL) && (have_pre_comp)) {
1567         memset(g_secret, 0, sizeof(g_secret));
1568         /* reduce scalar to 0 <= scalar < 2^224 */
1569         if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1570             /*
1571              * this is an unusual input, and we don't guarantee
1572              * constant-timeness
1573              */
1574             if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1575                 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1576                 goto err;
1577             }
1578             num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1579         } else {
1580             num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1581         }
1582         /* do the multiplication with generator precomputation */
1583         batch_mul(x_out, y_out, z_out,
1584                   (const felem_bytearray(*))secrets, num_points,
1585                   g_secret,
1586                   mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1587     } else {
1588         /* do the multiplication without generator precomputation */
1589         batch_mul(x_out, y_out, z_out,
1590                   (const felem_bytearray(*))secrets, num_points,
1591                   NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1592     }
1593     /* reduce the output to its unique minimal representation */
1594     felem_contract(x_in, x_out);
1595     felem_contract(y_in, y_out);
1596     felem_contract(z_in, z_out);
1597     if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1598         (!felem_to_BN(z, z_in))) {
1599         ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1600         goto err;
1601     }
1602     ret = ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1603
1604  err:
1605     BN_CTX_end(ctx);
1606     EC_POINT_free(generator);
1607     OPENSSL_free(secrets);
1608     OPENSSL_free(pre_comp);
1609     OPENSSL_free(tmp_felems);
1610     return ret;
1611 }
1612
1613 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1614 {
1615     int ret = 0;
1616     NISTP224_PRE_COMP *pre = NULL;
1617     int i, j;
1618     BIGNUM *x, *y;
1619     EC_POINT *generator = NULL;
1620     felem tmp_felems[32];
1621 #ifndef FIPS_MODULE
1622     BN_CTX *new_ctx = NULL;
1623 #endif
1624
1625     /* throw away old precomputation */
1626     EC_pre_comp_free(group);
1627
1628 #ifndef FIPS_MODULE
1629     if (ctx == NULL)
1630         ctx = new_ctx = BN_CTX_new();
1631 #endif
1632     if (ctx == NULL)
1633         return 0;
1634
1635     BN_CTX_start(ctx);
1636     x = BN_CTX_get(ctx);
1637     y = BN_CTX_get(ctx);
1638     if (y == NULL)
1639         goto err;
1640     /* get the generator */
1641     if (group->generator == NULL)
1642         goto err;
1643     generator = EC_POINT_new(group);
1644     if (generator == NULL)
1645         goto err;
1646     BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1647     BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1648     if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1649         goto err;
1650     if ((pre = nistp224_pre_comp_new()) == NULL)
1651         goto err;
1652     /*
1653      * if the generator is the standard one, use built-in precomputation
1654      */
1655     if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1656         memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1657         goto done;
1658     }
1659     if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1660         (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1661         (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1662         goto err;
1663     /*
1664      * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1665      * 2^140*G, 2^196*G for the second one
1666      */
1667     for (i = 1; i <= 8; i <<= 1) {
1668         point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1669                      pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1670                      pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1671         for (j = 0; j < 27; ++j) {
1672             point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1673                          pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1674                          pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1675         }
1676         if (i == 8)
1677             break;
1678         point_double(pre->g_pre_comp[0][2 * i][0],
1679                      pre->g_pre_comp[0][2 * i][1],
1680                      pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1681                      pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1682         for (j = 0; j < 27; ++j) {
1683             point_double(pre->g_pre_comp[0][2 * i][0],
1684                          pre->g_pre_comp[0][2 * i][1],
1685                          pre->g_pre_comp[0][2 * i][2],
1686                          pre->g_pre_comp[0][2 * i][0],
1687                          pre->g_pre_comp[0][2 * i][1],
1688                          pre->g_pre_comp[0][2 * i][2]);
1689         }
1690     }
1691     for (i = 0; i < 2; i++) {
1692         /* g_pre_comp[i][0] is the point at infinity */
1693         memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1694         /* the remaining multiples */
1695         /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1696         point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1697                   pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1698                   pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1699                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1700                   pre->g_pre_comp[i][2][2]);
1701         /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1702         point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1703                   pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1704                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1705                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1706                   pre->g_pre_comp[i][2][2]);
1707         /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1708         point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1709                   pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1710                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1711                   0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1712                   pre->g_pre_comp[i][4][2]);
1713         /*
1714          * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1715          */
1716         point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1717                   pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1718                   pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1719                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1720                   pre->g_pre_comp[i][2][2]);
1721         for (j = 1; j < 8; ++j) {
1722             /* odd multiples: add G resp. 2^28*G */
1723             point_add(pre->g_pre_comp[i][2 * j + 1][0],
1724                       pre->g_pre_comp[i][2 * j + 1][1],
1725                       pre->g_pre_comp[i][2 * j + 1][2],
1726                       pre->g_pre_comp[i][2 * j][0],
1727                       pre->g_pre_comp[i][2 * j][1],
1728                       pre->g_pre_comp[i][2 * j][2], 0,
1729                       pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1730                       pre->g_pre_comp[i][1][2]);
1731         }
1732     }
1733     make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1734
1735  done:
1736     SETPRECOMP(group, nistp224, pre);
1737     pre = NULL;
1738     ret = 1;
1739  err:
1740     BN_CTX_end(ctx);
1741     EC_POINT_free(generator);
1742 #ifndef FIPS_MODULE
1743     BN_CTX_free(new_ctx);
1744 #endif
1745     EC_nistp224_pre_comp_free(pre);
1746     return ret;
1747 }
1748
1749 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1750 {
1751     return HAVEPRECOMP(group, nistp224);
1752 }