crypto/ec/ecp_nistp224.c

   1 /* crypto/ec/ecp_nistp224.c */
   2 /*
   3  * Written by Emilia Kasper (Google) for the OpenSSL project.
   4  */
   5 /* Copyright 2011 Google Inc.
   6  *
   7  * Licensed under the Apache License, Version 2.0 (the "License");
   8  *
   9  * you may not use this file except in compliance with the License.
  10  * You may obtain a copy of the License at
  11  *
  12  *     http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  *  Unless required by applicable law or agreed to in writing, software
  15  *  distributed under the License is distributed on an "AS IS" BASIS,
  16  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  *  See the License for the specific language governing permissions and
  18  *  limitations under the License.
  19  */
  20
  21 /*
  22  * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
  23  *
  24  * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
  25  * and Adam Langley's public domain 64-bit C implementation of curve25519
  26  */
  27
  28 #include <openssl/opensslconf.h>
  29 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
  30
  31 # ifndef OPENSSL_SYS_VMS
  32 #  include <stdint.h>
  33 # else
  34 #  include <inttypes.h>
  35 # endif
  36
  37 # include <string.h>
  38 # include <openssl/err.h>
  39 # include "ec_lcl.h"
  40
  41 # if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
  42   /* even with gcc, the typedef won't work for 32-bit platforms */
  43 typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
  44                                  * platforms */
  45 # else
  46 #  error "Need GCC 3.1 or later to define type uint128_t"
  47 # endif
  48
  49 typedef uint8_t u8;
  50 typedef uint64_t u64;
  51 typedef int64_t s64;
  52
  53 /******************************************************************************/
  54 /*-
  55  * INTERNAL REPRESENTATION OF FIELD ELEMENTS
  56  *
  57  * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
  58  * using 64-bit coefficients called 'limbs',
  59  * and sometimes (for multiplication results) as
  60  * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
  61  * using 128-bit coefficients called 'widelimbs'.
  62  * A 4-limb representation is an 'felem';
  63  * a 7-widelimb representation is a 'widefelem'.
  64  * Even within felems, bits of adjacent limbs overlap, and we don't always
  65  * reduce the representations: we ensure that inputs to each felem
  66  * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
  67  * and fit into a 128-bit word without overflow. The coefficients are then
  68  * again partially reduced to obtain an felem satisfying a_i < 2^57.
  69  * We only reduce to the unique minimal representation at the end of the
  70  * computation.
  71  */
  72
  73 typedef uint64_t limb;
  74 typedef uint128_t widelimb;
  75
  76 typedef limb felem[4];
  77 typedef widelimb widefelem[7];
  78
  79 /*
  80  * Field element represented as a byte arrary. 28*8 = 224 bits is also the
  81  * group order size for the elliptic curve, and we also use this type for
  82  * scalars for point multiplication.
  83  */
  84 typedef u8 felem_bytearray[28];
  85
  86 static const felem_bytearray nistp224_curve_params[5] = {
  87     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
  88      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
  89      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
  90     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
  91      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
  92      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
  93     {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
  94      0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
  95      0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
  96     {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
  97      0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
  98      0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
  99     {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
 100      0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
 101      0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
 102 };
 103
 104 /*-
 105  * Precomputed multiples of the standard generator
 106  * Points are given in coordinates (X, Y, Z) where Z normally is 1
 107  * (0 for the point at infinity).
 108  * For each field element, slice a_0 is word 0, etc.
 109  *
 110  * The table has 2 * 16 elements, starting with the following:
 111  * index | bits    | point
 112  * ------+---------+------------------------------
 113  *     0 | 0 0 0 0 | 0G
 114  *     1 | 0 0 0 1 | 1G
 115  *     2 | 0 0 1 0 | 2^56G
 116  *     3 | 0 0 1 1 | (2^56 + 1)G
 117  *     4 | 0 1 0 0 | 2^112G
 118  *     5 | 0 1 0 1 | (2^112 + 1)G
 119  *     6 | 0 1 1 0 | (2^112 + 2^56)G
 120  *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
 121  *     8 | 1 0 0 0 | 2^168G
 122  *     9 | 1 0 0 1 | (2^168 + 1)G
 123  *    10 | 1 0 1 0 | (2^168 + 2^56)G
 124  *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
 125  *    12 | 1 1 0 0 | (2^168 + 2^112)G
 126  *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
 127  *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
 128  *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
 129  * followed by a copy of this with each element multiplied by 2^28.
 130  *
 131  * The reason for this is so that we can clock bits into four different
 132  * locations when doing simple scalar multiplies against the base point,
 133  * and then another four locations using the second 16 elements.
 134  */
 135 static const felem gmul[2][16][3] = { {{{0, 0, 0, 0},
 136                                         {0, 0, 0, 0},
 137                                         {0, 0, 0, 0}},
 138                                        {{0x3280d6115c1d21, 0xc1d356c2112234,
 139                                          0x7f321390b94a03, 0xb70e0cbd6bb4bf},
 140                                         {0xd5819985007e34, 0x75a05a07476444,
 141                                          0xfb4c22dfe6cd43, 0xbd376388b5f723},
 142                                         {1, 0, 0, 0}},
 143                                        {{0xfd9675666ebbe9, 0xbca7664d40ce5e,
 144                                          0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
 145                                         {0x29e0b892dc9c43, 0xece8608436e662,
 146                                          0xdc858f185310d0, 0x9812dd4eb8d321},
 147                                         {1, 0, 0, 0}},
 148                                        {{0x6d3e678d5d8eb8, 0x559eed1cb362f1,
 149                                          0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
 150                                         {0xf19f90ed50266d, 0xabf2b4bf65f9df,
 151                                          0x313865468fafec, 0x5cb379ba910a17},
 152                                         {1, 0, 0, 0}},
 153                                        {{0x0641966cab26e3, 0x91fb2991fab0a0,
 154                                          0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
 155                                         {0x7510407766af5d, 0x84d929610d5450,
 156                                          0x81d77aae82f706, 0x6916f6d4338c5b},
 157                                         {1, 0, 0, 0}},
 158                                        {{0xea95ac3b1f15c6, 0x086000905e82d4,
 159                                          0xdd323ae4d1c8b1, 0x932b56be7685a3},
 160                                         {0x9ef93dea25dbbf, 0x41665960f390f0,
 161                                          0xfdec76dbe2a8a7, 0x523e80f019062a},
 162                                         {1, 0, 0, 0}},
 163                                        {{0x822fdd26732c73, 0xa01c83531b5d0f,
 164                                          0x363f37347c1ba4, 0xc391b45c84725c},
 165                                         {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec,
 166                                          0xc393da7e222a7f, 0x1efb7890ede244},
 167                                         {1, 0, 0, 0}},
 168                                        {{0x4c9e90ca217da1, 0xd11beca79159bb,
 169                                          0xff8d33c2c98b7c, 0x2610b39409f849},
 170                                         {0x44d1352ac64da0, 0xcdbb7b2c46b4fb,
 171                                          0x966c079b753c89, 0xfe67e4e820b112},
 172                                         {1, 0, 0, 0}},
 173                                        {{0xe28cae2df5312d, 0xc71b61d16f5c6e,
 174                                          0x79b7619a3e7c4c, 0x05c73240899b47},
 175                                         {0x9f7f6382c73e3a, 0x18615165c56bda,
 176                                          0x641fab2116fd56, 0x72855882b08394},
 177                                         {1, 0, 0, 0}},
 178                                        {{0x0469182f161c09, 0x74a98ca8d00fb5,
 179                                          0xb89da93489a3e0, 0x41c98768fb0c1d},
 180                                         {0xe5ea05fb32da81, 0x3dce9ffbca6855,
 181                                          0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
 182                                         {1, 0, 0, 0}},
 183                                        {{0xdab22b2333e87f, 0x4430137a5dd2f6,
 184                                          0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
 185                                         {0x764a7df0c8fda5, 0x185ba5c3fa2044,
 186                                          0x9281d688bcbe50, 0xc40331df893881},
 187                                         {1, 0, 0, 0}},
 188                                        {{0xb89530796f0f60, 0xade92bd26909a3,
 189                                          0x1a0c83fb4884da, 0x1765bf22a5a984},
 190                                         {0x772a9ee75db09e, 0x23bc6c67cec16f,
 191                                          0x4c1edba8b14e2f, 0xe2a215d9611369},
 192                                         {1, 0, 0, 0}},
 193                                        {{0x571e509fb5efb3, 0xade88696410552,
 194                                          0xc8ae85fada74fe, 0x6c7e4be83bbde3},
 195                                         {0xff9f51160f4652, 0xb47ce2495a6539,
 196                                          0xa2946c53b582f4, 0x286d2db3ee9a60},
 197                                         {1, 0, 0, 0}},
 198                                        {{0x40bbd5081a44af, 0x0995183b13926c,
 199                                          0xbcefba6f47f6d0, 0x215619e9cc0057},
 200                                         {0x8bc94d3b0df45e, 0xf11c54a3694f6f,
 201                                          0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
 202                                         {1, 0, 0, 0}},
 203                                        {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8,
 204                                          0x1c29819435d2c6, 0xc813132f4c07e9},
 205                                         {0x2891425503b11f, 0x08781030579fea,
 206                                          0xf5426ba5cc9674, 0x1e28ebf18562bc},
 207                                         {1, 0, 0, 0}},
 208                                        {{0x9f31997cc864eb, 0x06cd91d28b5e4c,
 209                                          0xff17036691a973, 0xf1aef351497c58},
 210                                         {0xdd1f2d600564ff, 0xdead073b1402db,
 211                                          0x74a684435bd693, 0xeea7471f962558},
 212                                         {1, 0, 0, 0}}},
 213 {{{0, 0, 0, 0},
 214   {0, 0, 0, 0},
 215   {0, 0, 0, 0}},
 216  {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
 217   {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
 218   {1, 0, 0, 0}},
 219  {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
 220   {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
 221   {1, 0, 0, 0}},
 222  {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
 223   {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
 224   {1, 0, 0, 0}},
 225  {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
 226   {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
 227   {1, 0, 0, 0}},
 228  {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
 229   {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
 230   {1, 0, 0, 0}},
 231  {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
 232   {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
 233   {1, 0, 0, 0}},
 234  {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
 235   {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
 236   {1, 0, 0, 0}},
 237  {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
 238   {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
 239   {1, 0, 0, 0}},
 240  {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
 241   {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
 242   {1, 0, 0, 0}},
 243  {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
 244   {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
 245   {1, 0, 0, 0}},
 246  {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
 247   {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
 248   {1, 0, 0, 0}},
 249  {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
 250   {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
 251   {1, 0, 0, 0}},
 252  {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
 253   {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
 254   {1, 0, 0, 0}},
 255  {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
 256   {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
 257   {1, 0, 0, 0}},
 258  {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
 259   {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
 260   {1, 0, 0, 0}}}
 261 };
 262
 263 /* Precomputation for the group generator. */
 264 typedef struct {
 265     felem g_pre_comp[2][16][3];
 266     int references;
 267 } NISTP224_PRE_COMP;
 268
 269 const EC_METHOD *EC_GFp_nistp224_method(void)
 270 {
 271     static const EC_METHOD ret = {
 272         EC_FLAGS_DEFAULT_OCT,
 273         NID_X9_62_prime_field,
 274         ec_GFp_nistp224_group_init,
 275         ec_GFp_simple_group_finish,
 276         ec_GFp_simple_group_clear_finish,
 277         ec_GFp_nist_group_copy,
 278         ec_GFp_nistp224_group_set_curve,
 279         ec_GFp_simple_group_get_curve,
 280         ec_GFp_simple_group_get_degree,
 281         ec_GFp_simple_group_check_discriminant,
 282         ec_GFp_simple_point_init,
 283         ec_GFp_simple_point_finish,
 284         ec_GFp_simple_point_clear_finish,
 285         ec_GFp_simple_point_copy,
 286         ec_GFp_simple_point_set_to_infinity,
 287         ec_GFp_simple_set_Jprojective_coordinates_GFp,
 288         ec_GFp_simple_get_Jprojective_coordinates_GFp,
 289         ec_GFp_simple_point_set_affine_coordinates,
 290         ec_GFp_nistp224_point_get_affine_coordinates,
 291         0 /* point_set_compressed_coordinates */ ,
 292         0 /* point2oct */ ,
 293         0 /* oct2point */ ,
 294         ec_GFp_simple_add,
 295         ec_GFp_simple_dbl,
 296         ec_GFp_simple_invert,
 297         ec_GFp_simple_is_at_infinity,
 298         ec_GFp_simple_is_on_curve,
 299         ec_GFp_simple_cmp,
 300         ec_GFp_simple_make_affine,
 301         ec_GFp_simple_points_make_affine,
 302         ec_GFp_nistp224_points_mul,
 303         ec_GFp_nistp224_precompute_mult,
 304         ec_GFp_nistp224_have_precompute_mult,
 305         ec_GFp_nist_field_mul,
 306         ec_GFp_nist_field_sqr,
 307         0 /* field_div */ ,
 308         0 /* field_encode */ ,
 309         0 /* field_decode */ ,
 310         0                       /* field_set_to_one */
 311     };
 312
 313     return &ret;
 314 }
 315
 316 /*
 317  * Helper functions to convert field elements to/from internal representation
 318  */
 319 static void bin28_to_felem(felem out, const u8 in[28])
 320 {
 321     out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
 322     out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
 323     out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
 324     out[3] = (*((const uint64_t *)(in + 21))) & 0x00ffffffffffffff;
 325 }
 326
 327 static void felem_to_bin28(u8 out[28], const felem in)
 328 {
 329     unsigned i;
 330     for (i = 0; i < 7; ++i) {
 331         out[i] = in[0] >> (8 * i);
 332         out[i + 7] = in[1] >> (8 * i);
 333         out[i + 14] = in[2] >> (8 * i);
 334         out[i + 21] = in[3] >> (8 * i);
 335     }
 336 }
 337
 338 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
 339 static void flip_endian(u8 *out, const u8 *in, unsigned len)
 340 {
 341     unsigned i;
 342     for (i = 0; i < len; ++i)
 343         out[i] = in[len - 1 - i];
 344 }
 345
 346 /* From OpenSSL BIGNUM to internal representation */
 347 static int BN_to_felem(felem out, const BIGNUM *bn)
 348 {
 349     felem_bytearray b_in;
 350     felem_bytearray b_out;
 351     unsigned num_bytes;
 352
 353     /* BN_bn2bin eats leading zeroes */
 354     memset(b_out, 0, sizeof b_out);
 355     num_bytes = BN_num_bytes(bn);
 356     if (num_bytes > sizeof b_out) {
 357         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
 358         return 0;
 359     }
 360     if (BN_is_negative(bn)) {
 361         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
 362         return 0;
 363     }
 364     num_bytes = BN_bn2bin(bn, b_in);
 365     flip_endian(b_out, b_in, num_bytes);
 366     bin28_to_felem(out, b_out);
 367     return 1;
 368 }
 369
 370 /* From internal representation to OpenSSL BIGNUM */
 371 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
 372 {
 373     felem_bytearray b_in, b_out;
 374     felem_to_bin28(b_in, in);
 375     flip_endian(b_out, b_in, sizeof b_out);
 376     return BN_bin2bn(b_out, sizeof b_out, out);
 377 }
 378
 379 /******************************************************************************/
 380 /*-
 381  *                              FIELD OPERATIONS
 382  *
 383  * Field operations, using the internal representation of field elements.
 384  * NB! These operations are specific to our point multiplication and cannot be
 385  * expected to be correct in general - e.g., multiplication with a large scalar
 386  * will cause an overflow.
 387  *
 388  */
 389
 390 static void felem_one(felem out)
 391 {
 392     out[0] = 1;
 393     out[1] = 0;
 394     out[2] = 0;
 395     out[3] = 0;
 396 }
 397
 398 static void felem_assign(felem out, const felem in)
 399 {
 400     out[0] = in[0];
 401     out[1] = in[1];
 402     out[2] = in[2];
 403     out[3] = in[3];
 404 }
 405
 406 /* Sum two field elements: out += in */
 407 static void felem_sum(felem out, const felem in)
 408 {
 409     out[0] += in[0];
 410     out[1] += in[1];
 411     out[2] += in[2];
 412     out[3] += in[3];
 413 }
 414
 415 /* Get negative value: out = -in */
 416 /* Assumes in[i] < 2^57 */
 417 static void felem_neg(felem out, const felem in)
 418 {
 419     static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
 420     static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
 421     static const limb two58m42m2 = (((limb) 1) << 58) -
 422         (((limb) 1) << 42) - (((limb) 1) << 2);
 423
 424     /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
 425     out[0] = two58p2 - in[0];
 426     out[1] = two58m42m2 - in[1];
 427     out[2] = two58m2 - in[2];
 428     out[3] = two58m2 - in[3];
 429 }
 430
 431 /* Subtract field elements: out -= in */
 432 /* Assumes in[i] < 2^57 */
 433 static void felem_diff(felem out, const felem in)
 434 {
 435     static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
 436     static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
 437     static const limb two58m42m2 = (((limb) 1) << 58) -
 438         (((limb) 1) << 42) - (((limb) 1) << 2);
 439
 440     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 441     out[0] += two58p2;
 442     out[1] += two58m42m2;
 443     out[2] += two58m2;
 444     out[3] += two58m2;
 445
 446     out[0] -= in[0];
 447     out[1] -= in[1];
 448     out[2] -= in[2];
 449     out[3] -= in[3];
 450 }
 451
 452 /* Subtract in unreduced 128-bit mode: out -= in */
 453 /* Assumes in[i] < 2^119 */
 454 static void widefelem_diff(widefelem out, const widefelem in)
 455 {
 456     static const widelimb two120 = ((widelimb) 1) << 120;
 457     static const widelimb two120m64 = (((widelimb) 1) << 120) -
 458         (((widelimb) 1) << 64);
 459     static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
 460         (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
 461
 462     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 463     out[0] += two120;
 464     out[1] += two120m64;
 465     out[2] += two120m64;
 466     out[3] += two120;
 467     out[4] += two120m104m64;
 468     out[5] += two120m64;
 469     out[6] += two120m64;
 470
 471     out[0] -= in[0];
 472     out[1] -= in[1];
 473     out[2] -= in[2];
 474     out[3] -= in[3];
 475     out[4] -= in[4];
 476     out[5] -= in[5];
 477     out[6] -= in[6];
 478 }
 479
 480 /* Subtract in mixed mode: out128 -= in64 */
 481 /* in[i] < 2^63 */
 482 static void felem_diff_128_64(widefelem out, const felem in)
 483 {
 484     static const widelimb two64p8 = (((widelimb) 1) << 64) +
 485         (((widelimb) 1) << 8);
 486     static const widelimb two64m8 = (((widelimb) 1) << 64) -
 487         (((widelimb) 1) << 8);
 488     static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
 489         (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
 490
 491     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 492     out[0] += two64p8;
 493     out[1] += two64m48m8;
 494     out[2] += two64m8;
 495     out[3] += two64m8;
 496
 497     out[0] -= in[0];
 498     out[1] -= in[1];
 499     out[2] -= in[2];
 500     out[3] -= in[3];
 501 }
 502
 503 /*
 504  * Multiply a field element by a scalar: out = out * scalar The scalars we
 505  * actually use are small, so results fit without overflow
 506  */
 507 static void felem_scalar(felem out, const limb scalar)
 508 {
 509     out[0] *= scalar;
 510     out[1] *= scalar;
 511     out[2] *= scalar;
 512     out[3] *= scalar;
 513 }
 514
 515 /*
 516  * Multiply an unreduced field element by a scalar: out = out * scalar The
 517  * scalars we actually use are small, so results fit without overflow
 518  */
 519 static void widefelem_scalar(widefelem out, const widelimb scalar)
 520 {
 521     out[0] *= scalar;
 522     out[1] *= scalar;
 523     out[2] *= scalar;
 524     out[3] *= scalar;
 525     out[4] *= scalar;
 526     out[5] *= scalar;
 527     out[6] *= scalar;
 528 }
 529
 530 /* Square a field element: out = in^2 */
 531 static void felem_square(widefelem out, const felem in)
 532 {
 533     limb tmp0, tmp1, tmp2;
 534     tmp0 = 2 * in[0];
 535     tmp1 = 2 * in[1];
 536     tmp2 = 2 * in[2];
 537     out[0] = ((widelimb) in[0]) * in[0];
 538     out[1] = ((widelimb) in[0]) * tmp1;
 539     out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
 540     out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
 541     out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
 542     out[5] = ((widelimb) in[3]) * tmp2;
 543     out[6] = ((widelimb) in[3]) * in[3];
 544 }
 545
 546 /* Multiply two field elements: out = in1 * in2 */
 547 static void felem_mul(widefelem out, const felem in1, const felem in2)
 548 {
 549     out[0] = ((widelimb) in1[0]) * in2[0];
 550     out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
 551     out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
 552         ((widelimb) in1[2]) * in2[0];
 553     out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
 554         ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
 555     out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
 556         ((widelimb) in1[3]) * in2[1];
 557     out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
 558     out[6] = ((widelimb) in1[3]) * in2[3];
 559 }
 560
 561 /*-
 562  * Reduce seven 128-bit coefficients to four 64-bit coefficients.
 563  * Requires in[i] < 2^126,
 564  * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
 565 static void felem_reduce(felem out, const widefelem in)
 566 {
 567     static const widelimb two127p15 = (((widelimb) 1) << 127) +
 568         (((widelimb) 1) << 15);
 569     static const widelimb two127m71 = (((widelimb) 1) << 127) -
 570         (((widelimb) 1) << 71);
 571     static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
 572         (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
 573     widelimb output[5];
 574
 575     /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
 576     output[0] = in[0] + two127p15;
 577     output[1] = in[1] + two127m71m55;
 578     output[2] = in[2] + two127m71;
 579     output[3] = in[3];
 580     output[4] = in[4];
 581
 582     /* Eliminate in[4], in[5], in[6] */
 583     output[4] += in[6] >> 16;
 584     output[3] += (in[6] & 0xffff) << 40;
 585     output[2] -= in[6];
 586
 587     output[3] += in[5] >> 16;
 588     output[2] += (in[5] & 0xffff) << 40;
 589     output[1] -= in[5];
 590
 591     output[2] += output[4] >> 16;
 592     output[1] += (output[4] & 0xffff) << 40;
 593     output[0] -= output[4];
 594
 595     /* Carry 2 -> 3 -> 4 */
 596     output[3] += output[2] >> 56;
 597     output[2] &= 0x00ffffffffffffff;
 598
 599     output[4] = output[3] >> 56;
 600     output[3] &= 0x00ffffffffffffff;
 601
 602     /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
 603
 604     /* Eliminate output[4] */
 605     output[2] += output[4] >> 16;
 606     /* output[2] < 2^56 + 2^56 = 2^57 */
 607     output[1] += (output[4] & 0xffff) << 40;
 608     output[0] -= output[4];
 609
 610     /* Carry 0 -> 1 -> 2 -> 3 */
 611     output[1] += output[0] >> 56;
 612     out[0] = output[0] & 0x00ffffffffffffff;
 613
 614     output[2] += output[1] >> 56;
 615     /* output[2] < 2^57 + 2^72 */
 616     out[1] = output[1] & 0x00ffffffffffffff;
 617     output[3] += output[2] >> 56;
 618     /* output[3] <= 2^56 + 2^16 */
 619     out[2] = output[2] & 0x00ffffffffffffff;
 620
 621         /*-
 622          * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
 623          * out[3] <= 2^56 + 2^16 (due to final carry),
 624          * so out < 2*p
 625          */
 626     out[3] = output[3];
 627 }
 628
 629 static void felem_square_reduce(felem out, const felem in)
 630 {
 631     widefelem tmp;
 632     felem_square(tmp, in);
 633     felem_reduce(out, tmp);
 634 }
 635
 636 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
 637 {
 638     widefelem tmp;
 639     felem_mul(tmp, in1, in2);
 640     felem_reduce(out, tmp);
 641 }
 642
 643 /*
 644  * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
 645  * call felem_reduce first)
 646  */
 647 static void felem_contract(felem out, const felem in)
 648 {
 649     static const int64_t two56 = ((limb) 1) << 56;
 650     /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
 651     /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
 652     int64_t tmp[4], a;
 653     tmp[0] = in[0];
 654     tmp[1] = in[1];
 655     tmp[2] = in[2];
 656     tmp[3] = in[3];
 657     /* Case 1: a = 1 iff in >= 2^224 */
 658     a = (in[3] >> 56);
 659     tmp[0] -= a;
 660     tmp[1] += a << 40;
 661     tmp[3] &= 0x00ffffffffffffff;
 662     /*
 663      * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
 664      * and the lower part is non-zero
 665      */
 666     a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
 667         (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
 668     a &= 0x00ffffffffffffff;
 669     /* turn a into an all-one mask (if a = 0) or an all-zero mask */
 670     a = (a - 1) >> 63;
 671     /* subtract 2^224 - 2^96 + 1 if a is all-one */
 672     tmp[3] &= a ^ 0xffffffffffffffff;
 673     tmp[2] &= a ^ 0xffffffffffffffff;
 674     tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
 675     tmp[0] -= 1 & a;
 676
 677     /*
 678      * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
 679      * non-zero, so we only need one step
 680      */
 681     a = tmp[0] >> 63;
 682     tmp[0] += two56 & a;
 683     tmp[1] -= 1 & a;
 684
 685     /* carry 1 -> 2 -> 3 */
 686     tmp[2] += tmp[1] >> 56;
 687     tmp[1] &= 0x00ffffffffffffff;
 688
 689     tmp[3] += tmp[2] >> 56;
 690     tmp[2] &= 0x00ffffffffffffff;
 691
 692     /* Now 0 <= out < p */
 693     out[0] = tmp[0];
 694     out[1] = tmp[1];
 695     out[2] = tmp[2];
 696     out[3] = tmp[3];
 697 }
 698
 699 /*
 700  * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
 701  * elements are reduced to in < 2^225, so we only need to check three cases:
 702  * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
 703  */
 704 static limb felem_is_zero(const felem in)
 705 {
 706     limb zero, two224m96p1, two225m97p2;
 707
 708     zero = in[0] | in[1] | in[2] | in[3];
 709     zero = (((int64_t) (zero) - 1) >> 63) & 1;
 710     two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
 711         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
 712     two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
 713     two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
 714         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
 715     two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
 716     return (zero | two224m96p1 | two225m97p2);
 717 }
 718
 719 static limb felem_is_zero_int(const felem in)
 720 {
 721     return (int)(felem_is_zero(in) & ((limb) 1));
 722 }
 723
 724 /* Invert a field element */
 725 /* Computation chain copied from djb's code */
 726 static void felem_inv(felem out, const felem in)
 727 {
 728     felem ftmp, ftmp2, ftmp3, ftmp4;
 729     widefelem tmp;
 730     unsigned i;
 731
 732     felem_square(tmp, in);
 733     felem_reduce(ftmp, tmp);    /* 2 */
 734     felem_mul(tmp, in, ftmp);
 735     felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
 736     felem_square(tmp, ftmp);
 737     felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
 738     felem_mul(tmp, in, ftmp);
 739     felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
 740     felem_square(tmp, ftmp);
 741     felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
 742     felem_square(tmp, ftmp2);
 743     felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
 744     felem_square(tmp, ftmp2);
 745     felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
 746     felem_mul(tmp, ftmp2, ftmp);
 747     felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
 748     felem_square(tmp, ftmp);
 749     felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
 750     for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
 751         felem_square(tmp, ftmp2);
 752         felem_reduce(ftmp2, tmp);
 753     }
 754     felem_mul(tmp, ftmp2, ftmp);
 755     felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
 756     felem_square(tmp, ftmp2);
 757     felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
 758     for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
 759         felem_square(tmp, ftmp3);
 760         felem_reduce(ftmp3, tmp);
 761     }
 762     felem_mul(tmp, ftmp3, ftmp2);
 763     felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
 764     felem_square(tmp, ftmp2);
 765     felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
 766     for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
 767         felem_square(tmp, ftmp3);
 768         felem_reduce(ftmp3, tmp);
 769     }
 770     felem_mul(tmp, ftmp3, ftmp2);
 771     felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
 772     felem_square(tmp, ftmp3);
 773     felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
 774     for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
 775         felem_square(tmp, ftmp4);
 776         felem_reduce(ftmp4, tmp);
 777     }
 778     felem_mul(tmp, ftmp3, ftmp4);
 779     felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
 780     felem_square(tmp, ftmp3);
 781     felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
 782     for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
 783         felem_square(tmp, ftmp4);
 784         felem_reduce(ftmp4, tmp);
 785     }
 786     felem_mul(tmp, ftmp2, ftmp4);
 787     felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
 788     for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
 789         felem_square(tmp, ftmp2);
 790         felem_reduce(ftmp2, tmp);
 791     }
 792     felem_mul(tmp, ftmp2, ftmp);
 793     felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
 794     felem_square(tmp, ftmp);
 795     felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
 796     felem_mul(tmp, ftmp, in);
 797     felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
 798     for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
 799         felem_square(tmp, ftmp);
 800         felem_reduce(ftmp, tmp);
 801     }
 802     felem_mul(tmp, ftmp, ftmp3);
 803     felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
 804 }
 805
 806 /*
 807  * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
 808  * out to itself.
 809  */
 810 static void copy_conditional(felem out, const felem in, limb icopy)
 811 {
 812     unsigned i;
 813     /*
 814      * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
 815      */
 816     const limb copy = -icopy;
 817     for (i = 0; i < 4; ++i) {
 818         const limb tmp = copy & (in[i] ^ out[i]);
 819         out[i] ^= tmp;
 820     }
 821 }
 822
 823 /******************************************************************************/
 824 /*-
 825  *                       ELLIPTIC CURVE POINT OPERATIONS
 826  *
 827  * Points are represented in Jacobian projective coordinates:
 828  * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
 829  * or to the point at infinity if Z == 0.
 830  *
 831  */
 832
 833 /*-
 834  * Double an elliptic curve point:
 835  * (X', Y', Z') = 2 * (X, Y, Z), where
 836  * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
 837  * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
 838  * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
 839  * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
 840  * while x_out == y_in is not (maybe this works, but it's not tested).
 841  */
 842 static void
 843 point_double(felem x_out, felem y_out, felem z_out,
 844              const felem x_in, const felem y_in, const felem z_in)
 845 {
 846     widefelem tmp, tmp2;
 847     felem delta, gamma, beta, alpha, ftmp, ftmp2;
 848
 849     felem_assign(ftmp, x_in);
 850     felem_assign(ftmp2, x_in);
 851
 852     /* delta = z^2 */
 853     felem_square(tmp, z_in);
 854     felem_reduce(delta, tmp);
 855
 856     /* gamma = y^2 */
 857     felem_square(tmp, y_in);
 858     felem_reduce(gamma, tmp);
 859
 860     /* beta = x*gamma */
 861     felem_mul(tmp, x_in, gamma);
 862     felem_reduce(beta, tmp);
 863
 864     /* alpha = 3*(x-delta)*(x+delta) */
 865     felem_diff(ftmp, delta);
 866     /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
 867     felem_sum(ftmp2, delta);
 868     /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
 869     felem_scalar(ftmp2, 3);
 870     /* ftmp2[i] < 3 * 2^58 < 2^60 */
 871     felem_mul(tmp, ftmp, ftmp2);
 872     /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
 873     felem_reduce(alpha, tmp);
 874
 875     /* x' = alpha^2 - 8*beta */
 876     felem_square(tmp, alpha);
 877     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 878     felem_assign(ftmp, beta);
 879     felem_scalar(ftmp, 8);
 880     /* ftmp[i] < 8 * 2^57 = 2^60 */
 881     felem_diff_128_64(tmp, ftmp);
 882     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 883     felem_reduce(x_out, tmp);
 884
 885     /* z' = (y + z)^2 - gamma - delta */
 886     felem_sum(delta, gamma);
 887     /* delta[i] < 2^57 + 2^57 = 2^58 */
 888     felem_assign(ftmp, y_in);
 889     felem_sum(ftmp, z_in);
 890     /* ftmp[i] < 2^57 + 2^57 = 2^58 */
 891     felem_square(tmp, ftmp);
 892     /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
 893     felem_diff_128_64(tmp, delta);
 894     /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
 895     felem_reduce(z_out, tmp);
 896
 897     /* y' = alpha*(4*beta - x') - 8*gamma^2 */
 898     felem_scalar(beta, 4);
 899     /* beta[i] < 4 * 2^57 = 2^59 */
 900     felem_diff(beta, x_out);
 901     /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
 902     felem_mul(tmp, alpha, beta);
 903     /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
 904     felem_square(tmp2, gamma);
 905     /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
 906     widefelem_scalar(tmp2, 8);
 907     /* tmp2[i] < 8 * 2^116 = 2^119 */
 908     widefelem_diff(tmp, tmp2);
 909     /* tmp[i] < 2^119 + 2^120 < 2^121 */
 910     felem_reduce(y_out, tmp);
 911 }
 912
 913 /*-
 914  * Add two elliptic curve points:
 915  * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
 916  * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
 917  * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
 918  * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
 919  *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
 920  * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
 921  *
 922  * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
 923  */
 924
 925 /*
 926  * This function is not entirely constant-time: it includes a branch for
 927  * checking whether the two input points are equal, (while not equal to the
 928  * point at infinity). This case never happens during single point
 929  * multiplication, so there is no timing leak for ECDH or ECDSA signing.
 930  */
 931 static void point_add(felem x3, felem y3, felem z3,
 932                       const felem x1, const felem y1, const felem z1,
 933                       const int mixed, const felem x2, const felem y2,
 934                       const felem z2)
 935 {
 936     felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
 937     widefelem tmp, tmp2;
 938     limb z1_is_zero, z2_is_zero, x_equal, y_equal;
 939
 940     if (!mixed) {
 941         /* ftmp2 = z2^2 */
 942         felem_square(tmp, z2);
 943         felem_reduce(ftmp2, tmp);
 944
 945         /* ftmp4 = z2^3 */
 946         felem_mul(tmp, ftmp2, z2);
 947         felem_reduce(ftmp4, tmp);
 948
 949         /* ftmp4 = z2^3*y1 */
 950         felem_mul(tmp2, ftmp4, y1);
 951         felem_reduce(ftmp4, tmp2);
 952
 953         /* ftmp2 = z2^2*x1 */
 954         felem_mul(tmp2, ftmp2, x1);
 955         felem_reduce(ftmp2, tmp2);
 956     } else {
 957         /*
 958          * We'll assume z2 = 1 (special case z2 = 0 is handled later)
 959          */
 960
 961         /* ftmp4 = z2^3*y1 */
 962         felem_assign(ftmp4, y1);
 963
 964         /* ftmp2 = z2^2*x1 */
 965         felem_assign(ftmp2, x1);
 966     }
 967
 968     /* ftmp = z1^2 */
 969     felem_square(tmp, z1);
 970     felem_reduce(ftmp, tmp);
 971
 972     /* ftmp3 = z1^3 */
 973     felem_mul(tmp, ftmp, z1);
 974     felem_reduce(ftmp3, tmp);
 975
 976     /* tmp = z1^3*y2 */
 977     felem_mul(tmp, ftmp3, y2);
 978     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 979
 980     /* ftmp3 = z1^3*y2 - z2^3*y1 */
 981     felem_diff_128_64(tmp, ftmp4);
 982     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 983     felem_reduce(ftmp3, tmp);
 984
 985     /* tmp = z1^2*x2 */
 986     felem_mul(tmp, ftmp, x2);
 987     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 988
 989     /* ftmp = z1^2*x2 - z2^2*x1 */
 990     felem_diff_128_64(tmp, ftmp2);
 991     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 992     felem_reduce(ftmp, tmp);
 993
 994     /*
 995      * the formulae are incorrect if the points are equal so we check for
 996      * this and do doubling if this happens
 997      */
 998     x_equal = felem_is_zero(ftmp);
 999     y_equal = felem_is_zero(ftmp3);
1000     z1_is_zero = felem_is_zero(z1);
1001     z2_is_zero = felem_is_zero(z2);
1002     /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
1003     if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
1004         point_double(x3, y3, z3, x1, y1, z1);
1005         return;
1006     }
1007
1008     /* ftmp5 = z1*z2 */
1009     if (!mixed) {
1010         felem_mul(tmp, z1, z2);
1011         felem_reduce(ftmp5, tmp);
1012     } else {
1013         /* special case z2 = 0 is handled later */
1014         felem_assign(ftmp5, z1);
1015     }
1016
1017     /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1018     felem_mul(tmp, ftmp, ftmp5);
1019     felem_reduce(z_out, tmp);
1020
1021     /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1022     felem_assign(ftmp5, ftmp);
1023     felem_square(tmp, ftmp);
1024     felem_reduce(ftmp, tmp);
1025
1026     /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1027     felem_mul(tmp, ftmp, ftmp5);
1028     felem_reduce(ftmp5, tmp);
1029
1030     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1031     felem_mul(tmp, ftmp2, ftmp);
1032     felem_reduce(ftmp2, tmp);
1033
1034     /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1035     felem_mul(tmp, ftmp4, ftmp5);
1036     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1037
1038     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1039     felem_square(tmp2, ftmp3);
1040     /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1041
1042     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1043     felem_diff_128_64(tmp2, ftmp5);
1044     /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1045
1046     /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1047     felem_assign(ftmp5, ftmp2);
1048     felem_scalar(ftmp5, 2);
1049     /* ftmp5[i] < 2 * 2^57 = 2^58 */
1050
1051         /*-
1052          * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1053          *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1054          */
1055     felem_diff_128_64(tmp2, ftmp5);
1056     /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1057     felem_reduce(x_out, tmp2);
1058
1059     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1060     felem_diff(ftmp2, x_out);
1061     /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1062
1063     /*
1064      * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1065      */
1066     felem_mul(tmp2, ftmp3, ftmp2);
1067     /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1068
1069         /*-
1070          * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1071          *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1072          */
1073     widefelem_diff(tmp2, tmp);
1074     /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1075     felem_reduce(y_out, tmp2);
1076
1077     /*
1078      * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1079      * the point at infinity, so we need to check for this separately
1080      */
1081
1082     /*
1083      * if point 1 is at infinity, copy point 2 to output, and vice versa
1084      */
1085     copy_conditional(x_out, x2, z1_is_zero);
1086     copy_conditional(x_out, x1, z2_is_zero);
1087     copy_conditional(y_out, y2, z1_is_zero);
1088     copy_conditional(y_out, y1, z2_is_zero);
1089     copy_conditional(z_out, z2, z1_is_zero);
1090     copy_conditional(z_out, z1, z2_is_zero);
1091     felem_assign(x3, x_out);
1092     felem_assign(y3, y_out);
1093     felem_assign(z3, z_out);
1094 }
1095
1096 /*
1097  * select_point selects the |idx|th point from a precomputation table and
1098  * copies it to out.
1099  * The pre_comp array argument should be size of |size| argument
1100  */
1101 static void select_point(const u64 idx, unsigned int size,
1102                          const felem pre_comp[][3], felem out[3])
1103 {
1104     unsigned i, j;
1105     limb *outlimbs = &out[0][0];
1106     memset(outlimbs, 0, 3 * sizeof(felem));
1107
1108     for (i = 0; i < size; i++) {
1109         const limb *inlimbs = &pre_comp[i][0][0];
1110         u64 mask = i ^ idx;
1111         mask |= mask >> 4;
1112         mask |= mask >> 2;
1113         mask |= mask >> 1;
1114         mask &= 1;
1115         mask--;
1116         for (j = 0; j < 4 * 3; j++)
1117             outlimbs[j] |= inlimbs[j] & mask;
1118     }
1119 }
1120
1121 /* get_bit returns the |i|th bit in |in| */
1122 static char get_bit(const felem_bytearray in, unsigned i)
1123 {
1124     if (i >= 224)
1125         return 0;
1126     return (in[i >> 3] >> (i & 7)) & 1;
1127 }
1128
1129 /*
1130  * Interleaved point multiplication using precomputed point multiples: The
1131  * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1132  * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1133  * generator, using certain (large) precomputed multiples in g_pre_comp.
1134  * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1135  */
1136 static void batch_mul(felem x_out, felem y_out, felem z_out,
1137                       const felem_bytearray scalars[],
1138                       const unsigned num_points, const u8 *g_scalar,
1139                       const int mixed, const felem pre_comp[][17][3],
1140                       const felem g_pre_comp[2][16][3])
1141 {
1142     int i, skip;
1143     unsigned num;
1144     unsigned gen_mul = (g_scalar != NULL);
1145     felem nq[3], tmp[4];
1146     u64 bits;
1147     u8 sign, digit;
1148
1149     /* set nq to the point at infinity */
1150     memset(nq, 0, 3 * sizeof(felem));
1151
1152     /*
1153      * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1154      * of the generator (two in each of the last 28 rounds) and additions of
1155      * other points multiples (every 5th round).
1156      */
1157     skip = 1;                   /* save two point operations in the first
1158                                  * round */
1159     for (i = (num_points ? 220 : 27); i >= 0; --i) {
1160         /* double */
1161         if (!skip)
1162             point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1163
1164         /* add multiples of the generator */
1165         if (gen_mul && (i <= 27)) {
1166             /* first, look 28 bits upwards */
1167             bits = get_bit(g_scalar, i + 196) << 3;
1168             bits |= get_bit(g_scalar, i + 140) << 2;
1169             bits |= get_bit(g_scalar, i + 84) << 1;
1170             bits |= get_bit(g_scalar, i + 28);
1171             /* select the point to add, in constant time */
1172             select_point(bits, 16, g_pre_comp[1], tmp);
1173
1174             if (!skip) {
1175                 /* value 1 below is argument for "mixed" */
1176                 point_add(nq[0], nq[1], nq[2],
1177                           nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1178             } else {
1179                 memcpy(nq, tmp, 3 * sizeof(felem));
1180                 skip = 0;
1181             }
1182
1183             /* second, look at the current position */
1184             bits = get_bit(g_scalar, i + 168) << 3;
1185             bits |= get_bit(g_scalar, i + 112) << 2;
1186             bits |= get_bit(g_scalar, i + 56) << 1;
1187             bits |= get_bit(g_scalar, i);
1188             /* select the point to add, in constant time */
1189             select_point(bits, 16, g_pre_comp[0], tmp);
1190             point_add(nq[0], nq[1], nq[2],
1191                       nq[0], nq[1], nq[2],
1192                       1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1193         }
1194
1195         /* do other additions every 5 doublings */
1196         if (num_points && (i % 5 == 0)) {
1197             /* loop over all scalars */
1198             for (num = 0; num < num_points; ++num) {
1199                 bits = get_bit(scalars[num], i + 4) << 5;
1200                 bits |= get_bit(scalars[num], i + 3) << 4;
1201                 bits |= get_bit(scalars[num], i + 2) << 3;
1202                 bits |= get_bit(scalars[num], i + 1) << 2;
1203                 bits |= get_bit(scalars[num], i) << 1;
1204                 bits |= get_bit(scalars[num], i - 1);
1205                 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1206
1207                 /* select the point to add or subtract */
1208                 select_point(digit, 17, pre_comp[num], tmp);
1209                 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1210                                             * point */
1211                 copy_conditional(tmp[1], tmp[3], sign);
1212
1213                 if (!skip) {
1214                     point_add(nq[0], nq[1], nq[2],
1215                               nq[0], nq[1], nq[2],
1216                               mixed, tmp[0], tmp[1], tmp[2]);
1217                 } else {
1218                     memcpy(nq, tmp, 3 * sizeof(felem));
1219                     skip = 0;
1220                 }
1221             }
1222         }
1223     }
1224     felem_assign(x_out, nq[0]);
1225     felem_assign(y_out, nq[1]);
1226     felem_assign(z_out, nq[2]);
1227 }
1228
1229 /******************************************************************************/
1230 /*
1231  * FUNCTIONS TO MANAGE PRECOMPUTATION
1232  */
1233
1234 static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1235 {
1236     NISTP224_PRE_COMP *ret = NULL;
1237     ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1238     if (!ret) {
1239         ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1240         return ret;
1241     }
1242     memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1243     ret->references = 1;
1244     return ret;
1245 }
1246
1247 static void *nistp224_pre_comp_dup(void *src_)
1248 {
1249     NISTP224_PRE_COMP *src = src_;
1250
1251     /* no need to actually copy, these objects never change! */
1252     CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1253
1254     return src_;
1255 }
1256
1257 static void nistp224_pre_comp_free(void *pre_)
1258 {
1259     int i;
1260     NISTP224_PRE_COMP *pre = pre_;
1261
1262     if (!pre)
1263         return;
1264
1265     i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1266     if (i > 0)
1267         return;
1268
1269     OPENSSL_free(pre);
1270 }
1271
1272 static void nistp224_pre_comp_clear_free(void *pre_)
1273 {
1274     int i;
1275     NISTP224_PRE_COMP *pre = pre_;
1276
1277     if (!pre)
1278         return;
1279
1280     i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1281     if (i > 0)
1282         return;
1283
1284     OPENSSL_cleanse(pre, sizeof *pre);
1285     OPENSSL_free(pre);
1286 }
1287
1288 /******************************************************************************/
1289 /*
1290  * OPENSSL EC_METHOD FUNCTIONS
1291  */
1292
1293 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1294 {
1295     int ret;
1296     ret = ec_GFp_simple_group_init(group);
1297     group->a_is_minus3 = 1;
1298     return ret;
1299 }
1300
1301 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1302                                     const BIGNUM *a, const BIGNUM *b,
1303                                     BN_CTX *ctx)
1304 {
1305     int ret = 0;
1306     BN_CTX *new_ctx = NULL;
1307     BIGNUM *curve_p, *curve_a, *curve_b;
1308
1309     if (ctx == NULL)
1310         if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1311             return 0;
1312     BN_CTX_start(ctx);
1313     if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1314         ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1315         ((curve_b = BN_CTX_get(ctx)) == NULL))
1316         goto err;
1317     BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1318     BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1319     BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1320     if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1321         ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1322               EC_R_WRONG_CURVE_PARAMETERS);
1323         goto err;
1324     }
1325     group->field_mod_func = BN_nist_mod_224;
1326     ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1327  err:
1328     BN_CTX_end(ctx);
1329     if (new_ctx != NULL)
1330         BN_CTX_free(new_ctx);
1331     return ret;
1332 }
1333
1334 /*
1335  * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1336  * (X/Z^2, Y/Z^3)
1337  */
1338 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1339                                                  const EC_POINT *point,
1340                                                  BIGNUM *x, BIGNUM *y,
1341                                                  BN_CTX *ctx)
1342 {
1343     felem z1, z2, x_in, y_in, x_out, y_out;
1344     widefelem tmp;
1345
1346     if (EC_POINT_is_at_infinity(group, point)) {
1347         ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1348               EC_R_POINT_AT_INFINITY);
1349         return 0;
1350     }
1351     if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1352         (!BN_to_felem(z1, &point->Z)))
1353         return 0;
1354     felem_inv(z2, z1);
1355     felem_square(tmp, z2);
1356     felem_reduce(z1, tmp);
1357     felem_mul(tmp, x_in, z1);
1358     felem_reduce(x_in, tmp);
1359     felem_contract(x_out, x_in);
1360     if (x != NULL) {
1361         if (!felem_to_BN(x, x_out)) {
1362             ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1363                   ERR_R_BN_LIB);
1364             return 0;
1365         }
1366     }
1367     felem_mul(tmp, z1, z2);
1368     felem_reduce(z1, tmp);
1369     felem_mul(tmp, y_in, z1);
1370     felem_reduce(y_in, tmp);
1371     felem_contract(y_out, y_in);
1372     if (y != NULL) {
1373         if (!felem_to_BN(y, y_out)) {
1374             ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1375                   ERR_R_BN_LIB);
1376             return 0;
1377         }
1378     }
1379     return 1;
1380 }
1381
1382 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1383                                felem tmp_felems[ /* num+1 */ ])
1384 {
1385     /*
1386      * Runs in constant time, unless an input is the point at infinity (which
1387      * normally shouldn't happen).
1388      */
1389     ec_GFp_nistp_points_make_affine_internal(num,
1390                                              points,
1391                                              sizeof(felem),
1392                                              tmp_felems,
1393                                              (void (*)(void *))felem_one,
1394                                              (int (*)(const void *))
1395                                              felem_is_zero_int,
1396                                              (void (*)(void *, const void *))
1397                                              felem_assign,
1398                                              (void (*)(void *, const void *))
1399                                              felem_square_reduce, (void (*)
1400                                                                    (void *,
1401                                                                     const void
1402                                                                     *,
1403                                                                     const void
1404                                                                     *))
1405                                              felem_mul_reduce,
1406                                              (void (*)(void *, const void *))
1407                                              felem_inv,
1408                                              (void (*)(void *, const void *))
1409                                              felem_contract);
1410 }
1411
1412 /*
1413  * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1414  * values Result is stored in r (r can equal one of the inputs).
1415  */
1416 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1417                                const BIGNUM *scalar, size_t num,
1418                                const EC_POINT *points[],
1419                                const BIGNUM *scalars[], BN_CTX *ctx)
1420 {
1421     int ret = 0;
1422     int j;
1423     unsigned i;
1424     int mixed = 0;
1425     BN_CTX *new_ctx = NULL;
1426     BIGNUM *x, *y, *z, *tmp_scalar;
1427     felem_bytearray g_secret;
1428     felem_bytearray *secrets = NULL;
1429     felem(*pre_comp)[17][3] = NULL;
1430     felem *tmp_felems = NULL;
1431     felem_bytearray tmp;
1432     unsigned num_bytes;
1433     int have_pre_comp = 0;
1434     size_t num_points = num;
1435     felem x_in, y_in, z_in, x_out, y_out, z_out;
1436     NISTP224_PRE_COMP *pre = NULL;
1437     const felem(*g_pre_comp)[16][3] = NULL;
1438     EC_POINT *generator = NULL;
1439     const EC_POINT *p = NULL;
1440     const BIGNUM *p_scalar = NULL;
1441
1442     if (ctx == NULL)
1443         if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1444             return 0;
1445     BN_CTX_start(ctx);
1446     if (((x = BN_CTX_get(ctx)) == NULL) ||
1447         ((y = BN_CTX_get(ctx)) == NULL) ||
1448         ((z = BN_CTX_get(ctx)) == NULL) ||
1449         ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1450         goto err;
1451
1452     if (scalar != NULL) {
1453         pre = EC_EX_DATA_get_data(group->extra_data,
1454                                   nistp224_pre_comp_dup,
1455                                   nistp224_pre_comp_free,
1456                                   nistp224_pre_comp_clear_free);
1457         if (pre)
1458             /* we have precomputation, try to use it */
1459             g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1460         else
1461             /* try to use the standard precomputation */
1462             g_pre_comp = &gmul[0];
1463         generator = EC_POINT_new(group);
1464         if (generator == NULL)
1465             goto err;
1466         /* get the generator from precomputation */
1467         if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1468             !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1469             !felem_to_BN(z, g_pre_comp[0][1][2])) {
1470             ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1471             goto err;
1472         }
1473         if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1474                                                       generator, x, y, z,
1475                                                       ctx))
1476             goto err;
1477         if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1478             /* precomputation matches generator */
1479             have_pre_comp = 1;
1480         else
1481             /*
1482              * we don't have valid precomputation: treat the generator as a
1483              * random point
1484              */
1485             num_points = num_points + 1;
1486     }
1487
1488     if (num_points > 0) {
1489         if (num_points >= 3) {
1490             /*
1491              * unless we precompute multiples for just one or two points,
1492              * converting those into affine form is time well spent
1493              */
1494             mixed = 1;
1495         }
1496         secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1497         pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1498         if (mixed)
1499             tmp_felems =
1500                 OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1501         if ((secrets == NULL) || (pre_comp == NULL)
1502             || (mixed && (tmp_felems == NULL))) {
1503             ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1504             goto err;
1505         }
1506
1507         /*
1508          * we treat NULL scalars as 0, and NULL points as points at infinity,
1509          * i.e., they contribute nothing to the linear combination
1510          */
1511         memset(secrets, 0, num_points * sizeof(felem_bytearray));
1512         memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1513         for (i = 0; i < num_points; ++i) {
1514             if (i == num)
1515                 /* the generator */
1516             {
1517                 p = EC_GROUP_get0_generator(group);
1518                 p_scalar = scalar;
1519             } else
1520                 /* the i^th point */
1521             {
1522                 p = points[i];
1523                 p_scalar = scalars[i];
1524             }
1525             if ((p_scalar != NULL) && (p != NULL)) {
1526                 /* reduce scalar to 0 <= scalar < 2^224 */
1527                 if ((BN_num_bits(p_scalar) > 224)
1528                     || (BN_is_negative(p_scalar))) {
1529                     /*
1530                      * this is an unusual input, and we don't guarantee
1531                      * constant-timeness
1532                      */
1533                     if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
1534                         ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1535                         goto err;
1536                     }
1537                     num_bytes = BN_bn2bin(tmp_scalar, tmp);
1538                 } else
1539                     num_bytes = BN_bn2bin(p_scalar, tmp);
1540                 flip_endian(secrets[i], tmp, num_bytes);
1541                 /* precompute multiples */
1542                 if ((!BN_to_felem(x_out, &p->X)) ||
1543                     (!BN_to_felem(y_out, &p->Y)) ||
1544                     (!BN_to_felem(z_out, &p->Z)))
1545                     goto err;
1546                 felem_assign(pre_comp[i][1][0], x_out);
1547                 felem_assign(pre_comp[i][1][1], y_out);
1548                 felem_assign(pre_comp[i][1][2], z_out);
1549                 for (j = 2; j <= 16; ++j) {
1550                     if (j & 1) {
1551                         point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1552                                   pre_comp[i][j][2], pre_comp[i][1][0],
1553                                   pre_comp[i][1][1], pre_comp[i][1][2], 0,
1554                                   pre_comp[i][j - 1][0],
1555                                   pre_comp[i][j - 1][1],
1556                                   pre_comp[i][j - 1][2]);
1557                     } else {
1558                         point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1559                                      pre_comp[i][j][2], pre_comp[i][j / 2][0],
1560                                      pre_comp[i][j / 2][1],
1561                                      pre_comp[i][j / 2][2]);
1562                     }
1563                 }
1564             }
1565         }
1566         if (mixed)
1567             make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1568     }
1569
1570     /* the scalar for the generator */
1571     if ((scalar != NULL) && (have_pre_comp)) {
1572         memset(g_secret, 0, sizeof g_secret);
1573         /* reduce scalar to 0 <= scalar < 2^224 */
1574         if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1575             /*
1576              * this is an unusual input, and we don't guarantee
1577              * constant-timeness
1578              */
1579             if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
1580                 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1581                 goto err;
1582             }
1583             num_bytes = BN_bn2bin(tmp_scalar, tmp);
1584         } else
1585             num_bytes = BN_bn2bin(scalar, tmp);
1586         flip_endian(g_secret, tmp, num_bytes);
1587         /* do the multiplication with generator precomputation */
1588         batch_mul(x_out, y_out, z_out,
1589                   (const felem_bytearray(*))secrets, num_points,
1590                   g_secret,
1591                   mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1592     } else
1593         /* do the multiplication without generator precomputation */
1594         batch_mul(x_out, y_out, z_out,
1595                   (const felem_bytearray(*))secrets, num_points,
1596                   NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1597     /* reduce the output to its unique minimal representation */
1598     felem_contract(x_in, x_out);
1599     felem_contract(y_in, y_out);
1600     felem_contract(z_in, z_out);
1601     if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1602         (!felem_to_BN(z, z_in))) {
1603         ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1604         goto err;
1605     }
1606     ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1607
1608  err:
1609     BN_CTX_end(ctx);
1610     if (generator != NULL)
1611         EC_POINT_free(generator);
1612     if (new_ctx != NULL)
1613         BN_CTX_free(new_ctx);
1614     if (secrets != NULL)
1615         OPENSSL_free(secrets);
1616     if (pre_comp != NULL)
1617         OPENSSL_free(pre_comp);
1618     if (tmp_felems != NULL)
1619         OPENSSL_free(tmp_felems);
1620     return ret;
1621 }
1622
1623 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1624 {
1625     int ret = 0;
1626     NISTP224_PRE_COMP *pre = NULL;
1627     int i, j;
1628     BN_CTX *new_ctx = NULL;
1629     BIGNUM *x, *y;
1630     EC_POINT *generator = NULL;
1631     felem tmp_felems[32];
1632
1633     /* throw away old precomputation */
1634     EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
1635                          nistp224_pre_comp_free,
1636                          nistp224_pre_comp_clear_free);
1637     if (ctx == NULL)
1638         if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1639             return 0;
1640     BN_CTX_start(ctx);
1641     if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
1642         goto err;
1643     /* get the generator */
1644     if (group->generator == NULL)
1645         goto err;
1646     generator = EC_POINT_new(group);
1647     if (generator == NULL)
1648         goto err;
1649     BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1650     BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1651     if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1652         goto err;
1653     if ((pre = nistp224_pre_comp_new()) == NULL)
1654         goto err;
1655     /*
1656      * if the generator is the standard one, use built-in precomputation
1657      */
1658     if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1659         memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1660         ret = 1;
1661         goto err;
1662     }
1663     if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
1664         (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
1665         (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z)))
1666         goto err;
1667     /*
1668      * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1669      * 2^140*G, 2^196*G for the second one
1670      */
1671     for (i = 1; i <= 8; i <<= 1) {
1672         point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1673                      pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1674                      pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1675         for (j = 0; j < 27; ++j) {
1676             point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1677                          pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1678                          pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1679         }
1680         if (i == 8)
1681             break;
1682         point_double(pre->g_pre_comp[0][2 * i][0],
1683                      pre->g_pre_comp[0][2 * i][1],
1684                      pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1685                      pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1686         for (j = 0; j < 27; ++j) {
1687             point_double(pre->g_pre_comp[0][2 * i][0],
1688                          pre->g_pre_comp[0][2 * i][1],
1689                          pre->g_pre_comp[0][2 * i][2],
1690                          pre->g_pre_comp[0][2 * i][0],
1691                          pre->g_pre_comp[0][2 * i][1],
1692                          pre->g_pre_comp[0][2 * i][2]);
1693         }
1694     }
1695     for (i = 0; i < 2; i++) {
1696         /* g_pre_comp[i][0] is the point at infinity */
1697         memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1698         /* the remaining multiples */
1699         /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1700         point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1701                   pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1702                   pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1703                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1704                   pre->g_pre_comp[i][2][2]);
1705         /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1706         point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1707                   pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1708                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1709                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1710                   pre->g_pre_comp[i][2][2]);
1711         /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1712         point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1713                   pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1714                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1715                   0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1716                   pre->g_pre_comp[i][4][2]);
1717         /*
1718          * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1719          */
1720         point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1721                   pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1722                   pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1723                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1724                   pre->g_pre_comp[i][2][2]);
1725         for (j = 1; j < 8; ++j) {
1726             /* odd multiples: add G resp. 2^28*G */
1727             point_add(pre->g_pre_comp[i][2 * j + 1][0],
1728                       pre->g_pre_comp[i][2 * j + 1][1],
1729                       pre->g_pre_comp[i][2 * j + 1][2],
1730                       pre->g_pre_comp[i][2 * j][0],
1731                       pre->g_pre_comp[i][2 * j][1],
1732                       pre->g_pre_comp[i][2 * j][2], 0,
1733                       pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1734                       pre->g_pre_comp[i][1][2]);
1735         }
1736     }
1737     make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1738
1739     if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
1740                              nistp224_pre_comp_free,
1741                              nistp224_pre_comp_clear_free))
1742         goto err;
1743     ret = 1;
1744     pre = NULL;
1745  err:
1746     BN_CTX_end(ctx);
1747     if (generator != NULL)
1748         EC_POINT_free(generator);
1749     if (new_ctx != NULL)
1750         BN_CTX_free(new_ctx);
1751     if (pre)
1752         nistp224_pre_comp_free(pre);
1753     return ret;
1754 }
1755
1756 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1757 {
1758     if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
1759                             nistp224_pre_comp_free,
1760                             nistp224_pre_comp_clear_free)
1761         != NULL)
1762         return 1;
1763     else
1764         return 0;
1765 }
1766
1767 #else
1768 static void *dummy = &dummy;
1769 #endif