1 /* crypto/ec/ecp_nistp256.c */
3 * Written by Adam Langley (Google) for the OpenSSL project
5 /* Copyright 2011 Google Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * A 64-bit implementation of the NIST P-256 elliptic curve point multiplication
24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26 * work which got its smarts from Daniel J. Bernstein's work on the same.
29 #include <openssl/opensslconf.h>
30 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
32 #ifndef OPENSSL_SYS_VMS
39 #include <openssl/err.h>
42 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
43 /* even with gcc, the typedef won't work for 32-bit platforms */
44 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
45 typedef __int128_t int128_t;
47 #error "Need GCC 3.1 or later to define type uint128_t"
55 /* The underlying field.
57 * P256 operates over GF(2^256-2^224+2^192+2^96-1). We can serialise an element
58 * of this field into 32 bytes. We call this an felem_bytearray. */
60 typedef u8 felem_bytearray[32];
62 /* These are the parameters of P256, taken from FIPS 186-3, page 86. These
63 * values are big-endian. */
64 static const felem_bytearray nistp256_curve_params[5] = {
65 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
66 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
67 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
69 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
70 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
71 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc}, /* b */
73 {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7,
74 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
75 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
76 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
77 {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
78 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
79 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
80 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
81 {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
82 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
83 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
84 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
88 * The representation of field elements.
89 * ------------------------------------
91 * We represent field elements with either four 128-bit values, eight 128-bit
92 * values, or four 64-bit values. The field element represented is:
93 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192 (mod p)
95 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[8]*2^512 (mod p)
97 * 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits
98 * apart, but are 128-bits wide, the most significant bits of each limb overlap
99 * with the least significant bits of the next.
101 * A field element with four limbs is an 'felem'. One with eight limbs is a
104 * A field element with four, 64-bit values is called a 'smallfelem'. Small
105 * values are used as intermediate values before multiplication.
110 typedef uint128_t limb;
111 typedef limb felem[NLIMBS];
112 typedef limb longfelem[NLIMBS * 2];
113 typedef u64 smallfelem[NLIMBS];
115 /* This is the value of the prime as four 64-bit words, little-endian. */
116 static const u64 kPrime[4] = { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul };
117 static const u64 bottom63bits = 0x7ffffffffffffffful;
119 /* bin32_to_felem takes a little-endian byte array and converts it into felem
120 * form. This assumes that the CPU is little-endian. */
121 static void bin32_to_felem(felem out, const u8 in[32])
123 out[0] = *((u64*) &in[0]);
124 out[1] = *((u64*) &in[8]);
125 out[2] = *((u64*) &in[16]);
126 out[3] = *((u64*) &in[24]);
129 /* smallfelem_to_bin32 takes a smallfelem and serialises into a little endian,
130 * 32 byte array. This assumes that the CPU is little-endian. */
131 static void smallfelem_to_bin32(u8 out[32], const smallfelem in)
133 *((u64*) &out[0]) = in[0];
134 *((u64*) &out[8]) = in[1];
135 *((u64*) &out[16]) = in[2];
136 *((u64*) &out[24]) = in[3];
139 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
140 static void flip_endian(u8 *out, const u8 *in, unsigned len)
143 for (i = 0; i < len; ++i)
144 out[i] = in[len-1-i];
147 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
148 static int BN_to_felem(felem out, const BIGNUM *bn)
150 felem_bytearray b_in;
151 felem_bytearray b_out;
154 /* BN_bn2bin eats leading zeroes */
155 memset(b_out, 0, sizeof b_out);
156 num_bytes = BN_num_bytes(bn);
157 if (num_bytes > sizeof b_out)
159 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
162 if (BN_is_negative(bn))
164 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
167 num_bytes = BN_bn2bin(bn, b_in);
168 flip_endian(b_out, b_in, num_bytes);
169 bin32_to_felem(out, b_out);
173 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
174 static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
176 felem_bytearray b_in, b_out;
177 smallfelem_to_bin32(b_in, in);
178 flip_endian(b_out, b_in, sizeof b_out);
179 return BN_bin2bn(b_out, sizeof b_out, out);
184 * ---------------- */
186 static void smallfelem_one(smallfelem out)
194 static void smallfelem_assign(smallfelem out, const smallfelem in)
202 static void felem_assign(felem out, const felem in)
210 /* felem_sum sets out = out + in. */
211 static void felem_sum(felem out, const felem in)
219 /* felem_small_sum sets out = out + in. */
220 static void felem_small_sum(felem out, const smallfelem in)
228 /* felem_scalar sets out = out * scalar */
229 static void felem_scalar(felem out, const u64 scalar)
237 /* longfelem_scalar sets out = out * scalar */
238 static void longfelem_scalar(longfelem out, const u64 scalar)
250 #define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
251 #define two105 (((limb)1) << 105)
252 #define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
254 /* zero105 is 0 mod p */
255 static const felem zero105 = { two105m41m9, two105, two105m41p9, two105m41p9 };
258 * smallfelem_neg sets |out| to |-small|
260 * out[i] < out[i] + 2^105
262 static void smallfelem_neg(felem out, const smallfelem small)
264 /* In order to prevent underflow, we subtract from 0 mod p. */
265 out[0] = zero105[0] - small[0];
266 out[1] = zero105[1] - small[1];
267 out[2] = zero105[2] - small[2];
268 out[3] = zero105[3] - small[3];
272 * felem_diff subtracts |in| from |out|
276 * out[i] < out[i] + 2^105
278 static void felem_diff(felem out, const felem in)
280 /* In order to prevent underflow, we add 0 mod p before subtracting. */
281 out[0] += zero105[0];
282 out[1] += zero105[1];
283 out[2] += zero105[2];
284 out[3] += zero105[3];
292 #define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
293 #define two107 (((limb)1) << 107)
294 #define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
296 /* zero107 is 0 mod p */
297 static const felem zero107 = { two107m43m11, two107, two107m43p11, two107m43p11 };
300 * An alternative felem_diff for larger inputs |in|
301 * felem_diff_zero107 subtracts |in| from |out|
305 * out[i] < out[i] + 2^107
307 static void felem_diff_zero107(felem out, const felem in)
309 /* In order to prevent underflow, we add 0 mod p before subtracting. */
310 out[0] += zero107[0];
311 out[1] += zero107[1];
312 out[2] += zero107[2];
313 out[3] += zero107[3];
322 * longfelem_diff subtracts |in| from |out|
326 * out[i] < out[i] + 2^70 + 2^40
328 static void longfelem_diff(longfelem out, const longfelem in)
330 static const limb two70m8p6 = (((limb)1) << 70) - (((limb)1) << 8) + (((limb)1) << 6);
331 static const limb two70p40 = (((limb)1) << 70) + (((limb)1) << 40);
332 static const limb two70 = (((limb)1) << 70);
333 static const limb two70m40m38p6 = (((limb)1) << 70) - (((limb)1) << 40) - (((limb)1) << 38) + (((limb)1) << 6);
334 static const limb two70m6 = (((limb)1) << 70) - (((limb)1) << 6);
336 /* add 0 mod p to avoid underflow */
340 out[3] += two70m40m38p6;
346 /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
357 #define two64m0 (((limb)1) << 64) - 1
358 #define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
359 #define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
360 #define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
362 /* zero110 is 0 mod p */
363 static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
366 * felem_shrink converts an felem into a smallfelem. The result isn't quite
367 * minimal as the value may be greater than p.
374 static void felem_shrink(smallfelem out, const felem in)
379 static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
382 tmp[3] = zero110[3] + in[3] + ((u64) (in[2] >> 64));
385 tmp[2] = zero110[2] + (u64) in[2];
386 tmp[0] = zero110[0] + in[0];
387 tmp[1] = zero110[1] + in[1];
388 /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
390 /* We perform two partial reductions where we eliminate the
391 * high-word of tmp[3]. We don't update the other words till the end.
393 a = tmp[3] >> 64; /* a < 2^46 */
394 tmp[3] = (u64) tmp[3];
396 tmp[3] += ((limb)a) << 32;
400 a = tmp[3] >> 64; /* a < 2^15 */
401 b += a; /* b < 2^46 + 2^15 < 2^47 */
402 tmp[3] = (u64) tmp[3];
404 tmp[3] += ((limb)a) << 32;
405 /* tmp[3] < 2^64 + 2^47 */
407 /* This adjusts the other two words to complete the two partial
410 tmp[1] -= (((limb)b) << 32);
412 /* In order to make space in tmp[3] for the carry from 2 -> 3, we
413 * conditionally subtract kPrime if tmp[3] is large enough. */
415 /* As tmp[3] < 2^65, high is either 1 or 0 */
420 * all ones if the high word of tmp[3] is 1
421 * all zeros if the high word of tmp[3] if 0 */
426 * all ones if the MSB of low is 1
427 * all zeros if the MSB of low if 0 */
430 /* if low was greater than kPrime3Test then the MSB is zero */
435 * all ones if low was > kPrime3Test
436 * all zeros if low was <= kPrime3Test */
437 mask = (mask & low) | high;
438 tmp[0] -= mask & kPrime[0];
439 tmp[1] -= mask & kPrime[1];
440 /* kPrime[2] is zero, so omitted */
441 tmp[3] -= mask & kPrime[3];
442 /* tmp[3] < 2**64 - 2**32 + 1 */
444 tmp[1] += ((u64) (tmp[0] >> 64)); tmp[0] = (u64) tmp[0];
445 tmp[2] += ((u64) (tmp[1] >> 64)); tmp[1] = (u64) tmp[1];
446 tmp[3] += ((u64) (tmp[2] >> 64)); tmp[2] = (u64) tmp[2];
455 /* smallfelem_expand converts a smallfelem to an felem */
456 static void smallfelem_expand(felem out, const smallfelem in)
465 * smallfelem_square sets |out| = |small|^2
469 * out[i] < 7 * 2^64 < 2^67
471 static void smallfelem_square(longfelem out, const smallfelem small)
476 a = ((uint128_t) small[0]) * small[0];
482 a = ((uint128_t) small[0]) * small[1];
489 a = ((uint128_t) small[0]) * small[2];
496 a = ((uint128_t) small[0]) * small[3];
502 a = ((uint128_t) small[1]) * small[2];
509 a = ((uint128_t) small[1]) * small[1];
515 a = ((uint128_t) small[1]) * small[3];
522 a = ((uint128_t) small[2]) * small[3];
530 a = ((uint128_t) small[2]) * small[2];
536 a = ((uint128_t) small[3]) * small[3];
544 * felem_square sets |out| = |in|^2
548 * out[i] < 7 * 2^64 < 2^67
550 static void felem_square(longfelem out, const felem in)
553 felem_shrink(small, in);
554 smallfelem_square(out, small);
558 * smallfelem_mul sets |out| = |small1| * |small2|
563 * out[i] < 7 * 2^64 < 2^67
565 static void smallfelem_mul(longfelem out, const smallfelem small1, const smallfelem small2)
570 a = ((uint128_t) small1[0]) * small2[0];
577 a = ((uint128_t) small1[0]) * small2[1];
583 a = ((uint128_t) small1[1]) * small2[0];
590 a = ((uint128_t) small1[0]) * small2[2];
596 a = ((uint128_t) small1[1]) * small2[1];
602 a = ((uint128_t) small1[2]) * small2[0];
609 a = ((uint128_t) small1[0]) * small2[3];
615 a = ((uint128_t) small1[1]) * small2[2];
621 a = ((uint128_t) small1[2]) * small2[1];
627 a = ((uint128_t) small1[3]) * small2[0];
634 a = ((uint128_t) small1[1]) * small2[3];
640 a = ((uint128_t) small1[2]) * small2[2];
646 a = ((uint128_t) small1[3]) * small2[1];
653 a = ((uint128_t) small1[2]) * small2[3];
659 a = ((uint128_t) small1[3]) * small2[2];
666 a = ((uint128_t) small1[3]) * small2[3];
674 * felem_mul sets |out| = |in1| * |in2|
679 * out[i] < 7 * 2^64 < 2^67
681 static void felem_mul(longfelem out, const felem in1, const felem in2)
683 smallfelem small1, small2;
684 felem_shrink(small1, in1);
685 felem_shrink(small2, in2);
686 smallfelem_mul(out, small1, small2);
690 * felem_small_mul sets |out| = |small1| * |in2|
695 * out[i] < 7 * 2^64 < 2^67
697 static void felem_small_mul(longfelem out, const smallfelem small1, const felem in2)
700 felem_shrink(small2, in2);
701 smallfelem_mul(out, small1, small2);
704 #define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
705 #define two100 (((limb)1) << 100)
706 #define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
707 /* zero100 is 0 mod p */
708 static const felem zero100 = { two100m36m4, two100, two100m36p4, two100m36p4 };
711 * Internal function for the different flavours of felem_reduce.
712 * felem_reduce_ reduces the higher coefficients in[4]-in[7].
714 * out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7]
715 * out[1] >= in[7] + 2^32*in[4]
716 * out[2] >= in[5] + 2^32*in[5]
717 * out[3] >= in[4] + 2^32*in[5] + 2^32*in[6]
719 * out[0] <= out[0] + in[4] + 2^32*in[5]
720 * out[1] <= out[1] + in[5] + 2^33*in[6]
721 * out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7]
722 * out[3] <= out[3] + 2^32*in[4] + 3*in[7]
724 static void felem_reduce_(felem out, const longfelem in)
727 /* combine common terms from below */
728 c = in[4] + (in[5] << 32);
736 /* the remaining terms */
737 /* 256: [(0,1),(96,-1),(192,-1),(224,1)] */
738 out[1] -= (in[4] << 32);
739 out[3] += (in[4] << 32);
741 /* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */
742 out[2] -= (in[5] << 32);
744 /* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */
746 out[0] -= (in[6] << 32);
747 out[1] += (in[6] << 33);
748 out[2] += (in[6] * 2);
749 out[3] -= (in[6] << 32);
751 /* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */
753 out[0] -= (in[7] << 32);
754 out[2] += (in[7] << 33);
755 out[3] += (in[7] * 3);
759 * felem_reduce converts a longfelem into an felem.
760 * To be called directly after felem_square or felem_mul.
762 * in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64
763 * in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64
767 static void felem_reduce(felem out, const longfelem in)
769 out[0] = zero100[0] + in[0];
770 out[1] = zero100[1] + in[1];
771 out[2] = zero100[2] + in[2];
772 out[3] = zero100[3] + in[3];
774 felem_reduce_(out, in);
777 * out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0
778 * out[1] > 2^100 - 2^64 - 7*2^96 > 0
779 * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0
780 * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0
782 * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101
783 * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101
784 * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101
785 * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101
790 * felem_reduce_zero105 converts a larger longfelem into an felem.
796 static void felem_reduce_zero105(felem out, const longfelem in)
798 out[0] = zero105[0] + in[0];
799 out[1] = zero105[1] + in[1];
800 out[2] = zero105[2] + in[2];
801 out[3] = zero105[3] + in[3];
803 felem_reduce_(out, in);
806 * out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0
807 * out[1] > 2^105 - 2^71 - 2^103 > 0
808 * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0
809 * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0
811 * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
812 * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
813 * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106
814 * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106
818 /* subtract_u64 sets *result = *result - v and *carry to one if the subtraction
820 static void subtract_u64(u64* result, u64* carry, u64 v)
822 uint128_t r = *result;
824 *carry = (r >> 64) & 1;
828 /* felem_contract converts |in| to its unique, minimal representation.
832 static void felem_contract(smallfelem out, const felem in)
835 u64 all_equal_so_far = 0, result = 0, carry;
837 felem_shrink(out, in);
838 /* small is minimal except that the value might be > p */
841 /* We are doing a constant time test if out >= kPrime. We need to
842 * compare each u64, from most-significant to least significant. For
843 * each one, if all words so far have been equal (m is all ones) then a
844 * non-equal result is the answer. Otherwise we continue. */
845 for (i = 3; i < 4; i--)
848 uint128_t a = ((uint128_t) kPrime[i]) - out[i];
849 /* if out[i] > kPrime[i] then a will underflow and the high
850 * 64-bits will all be set. */
851 result |= all_equal_so_far & ((u64) (a >> 64));
853 /* if kPrime[i] == out[i] then |equal| will be all zeros and
854 * the decrement will make it all ones. */
855 equal = kPrime[i] ^ out[i];
857 equal &= equal << 32;
858 equal &= equal << 16;
863 equal = ((s64) equal) >> 63;
865 all_equal_so_far &= equal;
868 /* if all_equal_so_far is still all ones then the two values are equal
869 * and so out >= kPrime is true. */
870 result |= all_equal_so_far;
872 /* if out >= kPrime then we subtract kPrime. */
873 subtract_u64(&out[0], &carry, result & kPrime[0]);
874 subtract_u64(&out[1], &carry, carry);
875 subtract_u64(&out[2], &carry, carry);
876 subtract_u64(&out[3], &carry, carry);
878 subtract_u64(&out[1], &carry, result & kPrime[1]);
879 subtract_u64(&out[2], &carry, carry);
880 subtract_u64(&out[3], &carry, carry);
882 subtract_u64(&out[2], &carry, result & kPrime[2]);
883 subtract_u64(&out[3], &carry, carry);
885 subtract_u64(&out[3], &carry, result & kPrime[3]);
888 static void smallfelem_square_contract(smallfelem out, const smallfelem in)
893 smallfelem_square(longtmp, in);
894 felem_reduce(tmp, longtmp);
895 felem_contract(out, tmp);
898 static void smallfelem_mul_contract(smallfelem out, const smallfelem in1, const smallfelem in2)
903 smallfelem_mul(longtmp, in1, in2);
904 felem_reduce(tmp, longtmp);
905 felem_contract(out, tmp);
909 * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
914 static limb smallfelem_is_zero(const smallfelem small)
919 u64 is_zero = small[0] | small[1] | small[2] | small[3];
921 is_zero &= is_zero << 32;
922 is_zero &= is_zero << 16;
923 is_zero &= is_zero << 8;
924 is_zero &= is_zero << 4;
925 is_zero &= is_zero << 2;
926 is_zero &= is_zero << 1;
927 is_zero = ((s64) is_zero) >> 63;
929 is_p = (small[0] ^ kPrime[0]) |
930 (small[1] ^ kPrime[1]) |
931 (small[2] ^ kPrime[2]) |
932 (small[3] ^ kPrime[3]);
940 is_p = ((s64) is_p) >> 63;
945 result |= ((limb) is_zero) << 64;
949 static int smallfelem_is_zero_int(const smallfelem small)
951 return (int) (smallfelem_is_zero(small) & ((limb)1));
955 * felem_inv calculates |out| = |in|^{-1}
957 * Based on Fermat's Little Theorem:
959 * a^{p-1} = 1 (mod p)
960 * a^{p-2} = a^{-1} (mod p)
962 static void felem_inv(felem out, const felem in)
965 /* each e_I will hold |in|^{2^I - 1} */
966 felem e2, e4, e8, e16, e32, e64;
970 felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2^1 */
971 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
972 felem_assign(e2, ftmp);
973 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
974 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^4 - 2^2 */
975 felem_mul(tmp, ftmp, e2); felem_reduce(ftmp, tmp); /* 2^4 - 2^0 */
976 felem_assign(e4, ftmp);
977 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^5 - 2^1 */
978 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^6 - 2^2 */
979 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^7 - 2^3 */
980 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^8 - 2^4 */
981 felem_mul(tmp, ftmp, e4); felem_reduce(ftmp, tmp); /* 2^8 - 2^0 */
982 felem_assign(e8, ftmp);
983 for (i = 0; i < 8; i++) {
984 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
986 felem_mul(tmp, ftmp, e8); felem_reduce(ftmp, tmp); /* 2^16 - 2^0 */
987 felem_assign(e16, ftmp);
988 for (i = 0; i < 16; i++) {
989 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
991 felem_mul(tmp, ftmp, e16); felem_reduce(ftmp, tmp); /* 2^32 - 2^0 */
992 felem_assign(e32, ftmp);
993 for (i = 0; i < 32; i++) {
994 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
996 felem_assign(e64, ftmp);
997 felem_mul(tmp, ftmp, in); felem_reduce(ftmp, tmp); /* 2^64 - 2^32 + 2^0 */
998 for (i = 0; i < 192; i++) {
999 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
1000 } /* 2^256 - 2^224 + 2^192 */
1002 felem_mul(tmp, e64, e32); felem_reduce(ftmp2, tmp); /* 2^64 - 2^0 */
1003 for (i = 0; i < 16; i++) {
1004 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
1006 felem_mul(tmp, ftmp2, e16); felem_reduce(ftmp2, tmp); /* 2^80 - 2^0 */
1007 for (i = 0; i < 8; i++) {
1008 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
1010 felem_mul(tmp, ftmp2, e8); felem_reduce(ftmp2, tmp); /* 2^88 - 2^0 */
1011 for (i = 0; i < 4; i++) {
1012 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
1014 felem_mul(tmp, ftmp2, e4); felem_reduce(ftmp2, tmp); /* 2^92 - 2^0 */
1015 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^93 - 2^1 */
1016 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^2 */
1017 felem_mul(tmp, ftmp2, e2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^0 */
1018 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^95 - 2^1 */
1019 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^96 - 2^2 */
1020 felem_mul(tmp, ftmp2, in); felem_reduce(ftmp2, tmp); /* 2^96 - 3 */
1022 felem_mul(tmp, ftmp2, ftmp); felem_reduce(out, tmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
1025 static void smallfelem_inv_contract(smallfelem out, const smallfelem in)
1029 smallfelem_expand(tmp, in);
1030 felem_inv(tmp, tmp);
1031 felem_contract(out, tmp);
1038 * Building on top of the field operations we have the operations on the
1039 * elliptic curve group itself. Points on the curve are represented in Jacobian
1043 * point_double calculates 2*(x_in, y_in, z_in)
1045 * The method is taken from:
1046 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1048 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1049 * while x_out == y_in is not (maybe this works, but it's not tested). */
1051 point_double(felem x_out, felem y_out, felem z_out,
1052 const felem x_in, const felem y_in, const felem z_in)
1054 longfelem tmp, tmp2;
1055 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1056 smallfelem small1, small2;
1058 felem_assign(ftmp, x_in);
1059 /* ftmp[i] < 2^106 */
1060 felem_assign(ftmp2, x_in);
1061 /* ftmp2[i] < 2^106 */
1064 felem_square(tmp, z_in);
1065 felem_reduce(delta, tmp);
1066 /* delta[i] < 2^101 */
1069 felem_square(tmp, y_in);
1070 felem_reduce(gamma, tmp);
1071 /* gamma[i] < 2^101 */
1072 felem_shrink(small1, gamma);
1074 /* beta = x*gamma */
1075 felem_small_mul(tmp, small1, x_in);
1076 felem_reduce(beta, tmp);
1077 /* beta[i] < 2^101 */
1079 /* alpha = 3*(x-delta)*(x+delta) */
1080 felem_diff(ftmp, delta);
1081 /* ftmp[i] < 2^105 + 2^106 < 2^107 */
1082 felem_sum(ftmp2, delta);
1083 /* ftmp2[i] < 2^105 + 2^106 < 2^107 */
1084 felem_scalar(ftmp2, 3);
1085 /* ftmp2[i] < 3 * 2^107 < 2^109 */
1086 felem_mul(tmp, ftmp, ftmp2);
1087 felem_reduce(alpha, tmp);
1088 /* alpha[i] < 2^101 */
1089 felem_shrink(small2, alpha);
1091 /* x' = alpha^2 - 8*beta */
1092 smallfelem_square(tmp, small2);
1093 felem_reduce(x_out, tmp);
1094 felem_assign(ftmp, beta);
1095 felem_scalar(ftmp, 8);
1096 /* ftmp[i] < 8 * 2^101 = 2^104 */
1097 felem_diff(x_out, ftmp);
1098 /* x_out[i] < 2^105 + 2^101 < 2^106 */
1100 /* z' = (y + z)^2 - gamma - delta */
1101 felem_sum(delta, gamma);
1102 /* delta[i] < 2^101 + 2^101 = 2^102 */
1103 felem_assign(ftmp, y_in);
1104 felem_sum(ftmp, z_in);
1105 /* ftmp[i] < 2^106 + 2^106 = 2^107 */
1106 felem_square(tmp, ftmp);
1107 felem_reduce(z_out, tmp);
1108 felem_diff(z_out, delta);
1109 /* z_out[i] < 2^105 + 2^101 < 2^106 */
1111 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1112 felem_scalar(beta, 4);
1113 /* beta[i] < 4 * 2^101 = 2^103 */
1114 felem_diff_zero107(beta, x_out);
1115 /* beta[i] < 2^107 + 2^103 < 2^108 */
1116 felem_small_mul(tmp, small2, beta);
1117 /* tmp[i] < 7 * 2^64 < 2^67 */
1118 smallfelem_square(tmp2, small1);
1119 /* tmp2[i] < 7 * 2^64 */
1120 longfelem_scalar(tmp2, 8);
1121 /* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */
1122 longfelem_diff(tmp, tmp2);
1123 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1124 felem_reduce_zero105(y_out, tmp);
1125 /* y_out[i] < 2^106 */
1128 /* point_double_small is the same as point_double, except that it operates on
1131 point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,
1132 const smallfelem x_in, const smallfelem y_in, const smallfelem z_in)
1134 felem felem_x_out, felem_y_out, felem_z_out;
1135 felem felem_x_in, felem_y_in, felem_z_in;
1137 smallfelem_expand(felem_x_in, x_in);
1138 smallfelem_expand(felem_y_in, y_in);
1139 smallfelem_expand(felem_z_in, z_in);
1140 point_double(felem_x_out, felem_y_out, felem_z_out,
1141 felem_x_in, felem_y_in, felem_z_in);
1142 felem_shrink(x_out, felem_x_out);
1143 felem_shrink(y_out, felem_y_out);
1144 felem_shrink(z_out, felem_z_out);
1147 /* copy_conditional copies in to out iff mask is all ones. */
1149 copy_conditional(felem out, const felem in, limb mask)
1152 for (i = 0; i < NLIMBS; ++i)
1154 const limb tmp = mask & (in[i] ^ out[i]);
1159 /* copy_small_conditional copies in to out iff mask is all ones. */
1161 copy_small_conditional(felem out, const smallfelem in, limb mask)
1164 const u64 mask64 = mask;
1165 for (i = 0; i < NLIMBS; ++i)
1167 out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask);
1172 * point_add calcuates (x1, y1, z1) + (x2, y2, z2)
1174 * The method is taken from:
1175 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1176 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1178 * This function includes a branch for checking whether the two input points
1179 * are equal, (while not equal to the point at infinity). This case never
1180 * happens during single point multiplication, so there is no timing leak for
1181 * ECDH or ECDSA signing. */
1182 static void point_add(felem x3, felem y3, felem z3,
1183 const felem x1, const felem y1, const felem z1,
1184 const int mixed, const smallfelem x2, const smallfelem y2, const smallfelem z2)
1186 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1187 longfelem tmp, tmp2;
1188 smallfelem small1, small2, small3, small4, small5;
1189 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1191 felem_shrink(small3, z1);
1193 z1_is_zero = smallfelem_is_zero(small3);
1194 z2_is_zero = smallfelem_is_zero(z2);
1196 /* ftmp = z1z1 = z1**2 */
1197 smallfelem_square(tmp, small3);
1198 felem_reduce(ftmp, tmp);
1199 /* ftmp[i] < 2^101 */
1200 felem_shrink(small1, ftmp);
1204 /* ftmp2 = z2z2 = z2**2 */
1205 smallfelem_square(tmp, z2);
1206 felem_reduce(ftmp2, tmp);
1207 /* ftmp2[i] < 2^101 */
1208 felem_shrink(small2, ftmp2);
1210 felem_shrink(small5, x1);
1212 /* u1 = ftmp3 = x1*z2z2 */
1213 smallfelem_mul(tmp, small5, small2);
1214 felem_reduce(ftmp3, tmp);
1215 /* ftmp3[i] < 2^101 */
1217 /* ftmp5 = z1 + z2 */
1218 felem_assign(ftmp5, z1);
1219 felem_small_sum(ftmp5, z2);
1220 /* ftmp5[i] < 2^107 */
1222 /* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */
1223 felem_square(tmp, ftmp5);
1224 felem_reduce(ftmp5, tmp);
1225 /* ftmp2 = z2z2 + z1z1 */
1226 felem_sum(ftmp2, ftmp);
1227 /* ftmp2[i] < 2^101 + 2^101 = 2^102 */
1228 felem_diff(ftmp5, ftmp2);
1229 /* ftmp5[i] < 2^105 + 2^101 < 2^106 */
1231 /* ftmp2 = z2 * z2z2 */
1232 smallfelem_mul(tmp, small2, z2);
1233 felem_reduce(ftmp2, tmp);
1235 /* s1 = ftmp2 = y1 * z2**3 */
1236 felem_mul(tmp, y1, ftmp2);
1237 felem_reduce(ftmp6, tmp);
1238 /* ftmp6[i] < 2^101 */
1242 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
1244 /* u1 = ftmp3 = x1*z2z2 */
1245 felem_assign(ftmp3, x1);
1246 /* ftmp3[i] < 2^106 */
1249 felem_assign(ftmp5, z1);
1250 felem_scalar(ftmp5, 2);
1251 /* ftmp5[i] < 2*2^106 = 2^107 */
1253 /* s1 = ftmp2 = y1 * z2**3 */
1254 felem_assign(ftmp6, y1);
1255 /* ftmp6[i] < 2^106 */
1259 smallfelem_mul(tmp, x2, small1);
1260 felem_reduce(ftmp4, tmp);
1262 /* h = ftmp4 = u2 - u1 */
1263 felem_diff_zero107(ftmp4, ftmp3);
1264 /* ftmp4[i] < 2^107 + 2^101 < 2^108 */
1265 felem_shrink(small4, ftmp4);
1267 x_equal = smallfelem_is_zero(small4);
1269 /* z_out = ftmp5 * h */
1270 felem_small_mul(tmp, small4, ftmp5);
1271 felem_reduce(z_out, tmp);
1272 /* z_out[i] < 2^101 */
1274 /* ftmp = z1 * z1z1 */
1275 smallfelem_mul(tmp, small1, small3);
1276 felem_reduce(ftmp, tmp);
1278 /* s2 = tmp = y2 * z1**3 */
1279 felem_small_mul(tmp, y2, ftmp);
1280 felem_reduce(ftmp5, tmp);
1282 /* r = ftmp5 = (s2 - s1)*2 */
1283 felem_diff_zero107(ftmp5, ftmp6);
1284 /* ftmp5[i] < 2^107 + 2^107 = 2^108*/
1285 felem_scalar(ftmp5, 2);
1286 /* ftmp5[i] < 2^109 */
1287 felem_shrink(small1, ftmp5);
1288 y_equal = smallfelem_is_zero(small1);
1290 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
1292 point_double(x3, y3, z3, x1, y1, z1);
1296 /* I = ftmp = (2h)**2 */
1297 felem_assign(ftmp, ftmp4);
1298 felem_scalar(ftmp, 2);
1299 /* ftmp[i] < 2*2^108 = 2^109 */
1300 felem_square(tmp, ftmp);
1301 felem_reduce(ftmp, tmp);
1303 /* J = ftmp2 = h * I */
1304 felem_mul(tmp, ftmp4, ftmp);
1305 felem_reduce(ftmp2, tmp);
1307 /* V = ftmp4 = U1 * I */
1308 felem_mul(tmp, ftmp3, ftmp);
1309 felem_reduce(ftmp4, tmp);
1311 /* x_out = r**2 - J - 2V */
1312 smallfelem_square(tmp, small1);
1313 felem_reduce(x_out, tmp);
1314 felem_assign(ftmp3, ftmp4);
1315 felem_scalar(ftmp4, 2);
1316 felem_sum(ftmp4, ftmp2);
1317 /* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */
1318 felem_diff(x_out, ftmp4);
1319 /* x_out[i] < 2^105 + 2^101 */
1321 /* y_out = r(V-x_out) - 2 * s1 * J */
1322 felem_diff_zero107(ftmp3, x_out);
1323 /* ftmp3[i] < 2^107 + 2^101 < 2^108 */
1324 felem_small_mul(tmp, small1, ftmp3);
1325 felem_mul(tmp2, ftmp6, ftmp2);
1326 longfelem_scalar(tmp2, 2);
1327 /* tmp2[i] < 2*2^67 = 2^68 */
1328 longfelem_diff(tmp, tmp2);
1329 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1330 felem_reduce_zero105(y_out, tmp);
1331 /* y_out[i] < 2^106 */
1333 copy_small_conditional(x_out, x2, z1_is_zero);
1334 copy_conditional(x_out, x1, z2_is_zero);
1335 copy_small_conditional(y_out, y2, z1_is_zero);
1336 copy_conditional(y_out, y1, z2_is_zero);
1337 copy_small_conditional(z_out, z2, z1_is_zero);
1338 copy_conditional(z_out, z1, z2_is_zero);
1339 felem_assign(x3, x_out);
1340 felem_assign(y3, y_out);
1341 felem_assign(z3, z_out);
1344 /* point_add_small is the same as point_add, except that it operates on
1346 static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
1347 smallfelem x1, smallfelem y1, smallfelem z1,
1348 smallfelem x2, smallfelem y2, smallfelem z2)
1350 felem felem_x3, felem_y3, felem_z3;
1351 felem felem_x1, felem_y1, felem_z1;
1352 smallfelem_expand(felem_x1, x1);
1353 smallfelem_expand(felem_y1, y1);
1354 smallfelem_expand(felem_z1, z1);
1355 point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0, x2, y2, z2);
1356 felem_shrink(x3, felem_x3);
1357 felem_shrink(y3, felem_y3);
1358 felem_shrink(z3, felem_z3);
1362 * Base point pre computation
1363 * --------------------------
1365 * Two different sorts of precomputed tables are used in the following code.
1366 * Each contain various points on the curve, where each point is three field
1367 * elements (x, y, z).
1369 * For the base point table, z is usually 1 (0 for the point at infinity).
1370 * This table has 2 * 16 elements, starting with the following:
1371 * index | bits | point
1372 * ------+---------+------------------------------
1375 * 2 | 0 0 1 0 | 2^64G
1376 * 3 | 0 0 1 1 | (2^64 + 1)G
1377 * 4 | 0 1 0 0 | 2^128G
1378 * 5 | 0 1 0 1 | (2^128 + 1)G
1379 * 6 | 0 1 1 0 | (2^128 + 2^64)G
1380 * 7 | 0 1 1 1 | (2^128 + 2^64 + 1)G
1381 * 8 | 1 0 0 0 | 2^192G
1382 * 9 | 1 0 0 1 | (2^192 + 1)G
1383 * 10 | 1 0 1 0 | (2^192 + 2^64)G
1384 * 11 | 1 0 1 1 | (2^192 + 2^64 + 1)G
1385 * 12 | 1 1 0 0 | (2^192 + 2^128)G
1386 * 13 | 1 1 0 1 | (2^192 + 2^128 + 1)G
1387 * 14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G
1388 * 15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G
1389 * followed by a copy of this with each element multiplied by 2^32.
1391 * The reason for this is so that we can clock bits into four different
1392 * locations when doing simple scalar multiplies against the base point,
1393 * and then another four locations using the second 16 elements.
1395 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1397 /* gmul is the table of precomputed base points */
1398 static const smallfelem gmul[2][16][3] =
1402 {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2, 0x6b17d1f2e12c4247},
1403 {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16, 0x4fe342e2fe1a7f9b},
1405 {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de, 0x0fa822bc2811aaa5},
1406 {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b, 0xbff44ae8f5dba80d},
1408 {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789, 0x300a4bbc89d6726f},
1409 {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f, 0x72aac7e0d09b4644},
1411 {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e, 0x447d739beedb5e67},
1412 {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7, 0x2d4825ab834131ee},
1414 {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60, 0xef9519328a9c72ff},
1415 {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c, 0x611e9fc37dbb2c9b},
1417 {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf, 0x550663797b51f5d8},
1418 {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5, 0x157164848aecb851},
1420 {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391, 0xeb5d7745b21141ea},
1421 {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee, 0xeafd72ebdbecc17b},
1423 {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5, 0xa6d39677a7849276},
1424 {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf, 0x674f84749b0b8816},
1426 {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb, 0x4e769e7672c9ddad},
1427 {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281, 0x42b99082de830663},
1429 {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478, 0x78878ef61c6ce04d},
1430 {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def, 0xb6cb3f5d7b72c321},
1432 {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae, 0x0c88bc4d716b1287},
1433 {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa, 0xdd5ddea3f3901dc6},
1435 {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3, 0x68f344af6b317466},
1436 {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3, 0x31b9c405f8540a20},
1438 {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0, 0x4052bf4b6f461db9},
1439 {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8, 0xfecf4d5190b0fc61},
1441 {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a, 0x1eddbae2c802e41a},
1442 {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0, 0x43104d86560ebcfc},
1444 {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a, 0xb48e26b484f7a21c},
1445 {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668, 0xfac015404d4d3dab},
1450 {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da, 0x7fe36b40af22af89},
1451 {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1, 0xe697d45825b63624},
1453 {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902, 0x4a5b506612a677a6},
1454 {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40, 0xeb13461ceac089f1},
1456 {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857, 0x0781b8291c6a220a},
1457 {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434, 0x690cde8df0151593},
1459 {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326, 0x8a535f566ec73617},
1460 {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf, 0x0455c08468b08bd7},
1462 {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279, 0x06bada7ab77f8276},
1463 {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70, 0x5b476dfd0e6cb18a},
1465 {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8, 0x3e29864e8a2ec908},
1466 {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed, 0x239b90ea3dc31e7e},
1468 {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4, 0x820f4dd949f72ff7},
1469 {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3, 0x140406ec783a05ec},
1471 {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe, 0x68f6b8542783dfee},
1472 {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028, 0xcbe1feba92e40ce6},
1474 {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927, 0xd0b2f94d2f420109},
1475 {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a, 0x971459828b0719e5},
1477 {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687, 0x961610004a866aba},
1478 {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c, 0x7acb9fadcee75e44},
1480 {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea, 0x24eb9acca333bf5b},
1481 {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d, 0x69f891c5acd079cc},
1483 {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514, 0xe51f547c5972a107},
1484 {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06, 0x1c309a2b25bb1387},
1486 {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828, 0x20b87b8aa2c4e503},
1487 {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044, 0xf5c6fa49919776be},
1489 {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56, 0x1ed7d1b9332010b9},
1490 {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24, 0x3a2b03f03217257a},
1492 {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b, 0x15fee545c78dd9f6},
1493 {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb, 0x4ab5b6b2b8753f81},
1496 /* select_point selects the |idx|th point from a precomputation table and
1497 * copies it to out. */
1498 static void select_point(const u64 idx, unsigned int size, const smallfelem pre_comp[16][3], smallfelem out[3])
1501 u64 *outlimbs = &out[0][0];
1502 memset(outlimbs, 0, 3 * sizeof(smallfelem));
1504 for (i = 0; i < size; i++)
1506 const u64 *inlimbs = (u64*) &pre_comp[i][0][0];
1513 for (j = 0; j < NLIMBS * 3; j++)
1514 outlimbs[j] |= inlimbs[j] & mask;
1518 /* get_bit returns the |i|th bit in |in| */
1519 static char get_bit(const felem_bytearray in, int i)
1521 if ((i < 0) || (i >= 256))
1523 return (in[i >> 3] >> (i & 7)) & 1;
1526 /* Interleaved point multiplication using precomputed point multiples:
1527 * The small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[],
1528 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1529 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1530 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1531 static void batch_mul(felem x_out, felem y_out, felem z_out,
1532 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1533 const int mixed, const smallfelem pre_comp[][17][3], const smallfelem g_pre_comp[2][16][3])
1536 unsigned num, gen_mul = (g_scalar != NULL);
1542 /* set nq to the point at infinity */
1543 memset(nq, 0, 3 * sizeof(felem));
1545 /* Loop over all scalars msb-to-lsb, interleaving additions
1546 * of multiples of the generator (two in each of the last 32 rounds)
1547 * and additions of other points multiples (every 5th round).
1549 skip = 1; /* save two point operations in the first round */
1550 for (i = (num_points ? 255 : 31); i >= 0; --i)
1554 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1556 /* add multiples of the generator */
1557 if (gen_mul && (i <= 31))
1559 /* first, look 32 bits upwards */
1560 bits = get_bit(g_scalar, i + 224) << 3;
1561 bits |= get_bit(g_scalar, i + 160) << 2;
1562 bits |= get_bit(g_scalar, i + 96) << 1;
1563 bits |= get_bit(g_scalar, i + 32);
1564 /* select the point to add, in constant time */
1565 select_point(bits, 16, g_pre_comp[1], tmp);
1569 point_add(nq[0], nq[1], nq[2],
1570 nq[0], nq[1], nq[2],
1571 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1575 smallfelem_expand(nq[0], tmp[0]);
1576 smallfelem_expand(nq[1], tmp[1]);
1577 smallfelem_expand(nq[2], tmp[2]);
1581 /* second, look at the current position */
1582 bits = get_bit(g_scalar, i + 192) << 3;
1583 bits |= get_bit(g_scalar, i + 128) << 2;
1584 bits |= get_bit(g_scalar, i + 64) << 1;
1585 bits |= get_bit(g_scalar, i);
1586 /* select the point to add, in constant time */
1587 select_point(bits, 16, g_pre_comp[0], tmp);
1588 point_add(nq[0], nq[1], nq[2],
1589 nq[0], nq[1], nq[2],
1590 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1593 /* do other additions every 5 doublings */
1594 if (num_points && (i % 5 == 0))
1596 /* loop over all scalars */
1597 for (num = 0; num < num_points; ++num)
1599 bits = get_bit(scalars[num], i + 4) << 5;
1600 bits |= get_bit(scalars[num], i + 3) << 4;
1601 bits |= get_bit(scalars[num], i + 2) << 3;
1602 bits |= get_bit(scalars[num], i + 1) << 2;
1603 bits |= get_bit(scalars[num], i) << 1;
1604 bits |= get_bit(scalars[num], i - 1);
1605 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1607 /* select the point to add or subtract, in constant time */
1608 select_point(digit, 17, pre_comp[num], tmp);
1609 smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative point */
1610 copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1));
1611 felem_contract(tmp[1], ftmp);
1615 point_add(nq[0], nq[1], nq[2],
1616 nq[0], nq[1], nq[2],
1617 mixed, tmp[0], tmp[1], tmp[2]);
1621 smallfelem_expand(nq[0], tmp[0]);
1622 smallfelem_expand(nq[1], tmp[1]);
1623 smallfelem_expand(nq[2], tmp[2]);
1629 felem_assign(x_out, nq[0]);
1630 felem_assign(y_out, nq[1]);
1631 felem_assign(z_out, nq[2]);
1634 /* Precomputation for the group generator. */
1636 smallfelem g_pre_comp[2][16][3];
1638 } NISTP256_PRE_COMP;
1640 const EC_METHOD *EC_GFp_nistp256_method(void)
1642 static const EC_METHOD ret = {
1643 EC_FLAGS_DEFAULT_OCT,
1644 NID_X9_62_prime_field,
1645 ec_GFp_nistp256_group_init,
1646 ec_GFp_simple_group_finish,
1647 ec_GFp_simple_group_clear_finish,
1648 ec_GFp_nist_group_copy,
1649 ec_GFp_nistp256_group_set_curve,
1650 ec_GFp_simple_group_get_curve,
1651 ec_GFp_simple_group_get_degree,
1652 ec_GFp_simple_group_check_discriminant,
1653 ec_GFp_simple_point_init,
1654 ec_GFp_simple_point_finish,
1655 ec_GFp_simple_point_clear_finish,
1656 ec_GFp_simple_point_copy,
1657 ec_GFp_simple_point_set_to_infinity,
1658 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1659 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1660 ec_GFp_simple_point_set_affine_coordinates,
1661 ec_GFp_nistp256_point_get_affine_coordinates,
1662 0 /* point_set_compressed_coordinates */,
1667 ec_GFp_simple_invert,
1668 ec_GFp_simple_is_at_infinity,
1669 ec_GFp_simple_is_on_curve,
1671 ec_GFp_simple_make_affine,
1672 ec_GFp_simple_points_make_affine,
1673 ec_GFp_nistp256_points_mul,
1674 ec_GFp_nistp256_precompute_mult,
1675 ec_GFp_nistp256_have_precompute_mult,
1676 ec_GFp_nist_field_mul,
1677 ec_GFp_nist_field_sqr,
1679 0 /* field_encode */,
1680 0 /* field_decode */,
1681 0 /* field_set_to_one */ };
1686 /******************************************************************************/
1687 /* FUNCTIONS TO MANAGE PRECOMPUTATION
1690 static NISTP256_PRE_COMP *nistp256_pre_comp_new()
1692 NISTP256_PRE_COMP *ret = NULL;
1693 ret = (NISTP256_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1696 ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1699 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1700 ret->references = 1;
1704 static void *nistp256_pre_comp_dup(void *src_)
1706 NISTP256_PRE_COMP *src = src_;
1708 /* no need to actually copy, these objects never change! */
1709 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1714 static void nistp256_pre_comp_free(void *pre_)
1717 NISTP256_PRE_COMP *pre = pre_;
1722 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1729 static void nistp256_pre_comp_clear_free(void *pre_)
1732 NISTP256_PRE_COMP *pre = pre_;
1737 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1741 OPENSSL_cleanse(pre, sizeof *pre);
1745 /******************************************************************************/
1746 /* OPENSSL EC_METHOD FUNCTIONS
1749 int ec_GFp_nistp256_group_init(EC_GROUP *group)
1752 ret = ec_GFp_simple_group_init(group);
1753 group->a_is_minus3 = 1;
1757 int ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1758 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1761 BN_CTX *new_ctx = NULL;
1762 BIGNUM *curve_p, *curve_a, *curve_b;
1765 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1767 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1768 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1769 ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
1770 BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p);
1771 BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a);
1772 BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b);
1773 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1774 (BN_cmp(curve_b, b)))
1776 ECerr(EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE,
1777 EC_R_WRONG_CURVE_PARAMETERS);
1780 group->field_mod_func = BN_nist_mod_256;
1781 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1784 if (new_ctx != NULL)
1785 BN_CTX_free(new_ctx);
1789 /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1790 * (X', Y') = (X/Z^2, Y/Z^3) */
1791 int ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
1792 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1794 felem z1, z2, x_in, y_in;
1795 smallfelem x_out, y_out;
1798 if (EC_POINT_is_at_infinity(group, point))
1800 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1801 EC_R_POINT_AT_INFINITY);
1804 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1805 (!BN_to_felem(z1, &point->Z))) return 0;
1807 felem_square(tmp, z2); felem_reduce(z1, tmp);
1808 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1809 felem_contract(x_out, x_in);
1812 if (!smallfelem_to_BN(x, x_out)) {
1813 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1818 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1819 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1820 felem_contract(y_out, y_in);
1823 if (!smallfelem_to_BN(y, y_out))
1825 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1833 static void make_points_affine(size_t num, smallfelem points[/* num */][3], smallfelem tmp_smallfelems[/* num+1 */])
1835 /* Runs in constant time, unless an input is the point at infinity
1836 * (which normally shouldn't happen). */
1837 ec_GFp_nistp_points_make_affine_internal(
1842 (void (*)(void *)) smallfelem_one,
1843 (int (*)(const void *)) smallfelem_is_zero_int,
1844 (void (*)(void *, const void *)) smallfelem_assign,
1845 (void (*)(void *, const void *)) smallfelem_square_contract,
1846 (void (*)(void *, const void *, const void *)) smallfelem_mul_contract,
1847 (void (*)(void *, const void *)) smallfelem_inv_contract,
1848 (void (*)(void *, const void *)) smallfelem_assign /* nothing to contract */);
1851 /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1852 * Result is stored in r (r can equal one of the inputs). */
1853 int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
1854 const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1855 const BIGNUM *scalars[], BN_CTX *ctx)
1860 BN_CTX *new_ctx = NULL;
1861 BIGNUM *x, *y, *z, *tmp_scalar;
1862 felem_bytearray g_secret;
1863 felem_bytearray *secrets = NULL;
1864 smallfelem (*pre_comp)[17][3] = NULL;
1865 smallfelem *tmp_smallfelems = NULL;
1866 felem_bytearray tmp;
1867 unsigned i, num_bytes;
1868 int have_pre_comp = 0;
1869 size_t num_points = num;
1870 smallfelem x_in, y_in, z_in;
1871 felem x_out, y_out, z_out;
1872 NISTP256_PRE_COMP *pre = NULL;
1873 const smallfelem (*g_pre_comp)[16][3] = NULL;
1874 EC_POINT *generator = NULL;
1875 const EC_POINT *p = NULL;
1876 const BIGNUM *p_scalar = NULL;
1879 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1881 if (((x = BN_CTX_get(ctx)) == NULL) ||
1882 ((y = BN_CTX_get(ctx)) == NULL) ||
1883 ((z = BN_CTX_get(ctx)) == NULL) ||
1884 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1889 pre = EC_EX_DATA_get_data(group->extra_data,
1890 nistp256_pre_comp_dup, nistp256_pre_comp_free,
1891 nistp256_pre_comp_clear_free);
1893 /* we have precomputation, try to use it */
1894 g_pre_comp = (const smallfelem (*)[16][3]) pre->g_pre_comp;
1896 /* try to use the standard precomputation */
1897 g_pre_comp = &gmul[0];
1898 generator = EC_POINT_new(group);
1899 if (generator == NULL)
1901 /* get the generator from precomputation */
1902 if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) ||
1903 !smallfelem_to_BN(y, g_pre_comp[0][1][1]) ||
1904 !smallfelem_to_BN(z, g_pre_comp[0][1][2]))
1906 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1909 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1910 generator, x, y, z, ctx))
1912 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1913 /* precomputation matches generator */
1916 /* we don't have valid precomputation:
1917 * treat the generator as a random point */
1922 if (num_points >= 3)
1924 /* unless we precompute multiples for just one or two points,
1925 * converting those into affine form is time well spent */
1928 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1929 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(smallfelem));
1931 tmp_smallfelems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(smallfelem));
1932 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_smallfelems == NULL)))
1934 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1938 /* we treat NULL scalars as 0, and NULL points as points at infinity,
1939 * i.e., they contribute nothing to the linear combination */
1940 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1941 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(smallfelem));
1942 for (i = 0; i < num_points; ++i)
1945 /* we didn't have a valid precomputation, so we pick
1948 p = EC_GROUP_get0_generator(group);
1952 /* the i^th point */
1955 p_scalar = scalars[i];
1957 if ((p_scalar != NULL) && (p != NULL))
1959 /* reduce scalar to 0 <= scalar < 2^256 */
1960 if ((BN_num_bits(p_scalar) > 256) || (BN_is_negative(p_scalar)))
1962 /* this is an unusual input, and we don't guarantee
1963 * constant-timeness */
1964 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
1966 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1969 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1972 num_bytes = BN_bn2bin(p_scalar, tmp);
1973 flip_endian(secrets[i], tmp, num_bytes);
1974 /* precompute multiples */
1975 if ((!BN_to_felem(x_out, &p->X)) ||
1976 (!BN_to_felem(y_out, &p->Y)) ||
1977 (!BN_to_felem(z_out, &p->Z))) goto err;
1978 felem_shrink(pre_comp[i][1][0], x_out);
1979 felem_shrink(pre_comp[i][1][1], y_out);
1980 felem_shrink(pre_comp[i][1][2], z_out);
1981 for (j = 2; j <= 16; ++j)
1986 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1987 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1988 pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1993 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1994 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
2000 make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);
2003 /* the scalar for the generator */
2004 if ((scalar != NULL) && (have_pre_comp))
2006 memset(g_secret, 0, sizeof(g_secret));
2007 /* reduce scalar to 0 <= scalar < 2^256 */
2008 if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar)))
2010 /* this is an unusual input, and we don't guarantee
2011 * constant-timeness */
2012 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
2014 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2017 num_bytes = BN_bn2bin(tmp_scalar, tmp);
2020 num_bytes = BN_bn2bin(scalar, tmp);
2021 flip_endian(g_secret, tmp, num_bytes);
2022 /* do the multiplication with generator precomputation*/
2023 batch_mul(x_out, y_out, z_out,
2024 (const felem_bytearray (*)) secrets, num_points,
2026 mixed, (const smallfelem (*)[17][3]) pre_comp,
2030 /* do the multiplication without generator precomputation */
2031 batch_mul(x_out, y_out, z_out,
2032 (const felem_bytearray (*)) secrets, num_points,
2033 NULL, mixed, (const smallfelem (*)[17][3]) pre_comp, NULL);
2034 /* reduce the output to its unique minimal representation */
2035 felem_contract(x_in, x_out);
2036 felem_contract(y_in, y_out);
2037 felem_contract(z_in, z_out);
2038 if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) ||
2039 (!smallfelem_to_BN(z, z_in)))
2041 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2044 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
2048 if (generator != NULL)
2049 EC_POINT_free(generator);
2050 if (new_ctx != NULL)
2051 BN_CTX_free(new_ctx);
2052 if (secrets != NULL)
2053 OPENSSL_free(secrets);
2054 if (pre_comp != NULL)
2055 OPENSSL_free(pre_comp);
2056 if (tmp_smallfelems != NULL)
2057 OPENSSL_free(tmp_smallfelems);
2061 int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
2064 NISTP256_PRE_COMP *pre = NULL;
2066 BN_CTX *new_ctx = NULL;
2068 EC_POINT *generator = NULL;
2069 smallfelem tmp_smallfelems[32];
2070 felem x_tmp, y_tmp, z_tmp;
2072 /* throw away old precomputation */
2073 EC_EX_DATA_free_data(&group->extra_data, nistp256_pre_comp_dup,
2074 nistp256_pre_comp_free, nistp256_pre_comp_clear_free);
2076 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
2078 if (((x = BN_CTX_get(ctx)) == NULL) ||
2079 ((y = BN_CTX_get(ctx)) == NULL))
2081 /* get the generator */
2082 if (group->generator == NULL) goto err;
2083 generator = EC_POINT_new(group);
2084 if (generator == NULL)
2086 BN_bin2bn(nistp256_curve_params[3], sizeof (felem_bytearray), x);
2087 BN_bin2bn(nistp256_curve_params[4], sizeof (felem_bytearray), y);
2088 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
2090 if ((pre = nistp256_pre_comp_new()) == NULL)
2092 /* if the generator is the standard one, use built-in precomputation */
2093 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
2095 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2099 if ((!BN_to_felem(x_tmp, &group->generator->X)) ||
2100 (!BN_to_felem(y_tmp, &group->generator->Y)) ||
2101 (!BN_to_felem(z_tmp, &group->generator->Z)))
2103 felem_shrink(pre->g_pre_comp[0][1][0], x_tmp);
2104 felem_shrink(pre->g_pre_comp[0][1][1], y_tmp);
2105 felem_shrink(pre->g_pre_comp[0][1][2], z_tmp);
2106 /* compute 2^64*G, 2^128*G, 2^192*G for the first table,
2107 * 2^32*G, 2^96*G, 2^160*G, 2^224*G for the second one
2109 for (i = 1; i <= 8; i <<= 1)
2112 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2113 pre->g_pre_comp[0][i][0], pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
2114 for (j = 0; j < 31; ++j)
2117 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2118 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2123 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
2124 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2125 for (j = 0; j < 31; ++j)
2128 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
2129 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2]);
2132 for (i = 0; i < 2; i++)
2134 /* g_pre_comp[i][0] is the point at infinity */
2135 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
2136 /* the remaining multiples */
2137 /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */
2139 pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1], pre->g_pre_comp[i][6][2],
2140 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
2141 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2142 /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */
2144 pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1], pre->g_pre_comp[i][10][2],
2145 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2146 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2147 /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */
2149 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2150 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2151 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2]);
2152 /* 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G */
2154 pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1], pre->g_pre_comp[i][14][2],
2155 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2156 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2157 for (j = 1; j < 8; ++j)
2159 /* odd multiples: add G resp. 2^32*G */
2161 pre->g_pre_comp[i][2*j+1][0], pre->g_pre_comp[i][2*j+1][1], pre->g_pre_comp[i][2*j+1][2],
2162 pre->g_pre_comp[i][2*j][0], pre->g_pre_comp[i][2*j][1], pre->g_pre_comp[i][2*j][2],
2163 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1], pre->g_pre_comp[i][1][2]);
2166 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);
2168 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp256_pre_comp_dup,
2169 nistp256_pre_comp_free, nistp256_pre_comp_clear_free))
2175 if (generator != NULL)
2176 EC_POINT_free(generator);
2177 if (new_ctx != NULL)
2178 BN_CTX_free(new_ctx);
2180 nistp256_pre_comp_free(pre);
2184 int ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group)
2186 if (EC_EX_DATA_get_data(group->extra_data, nistp256_pre_comp_dup,
2187 nistp256_pre_comp_free, nistp256_pre_comp_clear_free)
2194 static void *dummy=&dummy;