2 * Copyright 2017-2018 The OpenSSL Project Authors. All Rights Reserved.
3 * Copyright 2014 Cryptography Research, Inc.
5 * Licensed under the OpenSSL license (the "License"). You may not use
6 * this file except in compliance with the License. You can obtain a copy
7 * in the file LICENSE in the source distribution or at
8 * https://www.openssl.org/source/license.html
10 * Originally written by Mike Hamburg
13 #ifndef __CONSTANT_TIME_H__
14 # define __CONSTANT_TIME_H__ 1
20 * Constant-time operations on hopefully-compile-time-sized memory
21 * regions. Needed for flexibility / demagication: not all fields
22 * have sizes which are multiples of the vector width, necessitating
23 * a change from the Ed448 versions.
25 * These routines would be much simpler to define at the byte level,
26 * but if not vectorized they would be a significant fraction of the
27 * runtime. Eg on NEON-less ARM, constant_time_lookup is like 15% of
28 * signing time, vs 6% on Haswell with its fancy AVX2 vectors.
30 * If the compiler could do a good job of autovectorizing the code,
31 * we could just leave it with the byte definition. But that's unlikely
32 * on most deployed compilers, especially if you consider that pcmpeq[size]
33 * is much faster than moving a scalar to the vector unit (which is what
34 * a naive autovectorizer will do with constant_time_lookup on Intel).
36 * Instead, we're putting our trust in the loop unroller and unswitcher.
39 # if defined(__GNUC__) || defined(__clang__)
41 * Unaligned big (vector?) register.
44 big_register_t unaligned;
45 } __attribute((packed)) unaligned_br_t;
48 * Unaligned word register, for architectures where that matters.
52 } __attribute((packed)) unaligned_word_t;
54 # define HAS_UNALIGNED_STRUCTS
55 # define RESTRICT __restrict__
61 * Constant-time conditional swap.
63 * If doswap, then swap elem_bytes between *a and *b.
65 * *a and *b must not alias. Also, they must be at least as aligned
66 * as their sizes, if the CPU cares about that sort of thing.
68 static ossl_inline void constant_time_cond_swap(void *RESTRICT a_,
74 unsigned char *a = (unsigned char *)a_;
75 unsigned char *b = (unsigned char *)b_;
76 big_register_t br_mask = br_set_to_mask(doswap);
77 # ifndef HAS_UNALIGNED_STRUCTS
78 unsigned char doswapc = (unsigned char)(doswap & 0xFF);
81 for (k = 0; k <= elem_bytes - sizeof(big_register_t);
82 k += sizeof(big_register_t)) {
83 if (elem_bytes % sizeof(big_register_t)) {
85 # ifdef HAS_UNALIGNED_STRUCTS
86 big_register_t xor = ((unaligned_br_t *) (&a[k]))->unaligned
87 ^ ((unaligned_br_t *) (&b[k]))->unaligned;
90 ((unaligned_br_t *)(&a[k]))->unaligned ^= xor;
91 ((unaligned_br_t *)(&b[k]))->unaligned ^= xor;
95 for (i = 0; i < sizeof(big_register_t); i++) {
96 unsigned char xor = a[k + i] ^ b[k + i];
105 big_register_t xor = *((big_register_t *) (&a[k]))
106 ^ *((big_register_t *) (&b[k]));
108 *((big_register_t *)(&a[k])) ^= xor;
109 *((big_register_t *)(&b[k])) ^= xor;
113 if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
114 for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) {
115 if (elem_bytes % sizeof(word_t)) {
117 # ifdef HAS_UNALIGNED_STRUCTS
118 word_t xor = ((unaligned_word_t *)(&a[k]))->unaligned
119 ^ ((unaligned_word_t *)(&b[k]))->unaligned;
122 ((unaligned_word_t *)(&a[k]))->unaligned ^= xor;
123 ((unaligned_word_t *)(&b[k]))->unaligned ^= xor;
127 for (i = 0; i < sizeof(word_t); i++) {
128 unsigned char xor = a[k + i] ^ b[k + i];
137 word_t xor = *((word_t *) (&a[k])) ^ *((word_t *) (&b[k]));
139 *((word_t *)(&a[k])) ^= xor;
140 *((word_t *)(&b[k])) ^= xor;
145 if (elem_bytes % sizeof(word_t)) {
146 for (; k < elem_bytes; k += 1) {
147 unsigned char xor = a[k] ^ b[k];
157 * Constant-time equivalent of memcpy(out, table + elem_bytes*idx, elem_bytes);
159 * The table must be at least as aligned as elem_bytes. The output must be word aligned,
160 * and if the input size is vector aligned it must also be vector aligned.
162 * The table and output must not alias.
164 static ossl_inline void constant_time_lookup(void *RESTRICT out_,
170 big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx);
172 /* Can't do pointer arithmetic on void * */
173 unsigned char *out = (unsigned char *)out_;
174 const unsigned char *table = (const unsigned char *)table_;
176 # ifndef HAS_UNALIGNED_STRUCTS
180 memset(out, 0, elem_bytes);
181 for (j = 0; j < n_table; j++, big_i -= big_one) {
182 big_register_t br_mask = br_is_zero(big_i);
185 # ifndef HAS_UNALIGNED_STRUCTS
186 maskc = (unsigned char)br_mask;
189 for (k = 0; k <= elem_bytes - sizeof(big_register_t);
190 k += sizeof(big_register_t)) {
191 if (elem_bytes % sizeof(big_register_t)) {
193 # ifdef HAS_UNALIGNED_STRUCTS
194 ((unaligned_br_t *)(out + k))->unaligned |=
196 & ((const unaligned_br_t *)
197 (&table[k + j * elem_bytes]))->unaligned;
201 for (i = 0; i < sizeof(big_register_t); i++)
203 & ((unsigned char *) table)
204 [k + (j * elem_bytes) + i];
208 *(big_register_t *)(out + k) |=
210 & *(const big_register_t *)(&table[k + j * elem_bytes]);
214 mask = word_is_zero(idx ^ j);
215 # ifndef HAS_UNALIGNED_STRUCTS
216 maskc = (unsigned char)mask;
218 if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
219 for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) {
220 if (elem_bytes % sizeof(word_t)) {
221 /* input unaligned, output aligned */
222 # ifdef HAS_UNALIGNED_STRUCTS
223 *(word_t *)(out + k) |=
225 & ((const unaligned_word_t *)
226 (&table[k + j * elem_bytes]))->unaligned;
230 for (i = 0; i < sizeof(word_t); i++)
232 & ((unsigned char *)table)
233 [k + (j * elem_bytes) + i];
237 *(word_t *)(out + k) |=
239 & *(const word_t *)(&table[k + j * elem_bytes]);
244 if (elem_bytes % sizeof(word_t)) {
245 for (; k < elem_bytes; k += 1) {
246 out[k] |= mask & table[k + j * elem_bytes];
253 * Constant-time a = mask ? bTrue : bFalse.
255 * The input and output must be at least as aligned as alignment_bytes
256 * or their size, whichever is smaller.
258 * Note that the output is not __restrict__, but if it overlaps either
259 * input, it must be equal and not partially overlap.
261 static ossl_inline void constant_time_select_c448(void *a_,
266 size_t alignment_bytes)
268 unsigned char *a = (unsigned char *)a_;
269 const unsigned char *bTrue = (const unsigned char *)bTrue_;
270 const unsigned char *bFalse = (const unsigned char *)bFalse_;
272 big_register_t br_mask = br_set_to_mask(mask);
273 # ifndef HAS_UNALIGNED_STRUCTS
274 unsigned char maskc = (unsigned char)mask;
277 alignment_bytes |= elem_bytes;
279 for (k = 0; k <= elem_bytes - sizeof(big_register_t);
280 k += sizeof(big_register_t)) {
281 if (alignment_bytes % sizeof(big_register_t)) {
283 # ifdef HAS_UNALIGNED_STRUCTS
284 ((unaligned_br_t *)(&a[k]))->unaligned =
285 (br_mask & ((const unaligned_br_t *)(&bTrue[k]))->unaligned)
287 & ((const unaligned_br_t *)(&bFalse[k]))->unaligned);
291 for (i = 0; i < sizeof(big_register_t); i++)
292 a[k + i] = (maskc & ((unsigned char *)bTrue)[k + i])
293 | (~maskc & ((unsigned char *)bFalse)[k + i]);
297 *(big_register_t *) (a + k) =
298 (br_mask & *(const big_register_t *)(&bTrue[k]))
299 | (~br_mask & *(const big_register_t *)(&bFalse[k]));
303 if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
304 for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) {
305 if (alignment_bytes % sizeof(word_t)) {
307 # ifdef HAS_UNALIGNED_STRUCTS
308 ((unaligned_word_t *) (&a[k]))->unaligned =
309 (mask & ((const unaligned_word_t *)(&bTrue[k]))->unaligned)
311 ((const unaligned_word_t *)(&bFalse[k]))->unaligned);
315 for (i = 0; i < sizeof(word_t); i++)
316 a[k + i] = (maskc & ((unsigned char *)bTrue)[k + i])
317 | (~maskc & ((unsigned char *)bFalse)[k + i]);
321 *(word_t *) (a + k) = (mask & *(const word_t *)(&bTrue[k]))
322 | (~mask & *(const word_t *)(&bFalse[k]));
327 if (elem_bytes % sizeof(word_t)) {
328 for (; k < elem_bytes; k += 1) {
329 a[k] = (mask & bTrue[k]) | (~mask & bFalse[k]);
335 #undef HAS_UNALIGNED_STRUCTS
337 #endif /* __CONSTANT_TIME_H__ */