2 * Copyright 2017 The OpenSSL Project Authors. All Rights Reserved.
3 * Copyright 2014 Cryptography Research, Inc.
5 * Licensed under the OpenSSL license (the "License"). You may not use
6 * this file except in compliance with the License. You can obtain a copy
7 * in the file LICENSE in the source distribution or at
8 * https://www.openssl.org/source/license.html
10 * Originally written by Mike Hamburg
13 #ifndef __CONSTANT_TIME_H__
14 # define __CONSTANT_TIME_H__ 1
20 * Constant-time operations on hopefully-compile-time-sized memory
21 * regions. Needed for flexibility / demagication: not all fields
22 * have sizes which are multiples of the vector width, necessitating
23 * a change from the Ed448 versions.
25 * These routines would be much simpler to define at the byte level,
26 * but if not vectorized they would be a significant fraction of the
27 * runtime. Eg on NEON-less ARM, constant_time_lookup is like 15% of
28 * signing time, vs 6% on Haswell with its fancy AVX2 vectors.
30 * If the compiler could do a good job of autovectorizing the code,
31 * we could just leave it with the byte definition. But that's unlikely
32 * on most deployed compilers, especially if you consider that pcmpeq[size]
33 * is much faster than moving a scalar to the vector unit (which is what
34 * a naive autovectorizer will do with constant_time_lookup on Intel).
36 * Instead, we're putting our trust in the loop unroller and unswitcher.
40 * Unaligned big (vector?) register.
43 big_register_t unaligned;
44 } __attribute__ ((packed)) unaligned_br_t;
47 * Unaligned word register, for architectures where that matters.
51 } __attribute__ ((packed)) unaligned_word_t;
54 * Constant-time conditional swap.
56 * If doswap, then swap elem_bytes between *a and *b.
58 * *a and *b must not alias. Also, they must be at least as aligned
59 * as their sizes, if the CPU cares about that sort of thing.
61 static ossl_inline void constant_time_cond_swap(void *__restrict__ a_,
62 void *__restrict__ b_,
67 unsigned char *a = (unsigned char *)a_;
68 unsigned char *b = (unsigned char *)b_;
70 big_register_t br_mask = br_set_to_mask(doswap);
71 for (k = 0; k <= elem_bytes - sizeof(big_register_t);
72 k += sizeof(big_register_t)) {
73 if (elem_bytes % sizeof(big_register_t)) {
75 big_register_t xor = ((unaligned_br_t *) (&a[k]))->unaligned
76 ^ ((unaligned_br_t *) (&b[k]))->unaligned;
79 ((unaligned_br_t *)(&a[k]))->unaligned ^= xor;
80 ((unaligned_br_t *)(&b[k]))->unaligned ^= xor;
83 big_register_t xor = *((big_register_t *) (&a[k]))
84 ^ *((big_register_t *) (&b[k]));
86 *((big_register_t *)(&a[k])) ^= xor;
87 *((big_register_t *)(&b[k])) ^= xor;
91 if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
92 for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) {
93 if (elem_bytes % sizeof(word_t)) {
95 word_t xor = ((unaligned_word_t *)(&a[k]))->unaligned
96 ^ ((unaligned_word_t *)(&b[k]))->unaligned;
99 ((unaligned_word_t *)(&a[k]))->unaligned ^= xor;
100 ((unaligned_word_t *)(&b[k]))->unaligned ^= xor;
103 word_t xor = *((word_t *) (&a[k])) ^ *((word_t *) (&b[k]));
105 *((word_t *)(&a[k])) ^= xor;
106 *((word_t *)(&b[k])) ^= xor;
111 if (elem_bytes % sizeof(word_t)) {
112 for (; k < elem_bytes; k += 1) {
113 unsigned char xor = a[k] ^ b[k];
123 * Constant-time equivalent of memcpy(out, table + elem_bytes*idx, elem_bytes);
125 * The table must be at least as aligned as elem_bytes. The output must be word aligned,
126 * and if the input size is vector aligned it must also be vector aligned.
128 * The table and output must not alias.
130 static ossl_inline void constant_time_lookup(void *__restrict__ out_,
136 big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx);
138 /* Can't do pointer arithmetic on void * */
139 unsigned char *out = (unsigned char *)out_;
140 const unsigned char *table = (const unsigned char *)table_;
143 memset(out, 0, elem_bytes);
144 for (j = 0; j < n_table; j++, big_i -= big_one) {
145 big_register_t br_mask = br_is_zero(big_i);
148 for (k = 0; k <= elem_bytes - sizeof(big_register_t);
149 k += sizeof(big_register_t)) {
150 if (elem_bytes % sizeof(big_register_t)) {
152 ((unaligned_br_t *)(out + k))->unaligned |=
154 & ((const unaligned_br_t *)
155 (&table[k + j * elem_bytes]))->unaligned;
158 *(big_register_t *)(out + k) |=
160 & *(const big_register_t *)(&table[k + j * elem_bytes]);
164 mask = word_is_zero(idx ^ j);
165 if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
166 for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) {
167 if (elem_bytes % sizeof(word_t)) {
168 /* input unaligned, output aligned */
169 *(word_t *)(out + k) |=
171 & ((const unaligned_word_t *)
172 (&table[k + j * elem_bytes]))->unaligned;
175 *(word_t *)(out + k) |=
177 & *(const word_t *)(&table[k + j * elem_bytes]);
182 if (elem_bytes % sizeof(word_t)) {
183 for (; k < elem_bytes; k += 1) {
184 out[k] |= mask & table[k + j * elem_bytes];
191 * Constant-time a = mask ? bTrue : bFalse.
193 * The input and output must be at least as aligned as alignment_bytes
194 * or their size, whichever is smaller.
196 * Note that the output is not __restrict__, but if it overlaps either
197 * input, it must be equal and not partially overlap.
199 static ossl_inline void constant_time_select(void *a_,
204 size_t alignment_bytes)
206 unsigned char *a = (unsigned char *)a_;
207 const unsigned char *bTrue = (const unsigned char *)bTrue_;
208 const unsigned char *bFalse = (const unsigned char *)bFalse_;
210 big_register_t br_mask = br_set_to_mask(mask);
212 alignment_bytes |= elem_bytes;
214 for (k = 0; k <= elem_bytes - sizeof(big_register_t);
215 k += sizeof(big_register_t)) {
216 if (alignment_bytes % sizeof(big_register_t)) {
218 ((unaligned_br_t *)(&a[k]))->unaligned =
219 (br_mask & ((const unaligned_br_t *)(&bTrue[k]))->unaligned)
221 & ((const unaligned_br_t *)(&bFalse[k]))->unaligned);
224 *(big_register_t *) (a + k) =
225 (br_mask & *(const big_register_t *)(&bTrue[k]))
226 | (~br_mask & *(const big_register_t *)(&bFalse[k]));
230 if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
231 for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) {
232 if (alignment_bytes % sizeof(word_t)) {
234 ((unaligned_word_t *) (&a[k]))->unaligned =
235 (mask & ((const unaligned_word_t *)(&bTrue[k]))->unaligned)
237 ((const unaligned_word_t *)(&bFalse[k]))->unaligned);
240 *(word_t *) (a + k) = (mask & *(const word_t *)(&bTrue[k]))
241 | (~mask & *(const word_t *)(&bFalse[k]));
246 if (elem_bytes % sizeof(word_t)) {
247 for (; k < elem_bytes; k += 1) {
248 a[k] = (mask & bTrue[k]) | (~mask & bFalse[k]);
253 #endif /* __CONSTANT_TIME_H__ */