2 * Copyright 2017 The OpenSSL Project Authors. All Rights Reserved.
3 * Copyright 2014 Cryptography Research, Inc.
5 * Licensed under the OpenSSL license (the "License"). You may not use
6 * this file except in compliance with the License. You can obtain a copy
7 * in the file LICENSE in the source distribution or at
8 * https://www.openssl.org/source/license.html
10 * Originally written by Mike Hamburg
19 #include <openssl/e_os2.h>
20 #include "arch_intrinsics.h"
22 #include "curve448utils.h"
28 #ifndef _DEFAULT_SOURCE
29 #define _DEFAULT_SOURCE 1
34 #if defined(__ARM_NEON__)
36 #elif defined(__SSE2__)
37 #if !defined(__GNUC__) || defined(__clang__) || __GNUC__ >= 5 || (__GNUC__==4 && __GNUC_MINOR__ >= 4)
38 #include <immintrin.h>
40 #include <emmintrin.h>
44 #if (ARCH_WORD_BITS == 64)
45 typedef uint64_t word_t, mask_t;
46 typedef __uint128_t dword_t;
47 typedef int32_t hsword_t;
48 typedef int64_t sword_t;
49 typedef __int128_t dsword_t;
50 #elif (ARCH_WORD_BITS == 32)
51 typedef uint32_t word_t, mask_t;
52 typedef uint64_t dword_t;
53 typedef int16_t hsword_t;
54 typedef int32_t sword_t;
55 typedef int64_t dsword_t;
57 #error "For now, libdecaf only supports 32- and 64-bit architectures."
60 /* Scalar limbs are keyed off of the API word size instead of the arch word size. */
61 #if DECAF_WORD_BITS == 64
62 #define SC_LIMB(x) (x)
63 #elif DECAF_WORD_BITS == 32
64 #define SC_LIMB(x) ((uint32_t)x),(x>>32)
66 #error "For now, libdecaf only supports 32- and 64-bit architectures."
70 typedef uint32x4_t vecmask_t;
71 #elif defined(__clang__)
72 typedef uint64_t uint64x2_t __attribute__((ext_vector_type(2)));
73 typedef int64_t int64x2_t __attribute__((ext_vector_type(2)));
74 typedef uint64_t uint64x4_t __attribute__((ext_vector_type(4)));
75 typedef int64_t int64x4_t __attribute__((ext_vector_type(4)));
76 typedef uint32_t uint32x4_t __attribute__((ext_vector_type(4)));
77 typedef int32_t int32x4_t __attribute__((ext_vector_type(4)));
78 typedef uint32_t uint32x2_t __attribute__((ext_vector_type(2)));
79 typedef int32_t int32x2_t __attribute__((ext_vector_type(2)));
80 typedef uint32_t uint32x8_t __attribute__((ext_vector_type(8)));
81 typedef int32_t int32x8_t __attribute__((ext_vector_type(8)));
82 typedef word_t vecmask_t __attribute__((ext_vector_type(4)));
83 #else /* GCC, hopefully? */
84 typedef uint64_t uint64x2_t __attribute__((vector_size(16)));
85 typedef int64_t int64x2_t __attribute__((vector_size(16)));
86 typedef uint64_t uint64x4_t __attribute__((vector_size(32)));
87 typedef int64_t int64x4_t __attribute__((vector_size(32)));
88 typedef uint32_t uint32x4_t __attribute__((vector_size(16)));
89 typedef int32_t int32x4_t __attribute__((vector_size(16)));
90 typedef uint32_t uint32x2_t __attribute__((vector_size(8)));
91 typedef int32_t int32x2_t __attribute__((vector_size(8)));
92 typedef uint32_t uint32x8_t __attribute__((vector_size(32)));
93 typedef int32_t int32x8_t __attribute__((vector_size(32)));
94 typedef word_t vecmask_t __attribute__((vector_size(32)));
98 #define VECTOR_ALIGNED __attribute__((aligned(32)))
99 typedef uint32x8_t big_register_t;
100 typedef uint64x4_t uint64xn_t;
101 typedef uint32x8_t uint32xn_t;
103 static ossl_inline big_register_t
104 br_set_to_mask(mask_t x) {
105 uint32_t y = (uint32_t)x;
106 big_register_t ret = {y,y,y,y,y,y,y,y};
109 #elif defined(__SSE2__)
110 #define VECTOR_ALIGNED __attribute__((aligned(16)))
111 typedef uint32x4_t big_register_t;
112 typedef uint64x2_t uint64xn_t;
113 typedef uint32x4_t uint32xn_t;
115 static ossl_inline big_register_t
116 br_set_to_mask(mask_t x) {
118 big_register_t ret = {y,y,y,y};
121 #elif defined(__ARM_NEON__)
122 #define VECTOR_ALIGNED __attribute__((aligned(16)))
123 typedef uint32x4_t big_register_t;
124 typedef uint64x2_t uint64xn_t;
125 typedef uint32x4_t uint32xn_t;
127 static ossl_inline big_register_t
128 br_set_to_mask(mask_t x) {
129 return vdupq_n_u32(x);
131 #elif defined(_WIN64) || defined(__amd64__) || defined(__X86_64__) \
132 || defined(__aarch64__)
133 #define VECTOR_ALIGNED __attribute__((aligned(8)))
134 typedef uint64_t big_register_t, uint64xn_t;
136 typedef uint32_t uint32xn_t;
137 static ossl_inline big_register_t
138 br_set_to_mask(mask_t x) {
139 return (big_register_t)x;
142 #define VECTOR_ALIGNED __attribute__((aligned(4)))
143 typedef uint64_t uint64xn_t;
144 typedef uint32_t uint32xn_t;
145 typedef uint32_t big_register_t;
147 static ossl_inline big_register_t
148 br_set_to_mask(mask_t x) {
149 return (big_register_t)x;
153 #if defined(__AVX2__)
154 static ossl_inline big_register_t
155 br_is_zero(big_register_t x) {
156 return (big_register_t)(x == br_set_to_mask(0));
158 #elif defined(__SSE2__)
159 static ossl_inline big_register_t
160 br_is_zero(big_register_t x) {
161 return (big_register_t)_mm_cmpeq_epi32((__m128i)x, _mm_setzero_si128());
162 //return (big_register_t)(x == br_set_to_mask(0));
164 #elif defined(__ARM_NEON__)
165 static ossl_inline big_register_t
166 br_is_zero(big_register_t x) {
167 return vceqq_u32(x,x^x);
170 #define br_is_zero word_is_zero
173 /* PERF: vectorize vs unroll */
175 #if 100*__clang_major__ + __clang_minor__ > 305
176 #define UNROLL _Pragma("clang loop unroll(full)")
184 /* The plan on booleans:
186 * The external interface uses decaf_bool_t, but this might be a different
187 * size than our particular arch's word_t (and thus mask_t). Also, the caller
188 * isn't guaranteed to pass it as nonzero. So bool_to_mask converts word sizes
189 * and checks nonzero.
191 * On the flip side, mask_t is always -1 or 0, but it might be a different size
194 * On the third hand, we have success vs boolean types, but that's handled in
195 * common.h: it converts between decaf_bool_t and decaf_error_t.
197 static ossl_inline decaf_bool_t mask_to_bool (mask_t m) {
198 return (decaf_sword_t)(sword_t)m;
201 static ossl_inline mask_t bool_to_mask (decaf_bool_t m) {
202 /* On most arches this will be optimized to a simple cast. */
206 unsigned int limit = sizeof(decaf_bool_t)/sizeof(mask_t);
207 if (limit < 1) limit = 1;
208 for (i=0; i<limit; i++) {
209 ret |= ~ word_is_zero(m >> (i*8*sizeof(word_t)));
214 static ossl_inline void ignore_result ( decaf_bool_t boo ) {
218 #endif /* __WORD_H__ */