crypto/ec/curve448/word.h

   1 /* Copyright (c) 2014 Cryptography Research, Inc.
   2  * Released under the MIT License.  See LICENSE.txt for license information.
   3  */
   4
   5 #ifndef __WORD_H__
   6 #define __WORD_H__
   7
   8 #include <string.h>
   9
  10 #include <assert.h>
  11 #include <stdint.h>
  12 #include "arch_intrinsics.h"
  13
  14 #include "curve448utils.h"
  15
  16 #ifndef _BSD_SOURCE
  17 #define _BSD_SOURCE 1
  18 #endif
  19
  20 #ifndef _DEFAULT_SOURCE
  21 #define _DEFAULT_SOURCE 1
  22 #endif
  23
  24 #include "portable_endian.h"
  25
  26 #include <stdlib.h>
  27 #include <sys/types.h>
  28 #include <inttypes.h>
  29
  30 #if defined(__ARM_NEON__)
  31 #include <arm_neon.h>
  32 #elif defined(__SSE2__)
  33     #if !defined(__GNUC__) || __clang__ || __GNUC__ >= 5 || (__GNUC__==4 && __GNUC_MINOR__ >= 4)
  34         #include <immintrin.h>
  35     #else
  36         #include <emmintrin.h>
  37     #endif
  38 #endif
  39
  40 #if (ARCH_WORD_BITS == 64)
  41     typedef uint64_t word_t, mask_t;
  42     typedef __uint128_t dword_t;
  43     typedef int32_t hsword_t;
  44     typedef int64_t sword_t;
  45     typedef __int128_t dsword_t;
  46 #elif (ARCH_WORD_BITS == 32)
  47     typedef uint32_t word_t, mask_t;
  48     typedef uint64_t dword_t;
  49     typedef int16_t hsword_t;
  50     typedef int32_t sword_t;
  51     typedef int64_t dsword_t;
  52 #else
  53     #error "For now, libdecaf only supports 32- and 64-bit architectures."
  54 #endif
  55
  56 /* Scalar limbs are keyed off of the API word size instead of the arch word size. */
  57 #if DECAF_WORD_BITS == 64
  58     #define SC_LIMB(x) (x##ull)
  59 #elif DECAF_WORD_BITS == 32
  60     #define SC_LIMB(x) ((uint32_t)x##ull),(x##ull>>32)
  61 #else
  62     #error "For now, libdecaf only supports 32- and 64-bit architectures."
  63 #endif
  64
  65 #ifdef __ARM_NEON__
  66     typedef uint32x4_t vecmask_t;
  67 #elif __clang__
  68     typedef uint64_t uint64x2_t __attribute__((ext_vector_type(2)));
  69     typedef int64_t  int64x2_t __attribute__((ext_vector_type(2)));
  70     typedef uint64_t uint64x4_t __attribute__((ext_vector_type(4)));
  71     typedef int64_t  int64x4_t __attribute__((ext_vector_type(4)));
  72     typedef uint32_t uint32x4_t __attribute__((ext_vector_type(4)));
  73     typedef int32_t  int32x4_t __attribute__((ext_vector_type(4)));
  74     typedef uint32_t uint32x2_t __attribute__((ext_vector_type(2)));
  75     typedef int32_t  int32x2_t __attribute__((ext_vector_type(2)));
  76     typedef uint32_t uint32x8_t __attribute__((ext_vector_type(8)));
  77     typedef int32_t  int32x8_t __attribute__((ext_vector_type(8)));
  78     typedef word_t vecmask_t __attribute__((ext_vector_type(4)));
  79 #else /* GCC, hopefully? */
  80     typedef uint64_t uint64x2_t __attribute__((vector_size(16)));
  81     typedef int64_t  int64x2_t __attribute__((vector_size(16)));
  82     typedef uint64_t uint64x4_t __attribute__((vector_size(32)));
  83     typedef int64_t  int64x4_t __attribute__((vector_size(32)));
  84     typedef uint32_t uint32x4_t __attribute__((vector_size(16)));
  85     typedef int32_t  int32x4_t __attribute__((vector_size(16)));
  86     typedef uint32_t uint32x2_t __attribute__((vector_size(8)));
  87     typedef int32_t  int32x2_t __attribute__((vector_size(8)));
  88     typedef uint32_t uint32x8_t __attribute__((vector_size(32)));
  89     typedef int32_t  int32x8_t __attribute__((vector_size(32)));
  90     typedef word_t vecmask_t __attribute__((vector_size(32)));
  91 #endif
  92
  93 #if __AVX2__
  94     #define VECTOR_ALIGNED __attribute__((aligned(32)))
  95     typedef uint32x8_t big_register_t;
  96     typedef uint64x4_t uint64xn_t;
  97     typedef uint32x8_t uint32xn_t;
  98
  99     static DECAF_INLINE big_register_t
 100     br_set_to_mask(mask_t x) {
 101         uint32_t y = (uint32_t)x;
 102         big_register_t ret = {y,y,y,y,y,y,y,y};
 103         return ret;
 104     }
 105 #elif __SSE2__
 106     #define VECTOR_ALIGNED __attribute__((aligned(16)))
 107     typedef uint32x4_t big_register_t;
 108     typedef uint64x2_t uint64xn_t;
 109     typedef uint32x4_t uint32xn_t;
 110
 111     static DECAF_INLINE big_register_t
 112     br_set_to_mask(mask_t x) {
 113         uint32_t y = x;
 114         big_register_t ret = {y,y,y,y};
 115         return ret;
 116     }
 117 #elif __ARM_NEON__
 118     #define VECTOR_ALIGNED __attribute__((aligned(16)))
 119     typedef uint32x4_t big_register_t;
 120     typedef uint64x2_t uint64xn_t;
 121     typedef uint32x4_t uint32xn_t;
 122
 123     static DECAF_INLINE big_register_t
 124     br_set_to_mask(mask_t x) {
 125         return vdupq_n_u32(x);
 126     }
 127 #elif _WIN64 || __amd64__ || __X86_64__ || __aarch64__
 128     #define VECTOR_ALIGNED __attribute__((aligned(8)))
 129     typedef uint64_t big_register_t, uint64xn_t;
 130
 131     typedef uint32_t uint32xn_t;
 132     static DECAF_INLINE big_register_t
 133     br_set_to_mask(mask_t x) {
 134         return (big_register_t)x;
 135     }
 136 #else
 137     #define VECTOR_ALIGNED __attribute__((aligned(4)))
 138     typedef uint64_t uint64xn_t;
 139     typedef uint32_t uint32xn_t;
 140     typedef uint32_t big_register_t;
 141
 142     static DECAF_INLINE big_register_t
 143     br_set_to_mask(mask_t x) {
 144         return (big_register_t)x;
 145     }
 146 #endif
 147
 148 #if __AVX2__
 149     static DECAF_INLINE big_register_t
 150     br_is_zero(big_register_t x) {
 151         return (big_register_t)(x == br_set_to_mask(0));
 152     }
 153 #elif __SSE2__
 154     static DECAF_INLINE big_register_t
 155     br_is_zero(big_register_t x) {
 156         return (big_register_t)_mm_cmpeq_epi32((__m128i)x, _mm_setzero_si128());
 157         //return (big_register_t)(x == br_set_to_mask(0));
 158     }
 159 #elif __ARM_NEON__
 160     static DECAF_INLINE big_register_t
 161     br_is_zero(big_register_t x) {
 162         return vceqq_u32(x,x^x);
 163     }
 164 #else
 165     #define br_is_zero word_is_zero
 166 #endif
 167
 168 /* PERF: vectorize vs unroll */
 169 #ifdef __clang__
 170 #if 100*__clang_major__ + __clang_minor__ > 305
 171 #define UNROLL _Pragma("clang loop unroll(full)")
 172 #endif
 173 #endif
 174
 175 #ifndef UNROLL
 176 #define UNROLL
 177 #endif
 178
 179 /* The plan on booleans:
 180  *
 181  * The external interface uses decaf_bool_t, but this might be a different
 182  * size than our particular arch's word_t (and thus mask_t).  Also, the caller
 183  * isn't guaranteed to pass it as nonzero.  So bool_to_mask converts word sizes
 184  * and checks nonzero.
 185  *
 186  * On the flip side, mask_t is always -1 or 0, but it might be a different size
 187  * than decaf_bool_t.
 188  *
 189  * On the third hand, we have success vs boolean types, but that's handled in
 190  * common.h: it converts between decaf_bool_t and decaf_error_t.
 191  */
 192 static DECAF_INLINE decaf_bool_t mask_to_bool (mask_t m) {
 193     return (decaf_sword_t)(sword_t)m;
 194 }
 195
 196 static DECAF_INLINE mask_t bool_to_mask (decaf_bool_t m) {
 197     /* On most arches this will be optimized to a simple cast. */
 198     mask_t ret = 0;
 199     unsigned int limit = sizeof(decaf_bool_t)/sizeof(mask_t);
 200     if (limit < 1) limit = 1;
 201     for (unsigned int i=0; i<limit; i++) {
 202         ret |= ~ word_is_zero(m >> (i*8*sizeof(word_t)));
 203     }
 204     return ret;
 205 }
 206
 207 static DECAF_INLINE void ignore_result ( decaf_bool_t boo ) {
 208     (void)boo;
 209 }
 210
 211 #endif /* __WORD_H__ */