crypto/ec/curve448/word.h

   1 /*
   2  * Copyright 2017-2018 The OpenSSL Project Authors. All Rights Reserved.
   3  * Copyright 2014 Cryptography Research, Inc.
   4  *
   5  * Licensed under the OpenSSL license (the "License").  You may not use
   6  * this file except in compliance with the License.  You can obtain a copy
   7  * in the file LICENSE in the source distribution or at
   8  * https://www.openssl.org/source/license.html
   9  *
  10  * Originally written by Mike Hamburg
  11  */
  12
  13 #ifndef __WORD_H__
  14 # define __WORD_H__
  15
  16 # include <string.h>
  17
  18 # include <assert.h>
  19 # include <openssl/e_os2.h>
  20 # include "arch_intrinsics.h"
  21
  22 # include "curve448utils.h"
  23 # include <stdlib.h>
  24
  25 # if defined(__ARM_NEON__)
  26 #  include <arm_neon.h>
  27 # elif defined(__SSE2__)
  28 #  if !defined(__GNUC__) || defined(__clang__) || __GNUC__ >= 5 \
  29       || (__GNUC__==4 && __GNUC_MINOR__ >= 4)
  30 #   include <immintrin.h>
  31 #  else
  32 #   include <emmintrin.h>
  33 #  endif
  34 # endif
  35
  36 # if (ARCH_WORD_BITS == 64)
  37 typedef uint64_t word_t, mask_t;
  38 typedef __uint128_t dword_t;
  39 typedef int32_t hsword_t;
  40 typedef int64_t sword_t;
  41 typedef __int128_t dsword_t;
  42 # elif (ARCH_WORD_BITS == 32)
  43 typedef uint32_t word_t, mask_t;
  44 typedef uint64_t dword_t;
  45 typedef int16_t hsword_t;
  46 typedef int32_t sword_t;
  47 typedef int64_t dsword_t;
  48 # else
  49 #  error "For now, we only support 32- and 64-bit architectures."
  50 # endif
  51
  52 /*
  53  * Scalar limbs are keyed off of the API word size instead of the arch word
  54  * size.
  55  */
  56 # if C448_WORD_BITS == 64
  57 #  define SC_LIMB(x) (x)
  58 # elif C448_WORD_BITS == 32
  59 #  define SC_LIMB(x) ((uint32_t)x),(x>>32)
  60 # else
  61 #  error "For now we only support 32- and 64-bit architectures."
  62 # endif
  63
  64 # ifdef __ARM_NEON__
  65 typedef uint32x4_t vecmask_t;
  66 # elif defined(__clang__)
  67 typedef uint64_t uint64x2_t __attribute__ ((ext_vector_type(2)));
  68 typedef int64_t int64x2_t __attribute__ ((ext_vector_type(2)));
  69 typedef uint64_t uint64x4_t __attribute__ ((ext_vector_type(4)));
  70 typedef int64_t int64x4_t __attribute__ ((ext_vector_type(4)));
  71 typedef uint32_t uint32x4_t __attribute__ ((ext_vector_type(4)));
  72 typedef int32_t int32x4_t __attribute__ ((ext_vector_type(4)));
  73 typedef uint32_t uint32x2_t __attribute__ ((ext_vector_type(2)));
  74 typedef int32_t int32x2_t __attribute__ ((ext_vector_type(2)));
  75 typedef uint32_t uint32x8_t __attribute__ ((ext_vector_type(8)));
  76 typedef int32_t int32x8_t __attribute__ ((ext_vector_type(8)));
  77 typedef word_t vecmask_t __attribute__ ((ext_vector_type(4)));
  78 # elif defined(__GNUC__) \
  79        && (__GNUC__ >= 4 || (__GNUC__== 3 && __GNUC_MINOR__ >= 1))
  80 typedef uint64_t uint64x2_t __attribute__ ((vector_size(16)));
  81 typedef int64_t int64x2_t __attribute__ ((vector_size(16)));
  82 typedef uint64_t uint64x4_t __attribute__ ((vector_size(32)));
  83 typedef int64_t int64x4_t __attribute__ ((vector_size(32)));
  84 typedef uint32_t uint32x4_t __attribute__ ((vector_size(16)));
  85 typedef int32_t int32x4_t __attribute__ ((vector_size(16)));
  86 typedef uint32_t uint32x2_t __attribute__ ((vector_size(8)));
  87 typedef int32_t int32x2_t __attribute__ ((vector_size(8)));
  88 typedef uint32_t uint32x8_t __attribute__ ((vector_size(32)));
  89 typedef int32_t int32x8_t __attribute__ ((vector_size(32)));
  90 typedef word_t vecmask_t __attribute__ ((vector_size(32)));
  91 # endif
  92
  93 # if defined(__AVX2__)
  94 #  define VECTOR_ALIGNED __attribute__((aligned(32)))
  95 typedef uint32x8_t big_register_t;
  96 typedef uint64x4_t uint64xn_t;
  97 typedef uint32x8_t uint32xn_t;
  98
  99 static ossl_inline big_register_t br_set_to_mask(mask_t x)
 100 {
 101     uint32_t y = (uint32_t)x;
 102     big_register_t ret = { y, y, y, y, y, y, y, y };
 103     return ret;
 104 }
 105 # elif defined(__SSE2__)
 106 #  define VECTOR_ALIGNED __attribute__((aligned(16)))
 107 typedef uint32x4_t big_register_t;
 108 typedef uint64x2_t uint64xn_t;
 109 typedef uint32x4_t uint32xn_t;
 110
 111 static ossl_inline big_register_t br_set_to_mask(mask_t x)
 112 {
 113     uint32_t y = x;
 114     big_register_t ret = { y, y, y, y };
 115     return ret;
 116 }
 117 # elif defined(__ARM_NEON__)
 118 #  define VECTOR_ALIGNED __attribute__((aligned(16)))
 119 typedef uint32x4_t big_register_t;
 120 typedef uint64x2_t uint64xn_t;
 121 typedef uint32x4_t uint32xn_t;
 122
 123 static ossl_inline big_register_t br_set_to_mask(mask_t x)
 124 {
 125     return vdupq_n_u32(x);
 126 }
 127 # elif !defined(_MSC_VER) \
 128        && (defined(_WIN64) || defined(__amd64__) || defined(__X86_64__) \
 129            || defined(__aarch64__))
 130 #  define VECTOR_ALIGNED __attribute__((aligned(8)))
 131 typedef uint64_t big_register_t, uint64xn_t;
 132
 133 typedef uint32_t uint32xn_t;
 134 static ossl_inline big_register_t br_set_to_mask(mask_t x)
 135 {
 136     return (big_register_t) x;
 137 }
 138 # else
 139 #  ifdef __GNUC__
 140 #   define VECTOR_ALIGNED __attribute__((aligned(4)))
 141 #  else
 142 /*
 143  * This shouldn't be a problem because a big_register_t isn't actually a vector
 144  * type anyway in this case.
 145  */
 146 #   define VECTOR_ALIGNED
 147 #  endif
 148 typedef uint64_t uint64xn_t;
 149 typedef uint32_t uint32xn_t;
 150 typedef uint32_t big_register_t;
 151
 152 static ossl_inline big_register_t br_set_to_mask(mask_t x)
 153 {
 154     return (big_register_t) x;
 155 }
 156 # endif
 157
 158 # if defined(__AVX2__)
 159 static ossl_inline big_register_t br_is_zero(big_register_t x)
 160 {
 161     return (big_register_t) (x == br_set_to_mask(0));
 162 }
 163 # elif defined(__SSE2__)
 164 static ossl_inline big_register_t br_is_zero(big_register_t x)
 165 {
 166     return (big_register_t) _mm_cmpeq_epi32((__m128i) x, _mm_setzero_si128());
 167 }
 168 # elif defined(__ARM_NEON__)
 169 static ossl_inline big_register_t br_is_zero(big_register_t x)
 170 {
 171     return vceqq_u32(x, x ^ x);
 172 }
 173 # else
 174 #  define br_is_zero word_is_zero
 175 # endif
 176
 177 /* PERF: vectorize vs unroll */
 178 # ifdef __clang__
 179 #  if 100*__clang_major__ + __clang_minor__ > 305
 180 #   define UNROLL _Pragma("clang loop unroll(full)")
 181 #  endif
 182 # endif
 183
 184 # ifndef UNROLL
 185 #  define UNROLL
 186 # endif
 187
 188 /*
 189  * The plan on booleans: The external interface uses c448_bool_t, but this
 190  * might be a different size than our particular arch's word_t (and thus
 191  * mask_t).  Also, the caller isn't guaranteed to pass it as nonzero.  So
 192  * bool_to_mask converts word sizes and checks nonzero. On the flip side,
 193  * mask_t is always -1 or 0, but it might be a different size than
 194  * c448_bool_t. On the third hand, we have success vs boolean types, but
 195  * that's handled in common.h: it converts between c448_bool_t and
 196  * c448_error_t.
 197  */
 198 static ossl_inline c448_bool_t mask_to_bool(mask_t m)
 199 {
 200     return (c448_sword_t)(sword_t)m;
 201 }
 202
 203 static ossl_inline mask_t bool_to_mask(c448_bool_t m)
 204 {
 205     /* On most arches this will be optimized to a simple cast. */
 206     mask_t ret = 0;
 207     unsigned int i;
 208     unsigned int limit = sizeof(c448_bool_t) / sizeof(mask_t);
 209
 210     if (limit < 1)
 211         limit = 1;
 212     for (i = 0; i < limit; i++)
 213         ret |= ~word_is_zero(m >> (i * 8 * sizeof(word_t)));
 214
 215     return ret;
 216 }
 217
 218 static ossl_inline void ignore_result(c448_bool_t boo)
 219 {
 220     (void)boo;
 221 }
 222
 223 #endif                          /* __WORD_H__ */