* Originally written by Mike Hamburg
*/
-#ifndef __WORD_H__
-# define __WORD_H__
+#ifndef HEADER_WORD_H
+# define HEADER_WORD_H
# include <string.h>
-
# include <assert.h>
+# include <stdlib.h>
# include <openssl/e_os2.h>
# include "arch_intrinsics.h"
-
# include "curve448utils.h"
-# include <stdlib.h>
# if defined(__ARM_NEON__)
# include <arm_neon.h>
# if C448_WORD_BITS == 64
# define SC_LIMB(x) (x)
# elif C448_WORD_BITS == 32
-# define SC_LIMB(x) ((uint32_t)x),(x>>32)
+# define SC_LIMB(x) ((uint32_t)(x)),((x) >> 32)
# else
# error "For now we only support 32- and 64-bit architectures."
# endif
-# ifdef __ARM_NEON__
-typedef uint32x4_t vecmask_t;
-# elif defined(__clang__)
-typedef uint64_t uint64x2_t __attribute__ ((ext_vector_type(2)));
-typedef int64_t int64x2_t __attribute__ ((ext_vector_type(2)));
-typedef uint64_t uint64x4_t __attribute__ ((ext_vector_type(4)));
-typedef int64_t int64x4_t __attribute__ ((ext_vector_type(4)));
-typedef uint32_t uint32x4_t __attribute__ ((ext_vector_type(4)));
-typedef int32_t int32x4_t __attribute__ ((ext_vector_type(4)));
-typedef uint32_t uint32x2_t __attribute__ ((ext_vector_type(2)));
-typedef int32_t int32x2_t __attribute__ ((ext_vector_type(2)));
-typedef uint32_t uint32x8_t __attribute__ ((ext_vector_type(8)));
-typedef int32_t int32x8_t __attribute__ ((ext_vector_type(8)));
-typedef word_t vecmask_t __attribute__ ((ext_vector_type(4)));
-# elif defined(__GNUC__) \
- && (__GNUC__ >= 4 || (__GNUC__== 3 && __GNUC_MINOR__ >= 1))
-typedef uint64_t uint64x2_t __attribute__ ((vector_size(16)));
-typedef int64_t int64x2_t __attribute__ ((vector_size(16)));
-typedef uint64_t uint64x4_t __attribute__ ((vector_size(32)));
-typedef int64_t int64x4_t __attribute__ ((vector_size(32)));
-typedef uint32_t uint32x4_t __attribute__ ((vector_size(16)));
-typedef int32_t int32x4_t __attribute__ ((vector_size(16)));
-typedef uint32_t uint32x2_t __attribute__ ((vector_size(8)));
-typedef int32_t int32x2_t __attribute__ ((vector_size(8)));
-typedef uint32_t uint32x8_t __attribute__ ((vector_size(32)));
-typedef int32_t int32x8_t __attribute__ ((vector_size(32)));
-typedef word_t vecmask_t __attribute__ ((vector_size(32)));
-# endif
-
-# if defined(__AVX2__)
-# define VECTOR_ALIGNED __attribute__((aligned(32)))
-typedef uint32x8_t big_register_t;
-typedef uint64x4_t uint64xn_t;
-typedef uint32x8_t uint32xn_t;
-
-static ossl_inline big_register_t br_set_to_mask(mask_t x)
-{
- uint32_t y = (uint32_t)x;
- big_register_t ret = { y, y, y, y, y, y, y, y };
- return ret;
-}
-# elif defined(__SSE2__)
-# define VECTOR_ALIGNED __attribute__((aligned(16)))
-typedef uint32x4_t big_register_t;
-typedef uint64x2_t uint64xn_t;
-typedef uint32x4_t uint32xn_t;
-
-static ossl_inline big_register_t br_set_to_mask(mask_t x)
-{
- uint32_t y = x;
- big_register_t ret = { y, y, y, y };
- return ret;
-}
-# elif defined(__ARM_NEON__)
-# define VECTOR_ALIGNED __attribute__((aligned(16)))
-typedef uint32x4_t big_register_t;
-typedef uint64x2_t uint64xn_t;
-typedef uint32x4_t uint32xn_t;
-
-static ossl_inline big_register_t br_set_to_mask(mask_t x)
-{
- return vdupq_n_u32(x);
-}
-# elif !defined(_MSC_VER) \
- && (defined(_WIN64) || defined(__amd64__) || defined(__X86_64__) \
- || defined(__aarch64__))
-# define VECTOR_ALIGNED __attribute__((aligned(8)))
-typedef uint64_t big_register_t, uint64xn_t;
-
-typedef uint32_t uint32xn_t;
-static ossl_inline big_register_t br_set_to_mask(mask_t x)
-{
- return (big_register_t) x;
-}
-# else
-# ifdef __GNUC__
-# define VECTOR_ALIGNED __attribute__((aligned(4)))
-# else
-/*
- * This shouldn't be a problem because a big_register_t isn't actually a vector
- * type anyway in this case.
- */
-# define VECTOR_ALIGNED
-# endif
-typedef uint64_t uint64xn_t;
-typedef uint32_t uint32xn_t;
-typedef uint32_t big_register_t;
-
-static ossl_inline big_register_t br_set_to_mask(mask_t x)
-{
- return (big_register_t) x;
-}
-# endif
-
-# if defined(__AVX2__)
-static ossl_inline big_register_t br_is_zero(big_register_t x)
-{
- return (big_register_t) (x == br_set_to_mask(0));
-}
-# elif defined(__SSE2__)
-static ossl_inline big_register_t br_is_zero(big_register_t x)
-{
- return (big_register_t) _mm_cmpeq_epi32((__m128i) x, _mm_setzero_si128());
-}
-# elif defined(__ARM_NEON__)
-static ossl_inline big_register_t br_is_zero(big_register_t x)
-{
- return vceqq_u32(x, x ^ x);
-}
-# else
-# define br_is_zero word_is_zero
-# endif
-
-/* PERF: vectorize vs unroll */
-# ifdef __clang__
-# if 100*__clang_major__ + __clang_minor__ > 305
-# define UNROLL _Pragma("clang loop unroll(full)")
-# endif
-# endif
-
-# ifndef UNROLL
-# define UNROLL
-# endif
-
/*
* The plan on booleans: The external interface uses c448_bool_t, but this
* might be a different size than our particular arch's word_t (and thus
return ret;
}
-static ossl_inline void ignore_result(c448_bool_t boo)
-{
- (void)boo;
-}
-
-#endif /* __WORD_H__ */
+#endif /* HEADER_WORD_H */