X-Git-Url: https://git.openssl.org/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fec%2Fcurve448%2Farch_x86_64%2Farch_intrinsics.h;h=cca3f81c17ee78f87b7700aade1a722f7ca25077;hp=8fcf2c8dd4a5c5602d99c6b45755e3940eaf185c;hb=001a0934191319f4617c02eb2b2857dd80e7464a;hpb=abcd22bf621b25e5db724b0ad9bcb4bcc189b1d3 diff --git a/crypto/ec/curve448/arch_x86_64/arch_intrinsics.h b/crypto/ec/curve448/arch_x86_64/arch_intrinsics.h index 8fcf2c8dd4..cca3f81c17 100644 --- a/crypto/ec/curve448/arch_x86_64/arch_intrinsics.h +++ b/crypto/ec/curve448/arch_x86_64/arch_intrinsics.h @@ -1,17 +1,26 @@ -/* Copyright (c) 2014-2016 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. +/* + * Copyright 2017 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2014-2016 Cryptography Research, Inc. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + * + * Originally written by Mike Hamburg */ - #ifndef __ARCH_X86_64_ARCH_INTRINSICS_H__ #define __ARCH_X86_64_ARCH_INTRINSICS_H__ #define ARCH_WORD_BITS 64 -#include +#include /* FUTURE: autogenerate */ -static __inline__ __uint128_t widemul(const uint64_t *a, const uint64_t *b) { - uint64_t c,d; +static __inline__ __uint128_t widemul(const uint64_t *a, const uint64_t *b) +{ + uint64_t c, d; + #ifndef __BMI2__ __asm__ volatile ("movq %[a], %%rax;" @@ -27,11 +36,13 @@ static __inline__ __uint128_t widemul(const uint64_t *a, const uint64_t *b) { : [b]"m"(*b), [a]"m"(*a) : "rdx"); #endif - return (((__uint128_t)(d))<<64) | c; + return (((__uint128_t)(d)) << 64) | c; } -static __inline__ __uint128_t widemul_rm(uint64_t a, const uint64_t *b) { - uint64_t c,d; +static __inline__ __uint128_t widemul_rm(uint64_t a, const uint64_t *b) +{ + uint64_t c, d; + #ifndef __BMI2__ __asm__ volatile ("movq %[a], %%rax;" @@ -45,11 +56,13 @@ static __inline__ __uint128_t widemul_rm(uint64_t a, const uint64_t *b) { : [c]"=r"(c), [d]"=r"(d) : [b]"m"(*b), [a]"d"(a)); #endif - return (((__uint128_t)(d))<<64) | c; + return (((__uint128_t)(d)) << 64) | c; } -static __inline__ __uint128_t widemul_rr(uint64_t a, uint64_t b) { - uint64_t c,d; +static __inline__ __uint128_t widemul_rr(uint64_t a, uint64_t b) +{ + uint64_t c, d; + #ifndef __BMI2__ __asm__ volatile ("mulq %[b];" @@ -62,11 +75,13 @@ static __inline__ __uint128_t widemul_rr(uint64_t a, uint64_t b) { : [c]"=r"(c), [d]"=r"(d) : [b]"r"(b), [a]"d"(a)); #endif - return (((__uint128_t)(d))<<64) | c; + return (((__uint128_t)(d)) << 64) | c; } -static __inline__ __uint128_t widemul2(const uint64_t *a, const uint64_t *b) { - uint64_t c,d; +static __inline__ __uint128_t widemul2(const uint64_t *a, const uint64_t *b) +{ + uint64_t c, d; + #ifndef __BMI2__ __asm__ volatile ("movq %[a], %%rax; " @@ -84,11 +99,13 @@ static __inline__ __uint128_t widemul2(const uint64_t *a, const uint64_t *b) { : [b]"m"(*b), [a]"m"(*a) : "rdx"); #endif - return (((__uint128_t)(d))<<64) | c; + return (((__uint128_t)(d)) << 64) | c; } -static __inline__ void mac(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { - uint64_t lo = *acc, hi = *acc>>64; +static __inline__ void mac(__uint128_t *acc, const uint64_t *a, + const uint64_t *b) +{ + uint64_t lo = *acc, hi = *acc >> 64; #ifdef __BMI2__ uint64_t c,d; @@ -111,12 +128,14 @@ static __inline__ void mac(__uint128_t *acc, const uint64_t *a, const uint64_t * : "rax", "rdx", "cc"); #endif - *acc = (((__uint128_t)(hi))<<64) | lo; + *acc = (((__uint128_t)(hi)) << 64) | lo; } -static __inline__ void macac(__uint128_t *acc, __uint128_t *acc2, const uint64_t *a, const uint64_t *b) { - uint64_t lo = *acc, hi = *acc>>64; - uint64_t lo2 = *acc2, hi2 = *acc2>>64; +static __inline__ void macac(__uint128_t *acc, __uint128_t *acc2, + const uint64_t *a, const uint64_t *b) +{ + uint64_t lo = *acc, hi = *acc >> 64; + uint64_t lo2 = *acc2, hi2 = *acc2 >> 64; #ifdef __BMI2__ uint64_t c,d; @@ -143,12 +162,13 @@ static __inline__ void macac(__uint128_t *acc, __uint128_t *acc2, const uint64_t : "rax", "rdx", "cc"); #endif - *acc = (((__uint128_t)(hi))<<64) | lo; - *acc2 = (((__uint128_t)(hi2))<<64) | lo2; + *acc = (((__uint128_t)(hi)) << 64) | lo; + *acc2 = (((__uint128_t)(hi2)) << 64) | lo2; } -static __inline__ void mac_rm(__uint128_t *acc, uint64_t a, const uint64_t *b) { - uint64_t lo = *acc, hi = *acc>>64; +static __inline__ void mac_rm(__uint128_t *acc, uint64_t a, const uint64_t *b) +{ + uint64_t lo = *acc, hi = *acc >> 64; #ifdef __BMI2__ uint64_t c,d; @@ -170,11 +190,12 @@ static __inline__ void mac_rm(__uint128_t *acc, uint64_t a, const uint64_t *b) { : "rax", "rdx", "cc"); #endif - *acc = (((__uint128_t)(hi))<<64) | lo; + *acc = (((__uint128_t)(hi)) << 64) | lo; } -static __inline__ void mac_rr(__uint128_t *acc, uint64_t a, const uint64_t b) { - uint64_t lo = *acc, hi = *acc>>64; +static __inline__ void mac_rr(__uint128_t *acc, uint64_t a, const uint64_t b) +{ + uint64_t lo = *acc, hi = *acc >> 64; #ifdef __BMI2__ uint64_t c,d; @@ -195,11 +216,13 @@ static __inline__ void mac_rr(__uint128_t *acc, uint64_t a, const uint64_t b) { : "rdx", "cc"); #endif - *acc = (((__uint128_t)(hi))<<64) | lo; + *acc = (((__uint128_t)(hi)) << 64) | lo; } -static __inline__ void mac2(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { - uint64_t lo = *acc, hi = *acc>>64; +static __inline__ void mac2(__uint128_t *acc, const uint64_t *a, + const uint64_t *b) +{ + uint64_t lo = *acc, hi = *acc >> 64; #ifdef __BMI2__ uint64_t c,d; @@ -224,11 +247,14 @@ static __inline__ void mac2(__uint128_t *acc, const uint64_t *a, const uint64_t : "rax", "rdx", "cc"); #endif - *acc = (((__uint128_t)(hi))<<64) | lo; + *acc = (((__uint128_t)(hi)) << 64) | lo; } -static __inline__ void msb(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { - uint64_t lo = *acc, hi = *acc>>64; +static __inline__ void msb(__uint128_t *acc, const uint64_t *a, + const uint64_t *b) +{ + uint64_t lo = *acc, hi = *acc >> 64; + #ifdef __BMI2__ uint64_t c,d; __asm__ volatile @@ -249,11 +275,14 @@ static __inline__ void msb(__uint128_t *acc, const uint64_t *a, const uint64_t * : [b]"m"(*b), [a]"m"(*a) : "rax", "rdx", "cc"); #endif - *acc = (((__uint128_t)(hi))<<64) | lo; + *acc = (((__uint128_t)(hi)) << 64) | lo; } -static __inline__ void msb2(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { - uint64_t lo = *acc, hi = *acc>>64; +static __inline__ void msb2(__uint128_t *acc, const uint64_t *a, + const uint64_t *b) +{ + uint64_t lo = *acc, hi = *acc >> 64; + #ifdef __BMI2__ uint64_t c,d; __asm__ volatile @@ -280,8 +309,10 @@ static __inline__ void msb2(__uint128_t *acc, const uint64_t *a, const uint64_t } -static __inline__ void mrs(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { - uint64_t c,d, lo = *acc, hi = *acc>>64; +static __inline__ void mrs(__uint128_t *acc, const uint64_t *a, + const uint64_t *b) +{ + uint64_t c,d, lo = *acc, hi = *acc >> 64; __asm__ volatile ("movq %[a], %%rdx; " "mulx %[b], %[c], %[d]; " @@ -290,16 +321,18 @@ static __inline__ void mrs(__uint128_t *acc, const uint64_t *a, const uint64_t * : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) : [b]"m"(*b), [a]"m"(*a) : "rdx", "cc"); - *acc = (((__uint128_t)(d))<<64) | c; + *acc = (((__uint128_t)(d)) << 64) | c; } -static __inline__ uint64_t word_is_zero(uint64_t x) { +static __inline__ uint64_t word_is_zero(uint64_t x) +{ __asm__ volatile("neg %0; sbb %0, %0;" : "+r"(x)); return ~x; } -static inline uint64_t shrld(__uint128_t x, int n) { - return x>>n; +static inline uint64_t shrld(__uint128_t x, int n) +{ + return x >> n; } #endif /* __ARCH_X86_64_ARCH_INTRINSICS_H__ */