X-Git-Url: https://git.openssl.org/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fec%2Fcurve448%2Farch_x86_64%2Ff_impl.c;h=a1c9dbd12259727680e592c831bc1e2177cd1388;hp=1e1d76d6170179bbf7b89821a0e0e3eb7e61a73b;hb=205fd6388175704bd7597dbfb571c84f868ce6da;hpb=abcd22bf621b25e5db724b0ad9bcb4bcc189b1d3 diff --git a/crypto/ec/curve448/arch_x86_64/f_impl.c b/crypto/ec/curve448/arch_x86_64/f_impl.c index 1e1d76d617..a1c9dbd122 100644 --- a/crypto/ec/curve448/arch_x86_64/f_impl.c +++ b/crypto/ec/curve448/arch_x86_64/f_impl.c @@ -1,35 +1,45 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. +/* + * Copyright 2017 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2014 Cryptography Research, Inc. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + * + * Originally written by Mike Hamburg */ #include "f_field.h" -void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { +void gf_mul(gf_s * __restrict__ cs, const gf as, const gf bs) +{ const uint64_t *a = as->limb, *b = bs->limb; uint64_t *c = cs->limb; __uint128_t accum0 = 0, accum1 = 0, accum2; - uint64_t mask = (1ull<<56) - 1; + uint64_t mask = (1ull << 56) - 1; uint64_t aa[4] VECTOR_ALIGNED, bb[4] VECTOR_ALIGNED, bbb[4] VECTOR_ALIGNED; /* For some reason clang doesn't vectorize this without prompting? */ unsigned int i; - for (i=0; i>= 56; accum1 >>= 56; - - mac(&accum0, &aa[1],&bb[3]); + + mac(&accum0, &aa[1], &bb[3]); mac(&accum1, &a[5], &b[7]); mac(&accum0, &aa[2], &bb[2]); mac(&accum1, &a[6], &b[6]); mac(&accum0, &aa[3], &bb[1]); accum1 += accum0; - accum2 = widemul(&a[0],&b[0]); + accum2 = widemul(&a[0], &b[0]); accum1 -= accum2; accum0 += accum2; - + msb(&accum0, &a[1], &b[3]); msb(&accum0, &a[2], &b[2]); mac(&accum1, &a[7], &b[5]); @@ -76,7 +86,7 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { accum0 >>= 56; accum1 >>= 56; - accum2 = widemul(&a[2],&b[7]); + accum2 = widemul(&a[2], &b[7]); mac(&accum0, &a[6], &bb[3]); mac(&accum1, &aa[2], &bbb[3]); @@ -84,7 +94,7 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { mac(&accum0, &a[7], &bb[2]); mac(&accum1, &aa[3], &bbb[2]); - mac(&accum2, &a[0],&b[1]); + mac(&accum2, &a[0], &b[1]); mac(&accum1, &aa[0], &bb[1]); mac(&accum0, &a[4], &b[5]); @@ -101,11 +111,11 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { accum0 >>= 56; accum1 >>= 56; - accum2 = widemul(&a[3],&b[7]); + accum2 = widemul(&a[3], &b[7]); mac(&accum0, &a[7], &bb[3]); mac(&accum1, &aa[3], &bbb[3]); - mac(&accum2, &a[0],&b[2]); + mac(&accum2, &a[0], &b[2]); mac(&accum1, &aa[0], &bb[2]); mac(&accum0, &a[4], &b[6]); @@ -139,37 +149,46 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { c[0] += ((uint64_t)(accum1)); } -void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) { +void gf_mulw_unsigned(gf_s * __restrict__ cs, const gf as, uint32_t b) +{ const uint64_t *a = as->limb; uint64_t *c = cs->limb; __uint128_t accum0, accum4; - uint64_t mask = (1ull<<56) - 1; + uint64_t mask = (1ull << 56) - 1; accum0 = widemul_rm(b, &a[0]); accum4 = widemul_rm(b, &a[4]); - c[0] = accum0 & mask; accum0 >>= 56; - c[4] = accum4 & mask; accum4 >>= 56; + c[0] = accum0 & mask; + accum0 >>= 56; + c[4] = accum4 & mask; + accum4 >>= 56; mac_rm(&accum0, b, &a[1]); mac_rm(&accum4, b, &a[5]); - c[1] = accum0 & mask; accum0 >>= 56; - c[5] = accum4 & mask; accum4 >>= 56; + c[1] = accum0 & mask; + accum0 >>= 56; + c[5] = accum4 & mask; + accum4 >>= 56; mac_rm(&accum0, b, &a[2]); mac_rm(&accum4, b, &a[6]); - c[2] = accum0 & mask; accum0 >>= 56; - c[6] = accum4 & mask; accum4 >>= 56; + c[2] = accum0 & mask; + accum0 >>= 56; + c[6] = accum4 & mask; + accum4 >>= 56; mac_rm(&accum0, b, &a[3]); mac_rm(&accum4, b, &a[7]); - c[3] = accum0 & mask; accum0 >>= 56; - c[7] = accum4 & mask; accum4 >>= 56; - + c[3] = accum0 & mask; + accum0 >>= 56; + c[7] = accum4 & mask; + accum4 >>= 56; + accum0 += accum4 + c[4]; c[4] = accum0 & mask; c[5] += accum0 >> 56; @@ -179,24 +198,26 @@ void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) { c[1] += accum4 >> 56; } -void gf_sqr (gf_s *__restrict__ cs, const gf as) { +void gf_sqr(gf_s * __restrict__ cs, const gf as) +{ const uint64_t *a = as->limb; uint64_t *c = cs->limb; __uint128_t accum0 = 0, accum1 = 0, accum2; - uint64_t mask = (1ull<<56) - 1; + uint64_t mask = (1ull << 56) - 1; uint64_t aa[4] VECTOR_ALIGNED; /* For some reason clang doesn't vectorize this without prompting? */ unsigned int i; - for (i=0; i>= 55; accum1 >>= 55; - mac2(&accum0, &aa[1],&aa[3]); + mac2(&accum0, &aa[1], &aa[3]); mac2(&accum1, &a[5], &a[7]); mac(&accum0, &aa[2], &aa[2]); accum1 += accum0; msb2(&accum0, &a[1], &a[3]); mac(&accum1, &a[6], &a[6]); - - accum2 = widemul(&a[0],&a[0]); + + accum2 = widemul(&a[0], &a[0]); accum1 -= accum2; accum0 += accum2; @@ -233,14 +254,14 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) { accum0 >>= 56; accum1 >>= 56; - accum2 = widemul2(&aa[2],&aa[3]); + accum2 = widemul2(&aa[2], &aa[3]); msb2(&accum0, &a[2], &a[3]); mac2(&accum1, &a[6], &a[7]); accum1 += accum2; accum0 += accum2; - accum2 = widemul2(&a[0],&a[1]); + accum2 = widemul2(&a[0], &a[1]); mac2(&accum1, &aa[0], &aa[1]); mac2(&accum0, &a[4], &a[5]); @@ -253,14 +274,14 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) { accum0 >>= 56; accum1 >>= 56; - accum2 = widemul(&aa[3],&aa[3]); + accum2 = widemul(&aa[3], &aa[3]); msb(&accum0, &a[3], &a[3]); mac(&accum1, &a[7], &a[7]); accum1 += accum2; accum0 += accum2; - accum2 = widemul2(&a[0],&a[2]); + accum2 = widemul2(&a[0], &a[2]); mac2(&accum1, &aa[0], &aa[2]); mac2(&accum0, &a[4], &a[6]);