crypto/ec/curve448/arch_neon/f_impl.h

   1 /*
   2  * Copyright 2017 The OpenSSL Project Authors. All Rights Reserved.
   3  * Copyright 2014-2016 Cryptography Research, Inc.
   4  *
   5  * Licensed under the OpenSSL license (the "License").  You may not use
   6  * this file except in compliance with the License.  You can obtain a copy
   7  * in the file LICENSE in the source distribution or at
   8  * https://www.openssl.org/source/license.html
   9  *
  10  * Originally written by Mike Hamburg
  11  */
  12
  13 #define GF_HEADROOM 2
  14 #define LIMBPERM(x) (((x)<<1 | (x)>>3) & 15)
  15 #define USE_NEON_PERM 1
  16 #define LIMBHI(x) ((x##ull)>>28)
  17 #define LIMBLO(x) ((x##ull)&((1ull<<28)-1))
  18 #  define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
  19     {{LIMBLO(a),LIMBLO(e), LIMBHI(a),LIMBHI(e), \
  20       LIMBLO(b),LIMBLO(f), LIMBHI(b),LIMBHI(f), \
  21       LIMBLO(c),LIMBLO(g), LIMBHI(c),LIMBHI(g), \
  22       LIMBLO(d),LIMBLO(h), LIMBHI(d),LIMBHI(h)}}
  23
  24 #define LIMB_PLACE_VALUE(i) 28
  25
  26 void gf_add_RAW (gf out, const gf a, const gf b) {
  27     for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
  28         ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i];
  29     }
  30 }
  31
  32 void gf_sub_RAW (gf out, const gf a, const gf b) {
  33     for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
  34         ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i];
  35     }
  36     /*
  37     unsigned int i;
  38     for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
  39         out->limb[i] = a->limb[i] - b->limb[i];
  40     }
  41     */
  42 }
  43
  44 void gf_bias (gf a, int amt) {
  45     uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt;
  46     uint32x4_t lo = {co1,co2,co1,co1}, hi = {co1,co1,co1,co1};
  47     uint32x4_t *aa = (uint32x4_t*) a;
  48     aa[0] += lo;
  49     aa[1] += hi;
  50     aa[2] += hi;
  51     aa[3] += hi;
  52 }
  53
  54 void gf_weak_reduce (gf a) {
  55
  56     uint32x2_t *aa = (uint32x2_t*) a, vmask = {(1ull<<28)-1, (1ull<<28)-1}, vm2 = {0,-1},
  57        tmp = vshr_n_u32(aa[7],28);
  58
  59     for (unsigned int i=7; i>=1; i--) {
  60         aa[i] = vsra_n_u32(aa[i] & vmask, aa[i-1], 28);
  61     }
  62     aa[0] = (aa[0] & vmask) + vrev64_u32(tmp) + (tmp&vm2);
  63 }
  64