X-Git-Url: https://git.openssl.org/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fec%2Fcurve448%2Farch_neon%2Ff_impl.h;h=341d8a9a19cf2b083d6b82aa9965efb26dcafc8d;hp=676ef1d4441fb5a084021940744cd586d15c4a4a;hb=8d55f844b08199e0ac6a2ddc501de39f3237c5e9;hpb=205fd6388175704bd7597dbfb571c84f868ce6da;ds=sidebyside diff --git a/crypto/ec/curve448/arch_neon/f_impl.h b/crypto/ec/curve448/arch_neon/f_impl.h index 676ef1d444..341d8a9a19 100644 --- a/crypto/ec/curve448/arch_neon/f_impl.h +++ b/crypto/ec/curve448/arch_neon/f_impl.h @@ -15,11 +15,13 @@ #define USE_NEON_PERM 1 #define LIMBHI(x) ((x##ull)>>28) #define LIMBLO(x) ((x##ull)&((1ull<<28)-1)) -#define FIELD_LITERAL(a,b,c,d,e,f,g,h) \ - {{LIMBLO(a),LIMBLO(e), LIMBHI(a),LIMBHI(e), \ - LIMBLO(b),LIMBLO(f), LIMBHI(b),LIMBHI(f), \ - LIMBLO(c),LIMBLO(g), LIMBHI(c),LIMBHI(g), \ - LIMBLO(d),LIMBLO(h), LIMBHI(d),LIMBHI(h)}} +#define FIELD_LITERAL(a,b,c,d,e,f,g,h) { \ + { \ + LIMBLO(a), LIMBLO(e), LIMBHI(a), LIMBHI(e), LIMBLO(b), LIMBLO(f), \ + LIMBHI(b), LIMBHI(f), LIMBLO(c), LIMBLO(g), LIMBHI(c), LIMBHI(g), \ + LIMBLO(d), LIMBLO(h), LIMBHI(d), LIMBHI(h) \ + } \ +} #define LIMB_PLACE_VALUE(i) 28 @@ -37,17 +39,13 @@ void gf_sub_RAW(gf out, const gf a, const gf b) ((uint32xn_t *) out)[i] = ((const uint32xn_t *)a)[i] - ((const uint32xn_t *)b)[i]; } - /* - * unsigned int i; for (i=0; ilimb[0]); i++) { - * out->limb[i] = a->limb[i] - b->limb[i]; } - */ } void gf_bias(gf a, int amt) { uint32_t co1 = ((1ull << 28) - 1) * amt, co2 = co1 - amt; - uint32x4_t lo = { co1, co2, co1, co1 }, hi = { - co1, co1, co1, co1}; + uint32x4_t lo = { co1, co2, co1, co1 }; + uint32x4_t hi = { co1, co1, co1, co1 }; uint32x4_t *aa = (uint32x4_t *) a; aa[0] += lo; aa[1] += hi; @@ -57,13 +55,11 @@ void gf_bias(gf a, int amt) void gf_weak_reduce(gf a) { + uint32x2_t *aa = (uint32x2_t *) a; + uint32x2_t vmask = { (1ull << 28) - 1, (1ull << 28) - 1}; + uint32x2_t vm2 = { 0, -1}, tmp = vshr_n_u32(aa[7], 28); - uint32x2_t *aa = (uint32x2_t *) a, vmask = { - (1ull << 28) - 1, (1ull << 28) - 1}, vm2 = { - 0, -1}, tmp = vshr_n_u32(aa[7], 28); - - for (unsigned int i = 7; i >= 1; i--) { + for (unsigned int i = 7; i >= 1; i--) aa[i] = vsra_n_u32(aa[i] & vmask, aa[i - 1], 28); - } aa[0] = (aa[0] & vmask) + vrev64_u32(tmp) + (tmp & vm2); }