X-Git-Url: https://git.openssl.org/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fec%2Fcurve448%2Farch_neon%2Ff_impl.h;h=676ef1d4441fb5a084021940744cd586d15c4a4a;hp=744c61fb154f5b5a21b58cbe6e33f8724e24c8f0;hb=205fd6388175704bd7597dbfb571c84f868ce6da;hpb=1308e022e1a62214b9e7f8ec92ca7045e70af3a2 diff --git a/crypto/ec/curve448/arch_neon/f_impl.h b/crypto/ec/curve448/arch_neon/f_impl.h index 744c61fb15..676ef1d444 100644 --- a/crypto/ec/curve448/arch_neon/f_impl.h +++ b/crypto/ec/curve448/arch_neon/f_impl.h @@ -15,50 +15,55 @@ #define USE_NEON_PERM 1 #define LIMBHI(x) ((x##ull)>>28) #define LIMBLO(x) ((x##ull)&((1ull<<28)-1)) -# define FIELD_LITERAL(a,b,c,d,e,f,g,h) \ +#define FIELD_LITERAL(a,b,c,d,e,f,g,h) \ {{LIMBLO(a),LIMBLO(e), LIMBHI(a),LIMBHI(e), \ LIMBLO(b),LIMBLO(f), LIMBHI(b),LIMBHI(f), \ LIMBLO(c),LIMBLO(g), LIMBHI(c),LIMBHI(g), \ LIMBLO(d),LIMBLO(h), LIMBHI(d),LIMBHI(h)}} - + #define LIMB_PLACE_VALUE(i) 28 -void gf_add_RAW (gf out, const gf a, const gf b) { - for (unsigned int i=0; ilimb[0]); i++) { - out->limb[i] = a->limb[i] - b->limb[i]; - } - */ + * unsigned int i; for (i=0; ilimb[0]); i++) { + * out->limb[i] = a->limb[i] - b->limb[i]; } + */ } -void gf_bias (gf a, int amt) { - uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt; - uint32x4_t lo = {co1,co2,co1,co1}, hi = {co1,co1,co1,co1}; - uint32x4_t *aa = (uint32x4_t*) a; +void gf_bias(gf a, int amt) +{ + uint32_t co1 = ((1ull << 28) - 1) * amt, co2 = co1 - amt; + uint32x4_t lo = { co1, co2, co1, co1 }, hi = { + co1, co1, co1, co1}; + uint32x4_t *aa = (uint32x4_t *) a; aa[0] += lo; aa[1] += hi; aa[2] += hi; aa[3] += hi; } -void gf_weak_reduce (gf a) { +void gf_weak_reduce(gf a) +{ - uint32x2_t *aa = (uint32x2_t*) a, vmask = {(1ull<<28)-1, (1ull<<28)-1}, vm2 = {0,-1}, - tmp = vshr_n_u32(aa[7],28); - - for (unsigned int i=7; i>=1; i--) { - aa[i] = vsra_n_u32(aa[i] & vmask, aa[i-1], 28); + uint32x2_t *aa = (uint32x2_t *) a, vmask = { + (1ull << 28) - 1, (1ull << 28) - 1}, vm2 = { + 0, -1}, tmp = vshr_n_u32(aa[7], 28); + + for (unsigned int i = 7; i >= 1; i--) { + aa[i] = vsra_n_u32(aa[i] & vmask, aa[i - 1], 28); } - aa[0] = (aa[0] & vmask) + vrev64_u32(tmp) + (tmp&vm2); + aa[0] = (aa[0] & vmask) + vrev64_u32(tmp) + (tmp & vm2); } -