#include "f_field.h"
static __inline__ uint64x2_t __attribute__((gnu_inline,always_inline,unused))
-xx_vaddup_u64(uint64x2_t x) {
+xx_vaddup_u64(uint64x2_t x)
+{
__asm__ ("vadd.s64 %f0, %e0" : "+w"(x));
return x;
}
static __inline__ int64x2_t __attribute__((gnu_inline,always_inline,unused))
-vrev128_s64(int64x2_t x) {
+vrev128_s64(int64x2_t x)
+{
__asm__ ("vswp.s64 %e0, %f0" : "+w"(x));
return x;
}
static __inline__ uint64x2_t __attribute__((gnu_inline,always_inline))
-vrev128_u64(uint64x2_t x) {
+vrev128_u64(uint64x2_t x)
+{
__asm__ ("vswp.s64 %e0, %f0" : "+w"(x));
return x;
}
static inline void __attribute__((gnu_inline,always_inline,unused))
-smlal (
- uint64_t *acc,
- const uint32_t a,
- const uint32_t b
-) {
+smlal(uint64_t *acc, const uint32_t a, const uint32_t b)
+{
*acc += (int64_t)(int32_t)a * (int64_t)(int32_t)b;
}
static inline void __attribute__((gnu_inline,always_inline,unused))
-smlal2 (
- uint64_t *acc,
- const uint32_t a,
- const uint32_t b
-) {
+smlal2(uint64_t *acc, const uint32_t a, const uint32_t b)
+{
*acc += (int64_t)(int32_t)a * (int64_t)(int32_t)b * 2;
}
static inline void __attribute__((gnu_inline,always_inline,unused))
-smull (
- uint64_t *acc,
- const uint32_t a,
- const uint32_t b
-) {
+smull(uint64_t *acc, const uint32_t a, const uint32_t b)
+{
*acc = (int64_t)(int32_t)a * (int64_t)(int32_t)b;
}
static inline void __attribute__((gnu_inline,always_inline,unused))
-smull2 (
- uint64_t *acc,
- const uint32_t a,
- const uint32_t b
-) {
+smull2(uint64_t *acc, const uint32_t a, const uint32_t b)
+{
*acc = (int64_t)(int32_t)a * (int64_t)(int32_t)b * 2;
}
-void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
+void gf_mul(gf_s *__restrict__ cs, const gf as, const gf bs)
+{
#define _bl0 "q0"
#define _bl0_0 "d0"
#define _bl0_1 "d1"
);
}
-void gf_sqr (gf_s *__restrict__ cs, const gf bs) {
+void gf_sqr(gf_s *__restrict__ cs, const gf bs)
+{
int32x2_t *vc = (int32x2_t*) cs->limb;
__asm__ __volatile__ (
);
}
-void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
+void gf_mulw_unsigned(gf_s *__restrict__ cs, const gf as, uint32_t b)
+{
uint32x2_t vmask = {(1<<28) - 1, (1<<28)-1};
assert(b<(1<<28));