2 * Copyright 2017 The OpenSSL Project Authors. All Rights Reserved.
3 * Copyright 2014-2016 Cryptography Research, Inc.
5 * Licensed under the OpenSSL license (the "License"). You may not use
6 * this file except in compliance with the License. You can obtain a copy
7 * in the file LICENSE in the source distribution or at
8 * https://www.openssl.org/source/license.html
10 * Originally written by Mike Hamburg
12 #ifndef __ARCH_X86_64_ARCH_INTRINSICS_H__
13 # define __ARCH_X86_64_ARCH_INTRINSICS_H__
15 # define ARCH_WORD_BITS 64
17 # include <openssl/e_os2.h>
19 /* FUTURE: autogenerate */
20 static __inline__ __uint128_t widemul(const uint64_t *a, const uint64_t *b)
25 ("movq %[a], %%rax;" "mulq %[b];":[c] "=&a"(c),[d] "=d"(d)
26 :[b] "m"(*b),[a] "m"(*a)
30 ("movq %[a], %%rdx;" "mulx %[b], %[c], %[d];":[c] "=r"(c),[d] "=r"(d)
31 :[b] "m"(*b),[a] "m"(*a)
34 return (((__uint128_t) (d)) << 64) | c;
37 static __inline__ __uint128_t widemul_rm(uint64_t a, const uint64_t *b)
42 ("movq %[a], %%rax;" "mulq %[b];":[c] "=&a"(c),[d] "=d"(d)
43 :[b] "m"(*b),[a] "r"(a)
47 ("mulx %[b], %[c], %[d];":[c] "=r"(c),[d] "=r"(d)
48 :[b] "m"(*b),[a] "d"(a));
50 return (((__uint128_t) (d)) << 64) | c;
53 static __inline__ __uint128_t widemul_rr(uint64_t a, uint64_t b)
58 ("mulq %[b];":[c] "=a"(c),[d] "=d"(d)
63 ("mulx %[b], %[c], %[d];":[c] "=r"(c),[d] "=r"(d)
64 :[b] "r"(b),[a] "d"(a));
66 return (((__uint128_t) (d)) << 64) | c;
69 static __inline__ __uint128_t widemul2(const uint64_t *a, const uint64_t *b)
75 "addq %%rax, %%rax; " "mulq %[b];":[c] "=&a"(c),[d] "=d"(d)
76 :[b] "m"(*b),[a] "m"(*a)
81 "leaq (,%%rdx,2), %%rdx;" "mulx %[b], %[c], %[d];":[c] "=r"(c),[d] "=r"(d)
82 :[b] "m"(*b),[a] "m"(*a)
85 return (((__uint128_t) (d)) << 64) | c;
88 static __inline__ void mac(__uint128_t * acc, const uint64_t *a,
91 uint64_t lo = *acc, hi = *acc >> 64;
97 "mulx %[b], %[c], %[d]; "
99 "adcq %[d], %[hi]; ":[c] "=&r"(c),[d] "=&r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
100 :[b] "m"(*b),[a] "m"(*a)
104 ("movq %[a], %%rax; "
106 "addq %%rax, %[lo]; " "adcq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
107 :[b] "m"(*b),[a] "m"(*a)
108 :"rax", "rdx", "cc");
111 *acc = (((__uint128_t) (hi)) << 64) | lo;
114 static __inline__ void macac(__uint128_t * acc, __uint128_t * acc2,
115 const uint64_t *a, const uint64_t *b)
117 uint64_t lo = *acc, hi = *acc >> 64;
118 uint64_t lo2 = *acc2, hi2 = *acc2 >> 64;
123 ("movq %[a], %%rdx; "
124 "mulx %[b], %[c], %[d]; "
127 "addq %[c], %[lo2]; "
128 "adcq %[d], %[hi2]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi),
129 [lo2] "+r"(lo2),[hi2] "+r"(hi2)
130 :[b] "m"(*b),[a] "m"(*a)
134 ("movq %[a], %%rax; "
136 "addq %%rax, %[lo]; "
137 "adcq %%rdx, %[hi]; "
138 "addq %%rax, %[lo2]; "
139 "adcq %%rdx, %[hi2]; ":[lo] "+r"(lo),[hi] "+r"(hi),[lo2] "+r"(lo2),
141 :[b] "m"(*b),[a] "m"(*a)
142 :"rax", "rdx", "cc");
145 *acc = (((__uint128_t) (hi)) << 64) | lo;
146 *acc2 = (((__uint128_t) (hi2)) << 64) | lo2;
149 static __inline__ void mac_rm(__uint128_t * acc, uint64_t a, const uint64_t *b)
151 uint64_t lo = *acc, hi = *acc >> 64;
156 ("mulx %[b], %[c], %[d]; "
158 "adcq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
159 :[b] "m"(*b),[a] "d"(a)
163 ("movq %[a], %%rax; "
165 "addq %%rax, %[lo]; " "adcq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
166 :[b] "m"(*b),[a] "r"(a)
167 :"rax", "rdx", "cc");
170 *acc = (((__uint128_t) (hi)) << 64) | lo;
173 static __inline__ void mac_rr(__uint128_t * acc, uint64_t a, const uint64_t b)
175 uint64_t lo = *acc, hi = *acc >> 64;
180 ("mulx %[b], %[c], %[d]; "
182 "adcq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
183 :[b] "r"(b),[a] "d"(a)
188 "addq %%rax, %[lo]; "
189 "adcq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi), "+a"(a)
194 *acc = (((__uint128_t) (hi)) << 64) | lo;
197 static __inline__ void mac2(__uint128_t * acc, const uint64_t *a,
200 uint64_t lo = *acc, hi = *acc >> 64;
205 ("movq %[a], %%rdx; "
206 "addq %%rdx, %%rdx; "
207 "mulx %[b], %[c], %[d]; "
209 "adcq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
210 :[b] "m"(*b),[a] "m"(*a)
214 ("movq %[a], %%rax; "
215 "addq %%rax, %%rax; "
217 "addq %%rax, %[lo]; " "adcq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
218 :[b] "m"(*b),[a] "m"(*a)
219 :"rax", "rdx", "cc");
222 *acc = (((__uint128_t) (hi)) << 64) | lo;
225 static __inline__ void msb(__uint128_t * acc, const uint64_t *a,
228 uint64_t lo = *acc, hi = *acc >> 64;
232 ("movq %[a], %%rdx; "
233 "mulx %[b], %[c], %[d]; "
235 "sbbq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
236 :[b] "m"(*b),[a] "m"(*a)
240 ("movq %[a], %%rax; "
242 "subq %%rax, %[lo]; " "sbbq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
243 :[b] "m"(*b),[a] "m"(*a)
244 :"rax", "rdx", "cc");
246 *acc = (((__uint128_t) (hi)) << 64) | lo;
249 static __inline__ void msb2(__uint128_t * acc, const uint64_t *a,
252 uint64_t lo = *acc, hi = *acc >> 64;
256 ("movq %[a], %%rdx; "
257 "addq %%rdx, %%rdx; "
258 "mulx %[b], %[c], %[d]; "
260 "sbbq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
261 :[b] "m"(*b),[a] "m"(*a)
265 ("movq %[a], %%rax; "
266 "addq %%rax, %%rax; "
268 "subq %%rax, %[lo]; " "sbbq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
269 :[b] "m"(*b),[a] "m"(*a)
270 :"rax", "rdx", "cc");
272 *acc = (((__uint128_t) (hi)) << 64) | lo;
276 static __inline__ void mrs(__uint128_t * acc, const uint64_t *a,
279 uint64_t c, d, lo = *acc, hi = *acc >> 64;
281 ("movq %[a], %%rdx; "
282 "mulx %[b], %[c], %[d]; "
284 "sbbq %[hi], %[d]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
285 :[b] "m"(*b),[a] "m"(*a)
287 *acc = (((__uint128_t) (d)) << 64) | c;
290 static __inline__ uint64_t word_is_zero(uint64_t x)
292 __asm__ volatile ("neg %0; sbb %0, %0;":"+r" (x));
296 static inline uint64_t shrld(__uint128_t x, int n)
301 #endif /* __ARCH_X86_64_ARCH_INTRINSICS_H__ */