2 * Copyright 2017 The OpenSSL Project Authors. All Rights Reserved.
3 * Copyright 2014-2016 Cryptography Research, Inc.
5 * Licensed under the OpenSSL license (the "License"). You may not use
6 * this file except in compliance with the License. You can obtain a copy
7 * in the file LICENSE in the source distribution or at
8 * https://www.openssl.org/source/license.html
10 * Originally written by Mike Hamburg
12 #ifndef __ARCH_X86_64_ARCH_INTRINSICS_H__
13 #define __ARCH_X86_64_ARCH_INTRINSICS_H__
15 #define ARCH_WORD_BITS 64
17 #include <openssl/e_os2.h>
19 /* FUTURE: autogenerate */
20 static __inline__ __uint128_t widemul(const uint64_t *a, const uint64_t *b)
28 : [c]"=&a"(c), [d]"=d"(d)
29 : [b]"m"(*b), [a]"m"(*a)
34 "mulx %[b], %[c], %[d];"
35 : [c]"=r"(c), [d]"=r"(d)
36 : [b]"m"(*b), [a]"m"(*a)
39 return (((__uint128_t)(d)) << 64) | c;
42 static __inline__ __uint128_t widemul_rm(uint64_t a, const uint64_t *b)
50 : [c]"=&a"(c), [d]"=d"(d)
51 : [b]"m"(*b), [a]"r"(a)
55 ("mulx %[b], %[c], %[d];"
56 : [c]"=r"(c), [d]"=r"(d)
57 : [b]"m"(*b), [a]"d"(a));
59 return (((__uint128_t)(d)) << 64) | c;
62 static __inline__ __uint128_t widemul_rr(uint64_t a, uint64_t b)
69 : [c]"=a"(c), [d]"=d"(d)
74 ("mulx %[b], %[c], %[d];"
75 : [c]"=r"(c), [d]"=r"(d)
76 : [b]"r"(b), [a]"d"(a));
78 return (((__uint128_t)(d)) << 64) | c;
81 static __inline__ __uint128_t widemul2(const uint64_t *a, const uint64_t *b)
90 : [c]"=&a"(c), [d]"=d"(d)
91 : [b]"m"(*b), [a]"m"(*a)
96 "leaq (,%%rdx,2), %%rdx;"
97 "mulx %[b], %[c], %[d];"
98 : [c]"=r"(c), [d]"=r"(d)
99 : [b]"m"(*b), [a]"m"(*a)
102 return (((__uint128_t)(d)) << 64) | c;
105 static __inline__ void mac(__uint128_t *acc, const uint64_t *a,
108 uint64_t lo = *acc, hi = *acc >> 64;
113 ("movq %[a], %%rdx; "
114 "mulx %[b], %[c], %[d]; "
117 : [c]"=&r"(c), [d]"=&r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
118 : [b]"m"(*b), [a]"m"(*a)
122 ("movq %[a], %%rax; "
124 "addq %%rax, %[lo]; "
125 "adcq %%rdx, %[hi]; "
126 : [lo]"+r"(lo), [hi]"+r"(hi)
127 : [b]"m"(*b), [a]"m"(*a)
128 : "rax", "rdx", "cc");
131 *acc = (((__uint128_t)(hi)) << 64) | lo;
134 static __inline__ void macac(__uint128_t *acc, __uint128_t *acc2,
135 const uint64_t *a, const uint64_t *b)
137 uint64_t lo = *acc, hi = *acc >> 64;
138 uint64_t lo2 = *acc2, hi2 = *acc2 >> 64;
143 ("movq %[a], %%rdx; "
144 "mulx %[b], %[c], %[d]; "
147 "addq %[c], %[lo2]; "
148 "adcq %[d], %[hi2]; "
149 : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi), [lo2]"+r"(lo2), [hi2]"+r"(hi2)
150 : [b]"m"(*b), [a]"m"(*a)
154 ("movq %[a], %%rax; "
156 "addq %%rax, %[lo]; "
157 "adcq %%rdx, %[hi]; "
158 "addq %%rax, %[lo2]; "
159 "adcq %%rdx, %[hi2]; "
160 : [lo]"+r"(lo), [hi]"+r"(hi), [lo2]"+r"(lo2), [hi2]"+r"(hi2)
161 : [b]"m"(*b), [a]"m"(*a)
162 : "rax", "rdx", "cc");
165 *acc = (((__uint128_t)(hi)) << 64) | lo;
166 *acc2 = (((__uint128_t)(hi2)) << 64) | lo2;
169 static __inline__ void mac_rm(__uint128_t *acc, uint64_t a, const uint64_t *b)
171 uint64_t lo = *acc, hi = *acc >> 64;
176 ("mulx %[b], %[c], %[d]; "
179 : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
180 : [b]"m"(*b), [a]"d"(a)
184 ("movq %[a], %%rax; "
186 "addq %%rax, %[lo]; "
187 "adcq %%rdx, %[hi]; "
188 : [lo]"+r"(lo), [hi]"+r"(hi)
189 : [b]"m"(*b), [a]"r"(a)
190 : "rax", "rdx", "cc");
193 *acc = (((__uint128_t)(hi)) << 64) | lo;
196 static __inline__ void mac_rr(__uint128_t *acc, uint64_t a, const uint64_t b)
198 uint64_t lo = *acc, hi = *acc >> 64;
203 ("mulx %[b], %[c], %[d]; "
206 : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
207 : [b]"r"(b), [a]"d"(a)
212 "addq %%rax, %[lo]; "
213 "adcq %%rdx, %[hi]; "
214 : [lo]"+r"(lo), [hi]"+r"(hi), "+a"(a)
219 *acc = (((__uint128_t)(hi)) << 64) | lo;
222 static __inline__ void mac2(__uint128_t *acc, const uint64_t *a,
225 uint64_t lo = *acc, hi = *acc >> 64;
230 ("movq %[a], %%rdx; "
231 "addq %%rdx, %%rdx; "
232 "mulx %[b], %[c], %[d]; "
235 : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
236 : [b]"m"(*b), [a]"m"(*a)
240 ("movq %[a], %%rax; "
241 "addq %%rax, %%rax; "
243 "addq %%rax, %[lo]; "
244 "adcq %%rdx, %[hi]; "
245 : [lo]"+r"(lo), [hi]"+r"(hi)
246 : [b]"m"(*b), [a]"m"(*a)
247 : "rax", "rdx", "cc");
250 *acc = (((__uint128_t)(hi)) << 64) | lo;
253 static __inline__ void msb(__uint128_t *acc, const uint64_t *a,
256 uint64_t lo = *acc, hi = *acc >> 64;
261 ("movq %[a], %%rdx; "
262 "mulx %[b], %[c], %[d]; "
265 : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
266 : [b]"m"(*b), [a]"m"(*a)
270 ("movq %[a], %%rax; "
272 "subq %%rax, %[lo]; "
273 "sbbq %%rdx, %[hi]; "
274 : [lo]"+r"(lo), [hi]"+r"(hi)
275 : [b]"m"(*b), [a]"m"(*a)
276 : "rax", "rdx", "cc");
278 *acc = (((__uint128_t)(hi)) << 64) | lo;
281 static __inline__ void msb2(__uint128_t *acc, const uint64_t *a,
284 uint64_t lo = *acc, hi = *acc >> 64;
289 ("movq %[a], %%rdx; "
290 "addq %%rdx, %%rdx; "
291 "mulx %[b], %[c], %[d]; "
294 : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
295 : [b]"m"(*b), [a]"m"(*a)
299 ("movq %[a], %%rax; "
300 "addq %%rax, %%rax; "
302 "subq %%rax, %[lo]; "
303 "sbbq %%rdx, %[hi]; "
304 : [lo]"+r"(lo), [hi]"+r"(hi)
305 : [b]"m"(*b), [a]"m"(*a)
306 : "rax", "rdx", "cc");
308 *acc = (((__uint128_t)(hi))<<64) | lo;
312 static __inline__ void mrs(__uint128_t *acc, const uint64_t *a,
315 uint64_t c,d, lo = *acc, hi = *acc >> 64;
317 ("movq %[a], %%rdx; "
318 "mulx %[b], %[c], %[d]; "
321 : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
322 : [b]"m"(*b), [a]"m"(*a)
324 *acc = (((__uint128_t)(d)) << 64) | c;
327 static __inline__ uint64_t word_is_zero(uint64_t x)
329 __asm__ volatile("neg %0; sbb %0, %0;" : "+r"(x));
333 static inline uint64_t shrld(__uint128_t x, int n)
338 #endif /* __ARCH_X86_64_ARCH_INTRINSICS_H__ */