2 * Copyright 2009-2021 The OpenSSL Project Authors. All Rights Reserved.
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
16 #if defined(__linux) || defined(_AIX)
17 # include <sys/utsname.h>
19 #if defined(_AIX53) /* defined even on post-5.3 */
20 # include <sys/systemcfg.h>
21 # if !defined(__power_set)
22 # define __power_set(a) (_system_configuration.implementation & (a))
25 #if defined(__APPLE__) && defined(__MACH__)
26 # include <sys/types.h>
27 # include <sys/sysctl.h>
29 #include <openssl/crypto.h>
30 #include <openssl/bn.h>
31 #include <internal/cryptlib.h>
32 #include <crypto/chacha.h>
33 #include "bn/bn_local.h"
37 unsigned int OPENSSL_ppccap_P = 0;
39 static sigset_t all_masked;
42 #ifdef OPENSSL_BN_ASM_MONT
43 int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
44 const BN_ULONG *np, const BN_ULONG *n0, int num)
46 int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
47 const BN_ULONG *np, const BN_ULONG *n0, int num);
48 int bn_mul4x_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
49 const BN_ULONG *np, const BN_ULONG *n0, int num);
50 int bn_mul_mont_fixed_n6(BN_ULONG *rp, const BN_ULONG *ap,
51 const BN_ULONG *bp, const BN_ULONG *np,
52 const BN_ULONG *n0, int num);
53 int bn_mul_mont_300_fixed_n6(BN_ULONG *rp, const BN_ULONG *ap,
54 const BN_ULONG *bp, const BN_ULONG *np,
55 const BN_ULONG *n0, int num);
61 return bn_mul4x_mont_int(rp, ap, bp, np, n0, num);
64 * There used to be [optional] call to bn_mul_mont_fpu64 here,
65 * but above subroutine is faster on contemporary processors.
66 * Formulation means that there might be old processors where
67 * FPU code path would be faster, POWER6 perhaps, but there was
68 * no opportunity to figure it out...
72 if (OPENSSL_ppccap_P & PPC_MADD300)
73 return bn_mul_mont_300_fixed_n6(rp, ap, bp, np, n0, num);
75 return bn_mul_mont_fixed_n6(rp, ap, bp, np, n0, num);
77 return bn_mul_mont_int(rp, ap, bp, np, n0, num);
80 void sha256_block_p8(void *ctx, const void *inp, size_t len);
81 void sha256_block_ppc(void *ctx, const void *inp, size_t len);
82 void sha256_block_data_order(void *ctx, const void *inp, size_t len);
83 void sha256_block_data_order(void *ctx, const void *inp, size_t len)
85 OPENSSL_ppccap_P & PPC_CRYPTO207 ? sha256_block_p8(ctx, inp, len) :
86 sha256_block_ppc(ctx, inp, len);
89 void sha512_block_p8(void *ctx, const void *inp, size_t len);
90 void sha512_block_ppc(void *ctx, const void *inp, size_t len);
91 void sha512_block_data_order(void *ctx, const void *inp, size_t len);
92 void sha512_block_data_order(void *ctx, const void *inp, size_t len)
94 OPENSSL_ppccap_P & PPC_CRYPTO207 ? sha512_block_p8(ctx, inp, len) :
95 sha512_block_ppc(ctx, inp, len);
99 # ifndef OPENSSL_NO_CHACHA
100 void ChaCha20_ctr32_int(unsigned char *out, const unsigned char *inp,
101 size_t len, const unsigned int key[8],
102 const unsigned int counter[4]);
103 void ChaCha20_ctr32_vmx(unsigned char *out, const unsigned char *inp,
104 size_t len, const unsigned int key[8],
105 const unsigned int counter[4]);
106 void ChaCha20_ctr32_vsx(unsigned char *out, const unsigned char *inp,
107 size_t len, const unsigned int key[8],
108 const unsigned int counter[4]);
109 void ChaCha20_ctr32(unsigned char *out, const unsigned char *inp,
110 size_t len, const unsigned int key[8],
111 const unsigned int counter[4])
113 OPENSSL_ppccap_P & PPC_CRYPTO207
114 ? ChaCha20_ctr32_vsx(out, inp, len, key, counter)
115 : OPENSSL_ppccap_P & PPC_ALTIVEC
116 ? ChaCha20_ctr32_vmx(out, inp, len, key, counter)
117 : ChaCha20_ctr32_int(out, inp, len, key, counter);
121 # ifndef OPENSSL_NO_POLY1305
122 void poly1305_init_int(void *ctx, const unsigned char key[16]);
123 void poly1305_blocks(void *ctx, const unsigned char *inp, size_t len,
124 unsigned int padbit);
125 void poly1305_emit(void *ctx, unsigned char mac[16],
126 const unsigned int nonce[4]);
127 void poly1305_init_fpu(void *ctx, const unsigned char key[16]);
128 void poly1305_blocks_fpu(void *ctx, const unsigned char *inp, size_t len,
129 unsigned int padbit);
130 void poly1305_emit_fpu(void *ctx, unsigned char mac[16],
131 const unsigned int nonce[4]);
132 void poly1305_init_vsx(void *ctx, const unsigned char key[16]);
133 void poly1305_blocks_vsx(void *ctx, const unsigned char *inp, size_t len,
134 unsigned int padbit);
135 void poly1305_emit_vsx(void *ctx, unsigned char mac[16],
136 const unsigned int nonce[4]);
137 int poly1305_init(void *ctx, const unsigned char key[16], void *func[2]);
138 int poly1305_init(void *ctx, const unsigned char key[16], void *func[2])
140 if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
141 poly1305_init_int(ctx, key);
142 func[0] = (void*)(uintptr_t)poly1305_blocks_vsx;
143 func[1] = (void*)(uintptr_t)poly1305_emit;
144 } else if (sizeof(size_t) == 4 && (OPENSSL_ppccap_P & PPC_FPU)) {
145 poly1305_init_fpu(ctx, key);
146 func[0] = (void*)(uintptr_t)poly1305_blocks_fpu;
147 func[1] = (void*)(uintptr_t)poly1305_emit_fpu;
149 poly1305_init_int(ctx, key);
150 func[0] = (void*)(uintptr_t)poly1305_blocks;
151 func[1] = (void*)(uintptr_t)poly1305_emit;
156 #endif /* FIPS_MODULE */
158 #ifdef ECP_NISTZ256_ASM
159 void ecp_nistz256_mul_mont(unsigned long res[4], const unsigned long a[4],
160 const unsigned long b[4]);
162 void ecp_nistz256_to_mont(unsigned long res[4], const unsigned long in[4]);
163 void ecp_nistz256_to_mont(unsigned long res[4], const unsigned long in[4])
165 static const unsigned long RR[] = { 0x0000000000000003U,
168 0x00000004fffffffdU };
170 ecp_nistz256_mul_mont(res, in, RR);
173 void ecp_nistz256_from_mont(unsigned long res[4], const unsigned long in[4]);
174 void ecp_nistz256_from_mont(unsigned long res[4], const unsigned long in[4])
176 static const unsigned long one[] = { 1, 0, 0, 0 };
178 ecp_nistz256_mul_mont(res, in, one);
182 static sigjmp_buf ill_jmp;
183 static void ill_handler(int sig)
185 siglongjmp(ill_jmp, sig);
188 void OPENSSL_fpu_probe(void);
189 void OPENSSL_ppc64_probe(void);
190 void OPENSSL_altivec_probe(void);
191 void OPENSSL_crypto207_probe(void);
192 void OPENSSL_madd300_probe(void);
194 long OPENSSL_rdtsc_mftb(void);
195 long OPENSSL_rdtsc_mfspr268(void);
197 uint32_t OPENSSL_rdtsc(void)
199 if (OPENSSL_ppccap_P & PPC_MFTB)
200 return OPENSSL_rdtsc_mftb();
201 else if (OPENSSL_ppccap_P & PPC_MFSPR268)
202 return OPENSSL_rdtsc_mfspr268();
207 size_t OPENSSL_instrument_bus_mftb(unsigned int *, size_t);
208 size_t OPENSSL_instrument_bus_mfspr268(unsigned int *, size_t);
210 size_t OPENSSL_instrument_bus(unsigned int *out, size_t cnt)
212 if (OPENSSL_ppccap_P & PPC_MFTB)
213 return OPENSSL_instrument_bus_mftb(out, cnt);
214 else if (OPENSSL_ppccap_P & PPC_MFSPR268)
215 return OPENSSL_instrument_bus_mfspr268(out, cnt);
220 size_t OPENSSL_instrument_bus2_mftb(unsigned int *, size_t, size_t);
221 size_t OPENSSL_instrument_bus2_mfspr268(unsigned int *, size_t, size_t);
223 size_t OPENSSL_instrument_bus2(unsigned int *out, size_t cnt, size_t max)
225 if (OPENSSL_ppccap_P & PPC_MFTB)
226 return OPENSSL_instrument_bus2_mftb(out, cnt, max);
227 else if (OPENSSL_ppccap_P & PPC_MFSPR268)
228 return OPENSSL_instrument_bus2_mfspr268(out, cnt, max);
233 #if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
234 # if __GLIBC_PREREQ(2, 16)
235 # include <sys/auxv.h>
236 # define OSSL_IMPLEMENT_GETAUXVAL
240 #if defined(__FreeBSD__)
241 # include <sys/param.h>
242 # if __FreeBSD_version >= 1200000
243 # include <sys/auxv.h>
244 # define OSSL_IMPLEMENT_GETAUXVAL
246 static unsigned long getauxval(unsigned long key)
248 unsigned long val = 0ul;
250 if (elf_aux_info((int)key, &val, sizeof(val)) != 0)
258 /* I wish <sys/auxv.h> was universally available */
259 #define HWCAP 16 /* AT_HWCAP */
260 #define HWCAP_PPC64 (1U << 30)
261 #define HWCAP_ALTIVEC (1U << 28)
262 #define HWCAP_FPU (1U << 27)
263 #define HWCAP_POWER6_EXT (1U << 9)
264 #define HWCAP_VSX (1U << 7)
266 #define HWCAP2 26 /* AT_HWCAP2 */
267 #define HWCAP_VEC_CRYPTO (1U << 25)
268 #define HWCAP_ARCH_3_00 (1U << 23)
270 # if defined(__GNUC__) && __GNUC__>=2
271 __attribute__ ((constructor))
273 void OPENSSL_cpuid_setup(void)
276 struct sigaction ill_oact, ill_act;
278 static int trigger = 0;
284 if ((e = getenv("OPENSSL_ppccap"))) {
285 OPENSSL_ppccap_P = strtoul(e, NULL, 0);
289 OPENSSL_ppccap_P = 0;
292 OPENSSL_ppccap_P |= PPC_FPU;
294 if (sizeof(size_t) == 4) {
296 # if defined(_SC_AIX_KERNEL_BITMODE)
297 if (sysconf(_SC_AIX_KERNEL_BITMODE) != 64)
300 if (uname(&uts) != 0 || atoi(uts.version) < 6)
304 # if defined(__power_set)
306 * Value used in __power_set is a single-bit 1<<n one denoting
307 * specific processor class. Incidentally 0xffffffff<<n can be
308 * used to denote specific processor and its successors.
310 if (sizeof(size_t) == 4) {
311 /* In 32-bit case PPC_FPU64 is always fastest [if option] */
312 if (__power_set(0xffffffffU<<13)) /* POWER5 and later */
313 OPENSSL_ppccap_P |= PPC_FPU64;
315 /* In 64-bit case PPC_FPU64 is fastest only on POWER6 */
316 if (__power_set(0x1U<<14)) /* POWER6 */
317 OPENSSL_ppccap_P |= PPC_FPU64;
320 if (__power_set(0xffffffffU<<14)) /* POWER6 and later */
321 OPENSSL_ppccap_P |= PPC_ALTIVEC;
323 if (__power_set(0xffffffffU<<16)) /* POWER8 and later */
324 OPENSSL_ppccap_P |= PPC_CRYPTO207;
326 if (__power_set(0xffffffffU<<17)) /* POWER9 and later */
327 OPENSSL_ppccap_P |= PPC_MADD300;
333 #if defined(__APPLE__) && defined(__MACH__)
334 OPENSSL_ppccap_P |= PPC_FPU;
338 size_t len = sizeof(val);
340 if (sysctlbyname("hw.optional.64bitops", &val, &len, NULL, 0) == 0) {
342 OPENSSL_ppccap_P |= PPC_FPU64;
346 if (sysctlbyname("hw.optional.altivec", &val, &len, NULL, 0) == 0) {
348 OPENSSL_ppccap_P |= PPC_ALTIVEC;
355 #ifdef OSSL_IMPLEMENT_GETAUXVAL
357 unsigned long hwcap = getauxval(HWCAP);
358 unsigned long hwcap2 = getauxval(HWCAP2);
360 if (hwcap & HWCAP_FPU) {
361 OPENSSL_ppccap_P |= PPC_FPU;
363 if (sizeof(size_t) == 4) {
364 /* In 32-bit case PPC_FPU64 is always fastest [if option] */
365 if (hwcap & HWCAP_PPC64)
366 OPENSSL_ppccap_P |= PPC_FPU64;
368 /* In 64-bit case PPC_FPU64 is fastest only on POWER6 */
369 if (hwcap & HWCAP_POWER6_EXT)
370 OPENSSL_ppccap_P |= PPC_FPU64;
374 if (hwcap & HWCAP_ALTIVEC) {
375 OPENSSL_ppccap_P |= PPC_ALTIVEC;
377 if ((hwcap & HWCAP_VSX) && (hwcap2 & HWCAP_VEC_CRYPTO))
378 OPENSSL_ppccap_P |= PPC_CRYPTO207;
381 if (hwcap2 & HWCAP_ARCH_3_00) {
382 OPENSSL_ppccap_P |= PPC_MADD300;
387 sigfillset(&all_masked);
388 sigdelset(&all_masked, SIGILL);
389 sigdelset(&all_masked, SIGTRAP);
391 sigdelset(&all_masked, SIGEMT);
393 sigdelset(&all_masked, SIGFPE);
394 sigdelset(&all_masked, SIGBUS);
395 sigdelset(&all_masked, SIGSEGV);
397 memset(&ill_act, 0, sizeof(ill_act));
398 ill_act.sa_handler = ill_handler;
399 ill_act.sa_mask = all_masked;
401 sigprocmask(SIG_SETMASK, &ill_act.sa_mask, &oset);
402 sigaction(SIGILL, &ill_act, &ill_oact);
404 #ifndef OSSL_IMPLEMENT_GETAUXVAL
405 if (sigsetjmp(ill_jmp,1) == 0) {
407 OPENSSL_ppccap_P |= PPC_FPU;
409 if (sizeof(size_t) == 4) {
412 if (uname(&uts) == 0 && strcmp(uts.machine, "ppc64") == 0)
414 if (sigsetjmp(ill_jmp, 1) == 0) {
415 OPENSSL_ppc64_probe();
416 OPENSSL_ppccap_P |= PPC_FPU64;
420 * Wanted code detecting POWER6 CPU and setting PPC_FPU64
425 if (sigsetjmp(ill_jmp, 1) == 0) {
426 OPENSSL_altivec_probe();
427 OPENSSL_ppccap_P |= PPC_ALTIVEC;
428 if (sigsetjmp(ill_jmp, 1) == 0) {
429 OPENSSL_crypto207_probe();
430 OPENSSL_ppccap_P |= PPC_CRYPTO207;
434 if (sigsetjmp(ill_jmp, 1) == 0) {
435 OPENSSL_madd300_probe();
436 OPENSSL_ppccap_P |= PPC_MADD300;
440 if (sigsetjmp(ill_jmp, 1) == 0) {
441 OPENSSL_rdtsc_mftb();
442 OPENSSL_ppccap_P |= PPC_MFTB;
443 } else if (sigsetjmp(ill_jmp, 1) == 0) {
444 OPENSSL_rdtsc_mfspr268();
445 OPENSSL_ppccap_P |= PPC_MFSPR268;
448 sigaction(SIGILL, &ill_oact, NULL);
449 sigprocmask(SIG_SETMASK, &oset, NULL);