-int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num)
- {
- int bn_mul_mont_fpu64(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num);
- int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num);
-
- if (sizeof(size_t)==4)
- {
-#if (defined(__APPLE__) && defined(__MACH__))
- if (num>=8 && (num&3)==0 && (OPENSSL_ppccap_P&PPC_FPU64))
- return bn_mul_mont_fpu64(rp,ap,bp,np,n0,num);
-#else
- /* boundary of 32 was experimentally determined on
- Linux 2.6.22, might have to be adjusted on AIX... */
- if (num>=32 && (num&3)==0 && (OPENSSL_ppccap_P&PPC_FPU64))
- {
- sigset_t oset;
- int ret;
-
- sigprocmask(SIG_SETMASK,&all_masked,&oset);
- ret=bn_mul_mont_fpu64(rp,ap,bp,np,n0,num);
- sigprocmask(SIG_SETMASK,&oset,NULL);
-
- return ret;
- }
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+ const BN_ULONG *np, const BN_ULONG *n0, int num)
+{
+ int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+ const BN_ULONG *np, const BN_ULONG *n0, int num);
+ int bn_mul4x_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+ const BN_ULONG *np, const BN_ULONG *n0, int num);
+
+ if (num < 4)
+ return 0;
+
+ if ((num & 3) == 0)
+ return bn_mul4x_mont_int(rp, ap, bp, np, n0, num);
+
+ /*
+ * There used to be [optional] call to bn_mul_mont_fpu64 here,
+ * but above subroutine is faster on contemporary processors.
+ * Formulation means that there might be old processors where
+ * FPU code path would be faster, POWER6 perhaps, but there was
+ * no opportunity to figure it out...
+ */
+
+ return bn_mul_mont_int(rp, ap, bp, np, n0, num);
+}
+#endif
+
+void sha256_block_p8(void *ctx, const void *inp, size_t len);
+void sha256_block_ppc(void *ctx, const void *inp, size_t len);
+void sha256_block_data_order(void *ctx, const void *inp, size_t len)
+{
+ OPENSSL_ppccap_P & PPC_CRYPTO207 ? sha256_block_p8(ctx, inp, len) :
+ sha256_block_ppc(ctx, inp, len);
+}
+
+void sha512_block_p8(void *ctx, const void *inp, size_t len);
+void sha512_block_ppc(void *ctx, const void *inp, size_t len);
+void sha512_block_data_order(void *ctx, const void *inp, size_t len)
+{
+ OPENSSL_ppccap_P & PPC_CRYPTO207 ? sha512_block_p8(ctx, inp, len) :
+ sha512_block_ppc(ctx, inp, len);
+}
+
+#ifndef OPENSSL_NO_CHACHA
+void ChaCha20_ctr32_int(unsigned char *out, const unsigned char *inp,
+ size_t len, const unsigned int key[8],
+ const unsigned int counter[4]);
+void ChaCha20_ctr32_vmx(unsigned char *out, const unsigned char *inp,
+ size_t len, const unsigned int key[8],
+ const unsigned int counter[4]);
+void ChaCha20_ctr32(unsigned char *out, const unsigned char *inp,
+ size_t len, const unsigned int key[8],
+ const unsigned int counter[4])
+{
+ OPENSSL_ppccap_P & PPC_ALTIVEC
+ ? ChaCha20_ctr32_vmx(out, inp, len, key, counter)
+ : ChaCha20_ctr32_int(out, inp, len, key, counter);
+}
+#endif
+
+#ifndef OPENSSL_NO_POLY1305
+void poly1305_init_int(void *ctx, const unsigned char key[16]);
+void poly1305_blocks(void *ctx, const unsigned char *inp, size_t len,
+ unsigned int padbit);
+void poly1305_emit(void *ctx, unsigned char mac[16],
+ const unsigned int nonce[4]);
+void poly1305_init_fpu(void *ctx, const unsigned char key[16]);
+void poly1305_blocks_fpu(void *ctx, const unsigned char *inp, size_t len,
+ unsigned int padbit);
+void poly1305_emit_fpu(void *ctx, unsigned char mac[16],
+ const unsigned int nonce[4]);
+int poly1305_init(void *ctx, const unsigned char key[16], void *func[2])
+{
+ if (sizeof(size_t) == 4 && (OPENSSL_ppccap_P & PPC_FPU)) {
+ poly1305_init_fpu(ctx, key);
+ func[0] = poly1305_blocks_fpu;
+ func[1] = poly1305_emit_fpu;
+ } else {
+ poly1305_init_int(ctx, key);
+ func[0] = poly1305_blocks;
+ func[1] = poly1305_emit;
+ }
+ return 1;
+}