ARM assembler pack: add platform run-time detection.
authorAndy Polyakov <appro@openssl.org>
Sun, 17 Jul 2011 17:40:29 +0000 (17:40 +0000)
committerAndy Polyakov <appro@openssl.org>
Sun, 17 Jul 2011 17:40:29 +0000 (17:40 +0000)
Configure
TABLE
config
crypto/arm_arch.h
crypto/armcap.c [new file with mode: 0644]
crypto/armv4cpuid.S [new file with mode: 0644]
crypto/bn/asm/armv4-gf2m.pl
crypto/modes/gcm128.c
crypto/sha/asm/sha512-armv4.pl

index 46d5cb259b1960d3efe3fa5c82a33b862761765a..8ae48e75f32ca7aa6873e6cb788cbf52ba64c1e5 100755 (executable)
--- a/Configure
+++ b/Configure
@@ -135,7 +135,7 @@ my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-a
 my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o:::::::";
 my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o:::::::";
 my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes_ctr.o aes-s390x.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o";
-my $armv4_asm=":bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o:void";
+my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o:void";
 my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:32";
 my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:64";
 my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o:::::::";
diff --git a/TABLE b/TABLE
index 2e405ec2301a12d3647a47d71d29f27e6cfe4f17..b7c764a9b9221bda6fe4b213d617a3910c69ad46 100644 (file)
--- a/TABLE
+++ b/TABLE
@@ -1032,7 +1032,7 @@ $thread_cflag = -D_REENTRANT
 $sys_id       = 
 $lflags       = -ldl
 $bn_ops       = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR
-$cpuid_obj    = 
+$cpuid_obj    = armcap.o armv4cpuid.o
 $bn_obj       = bn_asm.o armv4-mont.o armv4-gf2m.o
 $des_obj      = 
 $aes_obj      = aes_cbc.o aes-armv4.o
@@ -3688,7 +3688,7 @@ $thread_cflag = -D_REENTRANT
 $sys_id       = 
 $lflags       = -ldl
 $bn_ops       = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR
-$cpuid_obj    = 
+$cpuid_obj    = armcap.o armv4cpuid.o
 $bn_obj       = bn_asm.o armv4-mont.o armv4-gf2m.o
 $des_obj      = 
 $aes_obj      = aes_cbc.o aes-armv4.o
diff --git a/config b/config
index e82868721e31d797df6c6de27e674ea1dddbfb52..32bec89db2e0a102bb06cc91cdd1385b28755879 100755 (executable)
--- a/config
+++ b/config
@@ -630,6 +630,7 @@ case "$GUESSOS" in
        options="$options -DB_ENDIAN -mschedule=$CPUSCHEDULE -march=$CPUARCH"
        OUT="linux-generic32" ;;
   armv[1-3]*-*-linux2) OUT="linux-generic32" ;;
+  armv[7-9]*-*-linux2) OUT="linux-armv4"; options="$options -march=armv7-a" ;;
   arm*-*-linux2) OUT="linux-armv4" ;;
   sh*b-*-linux2) OUT="linux-generic32"; options="$options -DB_ENDIAN" ;;
   sh*-*-linux2)  OUT="linux-generic32"; options="$options -DL_ENDIAN" ;;
index 82401add1914137c412fcc51947015213ec99937..15027ed3de3c0ae5d1c40c31bb311445934d9d5a 100644 (file)
@@ -18,7 +18,7 @@
    */
 #  if  defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)     || \
        defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__)     || \
-       defined(__ARM_ARCH_7EM)
+       defined(__ARM_ARCH_7EM__)
 #   define __ARM_ARCH__ 7
 #  elif        defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__)     || \
        defined(__ARM_ARCH_6K__)|| defined(__ARM_ARCH_6M__)     || \
 #include <openssl/fipssyms.h>
 #endif
 
+#if !__ASSEMBLER__
+extern unsigned int OPENSSL_armcap_P;
+                                     
+#define ARMV7_NEON      (1<<0)
+#define ARMV7_TICK      (1<<1)
+#endif
+
 #endif
 #endif
diff --git a/crypto/armcap.c b/crypto/armcap.c
new file mode 100644 (file)
index 0000000..74c2c57
--- /dev/null
@@ -0,0 +1,77 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <crypto.h>
+
+#include "arm_arch.h"
+
+unsigned int OPENSSL_armcap_P;
+
+static sigset_t all_masked;
+
+static sigjmp_buf ill_jmp;
+static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); }
+
+/*
+ * Following subroutines could have been inlined, but it's not all
+ * ARM compilers support inline assembler...
+ */
+void _armv7_neon_probe(void);
+unsigned int _armv7_tick(void);
+
+unsigned int OPENSSL_rdtsc(void)
+       {
+       if (OPENSSL_armcap_P|ARMV7_TICK)
+               return _armv7_tick();
+       else
+               return 0;
+       }
+
+void OPENSSL_cpuid_setup(void)
+       {
+       char *e;
+       struct sigaction        ill_oact,ill_act;
+       sigset_t                oset;
+       static int trigger=0;
+
+       if (trigger) return;
+       trigger=1;
+       if ((e=getenv("OPENSSL_armcap")))
+               {
+               OPENSSL_armcap_P=strtoul(e,NULL,0);
+               return;
+               }
+
+       sigfillset(&all_masked);
+       sigdelset(&all_masked,SIGILL);
+       sigdelset(&all_masked,SIGTRAP);
+       sigdelset(&all_masked,SIGFPE);
+       sigdelset(&all_masked,SIGBUS);
+       sigdelset(&all_masked,SIGSEGV);
+
+       OPENSSL_armcap_P = 0;
+
+       memset(&ill_act,0,sizeof(ill_act));
+       ill_act.sa_handler = ill_handler;
+       ill_act.sa_mask    = all_masked;
+
+       sigprocmask(SIG_SETMASK,&ill_act.sa_mask,&oset);
+       sigaction(SIGILL,&ill_act,&ill_oact);
+
+       if (sigsetjmp(ill_jmp,1) == 0)
+               {
+               _armv7_neon_probe();
+               OPENSSL_armcap_P |= ARMV7_NEON;
+               }
+       if (sigsetjmp(ill_jmp,1) == 0)
+               {
+               _armv7_tick();
+               OPENSSL_armcap_P |= ARMV7_TICK;
+               }
+
+       sigaction (SIGILL,&ill_oact,NULL);
+       sigprocmask(SIG_SETMASK,&oset,NULL);
+       }
diff --git a/crypto/armv4cpuid.S b/crypto/armv4cpuid.S
new file mode 100644 (file)
index 0000000..c9102ca
--- /dev/null
@@ -0,0 +1,154 @@
+#include "arm_arch.h"
+
+.text
+.code  32
+
+.align 5
+.global        _armv7_neon_probe
+.type  _armv7_neon_probe,%function
+_armv7_neon_probe:
+       .word   0xf26ee1fe      @ vorr  q15,q15,q15
+       .word   0xe12fff1e      @ bx    lr
+.size  _armv7_neon_probe,.-_armv7_neon_probe
+
+.global        _armv7_tick
+.type  _armv7_tick,%function
+_armv7_tick:
+       mrc     p15,0,r0,c9,c13,0
+       .word   0xe12fff1e      @ bx    lr
+.size  _armv7_tick,.-_armv7_tick
+
+.global        OPENSSL_atomic_add
+.type  OPENSSL_atomic_add,%function
+OPENSSL_atomic_add:
+#if __ARM_ARCH__>=6
+.Ladd: ldrex   r2,[r0]
+       add     r3,r2,r1
+       strex   r2,r3,[r0]
+       cmp     r2,#0
+       bne     .Ladd
+       mov     r0,r3
+       .word   0xe12fff1e      @ bx    lr
+#else
+       stmdb   sp!,{r4-r6,lr}
+       ldr     r2,.Lspinlock
+       adr     r3,.Lspinlock
+       mov     r4,r0
+       mov     r5,r1
+       add     r6,r3,r2        @ &spinlock
+       b       .+8
+.Lspin:        bl      sched_yield
+       mov     r0,#-1
+       swp     r0,r0,[r6]
+       cmp     r0,#0
+       bne     .Lspin
+
+       ldr     r2,[r4]
+       add     r2,r5
+       str     r2,[r4]
+       str     r0,[r6]         @ release spinlock
+       ldmia   sp!,{r4-r6,lr}
+       tst     lr,#1
+       moveq   pc,lr
+       .word   0xe12fff1e      @ bx    lr
+#endif
+.size  OPENSSL_atomic_add,.-OPENSSL_atomic_add
+
+.global        OPENSSL_cleanse
+.type  OPENSSL_cleanse,%function
+OPENSSL_cleanse:
+       eor     ip,ip,ip
+       cmp     r1,#7
+       subhs   r1,#4
+       bhs     .Lot
+       cmp     r1,#0
+       beq     .Lcleanse_done
+.Little:
+       strb    ip,[r0],#1
+       subs    r1,#1
+       bhi     .Little
+       b       .Lcleanse_done
+
+.Lot:  tst     r0,#3
+       beq     .Laligned
+       strb    ip,[r0],#1
+       sub     r1,#1
+       b       .Lot
+.Laligned:
+       str     ip,[r0],#4
+       subs    r1,#4
+       bhs     .Laligned
+       adds    r1,#4
+       bne     .Little
+.Lcleanse_done:
+       tst     lr,#1
+       moveq   pc,lr
+       .word   0xe12fff1e      @ bx    lr
+.size  OPENSSL_cleanse,.-OPENSSL_cleanse
+
+.global        OPENSSL_wipe_cpu
+.type  OPENSSL_wipe_cpu,%function
+OPENSSL_wipe_cpu:
+       ldr     r0,.LOPENSSL_armcap
+       adr     r1,.LOPENSSL_armcap
+       ldr     r0,[r1,r0]
+       eor     r2,r2,r2
+       eor     r3,r3,r3
+       eor     ip,ip,ip
+       tst     r0,#1
+       beq     .Lwipe_done
+       .word   0xf3000150      @ veor    q0, q0, q0
+       .word   0xf3022152      @ veor    q1, q1, q1
+       .word   0xf3044154      @ veor    q2, q2, q2
+       .word   0xf3066156      @ veor    q3, q3, q3
+       .word   0xf34001f0      @ veor    q8, q8, q8
+       .word   0xf34221f2      @ veor    q9, q9, q9
+       .word   0xf34441f4      @ veor    q10, q10, q10
+       .word   0xf34661f6      @ veor    q11, q11, q11
+       .word   0xf34881f8      @ veor    q12, q12, q12
+       .word   0xf34aa1fa      @ veor    q13, q13, q13
+       .word   0xf34cc1fc      @ veor    q14, q14, q14
+       .word   0xf34ee1fe      @ veor    q15, q15, q15
+.Lwipe_done:
+       mov     r0,sp
+       tst     lr,#1
+       moveq   pc,lr
+       .word   0xe12fff1e      @ bx    lr
+.size  OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
+
+.global        OPENSSL_instrument_bus
+.type  OPENSSL_instrument_bus,%function
+OPENSSL_instrument_bus:
+       eor     r0,r0,r0
+       tst     lr,#1
+       moveq   pc,lr
+       .word   0xe12fff1e      @ bx    lr
+.size  OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
+
+.global        OPENSSL_instrument_bus2
+.type  OPENSSL_instrument_bus2,%function
+OPENSSL_instrument_bus2:
+       eor     r0,r0,r0
+       tst     lr,#1
+       moveq   pc,lr
+       .word   0xe12fff1e      @ bx    lr
+.size  OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
+
+.align 5
+.LOPENSSL_armcap:
+.word  OPENSSL_armcap_P-.LOPENSSL_armcap
+#if __ARM_ARCH__>=6
+.align 5
+#else
+.Lspinlock:
+.word  atomic_add_spinlock-.Lspinlock
+.align 5
+
+.data
+.align 2
+atomic_add_spinlock:
+.word  0
+#endif
+
+.comm  OPENSSL_armcap_P,4,4
+.hidden        OPENSSL_armcap_P
index 4fe9db9894aecb28cc82789d047c4b85847fd651..9928dae872e52c8fb3fcffe5c414caed2ee6b0f0 100644 (file)
@@ -264,12 +264,12 @@ $code.=<<___;
 #if __ARM_ARCH__>=7
 .align 5
 .LOPENSSL_armcap:
-.word  OPENSSL_armcap-(.Lpic+8)
+.word  OPENSSL_armcap_P-(.Lpic+8)
 #endif
-.asciz "GF2m Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
+.asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
 .align 5
 
-.comm  OPENSSL_armcap,4,4
+.comm  OPENSSL_armcap_P,4,4
 ___
 
 $code =~ s/\`([^\`]*)\`/eval $1/gem;
index 8b9070a45b76aac0d88bd011350d26ad1ccdbe0b..2e42e7180437a37a726ca8fe011d9046770e94a2 100644 (file)
@@ -668,8 +668,6 @@ void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len
 #  if __ARM_ARCH__>=7
 #   define GHASH_ASM_ARM
 #   define GCM_FUNCREF_4BIT
-extern unsigned int OPENSSL_armcap;
-
 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
 #  endif
@@ -715,7 +713,8 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
 #elif  TABLE_BITS==4
 # if   defined(GHASH_ASM_X86_OR_64)
 #  if  !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
-       if (OPENSSL_ia32cap_P[1]&(1<<1)) {      /* check PCLMULQDQ bit */
+       if (OPENSSL_ia32cap_P[0]&(1<<24) &&     /* check FXSR bit */
+           OPENSSL_ia32cap_P[1]&(1<<1) ) {     /* check PCLMULQDQ bit */
                gcm_init_clmul(ctx->Htable,ctx->H.u);
                ctx->gmult = gcm_gmult_clmul;
                ctx->ghash = gcm_ghash_clmul;
@@ -736,7 +735,7 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
        ctx->ghash = gcm_ghash_4bit;
 #  endif
 # elif defined(GHASH_ASM_ARM)
-       if (OPENSSL_armcap & 1) {
+       if (OPENSSL_armcap_P & ARMV7_NEON) {
                ctx->gmult = gcm_gmult_neon;
                ctx->ghash = gcm_ghash_neon;
        } else {
index 8ba56e3d16dff9c4d22bae56daf6f0358297895d..7faf37b1479029e152ec99200b4c55b551850131 100644 (file)
@@ -221,7 +221,7 @@ WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
 WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
 .size  K512,.-K512
 .LOPENSSL_armcap:
-.word  OPENSSL_armcap-sha512_block_data_order
+.word  OPENSSL_armcap_P-sha512_block_data_order
 .skip  32-4
 
 .global        sha512_block_data_order
@@ -231,7 +231,7 @@ sha512_block_data_order:
        add     $len,$inp,$len,lsl#7    @ len to point at the end of inp
 #if __ARM_ARCH__>=7
        ldr     r12,.LOPENSSL_armcap
-       ldr     r12,[r3,r12]            @ OPENSSL_armcap
+       ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
        tst     r12,#1
        bne     .LNEON
 #endif
@@ -573,7 +573,7 @@ $code.=<<___;
 .size  sha512_block_data_order,.-sha512_block_data_order
 .asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
 .align 2
-.comm  OPENSSL_armcap,4,4
+.comm  OPENSSL_armcap_P,4,4
 ___
 
 $code =~ s/\`([^\`]*)\`/eval $1/gem;