Add linux-aarch64 taget.
authorAndy Polyakov <appro@openssl.org>
Sun, 1 Jun 2014 15:21:06 +0000 (17:21 +0200)
committerAndy Polyakov <appro@openssl.org>
Sun, 1 Jun 2014 15:21:06 +0000 (17:21 +0200)
armcap.c is shared between 32- and 64-bit builds and features link-time
detection of getauxval.

Submitted by: Ard Biesheuvel.

Configure
TABLE
config
crypto/arm64cpuid.S [new file with mode: 0644]
crypto/arm_arch.h
crypto/armcap.c

index 175f8d86a44fdb37b236491a2d34bc02ba8a0284..5fc7acca438828411f0ea52d10c0d7b23233f81e 100755 (executable)
--- a/Configure
+++ b/Configure
@@ -137,6 +137,7 @@ my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha25
 my $mips32_asm=$mips64_asm; $mips32_asm =~ s/\s*sha512\-mips\.o//;
 my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o:";
 my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o bsaes-armv7.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::void";
+my $aarch64_asm="armcap.o arm64cpuid.o mem_clr.o::::::::::::::";
 my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32";
 my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64";
 my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::";
@@ -352,6 +353,7 @@ my %table=(
 # It's believed that majority of ARM toolchains predefine appropriate -march.
 # If you compiler does not, do complement config command line with one!
 "linux-armv4", "gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"linux-aarch64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${aarch64_asm}:linux64:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 # Configure script adds minimally required -march for assembly support,
 # if no -march was specified at command line. mips32 and mips64 below
 # refer to contemporary MIPS Architecture specifications, MIPS32 and
@@ -1592,7 +1594,7 @@ if ($rmd160_obj =~ /\.o$/)
        }
 if ($aes_obj =~ /\.o$/)
        {
-       $cflags.=" -DAES_ASM";
+       $cflags.=" -DAES_ASM" if ($aes_obj =~ m/\baes\-/);;
        # aes-ctr.o is not a real file, only indication that assembler
        # module implements AES_ctr32_encrypt...
        $cflags.=" -DAES_CTR_ASM" if ($aes_obj =~ s/\s*aes\-ctr\.o//);
diff --git a/TABLE b/TABLE
index bf2dba3b8d0fd02a255572f8316bc838ba337cb7..5c144eb259150e5a314a286fa8fd706b4a3e89c1 100644 (file)
--- a/TABLE
+++ b/TABLE
@@ -1883,7 +1883,7 @@ $multilib     =
 
 *** debug-ben-debug-64-clang
 $cc           = clang
-$cflags       = -Wall -pedantic -DPEDANTIC -Wno-long-long -Wsign-compare -Wmissing-prototypes -Wshadow -Wformat -Werror -DCRYPTO_MDEBUG_ALL -DCRYPTO_MDEBUG_ABORT -DREF_CHECK -DOPENSSL_NO_DEPRECATED -Wno-error=overlength-strings -Wno-error=extended-offsetof -Qunused-arguments -DBN_DEBUG -DCONF_DEBUG -DDEBUG_SAFESTACK -DDEBUG_UNUSED -g3 -O3 -pipe
+$cflags       = -Wall -pedantic -DPEDANTIC -Wno-long-long -Wsign-compare -Wmissing-prototypes -Wshadow -Wformat -Werror -DCRYPTO_MDEBUG_ALL -DCRYPTO_MDEBUG_ABORT -DREF_CHECK -DOPENSSL_NO_DEPRECATED -fsanitize=undefined -Wno-error=overlength-strings -Wno-error=extended-offsetof -Wno-error=language-extension-token -Wstrict-overflow -Qunused-arguments -DBN_DEBUG -DCONF_DEBUG -DDEBUG_SAFESTACK -DDEBUG_UNUSED -g3 -O3 -pipe
 $unistd       = 
 $thread_cflag = -pthread -D_THREAD_SAFE -D_REENTRANT
 $sys_id       = 
@@ -3960,6 +3960,39 @@ $ranlib       =
 $arflags      = 
 $multilib     = 64
 
+*** linux-aarch64
+$cc           = gcc
+$cflags       = -DTERMIO -O3 -Wall
+$unistd       = 
+$thread_cflag = -D_REENTRANT
+$sys_id       = 
+$lflags       = -ldl
+$bn_ops       = SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR
+$cpuid_obj    = armcap.o arm64cpuid.o mem_clr.o
+$bn_obj       = 
+$des_obj      = 
+$aes_obj      = 
+$bf_obj       = 
+$md5_obj      = 
+$sha1_obj     = 
+$cast_obj     = 
+$rc4_obj      = 
+$rmd160_obj   = 
+$rc5_obj      = 
+$wp_obj       = 
+$cmll_obj     = 
+$modes_obj    = 
+$engines_obj  = 
+$perlasm_scheme = linux64
+$dso_scheme   = dlfcn
+$shared_target= linux-shared
+$shared_cflag = -fPIC
+$shared_ldflag = 
+$shared_extension = .so.$(SHLIB_MAJOR).$(SHLIB_MINOR)
+$ranlib       = 
+$arflags      = 
+$multilib     = 
+
 *** linux-alpha+bwx-ccc
 $cc           = ccc
 $cflags       = -fast -readonly_strings -DL_ENDIAN -DTERMIO
diff --git a/config b/config
index c6488ee55a644373f1fb578627dffc1f02c5349a..5d9e5351db06fe0b90aaef185afaf383a0ce12fa 100755 (executable)
--- a/config
+++ b/config
@@ -662,7 +662,7 @@ case "$GUESSOS" in
   armv[1-3]*-*-linux2) OUT="linux-generic32" ;;
   armv[7-9]*-*-linux2) OUT="linux-armv4"; options="$options -march=armv7-a" ;;
   arm*-*-linux2) OUT="linux-armv4" ;;
-  aarch64-*-linux2) OUT="linux-generic64" ;;
+  aarch64-*-linux2) OUT="linux-aarch64" ;;
   sh*b-*-linux2) OUT="linux-generic32"; options="$options -DB_ENDIAN" ;;
   sh*-*-linux2)  OUT="linux-generic32"; options="$options -DL_ENDIAN" ;;
   m68k*-*-linux2) OUT="linux-generic32"; options="$options -DB_ENDIAN" ;;
diff --git a/crypto/arm64cpuid.S b/crypto/arm64cpuid.S
new file mode 100644 (file)
index 0000000..4778ac1
--- /dev/null
@@ -0,0 +1,46 @@
+#include "arm_arch.h"
+
+.text
+.arch  armv8-a+crypto
+
+.align 5
+.global        _armv7_neon_probe
+.type  _armv7_neon_probe,%function
+_armv7_neon_probe:
+       orr     v15.16b, v15.16b, v15.16b
+       ret
+.size  _armv7_neon_probe,.-_armv7_neon_probe
+
+.global        _armv7_tick
+.type  _armv7_tick,%function
+_armv7_tick:
+       mrs     x0, CNTVCT_EL0
+       ret
+.size  _armv7_tick,.-_armv7_tick
+
+.global        _armv8_aes_probe
+.type  _armv8_aes_probe,%function
+_armv8_aes_probe:
+       aese    v0.16b, v0.16b
+       ret
+.size  _armv8_aes_probe,.-_armv8_aes_probe
+
+.global        _armv8_sha1_probe
+.type  _armv8_sha1_probe,%function
+_armv8_sha1_probe:
+       sha1h   s0, s0
+       ret
+.size  _armv8_sha1_probe,.-_armv8_sha1_probe
+
+.global        _armv8_sha256_probe
+.type  _armv8_sha256_probe,%function
+_armv8_sha256_probe:
+       sha256su0       v0.4s, v0.4s
+       ret
+.size  _armv8_sha256_probe,.-_armv8_sha256_probe
+.global        _armv8_pmull_probe
+.type  _armv8_pmull_probe,%function
+_armv8_pmull_probe:
+       pmull   v0.1q, v0.1d, v0.1d
+       ret
+.size  _armv8_pmull_probe,.-_armv8_pmull_probe
index d68318c851919416a1c3bb07d1964d1aea3bf010..6fa87244d1b929fb56ca1d87cd384e59e0872a28 100644 (file)
 #   define __ARMEL__
 #  endif
 # elif defined(__GNUC__)
+#  if  defined(__aarch64__)
+#   define __ARM_ARCH__ 8
+#   if __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
+#    define __ARMEB__
+#   else
+#    define __ARMEL__
+#   endif
   /*
    * Why doesn't gcc define __ARM_ARCH__? Instead it defines
    * bunch of below macros. See all_architectires[] table in
    * gcc/config/arm/arm.c. On a side note it defines
    * __ARMEL__/__ARMEB__ for little-/big-endian.
    */
-#  if  defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)     || \
+#  elif defined(__ARM_ARCH)
+#   define __ARM_ARCH__ __ARM_ARCH
+#  elif        defined(__ARM_ARCH_8A__)
+#   define __ARM_ARCH__ 8
+#  elif        defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)     || \
        defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__)     || \
        defined(__ARM_ARCH_7EM__)
 #   define __ARM_ARCH__ 7
@@ -43,6 +54,7 @@
 
 #if !__ASSEMBLER__
 extern unsigned int OPENSSL_armcap_P;
+#endif
                                      
 #define ARMV7_NEON      (1<<0)
 #define ARMV7_TICK      (1<<1)
@@ -50,6 +62,5 @@ extern unsigned int OPENSSL_armcap_P;
 #define ARMV8_SHA1      (1<<3)
 #define ARMV8_SHA256    (1<<4)
 #define ARMV8_PMULL     (1<<5)
-#endif
 
 #endif
index 550414425d6bd9a8d1c56a5ac377abe03e57ac95..7e46d07a3232c06cb359fbc64110c21680c80e84 100644 (file)
@@ -23,9 +23,9 @@ void _armv8_aes_probe(void);
 void _armv8_sha1_probe(void);
 void _armv8_sha256_probe(void);
 void _armv8_pmull_probe(void);
-unsigned int _armv7_tick(void);
+unsigned long _armv7_tick(void);
 
-unsigned int OPENSSL_rdtsc(void)
+unsigned long OPENSSL_rdtsc(void)
        {
        if (OPENSSL_armcap_P & ARMV7_TICK)
                return _armv7_tick();
@@ -33,9 +33,41 @@ unsigned int OPENSSL_rdtsc(void)
                return 0;
        }
 
+/*
+ * Use a weak reference to getauxval() so we can use it if it is available but
+ * don't break the build if it is not.
+ */
 #if defined(__GNUC__) && __GNUC__>=2
 void OPENSSL_cpuid_setup(void) __attribute__((constructor));
+extern unsigned long getauxval(unsigned long type) __attribute__((weak));
+#else
+static unsigned long (*getauxval)(unsigned long) = NULL;
 #endif
+
+/*
+ * ARM puts the the feature bits for Crypto Extensions in AT_HWCAP2, whereas
+ * AArch64 used AT_HWCAP.
+ */
+#if defined(__arm__) || defined (__arm)
+# define HWCAP                 16      /* AT_HWCAP */
+# define HWCAP_NEON            (1 << 12)
+
+# define HWCAP_CE              26      /* AT_HWCAP2 */
+# define HWCAP_CE_AES          (1 << 0)
+# define HWCAP_CE_PMULL                (1 << 1)
+# define HWCAP_CE_SHA1         (1 << 2)
+# define HWCAP_CE_SHA256       (1 << 3)
+#elif defined(__aarch64__)
+# define HWCAP                 16      /* AT_HWCAP */
+# define HWCAP_NEON            (1 << 1)
+
+# define HWCAP_CE              HWCAP
+# define HWCAP_CE_AES          (1 << 3)
+# define HWCAP_CE_PMULL                (1 << 4)
+# define HWCAP_CE_SHA1         (1 << 5)
+# define HWCAP_CE_SHA256       (1 << 6)
+#endif
+
 void OPENSSL_cpuid_setup(void)
        {
        char *e;
@@ -48,7 +80,7 @@ void OPENSSL_cpuid_setup(void)
  
        if ((e=getenv("OPENSSL_armcap")))
                {
-               OPENSSL_armcap_P=strtoul(e,NULL,0);
+               OPENSSL_armcap_P=(unsigned int)strtoul(e,NULL,0);
                return;
                }
 
@@ -68,11 +100,37 @@ void OPENSSL_cpuid_setup(void)
        sigprocmask(SIG_SETMASK,&ill_act.sa_mask,&oset);
        sigaction(SIGILL,&ill_act,&ill_oact);
 
-       if (sigsetjmp(ill_jmp,1) == 0)
+       if (getauxval != NULL)
+               {
+               if (getauxval(HWCAP) & HWCAP_NEON)
+                       {
+                       unsigned long hwcap = getauxval(HWCAP_CE);
+
+                       OPENSSL_armcap_P |= ARMV7_NEON;
+
+                       if (hwcap & HWCAP_CE_AES)
+                               OPENSSL_armcap_P |= ARMV8_AES;
+
+                       if (hwcap & HWCAP_CE_PMULL)
+                               OPENSSL_armcap_P |= ARMV8_PMULL;
+
+                       if (hwcap & HWCAP_CE_SHA1)
+                               OPENSSL_armcap_P |= ARMV8_SHA1;
+
+                       if (hwcap & HWCAP_CE_SHA256)
+                               OPENSSL_armcap_P |= ARMV8_SHA256;
+                       }
+               }
+       else if (sigsetjmp(ill_jmp,1) == 0)
                {
                _armv7_neon_probe();
                OPENSSL_armcap_P |= ARMV7_NEON;
                if (sigsetjmp(ill_jmp,1) == 0)
+                       {
+                       _armv8_pmull_probe();
+                       OPENSSL_armcap_P |= ARMV8_PMULL|ARMV8_AES;
+                       }
+               else if (sigsetjmp(ill_jmp,1) == 0)
                        {
                        _armv8_aes_probe();
                        OPENSSL_armcap_P |= ARMV8_AES;
@@ -87,11 +145,6 @@ void OPENSSL_cpuid_setup(void)
                        _armv8_sha256_probe();
                        OPENSSL_armcap_P |= ARMV8_SHA256;
                        }
-               if (sigsetjmp(ill_jmp,1) == 0)
-                       {
-                       _armv8_pmull_probe();
-                       OPENSSL_armcap_P |= ARMV8_PMULL;
-                       }
                }
        if (sigsetjmp(ill_jmp,1) == 0)
                {