The loop unrolling and use of EOR3 can improve N2 performance
by up to 32%
Signed-off-by: XiaokangQian <xiaokang.qian@arm.com>
Reviewed-by: Tomas Mraz <tomas@openssl.org>
Reviewed-by: Paul Dale <pauli@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/18350)
# define ARM_CPU_PART_CORTEX_A72 0xD08
# define ARM_CPU_PART_N1 0xD0C
# define ARM_CPU_PART_V1 0xD40
+# define ARM_CPU_PART_N2 0xD49
# define MIDR_PARTNUM_SHIFT 4
# define MIDR_PARTNUM_MASK (0xfff << MIDR_PARTNUM_SHIFT)
(OPENSSL_armcap_P & ARMV7_NEON)) {
OPENSSL_armv8_rsa_neonized = 1;
}
- if ((MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V1)) &&
+ if ((MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V1) ||
+ MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_N2)) &&
(OPENSSL_armcap_P & ARMV8_SHA3))
OPENSSL_armcap_P |= ARMV8_UNROLL8_EOR3;
# endif