From 459217237640369a092084ccb80175b5758f40b1 Mon Sep 17 00:00:00 2001 From: Markus Stockhausen Date: Mon, 11 Feb 2019 18:38:46 +0100 Subject: [PATCH] MIPS32R3 provides the EXT instruction to extract bits from registers. As the AES table is already 1K aligned we can use it everywhere and speedup table address calculation by 10%. Performance numbers: decryption 16B 64B 256B 1024B 8192B ------------------------------------------------------------------- aes-256-cbc 5636.84k 6443.26k 6689.02k 6752.94k 6766.59k bef. aes-256-cbc 6200.31k 7195.71k 7504.30k 7585.11k 7599.45k aft. ------------------------------------------------------------------- aes-128-cbc 7313.85k 8653.67k 9079.55k 9188.35k 9205.08k bef. aes-128-cbc 7925.38k 9557.99k 10092.37k 10232.15k 10272.77k aft. encryption 16B 64B 256B 1024B 8192B ------------------------------------------------------------------- aes-256 cbc 6009.65k 6592.70k 6766.59k 6806.87k 6815.74k bef. aes-256 cbc 6643.93k 7388.69k 7605.33k 7657.81k 7675.90k aft. ------------------------------------------------------------------- aes-128 cbc 7862.09k 8892.48k 9214.04k 9291.78k 9311.57k bef. aes-128 cbc 8639.29k 9881.17k 10265.86k 10363.56k 10392.92k aft. Reviewed-by: Paul Dale Reviewed-by: Richard Levitte (Merged from https://github.com/openssl/openssl/pull/8206) --- crypto/aes/asm/aes-mips.pl | 134 +++++++++++++++++++++++-------------- 1 file changed, 85 insertions(+), 49 deletions(-) diff --git a/crypto/aes/asm/aes-mips.pl b/crypto/aes/asm/aes-mips.pl index 66c97cd32f..c93edc6ea3 100644 --- a/crypto/aes/asm/aes-mips.pl +++ b/crypto/aes/asm/aes-mips.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2010-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the Apache License 2.0 (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy @@ -34,6 +34,11 @@ # instead, code path is chosen upon pre-process time, pass -mips32r2 # or/and -msmartmips. +# February 2019 +# +# Normalize MIPS32R2 AES table address calculation by always using EXT +# instruction. This reduces the standard codebase by another 10%. + ###################################################################### # There is a number of MIPS ABI in use, O32 and N32/64 are most # widely used. Then there is a new contender: NUBI. It appears that if @@ -223,6 +228,33 @@ _mips_AES_encrypt: ext $i0,$s1,16,8 _xtr $i0,$s1,16-2 +#else +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + move $i0,$Tbl + move $i1,$Tbl + move $i2,$Tbl + move $i3,$Tbl + ext $t0,$s1,16,8 +.Loop_enc: + ext $t1,$s2,16,8 + ext $t2,$s3,16,8 + ext $t3,$s0,16,8 + $PTR_INS $i0,$t0,2,8 + $PTR_INS $i1,$t1,2,8 + $PTR_INS $i2,$t2,2,8 + $PTR_INS $i3,$t3,2,8 + lw $t0,0($i0) # Te1[s1>>16] + ext $t4,$s2,8,8 + lw $t1,0($i1) # Te1[s2>>16] + ext $t5,$s3,8,8 + lw $t2,0($i2) # Te1[s3>>16] + ext $t6,$s0,8,8 + lw $t3,0($i3) # Te1[s0>>16] + ext $t7,$s1,8,8 + $PTR_INS $i0,$t4,2,8 + $PTR_INS $i1,$t5,2,8 + $PTR_INS $i2,$t6,2,8 + $PTR_INS $i3,$t7,2,8 #else _xtr $i0,$s1,16-2 .Loop_enc: @@ -237,16 +269,6 @@ _mips_AES_encrypt: $PTR_ADD $i1,$Tbl $PTR_ADD $i2,$Tbl $PTR_ADD $i3,$Tbl -#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) - lw $t0,0($i0) # Te1[s1>>16] - _xtr $i0,$s2,8-2 - lw $t1,0($i1) # Te1[s2>>16] - _xtr $i1,$s3,8-2 - lw $t2,0($i2) # Te1[s3>>16] - _xtr $i2,$s0,8-2 - lw $t3,0($i3) # Te1[s0>>16] - _xtr $i3,$s1,8-2 -#else lwl $t0,3($i0) # Te1[s1>>16] lwl $t1,3($i1) # Te1[s2>>16] lwl $t2,3($i2) # Te1[s3>>16] @@ -259,7 +281,6 @@ _mips_AES_encrypt: _xtr $i2,$s0,8-2 lwr $t3,2($i3) # Te1[s0>>16] _xtr $i3,$s1,8-2 -#endif and $i0,0x3fc and $i1,0x3fc and $i2,0x3fc @@ -268,6 +289,7 @@ _mips_AES_encrypt: $PTR_ADD $i1,$Tbl $PTR_ADD $i2,$Tbl $PTR_ADD $i3,$Tbl +#endif #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) rotr $t0,$t0,8 rotr $t1,$t1,8 @@ -275,22 +297,18 @@ _mips_AES_encrypt: rotr $t3,$t3,8 # if defined(_MIPSEL) lw $t4,0($i0) # Te2[s2>>8] - _xtr $i0,$s3,0-2 + ext $t8,$s3,0,8 lw $t5,0($i1) # Te2[s3>>8] - _xtr $i1,$s0,0-2 + ext $t9,$s0,0,8 lw $t6,0($i2) # Te2[s0>>8] - _xtr $i2,$s1,0-2 + ext $t10,$s1,0,8 lw $t7,0($i3) # Te2[s1>>8] - _xtr $i3,$s2,0-2 + ext $t11,$s2,0,8 + $PTR_INS $i0,$t8,2,8 + $PTR_INS $i1,$t9,2,8 + $PTR_INS $i2,$t10,2,8 + $PTR_INS $i3,$t11,2,8 - and $i0,0x3fc - and $i1,0x3fc - and $i2,0x3fc - and $i3,0x3fc - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl lw $t8,0($i0) # Te3[s3] $PTR_INS $i0,$s0,2,8 lw $t9,0($i1) # Te3[s0] @@ -411,6 +429,9 @@ _mips_AES_encrypt: xor $s3,$t3 .set noreorder bnez $cnt,.Loop_enc +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + ext $t0,$s1,16,8 +#endif _xtr $i0,$s1,16-2 #endif @@ -811,6 +832,33 @@ _mips_AES_decrypt: ext $i0,$s3,16,8 _xtr $i0,$s3,16-2 +#else +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + move $i0,$Tbl + move $i1,$Tbl + move $i2,$Tbl + move $i3,$Tbl + ext $t0,$s3,16,8 +.Loop_dec: + ext $t1,$s0,16,8 + ext $t2,$s1,16,8 + ext $t3,$s2,16,8 + $PTR_INS $i0,$t0,2,8 + $PTR_INS $i1,$t1,2,8 + $PTR_INS $i2,$t2,2,8 + $PTR_INS $i3,$t3,2,8 + lw $t0,0($i0) # Td1[s3>>16] + ext $t4,$s2,8,8 + lw $t1,0($i1) # Td1[s0>>16] + ext $t5,$s3,8,8 + lw $t2,0($i2) # Td1[s1>>16] + ext $t6,$s0,8,8 + lw $t3,0($i3) # Td1[s2>>16] + ext $t7,$s1,8,8 + $PTR_INS $i0,$t4,2,8 + $PTR_INS $i1,$t5,2,8 + $PTR_INS $i2,$t6,2,8 + $PTR_INS $i3,$t7,2,8 #else _xtr $i0,$s3,16-2 .Loop_dec: @@ -825,16 +873,6 @@ _mips_AES_decrypt: $PTR_ADD $i1,$Tbl $PTR_ADD $i2,$Tbl $PTR_ADD $i3,$Tbl -#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) - lw $t0,0($i0) # Td1[s3>>16] - _xtr $i0,$s2,8-2 - lw $t1,0($i1) # Td1[s0>>16] - _xtr $i1,$s3,8-2 - lw $t2,0($i2) # Td1[s1>>16] - _xtr $i2,$s0,8-2 - lw $t3,0($i3) # Td1[s2>>16] - _xtr $i3,$s1,8-2 -#else lwl $t0,3($i0) # Td1[s3>>16] lwl $t1,3($i1) # Td1[s0>>16] lwl $t2,3($i2) # Td1[s1>>16] @@ -847,8 +885,6 @@ _mips_AES_decrypt: _xtr $i2,$s0,8-2 lwr $t3,2($i3) # Td1[s2>>16] _xtr $i3,$s1,8-2 -#endif - and $i0,0x3fc and $i1,0x3fc and $i2,0x3fc @@ -857,6 +893,7 @@ _mips_AES_decrypt: $PTR_ADD $i1,$Tbl $PTR_ADD $i2,$Tbl $PTR_ADD $i3,$Tbl +#endif #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) rotr $t0,$t0,8 rotr $t1,$t1,8 @@ -864,22 +901,17 @@ _mips_AES_decrypt: rotr $t3,$t3,8 # if defined(_MIPSEL) lw $t4,0($i0) # Td2[s2>>8] - _xtr $i0,$s1,0-2 + ext $t8,$s1,0,8 lw $t5,0($i1) # Td2[s3>>8] - _xtr $i1,$s2,0-2 + ext $t9,$s2,0,8 lw $t6,0($i2) # Td2[s0>>8] - _xtr $i2,$s3,0-2 + ext $t10,$s3,0,8 lw $t7,0($i3) # Td2[s1>>8] - _xtr $i3,$s0,0-2 - - and $i0,0x3fc - and $i1,0x3fc - and $i2,0x3fc - and $i3,0x3fc - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl + ext $t11,$s0,0,8 + $PTR_INS $i0,$t8,2,8 + $PTR_INS $i1,$t9,2,8 + $PTR_INS $i2,$t10,2,8 + $PTR_INS $i3,$t11,2,8 lw $t8,0($i0) # Td3[s1] $PTR_INS $i0,$s0,2,8 lw $t9,0($i1) # Td3[s2] @@ -1001,6 +1033,10 @@ _mips_AES_decrypt: xor $s3,$t3 .set noreorder bnez $cnt,.Loop_dec +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + ext $t0,$s3,16,8 +#endif + _xtr $i0,$s3,16-2 #endif -- 2.34.1