2 # Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
17 # SHA1 block procedure for MIPS.
19 # Performance improvement is 30% on unaligned input. The "secret" is
20 # to deploy lwl/lwr pair to load unaligned input. One could have
21 # vectorized Xupdate on MIPSIII/IV, but the goal was to code MIPS32-
22 # compatible subroutine. There is room for minor optimization on
23 # little-endian platforms...
27 # Add MIPS32r2 code (>25% less instructions).
29 ######################################################################
30 # There is a number of MIPS ABI in use, O32 and N32/64 are most
31 # widely used. Then there is a new contender: NUBI. It appears that if
32 # one picks the latter, it's possible to arrange code in ABI neutral
33 # manner. Therefore let's stick to NUBI register layout:
35 ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
36 ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
37 ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
38 ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
40 # The return value is placed in $a0. Following coding rules facilitate
43 # - never ever touch $tp, "thread pointer", former $gp;
44 # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
46 # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
48 # For reference here is register layout for N32/64 MIPS ABIs:
50 # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
51 # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
52 # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
53 # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
54 # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
56 # $output is the last argument if it looks like a file (it has an extension)
57 # $flavour is the first argument if it doesn't look like a file
58 $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
59 # supported flavours are o32,n32,64,nubi32,nubi64, default is o32
60 $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : "o32";
62 if ($flavour =~ /64|n32/i) {
63 $PTR_ADD="daddu"; # incidentally works even on n32
64 $PTR_SUB="dsubu"; # incidentally works even on n32
67 $PTR_SLL="dsll"; # incidentally works even on n32
80 ######################################################################
82 $big_endian=(`echo MIPSEB | $ENV{CC} -E -`=~/MIPSEB/)?0:1 if ($ENV{CC});
84 $output and open STDOUT,">$output";
86 if (!defined($big_endian))
87 { $big_endian=(unpack('L',pack('N',1))==1); }
89 # offsets of the Most and Least Significant Bytes
93 @X=map("\$$_",(8..23)); # a4-a7,s0-s11
102 $E="\$24"; @V=($A,$B,$C,$D,$E);
104 $t1=$num; # $num is offloaded to stack
109 my ($i,$a,$b,$c,$d,$e)=@_;
111 $code.=<<___ if (!$big_endian);
112 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
113 wsbh @X[$i],@X[$i] # byte swap($i)
114 rotr @X[$i],@X[$i],16
116 srl $t0,@X[$i],24 # byte swap($i)
118 andi $t2,@X[$i],0xFF00
128 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
134 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
137 lwl @X[$j],$j*4+$MSB($inp)
138 lwr @X[$j],$j*4+$LSB($inp)
145 lwl @X[$j],$j*4+$MSB($inp)
148 lwr @X[$j],$j*4+$LSB($inp)
165 my ($i,$a,$b,$c,$d,$e)=@_;
168 $code.=<<___ if (!$big_endian && $i==15);
169 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
170 wsbh @X[$i],@X[$i] # byte swap($i)
171 rotr @X[$i],@X[$i],16
173 srl $t0,@X[$i],24 # byte swap($i)
175 andi $t2,@X[$i],0xFF00
185 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
187 xor @X[$j%16],@X[($j+2)%16]
190 xor @X[$j%16],@X[($j+8)%16]
193 xor @X[$j%16],@X[($j+13)%16]
196 rotr @X[$j%16],@X[$j%16],31
200 xor @X[$j%16],@X[($j+2)%16]
205 xor @X[$j%16],@X[($j+8)%16]
208 xor @X[$j%16],@X[($j+13)%16]
212 addu @X[$j%16],@X[$j%16]
224 my ($i,$a,$b,$c,$d,$e)=@_;
226 $code.=<<___ if ($i<79);
227 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
228 xor @X[$j%16],@X[($j+2)%16]
231 xor @X[$j%16],@X[($j+8)%16]
234 xor @X[$j%16],@X[($j+13)%16]
237 rotr @X[$j%16],@X[$j%16],31
241 xor @X[$j%16],@X[($j+2)%16]
246 xor @X[$j%16],@X[($j+8)%16]
249 xor @X[$j%16],@X[($j+13)%16]
253 addu @X[$j%16],@X[$j%16]
261 $code.=<<___ if ($i==79);
262 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
299 my ($i,$a,$b,$c,$d,$e)=@_;
301 $code.=<<___ if ($i<79);
302 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
305 xor @X[$j%16],@X[($j+2)%16]
308 xor @X[$j%16],@X[($j+8)%16]
311 xor @X[$j%16],@X[($j+13)%16]
314 rotr @X[$j%16],@X[$j%16],31
318 xor @X[$j%16],@X[($j+2)%16]
323 xor @X[$j%16],@X[($j+8)%16]
326 xor @X[$j%16],@X[($j+13)%16]
331 addu @X[$j%16],@X[$j%16]
342 $FRAMESIZE=16; # large enough to accommodate NUBI saved registers
343 $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000";
346 #include "mips_arch.h"
353 .globl sha1_block_data_order
354 .ent sha1_block_data_order
355 sha1_block_data_order:
356 .frame $sp,$FRAMESIZE*$SZREG,$ra
357 .mask $SAVED_REGS_MASK,-$SZREG
359 $PTR_SUB $sp,$FRAMESIZE*$SZREG
360 $REG_S $ra,($FRAMESIZE-1)*$SZREG($sp)
361 $REG_S $fp,($FRAMESIZE-2)*$SZREG($sp)
362 $REG_S $s11,($FRAMESIZE-3)*$SZREG($sp)
363 $REG_S $s10,($FRAMESIZE-4)*$SZREG($sp)
364 $REG_S $s9,($FRAMESIZE-5)*$SZREG($sp)
365 $REG_S $s8,($FRAMESIZE-6)*$SZREG($sp)
366 $REG_S $s7,($FRAMESIZE-7)*$SZREG($sp)
367 $REG_S $s6,($FRAMESIZE-8)*$SZREG($sp)
368 $REG_S $s5,($FRAMESIZE-9)*$SZREG($sp)
369 $REG_S $s4,($FRAMESIZE-10)*$SZREG($sp)
371 $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
372 $REG_S $s3,($FRAMESIZE-11)*$SZREG($sp)
373 $REG_S $s2,($FRAMESIZE-12)*$SZREG($sp)
374 $REG_S $s1,($FRAMESIZE-13)*$SZREG($sp)
375 $REG_S $s0,($FRAMESIZE-14)*$SZREG($sp)
376 $REG_S $gp,($FRAMESIZE-15)*$SZREG($sp)
391 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
394 ori $K,0x7999 # K_00_19
399 ori $K,0x7999 # K_00_19
402 for ($i=0;$i<15;$i++) { &BODY_00_14($i,@V); unshift(@V,pop(@V)); }
403 for (;$i<20;$i++) { &BODY_15_19($i,@V); unshift(@V,pop(@V)); }
406 ori $K,0xeba1 # K_20_39
408 for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
411 ori $K,0xbcdc # K_40_59
413 for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
416 ori $K,0xc1d6 # K_60_79
418 for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
438 $REG_L $ra,($FRAMESIZE-1)*$SZREG($sp)
439 $REG_L $fp,($FRAMESIZE-2)*$SZREG($sp)
440 $REG_L $s11,($FRAMESIZE-3)*$SZREG($sp)
441 $REG_L $s10,($FRAMESIZE-4)*$SZREG($sp)
442 $REG_L $s9,($FRAMESIZE-5)*$SZREG($sp)
443 $REG_L $s8,($FRAMESIZE-6)*$SZREG($sp)
444 $REG_L $s7,($FRAMESIZE-7)*$SZREG($sp)
445 $REG_L $s6,($FRAMESIZE-8)*$SZREG($sp)
446 $REG_L $s5,($FRAMESIZE-9)*$SZREG($sp)
447 $REG_L $s4,($FRAMESIZE-10)*$SZREG($sp)
449 $code.=<<___ if ($flavour =~ /nubi/i);
450 $REG_L $s3,($FRAMESIZE-11)*$SZREG($sp)
451 $REG_L $s2,($FRAMESIZE-12)*$SZREG($sp)
452 $REG_L $s1,($FRAMESIZE-13)*$SZREG($sp)
453 $REG_L $s0,($FRAMESIZE-14)*$SZREG($sp)
454 $REG_L $gp,($FRAMESIZE-15)*$SZREG($sp)
458 $PTR_ADD $sp,$FRAMESIZE*$SZREG
459 .end sha1_block_data_order
461 .asciiz "SHA1 for MIPS, CRYPTOGAMS by <appro\@openssl.org>"
464 close STDOUT or die "error closing STDOUT: $!";