2 # Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
17 # I let hardware handle unaligned input(*), except on page boundaries
18 # (see below for details). Otherwise straightforward implementation
19 # with X vector in register bank.
21 # (*) this means that this module is inappropriate for PPC403? Does
22 # anybody know if pre-POWER3 can sustain unaligned load?
25 # ----------------------------------
26 # PPC970,gcc-4.0.0 +76% +59%
27 # Power6,xlc-7 +68% +33%
29 # $output is the last argument if it looks like a file (it has an extension)
30 # $flavour is the first argument if it doesn't look like a file
31 $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
32 $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
34 if ($flavour =~ /64/) {
41 } elsif ($flavour =~ /32/) {
48 } else { die "nonsense $flavour"; }
50 # Define endianness based on flavour
52 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
54 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
55 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
56 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
57 die "can't locate ppc-xlate.pl";
59 open STDOUT,"| $^X $xlate $flavour \"$output\""
60 or die "can't call $xlate: $!";
81 @V=($A,$B,$C,$D,$E,$T);
82 @X=("r16","r17","r18","r19","r20","r21","r22","r23",
83 "r24","r25","r26","r27","r28","r29","r30","r31");
86 my ($dst, $src, $temp_reg) = @_;
87 $code.=<<___ if (!$LITTLE_ENDIAN);
90 $code.=<<___ if ($LITTLE_ENDIAN);
92 rotlwi $dst,$temp_reg,8
93 rlwimi $dst,$temp_reg,24,0,7
94 rlwimi $dst,$temp_reg,24,16,23
99 my ($i,$a,$b,$c,$d,$e,$f)=@_;
102 # Since the last value of $f is discarded, we can use
103 # it as a temp reg to swap byte-order when needed.
104 loadbe("@X[$i]","`$i*4`($inp)",$f) if ($i==0);
105 loadbe("@X[$j]","`$j*4`($inp)",$f) if ($i<15);
106 $code.=<<___ if ($i<15);
117 $code.=<<___ if ($i>=15);
120 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
123 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
128 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
130 rotlwi @X[$j%16],@X[$j%16],1
135 my ($i,$a,$b,$c,$d,$e,$f)=@_;
137 $code.=<<___ if ($i<79);
141 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
144 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
147 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
149 rotlwi @X[$j%16],@X[$j%16],1
151 $code.=<<___ if ($i==79);
169 my ($i,$a,$b,$c,$d,$e,$f)=@_;
174 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
177 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
181 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
184 rotlwi @X[$j%16],@X[$j%16],1
193 .globl .sha1_block_data_order
195 .sha1_block_data_order:
196 $STU $sp,-$FRAME($sp)
198 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
199 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
200 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
201 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
202 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
203 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
204 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
205 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
206 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
207 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
208 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
209 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
210 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
211 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
212 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
213 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
214 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
215 $PUSH r0,`$FRAME+$LRSAVE`($sp)
225 bl Lsha1_block_private
228 ; PowerPC specification allows an implementation to be ill-behaved
229 ; upon unaligned access which crosses page boundary. "Better safe
230 ; than sorry" principle makes me treat it specially. But I don't
231 ; look for particular offending word, but rather for 64-byte input
232 ; block which crosses the boundary. Once found that block is aligned
233 ; and hashed separately...
237 andi. $t1,$t1,4095 ; distance to closest page boundary
238 srwi. $t1,$t1,6 ; t1/=64
241 ble Laligned ; didn't cross the page boundary
244 bl Lsha1_block_private
248 addi r20,$sp,$LOCALS ; spot within the frame
262 $PUSH $inp,`$FRAME-$SIZE_T*18`($sp)
264 addi $inp,$sp,$LOCALS
266 bl Lsha1_block_private
267 $POP $inp,`$FRAME-$SIZE_T*18`($sp)
272 $POP r0,`$FRAME+$LRSAVE`($sp)
273 $POP r15,`$FRAME-$SIZE_T*17`($sp)
274 $POP r16,`$FRAME-$SIZE_T*16`($sp)
275 $POP r17,`$FRAME-$SIZE_T*15`($sp)
276 $POP r18,`$FRAME-$SIZE_T*14`($sp)
277 $POP r19,`$FRAME-$SIZE_T*13`($sp)
278 $POP r20,`$FRAME-$SIZE_T*12`($sp)
279 $POP r21,`$FRAME-$SIZE_T*11`($sp)
280 $POP r22,`$FRAME-$SIZE_T*10`($sp)
281 $POP r23,`$FRAME-$SIZE_T*9`($sp)
282 $POP r24,`$FRAME-$SIZE_T*8`($sp)
283 $POP r25,`$FRAME-$SIZE_T*7`($sp)
284 $POP r26,`$FRAME-$SIZE_T*6`($sp)
285 $POP r27,`$FRAME-$SIZE_T*5`($sp)
286 $POP r28,`$FRAME-$SIZE_T*4`($sp)
287 $POP r29,`$FRAME-$SIZE_T*3`($sp)
288 $POP r30,`$FRAME-$SIZE_T*2`($sp)
289 $POP r31,`$FRAME-$SIZE_T*1`($sp)
294 .byte 0,12,4,1,0x80,18,3,0
298 # This is private block function, which uses tailored calling
299 # interface, namely upon entry SHA_CTX is pre-loaded to given
300 # registers and counter register contains amount of chunks to
306 $code.=<<___; # load K_00_19
310 for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
311 $code.=<<___; # load K_20_39
315 for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
316 $code.=<<___; # load K_40_59
320 for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
321 $code.=<<___; # load K_60_79
325 for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
342 addi $inp,$inp,`16*4`
343 bdnz Lsha1_block_private
346 .byte 0,12,0x14,0,0,0,0,0
347 .size .sha1_block_data_order,.-.sha1_block_data_order
350 .asciz "SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
353 $code =~ s/\`([^\`]*)\`/eval $1/gem;
355 close STDOUT or die "error closing STDOUT";