3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
10 # I let hardware handle unaligned input(*), except on page boundaries
11 # (see below for details). Otherwise straightforward implementation
12 # with X vector in register bank. The module is big-endian [which is
13 # not big deal as there're no little-endian targets left around].
15 # (*) this means that this module is inappropriate for PPC403? Does
16 # anybody know if pre-POWER3 can sustain unaligned load?
19 # ----------------------------------
20 # PPC970,gcc-4.0.0 +76% +59%
21 # Power6,xlc-7 +68% +33%
25 if ($output =~ /64\.s/) {
31 } elsif ($output =~ /32\.s/) {
37 } else { die "nonsense $output"; }
39 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
40 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
41 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
42 die "can't locate ppc-xlate.pl";
44 ( defined shift || open STDOUT,"| $^X $xlate $output" ) ||
45 die "can't call $xlate: $!";
65 @V=($A,$B,$C,$D,$E,$T);
66 @X=("r16","r17","r18","r19","r20","r21","r22","r23",
67 "r24","r25","r26","r27","r28","r29","r30","r31");
70 my ($i,$a,$b,$c,$d,$e,$f)=@_;
72 $code.=<<___ if ($i==0);
73 lwz @X[$i],`$i*4`($inp)
75 $code.=<<___ if ($i<15);
76 lwz @X[$j],`$j*4`($inp)
87 $code.=<<___ if ($i>=15);
90 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
93 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
98 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
100 rotlwi @X[$j%16],@X[$j%16],1
105 my ($i,$a,$b,$c,$d,$e,$f)=@_;
107 $code.=<<___ if ($i<79);
110 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
113 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
117 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
119 rotlwi @X[$j%16],@X[$j%16],1
121 $code.=<<___ if ($i==79);
139 my ($i,$a,$b,$c,$d,$e,$f)=@_;
144 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
147 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
151 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
154 rotlwi @X[$j%16],@X[$j%16],1
162 .globl .sha1_block_data_order
164 .sha1_block_data_order:
166 $STU $sp,`-($FRAME+64)`($sp)
167 $PUSH r0,`$FRAME-$SIZE_T*18`($sp)
168 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
169 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
170 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
171 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
172 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
173 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
174 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
175 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
176 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
177 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
178 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
179 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
180 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
181 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
182 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
183 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
184 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
194 bl Lsha1_block_private
196 $POP r0,`$FRAME-$SIZE_T*18`($sp)
197 $POP r15,`$FRAME-$SIZE_T*17`($sp)
198 $POP r16,`$FRAME-$SIZE_T*16`($sp)
199 $POP r17,`$FRAME-$SIZE_T*15`($sp)
200 $POP r18,`$FRAME-$SIZE_T*14`($sp)
201 $POP r19,`$FRAME-$SIZE_T*13`($sp)
202 $POP r20,`$FRAME-$SIZE_T*12`($sp)
203 $POP r21,`$FRAME-$SIZE_T*11`($sp)
204 $POP r22,`$FRAME-$SIZE_T*10`($sp)
205 $POP r23,`$FRAME-$SIZE_T*9`($sp)
206 $POP r24,`$FRAME-$SIZE_T*8`($sp)
207 $POP r25,`$FRAME-$SIZE_T*7`($sp)
208 $POP r26,`$FRAME-$SIZE_T*6`($sp)
209 $POP r27,`$FRAME-$SIZE_T*5`($sp)
210 $POP r28,`$FRAME-$SIZE_T*4`($sp)
211 $POP r29,`$FRAME-$SIZE_T*3`($sp)
212 $POP r30,`$FRAME-$SIZE_T*2`($sp)
213 $POP r31,`$FRAME-$SIZE_T*1`($sp)
215 addi $sp,$sp,`$FRAME+64`
219 # PowerPC specification allows an implementation to be ill-behaved
220 # upon unaligned access which crosses page boundary. "Better safe
221 # than sorry" principle makes me treat it specially. But I don't
222 # look for particular offending word, but rather for 64-byte input
223 # block which crosses the boundary. Once found that block is aligned
224 # and hashed separately...
229 andi. $t1,$t1,4095 ; distance to closest page boundary
230 srwi. $t1,$t1,6 ; t1/=64
233 ble- Laligned ; didn't cross the page boundary
236 bl Lsha1_block_private
240 addi r20,$sp,$FRAME ; spot below the frame
254 $PUSH $inp,`$FRAME-$SIZE_T*19`($sp)
258 bl Lsha1_block_private
259 $POP $inp,`$FRAME-$SIZE_T*19`($sp)
265 # This is private block function, which uses tailored calling
266 # interface, namely upon entry SHA_CTX is pre-loaded to given
267 # registers and counter register contains amount of chunks to
273 $code.=<<___; # load K_00_19
277 for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
278 $code.=<<___; # load K_20_39
282 for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
283 $code.=<<___; # load K_40_59
287 for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
288 $code.=<<___; # load K_60_79
292 for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
309 addi $inp,$inp,`16*4`
310 bdnz- Lsha1_block_private
314 .asciz "SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
317 $code =~ s/\`([^\`]*)\`/eval $1/gem;