2 # Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
19 # The module implements "4-bit" GCM GHASH function and underlying
20 # single multiplication operation in GF(2^128). "4-bit" means that it
21 # uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC
22 # it processes one byte in 19.6 cycles, which is more than twice as
23 # fast as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for
24 # 8 cycles, but measured performance on PA-8600 system is ~9 cycles per
25 # processed byte. This is ~2.2x faster than 64-bit code generated by
26 # vendor compiler (which used to be very hard to beat:-).
28 # Special thanks to polarhome.com for providing HP-UX account.
30 # $output is the last argument if it looks like a file (it has an extension)
31 # $flavour is the first argument if it doesn't look like a file
32 $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
33 $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
35 $output and open STDOUT,">$output";
37 if ($flavour =~ /64/) {
48 $LEVEL ="1.0"; #"\n\t.ALLOW\t2.0";
59 $FRAME=10*$SIZE_T+$FRAME_MARKER;# NREGS saved regs + frame marker
60 # [+ argument transfer]
62 ################# volatile registers
63 $Xi="%r26"; # argument block
67 $Hhh=$Htbl; # variables
76 ################# preserved registers
90 $rem2="%r6"; # used in PA-RISC 2.0 code
95 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
97 .EXPORT gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR
101 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS
103 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
104 $PUSHMA %r3,$FRAME(%sp)
105 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
106 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
107 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
109 $code.=<<___ if ($SIZE_T==4);
110 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
111 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
112 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
113 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
114 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
120 andcm $rem_4bit,$rem,$rem_4bit
122 ldo L\$rem_4bit-L\$pic_gmult($rem_4bit),$rem_4bit
125 $code.=<<___ if ($SIZE_T==4);
128 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
137 and $mask0xf0,$nlo,$nhi
138 depd,z $nlo,59,4,$nlo
143 depd,z $Zll,60,4,$rem
144 shrpd $Zhh,$Zll,4,$Zll
145 extrd,u $Zhh,59,60,$Zhh
150 and $mask0xf0,$nlo,$nhi
151 depd,z $nlo,59,4,$nlo
155 ldd $rem($rem_4bit),$rem
161 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
162 depd,z $Zll,60,4,$rem
164 shrpd $Zhh,$Zll,4,$Zll
165 extrd,u $Zhh,59,60,$Zhh
171 ldd $rem($rem_4bit),$rem
174 depd,z $Zll,60,4,$rem
177 shrpd $Zhh,$Zll,4,$Zll
178 extrd,u $Zhh,59,60,$Zhh
182 and $mask0xf0,$nlo,$nhi
183 depd,z $nlo,59,4,$nlo
184 ldd $rem($rem_4bit),$rem
187 addib,uv -1,$cnt,L\$oop_gmult_pa2
191 depd,z $Zll,60,4,$rem
193 shrpd $Zhh,$Zll,4,$Zll
194 extrd,u $Zhh,59,60,$Zhh
200 ldd $rem($rem_4bit),$rem
203 depd,z $Zll,60,4,$rem
205 shrpd $Zhh,$Zll,4,$Zll
206 extrd,u $Zhh,59,60,$Zhh
212 ldd $rem($rem_4bit),$rem
219 $code.=<<___ if ($SIZE_T==4);
229 and $mask0xf0,$nlo,$nhi
238 ldwx $rem($rem_4bit),$rem
239 shrpw $Zlh,$Zll,4,$Zll
241 shrpw $Zhl,$Zlh,4,$Zlh
243 shrpw $Zhh,$Zhl,4,$Zhl
245 extru $Zhh,27,28,$Zhh
248 and $mask0xf0,$nlo,$nhi
264 ldwx $rem($rem_4bit),$rem
265 shrpw $Zlh,$Zll,4,$Zll
267 shrpw $Zhl,$Zlh,4,$Zlh
271 shrpw $Zhh,$Zhl,4,$Zhl
274 extru $Zhh,27,28,$Zhh
281 shrpw $Zlh,$Zll,4,$Zll
282 ldwx $rem($rem_4bit),$rem
283 shrpw $Zhl,$Zlh,4,$Zlh
284 shrpw $Zhh,$Zhl,4,$Zhl
285 and $mask0xf0,$nlo,$nhi
286 extru $Zhh,27,28,$Zhh
293 addib,uv -1,$cnt,L\$oop_gmult_pa1
299 ldwx $rem($rem_4bit),$rem
300 shrpw $Zlh,$Zll,4,$Zll
302 shrpw $Zhl,$Zlh,4,$Zlh
305 shrpw $Zhh,$Zhl,4,$Zhl
308 extru $Zhh,27,28,$Zhh
315 ldwx $rem($rem_4bit),$rem
316 shrpw $Zlh,$Zll,4,$Zll
317 shrpw $Zhl,$Zlh,4,$Zlh
318 shrpw $Zhh,$Zhl,4,$Zhl
319 extru $Zhh,27,28,$Zhh
332 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
333 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
334 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
335 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
337 $code.=<<___ if ($SIZE_T==4);
338 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
339 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
340 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
341 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
342 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
347 $POPMB -$FRAME(%sp),%r3
350 .EXPORT gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
354 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11
356 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
357 $PUSHMA %r3,$FRAME(%sp)
358 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
359 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
360 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
362 $code.=<<___ if ($SIZE_T==4);
363 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
364 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
365 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
366 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
367 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
373 andcm $rem_4bit,$rem,$rem_4bit
375 ldo L\$rem_4bit-L\$pic_ghash($rem_4bit),$rem_4bit
378 $code.=<<___ if ($SIZE_T==4);
381 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
393 and $mask0xf0,$nlo,$nhi
394 depd,z $nlo,59,4,$nlo
399 depd,z $Zll,60,4,$rem
400 shrpd $Zhh,$Zll,4,$Zll
401 extrd,u $Zhh,59,60,$Zhh
408 and $mask0xf0,$nlo,$nhi
409 depd,z $nlo,59,4,$nlo
413 ldd $rem($rem_4bit),$rem
419 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
420 depd,z $Zll,60,4,$rem2
422 shrpd $Zhh,$Zll,4,$Zll
423 extrd,u $Zhh,59,60,$Zhh
430 ldbx $cnt($inp),$byte
432 depd,z $Zll,60,4,$rem
433 shrpd $Zhh,$Zll,4,$Zll
434 ldd $rem2($rem_4bit),$rem2
441 and $mask0xf0,$nlo,$nhi
442 depd,z $nlo,59,4,$nlo
444 extrd,u $Zhh,59,60,$Zhh
447 ldd $rem($rem_4bit),$rem
448 addib,uv -1,$cnt,L\$oop_ghash_pa2
452 depd,z $Zll,60,4,$rem2
454 shrpd $Zhh,$Zll,4,$Zll
455 extrd,u $Zhh,59,60,$Zhh
462 depd,z $Zll,60,4,$rem
463 shrpd $Zhh,$Zll,4,$Zll
464 ldd $rem2($rem_4bit),$rem2
470 extrd,u $Zhh,59,60,$Zhh
473 ldd $rem($rem_4bit),$rem
479 cmpb,*<> $inp,$len,L\$outer_ghash_pa2
483 $code.=<<___ if ($SIZE_T==4);
496 and $mask0xf0,$nlo,$nhi
506 ldwx $rem($rem_4bit),$rem
507 shrpw $Zlh,$Zll,4,$Zll
509 shrpw $Zhl,$Zlh,4,$Zlh
511 shrpw $Zhh,$Zhl,4,$Zhl
513 extru $Zhh,27,28,$Zhh
517 and $mask0xf0,$nlo,$nhi
533 ldwx $rem($rem_4bit),$rem
534 shrpw $Zlh,$Zll,4,$Zll
536 shrpw $Zhl,$Zlh,4,$Zlh
540 shrpw $Zhh,$Zhl,4,$Zhl
541 ldbx $cnt($inp),$byte
544 extru $Zhh,27,28,$Zhh
551 shrpw $Zlh,$Zll,4,$Zll
552 ldwx $rem($rem_4bit),$rem
553 shrpw $Zhl,$Zlh,4,$Zlh
555 shrpw $Zhh,$Zhl,4,$Zhl
556 and $mask0xf0,$nlo,$nhi
557 extru $Zhh,27,28,$Zhh
564 addib,uv -1,$cnt,L\$oop_ghash_pa1
570 ldwx $rem($rem_4bit),$rem
571 shrpw $Zlh,$Zll,4,$Zll
573 shrpw $Zhl,$Zlh,4,$Zlh
576 shrpw $Zhh,$Zhl,4,$Zhl
579 extru $Zhh,27,28,$Zhh
586 ldwx $rem($rem_4bit),$rem
587 shrpw $Zlh,$Zll,4,$Zll
588 shrpw $Zhl,$Zlh,4,$Zlh
589 shrpw $Zhh,$Zhl,4,$Zhl
590 extru $Zhh,27,28,$Zhh
601 comb,<> $inp,$len,L\$outer_ghash_pa1
606 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
607 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
608 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
609 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
611 $code.=<<___ if ($SIZE_T==4);
612 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
613 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
614 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
615 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
616 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
621 $POPMB -$FRAME(%sp),%r3
626 .WORD `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0
627 .WORD `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0
628 .WORD `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0
629 .WORD `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0
630 .STRINGZ "GHASH for PA-RISC, GRYPTOGAMS by <appro\@openssl.org>"
634 # Explicitly encode PA-RISC 2.0 instructions used in this module, so
635 # that it can be compiled with .LEVEL 1.0. It should be noted that I
636 # wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
640 my ($mod,$args) = @_;
641 my $orig = "ldd$mod\t$args";
643 if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4
644 { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3;
645 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
647 elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5
648 { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3;
649 $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset
650 $opcode|=(1<<5) if ($mod =~ /^,m/);
651 $opcode|=(1<<13) if ($mod =~ /^,mb/);
652 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
658 my ($mod,$args) = @_;
659 my $orig = "std$mod\t$args";
661 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
662 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
663 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
669 my ($mod,$args) = @_;
670 my $orig = "extrd$mod\t$args";
672 # I only have ",u" completer, it's implicitly encoded...
673 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
674 { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
676 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
677 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
678 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
680 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
681 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
683 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
684 $opcode |= (1<<13) if ($mod =~ /,\**=/);
685 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
691 my ($mod,$args) = @_;
692 my $orig = "shrpd$mod\t$args";
694 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
695 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
697 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
698 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
700 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
701 { sprintf "\t.WORD\t0x%08x\t; %s",
702 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
708 my ($mod,$args) = @_;
709 my $orig = "depd$mod\t$args";
711 # I only have ",z" completer, it's implicitly encoded...
712 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16
713 { my $opcode=(0x3c<<26)|($4<<21)|($1<<16);
716 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode pos
717 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
718 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
724 my ($mnemonic,$mod,$args)=@_;
725 my $opcode = eval("\$$mnemonic");
727 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
730 if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
731 =~ /GNU assembler/) {
735 foreach (split("\n",$code)) {
736 s/\`([^\`]*)\`/eval $1/ge;
738 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e;
743 s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8);
744 s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8);
745 s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8);
746 s/\bbv\b/bve/ if ($SIZE_T==8);