2 # Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
19 # The module implements "4-bit" GCM GHASH function and underlying
20 # single multiplication operation in GF(2^128). "4-bit" means that it
21 # uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC
22 # it processes one byte in 19.6 cycles, which is more than twice as
23 # fast as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for
24 # 8 cycles, but measured performance on PA-8600 system is ~9 cycles per
25 # processed byte. This is ~2.2x faster than 64-bit code generated by
26 # vendor compiler (which used to be very hard to beat:-).
28 # Special thanks to polarhome.com for providing HP-UX account.
32 open STDOUT,">$output";
34 if ($flavour =~ /64/) {
45 $LEVEL ="1.0"; #"\n\t.ALLOW\t2.0";
56 $FRAME=10*$SIZE_T+$FRAME_MARKER;# NREGS saved regs + frame marker
57 # [+ argument transfer]
59 ################# volatile registers
60 $Xi="%r26"; # argument block
64 $Hhh=$Htbl; # variables
73 ################# preserved registers
87 $rem2="%r6"; # used in PA-RISC 2.0 code
92 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
94 .EXPORT gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR
98 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS
100 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
101 $PUSHMA %r3,$FRAME(%sp)
102 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
103 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
104 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
106 $code.=<<___ if ($SIZE_T==4);
107 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
108 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
109 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
110 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
111 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
117 andcm $rem_4bit,$rem,$rem_4bit
119 ldo L\$rem_4bit-L\$pic_gmult($rem_4bit),$rem_4bit
122 $code.=<<___ if ($SIZE_T==4);
125 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
134 and $mask0xf0,$nlo,$nhi
135 depd,z $nlo,59,4,$nlo
140 depd,z $Zll,60,4,$rem
141 shrpd $Zhh,$Zll,4,$Zll
142 extrd,u $Zhh,59,60,$Zhh
147 and $mask0xf0,$nlo,$nhi
148 depd,z $nlo,59,4,$nlo
152 ldd $rem($rem_4bit),$rem
158 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
159 depd,z $Zll,60,4,$rem
161 shrpd $Zhh,$Zll,4,$Zll
162 extrd,u $Zhh,59,60,$Zhh
168 ldd $rem($rem_4bit),$rem
171 depd,z $Zll,60,4,$rem
174 shrpd $Zhh,$Zll,4,$Zll
175 extrd,u $Zhh,59,60,$Zhh
179 and $mask0xf0,$nlo,$nhi
180 depd,z $nlo,59,4,$nlo
181 ldd $rem($rem_4bit),$rem
184 addib,uv -1,$cnt,L\$oop_gmult_pa2
188 depd,z $Zll,60,4,$rem
190 shrpd $Zhh,$Zll,4,$Zll
191 extrd,u $Zhh,59,60,$Zhh
197 ldd $rem($rem_4bit),$rem
200 depd,z $Zll,60,4,$rem
202 shrpd $Zhh,$Zll,4,$Zll
203 extrd,u $Zhh,59,60,$Zhh
209 ldd $rem($rem_4bit),$rem
216 $code.=<<___ if ($SIZE_T==4);
226 and $mask0xf0,$nlo,$nhi
235 ldwx $rem($rem_4bit),$rem
236 shrpw $Zlh,$Zll,4,$Zll
238 shrpw $Zhl,$Zlh,4,$Zlh
240 shrpw $Zhh,$Zhl,4,$Zhl
242 extru $Zhh,27,28,$Zhh
245 and $mask0xf0,$nlo,$nhi
261 ldwx $rem($rem_4bit),$rem
262 shrpw $Zlh,$Zll,4,$Zll
264 shrpw $Zhl,$Zlh,4,$Zlh
268 shrpw $Zhh,$Zhl,4,$Zhl
271 extru $Zhh,27,28,$Zhh
278 shrpw $Zlh,$Zll,4,$Zll
279 ldwx $rem($rem_4bit),$rem
280 shrpw $Zhl,$Zlh,4,$Zlh
281 shrpw $Zhh,$Zhl,4,$Zhl
282 and $mask0xf0,$nlo,$nhi
283 extru $Zhh,27,28,$Zhh
290 addib,uv -1,$cnt,L\$oop_gmult_pa1
296 ldwx $rem($rem_4bit),$rem
297 shrpw $Zlh,$Zll,4,$Zll
299 shrpw $Zhl,$Zlh,4,$Zlh
302 shrpw $Zhh,$Zhl,4,$Zhl
305 extru $Zhh,27,28,$Zhh
312 ldwx $rem($rem_4bit),$rem
313 shrpw $Zlh,$Zll,4,$Zll
314 shrpw $Zhl,$Zlh,4,$Zlh
315 shrpw $Zhh,$Zhl,4,$Zhl
316 extru $Zhh,27,28,$Zhh
329 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
330 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
331 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
332 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
334 $code.=<<___ if ($SIZE_T==4);
335 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
336 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
337 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
338 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
339 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
344 $POPMB -$FRAME(%sp),%r3
347 .EXPORT gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
351 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11
353 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
354 $PUSHMA %r3,$FRAME(%sp)
355 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
356 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
357 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
359 $code.=<<___ if ($SIZE_T==4);
360 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
361 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
362 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
363 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
364 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
370 andcm $rem_4bit,$rem,$rem_4bit
372 ldo L\$rem_4bit-L\$pic_ghash($rem_4bit),$rem_4bit
375 $code.=<<___ if ($SIZE_T==4);
378 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
390 and $mask0xf0,$nlo,$nhi
391 depd,z $nlo,59,4,$nlo
396 depd,z $Zll,60,4,$rem
397 shrpd $Zhh,$Zll,4,$Zll
398 extrd,u $Zhh,59,60,$Zhh
405 and $mask0xf0,$nlo,$nhi
406 depd,z $nlo,59,4,$nlo
410 ldd $rem($rem_4bit),$rem
416 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
417 depd,z $Zll,60,4,$rem2
419 shrpd $Zhh,$Zll,4,$Zll
420 extrd,u $Zhh,59,60,$Zhh
427 ldbx $cnt($inp),$byte
429 depd,z $Zll,60,4,$rem
430 shrpd $Zhh,$Zll,4,$Zll
431 ldd $rem2($rem_4bit),$rem2
438 and $mask0xf0,$nlo,$nhi
439 depd,z $nlo,59,4,$nlo
441 extrd,u $Zhh,59,60,$Zhh
444 ldd $rem($rem_4bit),$rem
445 addib,uv -1,$cnt,L\$oop_ghash_pa2
449 depd,z $Zll,60,4,$rem2
451 shrpd $Zhh,$Zll,4,$Zll
452 extrd,u $Zhh,59,60,$Zhh
459 depd,z $Zll,60,4,$rem
460 shrpd $Zhh,$Zll,4,$Zll
461 ldd $rem2($rem_4bit),$rem2
467 extrd,u $Zhh,59,60,$Zhh
470 ldd $rem($rem_4bit),$rem
476 cmpb,*<> $inp,$len,L\$outer_ghash_pa2
480 $code.=<<___ if ($SIZE_T==4);
493 and $mask0xf0,$nlo,$nhi
503 ldwx $rem($rem_4bit),$rem
504 shrpw $Zlh,$Zll,4,$Zll
506 shrpw $Zhl,$Zlh,4,$Zlh
508 shrpw $Zhh,$Zhl,4,$Zhl
510 extru $Zhh,27,28,$Zhh
514 and $mask0xf0,$nlo,$nhi
530 ldwx $rem($rem_4bit),$rem
531 shrpw $Zlh,$Zll,4,$Zll
533 shrpw $Zhl,$Zlh,4,$Zlh
537 shrpw $Zhh,$Zhl,4,$Zhl
538 ldbx $cnt($inp),$byte
541 extru $Zhh,27,28,$Zhh
548 shrpw $Zlh,$Zll,4,$Zll
549 ldwx $rem($rem_4bit),$rem
550 shrpw $Zhl,$Zlh,4,$Zlh
552 shrpw $Zhh,$Zhl,4,$Zhl
553 and $mask0xf0,$nlo,$nhi
554 extru $Zhh,27,28,$Zhh
561 addib,uv -1,$cnt,L\$oop_ghash_pa1
567 ldwx $rem($rem_4bit),$rem
568 shrpw $Zlh,$Zll,4,$Zll
570 shrpw $Zhl,$Zlh,4,$Zlh
573 shrpw $Zhh,$Zhl,4,$Zhl
576 extru $Zhh,27,28,$Zhh
583 ldwx $rem($rem_4bit),$rem
584 shrpw $Zlh,$Zll,4,$Zll
585 shrpw $Zhl,$Zlh,4,$Zlh
586 shrpw $Zhh,$Zhl,4,$Zhl
587 extru $Zhh,27,28,$Zhh
598 comb,<> $inp,$len,L\$outer_ghash_pa1
603 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
604 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
605 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
606 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
608 $code.=<<___ if ($SIZE_T==4);
609 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
610 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
611 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
612 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
613 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
618 $POPMB -$FRAME(%sp),%r3
623 .WORD `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0
624 .WORD `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0
625 .WORD `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0
626 .WORD `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0
627 .STRINGZ "GHASH for PA-RISC, GRYPTOGAMS by <appro\@openssl.org>"
631 # Explicitly encode PA-RISC 2.0 instructions used in this module, so
632 # that it can be compiled with .LEVEL 1.0. It should be noted that I
633 # wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
637 my ($mod,$args) = @_;
638 my $orig = "ldd$mod\t$args";
640 if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4
641 { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3;
642 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
644 elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5
645 { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3;
646 $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset
647 $opcode|=(1<<5) if ($mod =~ /^,m/);
648 $opcode|=(1<<13) if ($mod =~ /^,mb/);
649 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
655 my ($mod,$args) = @_;
656 my $orig = "std$mod\t$args";
658 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
659 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
660 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
666 my ($mod,$args) = @_;
667 my $orig = "extrd$mod\t$args";
669 # I only have ",u" completer, it's implicitly encoded...
670 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
671 { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
673 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
674 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
675 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
677 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
678 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
680 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
681 $opcode |= (1<<13) if ($mod =~ /,\**=/);
682 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
688 my ($mod,$args) = @_;
689 my $orig = "shrpd$mod\t$args";
691 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
692 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
694 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
695 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
697 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
698 { sprintf "\t.WORD\t0x%08x\t; %s",
699 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
705 my ($mod,$args) = @_;
706 my $orig = "depd$mod\t$args";
708 # I only have ",z" completer, it's impicitly encoded...
709 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16
710 { my $opcode=(0x3c<<26)|($4<<21)|($1<<16);
713 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode pos
714 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
715 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
721 my ($mnemonic,$mod,$args)=@_;
722 my $opcode = eval("\$$mnemonic");
724 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
727 foreach (split("\n",$code)) {
728 s/\`([^\`]*)\`/eval $1/ge;
730 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e;
734 s/\bbv\b/bve/ if ($SIZE_T==8);