d5d38965cf760a8ea0f799789329cab42de1e724
[openssl.git] / crypto / cast / asm / cast-586.pl
1 #! /usr/bin/env perl
2 # Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the OpenSSL license (the "License").  You may not use
5 # this file except in compliance with the License.  You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9
10 # This flag makes the inner loop one cycle longer, but generates
11 # code that runs %30 faster on the pentium pro/II, 44% faster
12 # of PIII, while only %7 slower on the pentium.
13 # By default, this flag is on.
14 $ppro=1;
15
16 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
17 push(@INC,"${dir}","${dir}../../perlasm");
18 require "x86asm.pl";
19 require "cbc.pl";
20
21 $output=pop;
22 open STDOUT,">$output";
23
24 &asm_init($ARGV[0],$ARGV[$#ARGV] eq "386");
25
26 $CAST_ROUNDS=16;
27 $L="edi";
28 $R="esi";
29 $K="ebp";
30 $tmp1="ecx";
31 $tmp2="ebx";
32 $tmp3="eax";
33 $tmp4="edx";
34 $S1="CAST_S_table0";
35 $S2="CAST_S_table1";
36 $S3="CAST_S_table2";
37 $S4="CAST_S_table3";
38
39 @F1=("add","xor","sub");
40 @F2=("xor","sub","add");
41 @F3=("sub","add","xor");
42
43 &CAST_encrypt("CAST_encrypt",1);
44 &CAST_encrypt("CAST_decrypt",0);
45 &cbc("CAST_cbc_encrypt","CAST_encrypt","CAST_decrypt",1,4,5,3,-1,-1);
46
47 &asm_finish();
48
49 close STDOUT;
50
51 sub CAST_encrypt {
52     local($name,$enc)=@_;
53
54     local($win_ex)=<<"EOF";
55 EXTERN  _CAST_S_table0:DWORD
56 EXTERN  _CAST_S_table1:DWORD
57 EXTERN  _CAST_S_table2:DWORD
58 EXTERN  _CAST_S_table3:DWORD
59 EOF
60     &main::external_label(
61                           "CAST_S_table0",
62                           "CAST_S_table1",
63                           "CAST_S_table2",
64                           "CAST_S_table3",
65                           );
66
67     &function_begin_B($name,$win_ex);
68
69     &comment("");
70
71     &push("ebp");
72     &push("ebx");
73     &mov($tmp2,&wparam(0));
74     &mov($K,&wparam(1));
75     &push("esi");
76     &push("edi");
77
78     &comment("Load the 2 words");
79     &mov($L,&DWP(0,$tmp2,"",0));
80     &mov($R,&DWP(4,$tmp2,"",0));
81
82     &comment('Get short key flag');
83     &mov($tmp3,&DWP(128,$K,"",0));
84     if($enc) {
85         &push($tmp3);
86     } else {
87         &or($tmp3,$tmp3);
88         &jnz(&label('cast_dec_skip'));
89     }
90
91     &xor($tmp3, $tmp3);
92
93     # encrypting part
94
95     if ($enc) {
96         &E_CAST( 0,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
97         &E_CAST( 1,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
98         &E_CAST( 2,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
99         &E_CAST( 3,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
100         &E_CAST( 4,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
101         &E_CAST( 5,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
102         &E_CAST( 6,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
103         &E_CAST( 7,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
104         &E_CAST( 8,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
105         &E_CAST( 9,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
106         &E_CAST(10,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
107         &E_CAST(11,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
108         &comment('test short key flag');
109         &pop($tmp4);
110         &or($tmp4,$tmp4);
111         &jnz(&label('cast_enc_done'));
112         &E_CAST(12,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
113         &E_CAST(13,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
114         &E_CAST(14,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
115         &E_CAST(15,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
116     } else {
117         &E_CAST(15,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
118         &E_CAST(14,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
119         &E_CAST(13,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
120         &E_CAST(12,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
121         &set_label('cast_dec_skip');
122         &E_CAST(11,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
123         &E_CAST(10,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
124         &E_CAST( 9,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
125         &E_CAST( 8,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
126         &E_CAST( 7,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
127         &E_CAST( 6,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
128         &E_CAST( 5,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
129         &E_CAST( 4,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
130         &E_CAST( 3,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
131         &E_CAST( 2,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
132         &E_CAST( 1,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
133         &E_CAST( 0,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
134     }
135
136     &set_label('cast_enc_done') if $enc;
137 # Why the nop? - Ben 17/1/99
138     &nop();
139     &mov($tmp3,&wparam(0));
140     &mov(&DWP(4,$tmp3,"",0),$L);
141     &mov(&DWP(0,$tmp3,"",0),$R);
142     &function_end($name);
143 }
144
145 sub E_CAST {
146     local($i,$S,$L,$R,$K,$OP1,$OP2,$OP3,$tmp1,$tmp2,$tmp3,$tmp4)=@_;
147     # Ri needs to have 16 pre added.
148
149     &comment("round $i");
150     &mov(       $tmp4,          &DWP($i*8,$K,"",1));
151
152     &mov(       $tmp1,          &DWP($i*8+4,$K,"",1));
153     &$OP1(      $tmp4,          $R);
154
155     &rotl(      $tmp4,          &LB($tmp1));
156
157     if ($ppro) {
158         &xor(   $tmp1,          $tmp1);
159         &mov(   $tmp2,          0xff);
160
161         &movb(  &LB($tmp1),     &HB($tmp4));    # A
162         &and(   $tmp2,          $tmp4);
163
164         &shr(   $tmp4,          16);            #
165         &xor(   $tmp3,          $tmp3);
166     } else {
167         &mov(   $tmp2,          $tmp4);         # B
168         &movb(  &LB($tmp1),     &HB($tmp4));    # A     # BAD BAD BAD
169
170         &shr(   $tmp4,          16);            #
171         &and(   $tmp2,          0xff);
172     }
173
174     &movb(      &LB($tmp3),     &HB($tmp4));    # C     # BAD BAD BAD
175     &and(       $tmp4,          0xff);          # D
176
177     &mov(       $tmp1,          &DWP($S1,"",$tmp1,4));
178     &mov(       $tmp2,          &DWP($S2,"",$tmp2,4));
179
180     &$OP2(      $tmp1,          $tmp2);
181     &mov(       $tmp2,          &DWP($S3,"",$tmp3,4));
182
183     &$OP3(      $tmp1,          $tmp2);
184     &mov(       $tmp2,          &DWP($S4,"",$tmp4,4));
185
186     &$OP1(      $tmp1,          $tmp2);
187     # XXX
188
189     &xor(       $L,             $tmp1);
190     # XXX
191 }
192