Padlock engine: make it independent of inline assembler.
[openssl.git] / engines / asm / e_padlock-x86_86.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # September 2011
11 #
12 # Assembler helpers for Padlock engine.
13
14 $flavour = shift;
15 $output  = shift;
16 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
17
18 $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
19
20 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
21 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
22 ( $xlate="${dir}../../crypto/perlasm/x86_64-xlate.pl" and -f $xlate) or
23 die "can't locate x86_64-xlate.pl";
24
25 open STDOUT,"| $^X $xlate $flavour $output";
26
27 $code=".text\n";
28
29 $PADLOCK_CHUNK=512;     # Must be a power of 2 larger than 16
30
31 $ctx="%rdx";
32 $out="%rdi";
33 $inp="%rsi";
34 $len="%rcx";
35 $chunk="%rbx";
36
37 ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
38                                  ("%rdi","%rsi","%rdx","%rcx"); # Unix order
39
40 $code.=<<___;
41 .globl  padlock_capability
42 .type   padlock_capability,\@abi-omnipotent
43 .align  16
44 padlock_capability:
45         mov     %rbx,%r8
46         xor     %eax,%eax
47         cpuid
48         xor     %eax,%eax
49         cmp     \$`"0x".unpack("H*",'tneC')`,%ebx
50         jne     .Lnoluck
51         cmp     \$`"0x".unpack("H*",'Hrua')`,%edx
52         jne     .Lnoluck
53         cmp     \$`"0x".unpack("H*",'slua')`,%ecx
54         jne     .Lnoluck
55         mov     \$0xC0000000,%eax
56         cpuid
57         mov     %eax,%edx
58         xor     %eax,%eax
59         cmp     \$0xC0000001,%edx
60         jb      .Lnoluck
61         mov     \$0xC0000001,%eax
62         cpuid
63         mov     %edx,%eax
64         and     \$0xffffffef,%eax
65         or      \$0x10,%eax             # set Nano bit#4
66 .Lnoluck:
67         mov     %r8,%rbx
68         ret
69 .size   padlock_capability,.-padlock_capability
70
71 .globl  padlock_key_bswap
72 .type   padlock_key_bswap,\@abi-omnipotent,0
73 .align  16
74 padlock_key_bswap:
75         mov     240($arg1),%edx
76 .Lbswap_loop:
77         mov     ($arg1),%eax
78         bswap   %eax
79         mov     %eax,($arg1)
80         lea     4($arg1),$arg1
81         sub     \$1,%edx
82         jnz     .Lbswap_loop
83         ret
84 .size   padlock_key_bswap,.-padlock_key_bswap
85
86 .globl  padlock_verify_context
87 .type   padlock_verify_context,\@abi-omnipotent
88 .align  16
89 padlock_verify_context:
90         mov     $arg1,$ctx
91         pushf
92         lea     .Lpadlock_saved_context(%rip),%rax
93         call    _padlock_verify_ctx
94         lea     8(%rsp),%rsp
95         ret
96 .size   padlock_verify_context,.-padlock_verify_context
97
98 .type   _padlock_verify_ctx,\@abi-omnipotent
99 .align  16
100 _padlock_verify_ctx:
101         mov     8(%rsp),%r8
102         bt      \$30,%r8
103         jnc     .Lverified
104         cmp     (%rax),$ctx
105         je      .Lverified
106         pushf
107         popf
108 .Lverified:
109         mov     $ctx,(%rax)
110         ret
111 .size   _padlock_verify_ctx,.-_padlock_verify_ctx
112
113 .globl  padlock_reload_key
114 .type   padlock_reload_key,\@abi-omnipotent
115 .align  16
116 padlock_reload_key:
117         pushf
118         popf
119         ret
120 .size   padlock_reload_key,.-padlock_reload_key
121
122 .globl  padlock_aes_block
123 .type   padlock_aes_block,\@function,3
124 .align  16
125 padlock_aes_block:
126         mov     %rbx,%r8
127         mov     \$1,$len
128         lea     32($ctx),%rbx           # key
129         lea     16($ctx),$ctx           # control word
130         .byte   0xf3,0x0f,0xa7,0xc8     # rep xcryptecb
131         mov     %r8,%rbx
132         ret
133 .size   padlock_aes_block,.-padlock_aes_block
134
135 .globl  padlock_xstore
136 .type   padlock_xstore,\@function,2
137 .align  16
138 padlock_xstore:
139         mov     %esi,%edx
140         .byte   0x0f,0xa7,0xc0          # xstore
141         ret
142 .size   padlock_xstore,.-padlock_xstore
143
144 .globl  padlock_sha1_oneshot
145 .type   padlock_sha1_oneshot,\@function,3
146 .align  16
147 padlock_sha1_oneshot:
148         xor     %rax,%rax
149         mov     %rdx,%rcx
150         .byte   0xf3,0x0f,0xa6,0xc8     # rep xsha1
151         ret
152 .size   padlock_sha1_oneshot,.-padlock_sha1_oneshot
153
154 .globl  padlock_sha1
155 .type   padlock_sha1,\@function,3
156 .align  16
157 padlock_sha1:
158         mov     \$-1,%rax
159         mov     %rdx,%rcx
160         .byte   0xf3,0x0f,0xa6,0xc8     # rep xsha1
161         ret
162 .size   padlock_sha1,.-padlock_sha1
163
164 .globl  padlock_sha256_oneshot
165 .type   padlock_sha256_oneshot,\@function,3
166 .align  16
167 padlock_sha256_oneshot:
168         xor     %rax,%rax
169         mov     %rdx,%rcx
170         .byte   0xf3,0x0f,0xa6,0xd0     # rep xsha256
171         ret
172 .size   padlock_sha256_oneshot,.-padlock_sha256_oneshot
173
174 .globl  padlock_sha256
175 .type   padlock_sha256,\@function,3
176 .align  16
177 padlock_sha256:
178         mov     \$-1,%rax
179         mov     %rdx,%rcx
180         .byte   0xf3,0x0f,0xa6,0xd0     # rep xsha256
181         ret
182 .size   padlock_sha256,.-padlock_sha256
183 ___
184
185 sub generate_mode {
186 my ($mode,$opcode) = @_;
187 # int padlock_$mode_encrypt(void *out, const void *inp,
188 #               struct padlock_cipher_data *ctx, size_t len);
189 $code.=<<___;
190 .globl  padlock_${mode}_encrypt
191 .type   padlock_${mode}_encrypt,\@function,4
192 .align  16
193 padlock_${mode}_encrypt:
194         push    %rbp
195         push    %rbx
196
197         xor     %eax,%eax
198         test    \$15,$ctx
199         jnz     .L${mode}_abort
200         test    \$15,$len
201         jnz     .L${mode}_abort
202         lea     .Lpadlock_saved_context(%rip),%rax
203         pushf
204         cld
205         call    _padlock_verify_ctx
206         lea     16($ctx),$ctx           # control word
207         xor     %eax,%eax
208         xor     %ebx,%ebx
209         test    \$`1<<5`,($ctx)         # align bit in control word
210         test    \$0x0f,$out
211         setz    %al                     # !out_misaligned
212         test    \$0x0f,$inp
213         setz    %bl                     # !inp_misaligned
214         test    %ebx,%eax
215         jnz     .L${mode}_aligned
216         neg     %rax
217         mov     \$$PADLOCK_CHUNK,$chunk
218         not     %rax                    # out_misaligned?-1:0
219         lea     (%rsp),%rbp
220         cmp     $chunk,$len
221         cmovc   $len,$chunk             # chunk=len>PADLOCK_CHUNK?PADLOCK_CHUNK:len
222         and     $chunk,%rax             # out_misaligned?chunk:0
223         mov     $len,$chunk
224         neg     %rax
225         and     \$$PADLOCK_CHUNK-1,$chunk       # chunk%=PADLOCK_CHUNK
226         lea     (%rax,%rbp),%rsp
227         jmp     .L${mode}_loop
228 .align  16
229 .L${mode}_loop:
230         mov     $out,%r8                # save parameters
231         mov     $inp,%r9
232         mov     $len,%r10
233         mov     $chunk,$len
234         mov     $chunk,%r11
235         test    \$0x0f,$out             # out_misaligned
236         cmovnz  %rsp,$out
237         test    \$0x0f,$inp             # inp_misaligned
238         jz      .L${mode}_inp_aligned
239         shr     \$3,$len
240         .byte   0xf3,0x48,0xa5          # rep movsq
241         sub     $chunk,$out
242         mov     $chunk,$len
243         mov     $out,$inp
244 .L${mode}_inp_aligned:
245         lea     -16($ctx),%rax          # ivp
246         lea     16($ctx),%rbx           # key
247         shr     \$4,$len
248         .byte   0xf3,0x0f,0xa7,$opcode  # rep xcrypt*
249 ___
250 $code.=<<___                            if ($mode !~ /ecb|ctr/);
251         movdqa  (%rax),%xmm0
252         movdqa  %xmm0,-16($ctx)         # copy [or refresh] iv
253 ___
254 $code.=<<___;
255         mov     %r8,$out                # restore paramters
256         mov     %r11,$chunk
257         test    \$0x0f,$out
258         jz      .L${mode}_out_aligned
259         mov     $chunk,$len
260         shr     \$3,$len
261         lea     (%rsp),$inp
262         .byte   0xf3,0x48,0xa5          # rep movsq
263         sub     $chunk,$out
264 .L${mode}_out_aligned:
265         mov     %r9,$inp
266         mov     %r10,$len
267         add     $chunk,$out
268         add     $chunk,$inp
269         sub     $chunk,$len
270         mov     \$$PADLOCK_CHUNK,$chunk
271         jnz     .L${mode}_loop
272
273         test    \$0x0f,$out
274         jz      .L${mode}_done
275
276         mov     %rbp,$len
277         mov     %rsp,$out
278         sub     %rsp,$len
279         xor     %rax,%rax
280         shr     \$3,$len
281         .byte   0xf3,0x48,0xab          # rep stosq
282 .L${mode}_done:
283         lea     (%rbp),%rsp
284         jmp     .L${mode}_exit
285
286 .align  16
287 .L${mode}_aligned:
288         lea     -16($ctx),%rax          # ivp
289         lea     16($ctx),%rbx           # key
290         shr     \$4,$len                # len/=AES_BLOCK_SIZE
291         .byte   0xf3,0x0f,0xa7,$opcode  # rep xcrypt*
292 ___
293 $code.=<<___                            if ($mode !~ /ecb|ctr/);
294         movdqa  (%rax),%xmm0
295         movdqa  %xmm0,-16($ctx)         # copy [or refresh] iv
296 ___
297 $code.=<<___;
298 .L${mode}_exit:
299         mov     \$1,%eax
300         lea     8(%rsp),%rsp
301 .L${mode}_abort:
302         pop     %rbx
303         pop     %rbp
304         ret
305 .size   padlock_${mode}_encrypt,.-padlock_${mode}_encrypt
306 ___
307 }
308
309 &generate_mode("ecb",0xc8);
310 &generate_mode("cbc",0xd0);
311 &generate_mode("cfb",0xe0);
312 &generate_mode("ofb",0xe8);
313 &generate_mode("ctr16",0xd8);
314
315 $code.=<<___;
316 .asciz  "VIA Padlock x86_64 module, CRYPTOGAMS by <appro\@openssl.org>"
317 .align  16
318 .data
319 .align  8
320 .Lpadlock_saved_context:
321         .quad   0
322 ___
323 $code =~ s/\`([^\`]*)\`/eval($1)/gem;
324
325 print $code;
326
327 close STDOUT;