d5cc5fe00e785b7cccf02c3abac0953054a484a3
[openssl.git] / engines / asm / e_padlock-x86_64.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # September 2011
11 #
12 # Assembler helpers for Padlock engine.
13
14 $flavour = shift;
15 $output  = shift;
16 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
17
18 $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
19
20 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
21 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
22 ( $xlate="${dir}../../crypto/perlasm/x86_64-xlate.pl" and -f $xlate) or
23 die "can't locate x86_64-xlate.pl";
24
25 open STDOUT,"| $^X $xlate $flavour $output";
26
27 $code=".text\n";
28
29 $PADLOCK_CHUNK=512;     # Must be a power of 2 larger than 16
30
31 $ctx="%rdx";
32 $out="%rdi";
33 $inp="%rsi";
34 $len="%rcx";
35 $chunk="%rbx";
36
37 ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
38                                  ("%rdi","%rsi","%rdx","%rcx"); # Unix order
39
40 $code.=<<___;
41 .globl  padlock_capability
42 .type   padlock_capability,\@abi-omnipotent
43 .align  16
44 padlock_capability:
45         mov     %rbx,%r8
46         xor     %eax,%eax
47         cpuid
48         xor     %eax,%eax
49         cmp     \$`"0x".unpack("H*",'tneC')`,%ebx
50         jne     .Lnoluck
51         cmp     \$`"0x".unpack("H*",'Hrua')`,%edx
52         jne     .Lnoluck
53         cmp     \$`"0x".unpack("H*",'slua')`,%ecx
54         jne     .Lnoluck
55         mov     \$0xC0000000,%eax
56         cpuid
57         mov     %eax,%edx
58         xor     %eax,%eax
59         cmp     \$0xC0000001,%edx
60         jb      .Lnoluck
61         mov     \$0xC0000001,%eax
62         cpuid
63         mov     %edx,%eax
64         and     \$0xffffffef,%eax
65         or      \$0x10,%eax             # set Nano bit#4
66 .Lnoluck:
67         mov     %r8,%rbx
68         ret
69 .size   padlock_capability,.-padlock_capability
70
71 .globl  padlock_key_bswap
72 .type   padlock_key_bswap,\@abi-omnipotent,0
73 .align  16
74 padlock_key_bswap:
75         mov     240($arg1),%edx
76 .Lbswap_loop:
77         mov     ($arg1),%eax
78         bswap   %eax
79         mov     %eax,($arg1)
80         lea     4($arg1),$arg1
81         sub     \$1,%edx
82         jnz     .Lbswap_loop
83         ret
84 .size   padlock_key_bswap,.-padlock_key_bswap
85
86 .globl  padlock_verify_context
87 .type   padlock_verify_context,\@abi-omnipotent
88 .align  16
89 padlock_verify_context:
90         mov     $arg1,$ctx
91         pushf
92         lea     .Lpadlock_saved_context(%rip),%rax
93         call    _padlock_verify_ctx
94         lea     8(%rsp),%rsp
95         ret
96 .size   padlock_verify_context,.-padlock_verify_context
97
98 .type   _padlock_verify_ctx,\@abi-omnipotent
99 .align  16
100 _padlock_verify_ctx:
101         mov     8(%rsp),%r8
102         bt      \$30,%r8
103         jnc     .Lverified
104         cmp     (%rax),$ctx
105         je      .Lverified
106         pushf
107         popf
108 .Lverified:
109         mov     $ctx,(%rax)
110         ret
111 .size   _padlock_verify_ctx,.-_padlock_verify_ctx
112
113 .globl  padlock_reload_key
114 .type   padlock_reload_key,\@abi-omnipotent
115 .align  16
116 padlock_reload_key:
117         pushf
118         popf
119         ret
120 .size   padlock_reload_key,.-padlock_reload_key
121
122 .globl  padlock_aes_block
123 .type   padlock_aes_block,\@function,3
124 .align  16
125 padlock_aes_block:
126         mov     %rbx,%r8
127         mov     \$1,$len
128         lea     32($ctx),%rbx           # key
129         lea     16($ctx),$ctx           # control word
130         .byte   0xf3,0x0f,0xa7,0xc8     # rep xcryptecb
131         mov     %r8,%rbx
132         ret
133 .size   padlock_aes_block,.-padlock_aes_block
134
135 .globl  padlock_xstore
136 .type   padlock_xstore,\@function,2
137 .align  16
138 padlock_xstore:
139         mov     %esi,%edx
140         .byte   0x0f,0xa7,0xc0          # xstore
141         ret
142 .size   padlock_xstore,.-padlock_xstore
143
144 .globl  padlock_sha1_oneshot
145 .type   padlock_sha1_oneshot,\@function,3
146 .align  16
147 padlock_sha1_oneshot:
148         xor     %rax,%rax
149         mov     %rdx,%rcx
150         .byte   0xf3,0x0f,0xa6,0xc8     # rep xsha1
151         ret
152 .size   padlock_sha1_oneshot,.-padlock_sha1_oneshot
153
154 .globl  padlock_sha1_blocks
155 .type   padlock_sha1_blocks,\@function,3
156 .align  16
157 padlock_sha1_blocks:
158         mov     \$-1,%rax
159         mov     %rdx,%rcx
160         .byte   0xf3,0x0f,0xa6,0xc8     # rep xsha1
161         ret
162 .size   padlock_sha1_blocks,.-padlock_sha1_blocks
163
164 .globl  padlock_sha256_oneshot
165 .type   padlock_sha256_oneshot,\@function,3
166 .align  16
167 padlock_sha256_oneshot:
168         xor     %rax,%rax
169         mov     %rdx,%rcx
170         .byte   0xf3,0x0f,0xa6,0xd0     # rep xsha256
171         ret
172 .size   padlock_sha256_oneshot,.-padlock_sha256_oneshot
173
174 .globl  padlock_sha256_blocks
175 .type   padlock_sha256_blocks,\@function,3
176 .align  16
177 padlock_sha256_blocks:
178         mov     \$-1,%rax
179         mov     %rdx,%rcx
180         .byte   0xf3,0x0f,0xa6,0xd0     # rep xsha256
181         ret
182 .size   padlock_sha256_blocks,.-padlock_sha256_blocks
183
184 .globl  padlock_sha512_blocks
185 .type   padlock_sha512_blocks,\@function,3
186 .align  16
187 padlock_sha512_blocks:
188         mov     %rdx,%rcx
189         .byte   0xf3,0x0f,0xa6,0xe0     # rep xha512
190         ret
191 .size   padlock_sha512_blocks,.-padlock_sha512_blocks
192 ___
193
194 sub generate_mode {
195 my ($mode,$opcode) = @_;
196 # int padlock_$mode_encrypt(void *out, const void *inp,
197 #               struct padlock_cipher_data *ctx, size_t len);
198 $code.=<<___;
199 .globl  padlock_${mode}_encrypt
200 .type   padlock_${mode}_encrypt,\@function,4
201 .align  16
202 padlock_${mode}_encrypt:
203         push    %rbp
204         push    %rbx
205
206         xor     %eax,%eax
207         test    \$15,$ctx
208         jnz     .L${mode}_abort
209         test    \$15,$len
210         jnz     .L${mode}_abort
211         lea     .Lpadlock_saved_context(%rip),%rax
212         pushf
213         cld
214         call    _padlock_verify_ctx
215         lea     16($ctx),$ctx           # control word
216         xor     %eax,%eax
217         xor     %ebx,%ebx
218         testl   \$`1<<5`,($ctx)         # align bit in control word
219         jnz     .L${mode}_aligned
220         test    \$0x0f,$out
221         setz    %al                     # !out_misaligned
222         test    \$0x0f,$inp
223         setz    %bl                     # !inp_misaligned
224         test    %ebx,%eax
225         jnz     .L${mode}_aligned
226         neg     %rax
227         mov     \$$PADLOCK_CHUNK,$chunk
228         not     %rax                    # out_misaligned?-1:0
229         lea     (%rsp),%rbp
230         cmp     $chunk,$len
231         cmovc   $len,$chunk             # chunk=len>PADLOCK_CHUNK?PADLOCK_CHUNK:len
232         and     $chunk,%rax             # out_misaligned?chunk:0
233         mov     $len,$chunk
234         neg     %rax
235         and     \$$PADLOCK_CHUNK-1,$chunk       # chunk%=PADLOCK_CHUNK
236         lea     (%rax,%rbp),%rsp
237         jmp     .L${mode}_loop
238 .align  16
239 .L${mode}_loop:
240         mov     $out,%r8                # save parameters
241         mov     $inp,%r9
242         mov     $len,%r10
243         mov     $chunk,$len
244         mov     $chunk,%r11
245         test    \$0x0f,$out             # out_misaligned
246         cmovnz  %rsp,$out
247         test    \$0x0f,$inp             # inp_misaligned
248         jz      .L${mode}_inp_aligned
249         shr     \$3,$len
250         .byte   0xf3,0x48,0xa5          # rep movsq
251         sub     $chunk,$out
252         mov     $chunk,$len
253         mov     $out,$inp
254 .L${mode}_inp_aligned:
255         lea     -16($ctx),%rax          # ivp
256         lea     16($ctx),%rbx           # key
257         shr     \$4,$len
258         .byte   0xf3,0x0f,0xa7,$opcode  # rep xcrypt*
259 ___
260 $code.=<<___                            if ($mode !~ /ecb|ctr/);
261         movdqa  (%rax),%xmm0
262         movdqa  %xmm0,-16($ctx)         # copy [or refresh] iv
263 ___
264 $code.=<<___;
265         mov     %r8,$out                # restore paramters
266         mov     %r11,$chunk
267         test    \$0x0f,$out
268         jz      .L${mode}_out_aligned
269         mov     $chunk,$len
270         shr     \$3,$len
271         lea     (%rsp),$inp
272         .byte   0xf3,0x48,0xa5          # rep movsq
273         sub     $chunk,$out
274 .L${mode}_out_aligned:
275         mov     %r9,$inp
276         mov     %r10,$len
277         add     $chunk,$out
278         add     $chunk,$inp
279         sub     $chunk,$len
280         mov     \$$PADLOCK_CHUNK,$chunk
281         jnz     .L${mode}_loop
282
283         test    \$0x0f,$out
284         jz      .L${mode}_done
285
286         mov     %rbp,$len
287         mov     %rsp,$out
288         sub     %rsp,$len
289         xor     %rax,%rax
290         shr     \$3,$len
291         .byte   0xf3,0x48,0xab          # rep stosq
292 .L${mode}_done:
293         lea     (%rbp),%rsp
294         jmp     .L${mode}_exit
295
296 .align  16
297 .L${mode}_aligned:
298         lea     -16($ctx),%rax          # ivp
299         lea     16($ctx),%rbx           # key
300         shr     \$4,$len                # len/=AES_BLOCK_SIZE
301         .byte   0xf3,0x0f,0xa7,$opcode  # rep xcrypt*
302 ___
303 $code.=<<___                            if ($mode !~ /ecb|ctr/);
304         movdqa  (%rax),%xmm0
305         movdqa  %xmm0,-16($ctx)         # copy [or refresh] iv
306 ___
307 $code.=<<___;
308 .L${mode}_exit:
309         mov     \$1,%eax
310         lea     8(%rsp),%rsp
311 .L${mode}_abort:
312         pop     %rbx
313         pop     %rbp
314         ret
315 .size   padlock_${mode}_encrypt,.-padlock_${mode}_encrypt
316 ___
317 }
318
319 &generate_mode("ecb",0xc8);
320 &generate_mode("cbc",0xd0);
321 &generate_mode("cfb",0xe0);
322 &generate_mode("ofb",0xe8);
323 &generate_mode("ctr16",0xd8);
324
325 $code.=<<___;
326 .asciz  "VIA Padlock x86_64 module, CRYPTOGAMS by <appro\@openssl.org>"
327 .align  16
328 .data
329 .align  8
330 .Lpadlock_saved_context:
331         .quad   0
332 ___
333 $code =~ s/\`([^\`]*)\`/eval($1)/gem;
334
335 print $code;
336
337 close STDOUT;