e_padlock-x86*.pl: Nano-related update.
[openssl.git] / engines / asm / e_padlock-x86_64.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # September 2011
11 #
12 # Assembler helpers for Padlock engine.
13
14 $flavour = shift;
15 $output  = shift;
16 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
17
18 $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
19
20 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
21 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
22 ( $xlate="${dir}../../crypto/perlasm/x86_64-xlate.pl" and -f $xlate) or
23 die "can't locate x86_64-xlate.pl";
24
25 open STDOUT,"| $^X $xlate $flavour $output";
26
27 $code=".text\n";
28
29 $PADLOCK_CHUNK=512;     # Must be a power of 2 larger than 16
30
31 $ctx="%rdx";
32 $out="%rdi";
33 $inp="%rsi";
34 $len="%rcx";
35 $chunk="%rbx";
36
37 ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
38                                  ("%rdi","%rsi","%rdx","%rcx"); # Unix order
39
40 $code.=<<___;
41 .globl  padlock_capability
42 .type   padlock_capability,\@abi-omnipotent
43 .align  16
44 padlock_capability:
45         mov     %rbx,%r8
46         xor     %eax,%eax
47         cpuid
48         xor     %eax,%eax
49         cmp     \$`"0x".unpack("H*",'tneC')`,%ebx
50         jne     .Lnoluck
51         cmp     \$`"0x".unpack("H*",'Hrua')`,%edx
52         jne     .Lnoluck
53         cmp     \$`"0x".unpack("H*",'slua')`,%ecx
54         jne     .Lnoluck
55         mov     \$0xC0000000,%eax
56         cpuid
57         mov     %eax,%edx
58         xor     %eax,%eax
59         cmp     \$0xC0000001,%edx
60         jb      .Lnoluck
61         mov     \$0xC0000001,%eax
62         cpuid
63         mov     %edx,%eax
64         and     \$0xffffffef,%eax
65         or      \$0x10,%eax             # set Nano bit#4
66 .Lnoluck:
67         mov     %r8,%rbx
68         ret
69 .size   padlock_capability,.-padlock_capability
70
71 .globl  padlock_key_bswap
72 .type   padlock_key_bswap,\@abi-omnipotent,0
73 .align  16
74 padlock_key_bswap:
75         mov     240($arg1),%edx
76 .Lbswap_loop:
77         mov     ($arg1),%eax
78         bswap   %eax
79         mov     %eax,($arg1)
80         lea     4($arg1),$arg1
81         sub     \$1,%edx
82         jnz     .Lbswap_loop
83         ret
84 .size   padlock_key_bswap,.-padlock_key_bswap
85
86 .globl  padlock_verify_context
87 .type   padlock_verify_context,\@abi-omnipotent
88 .align  16
89 padlock_verify_context:
90         mov     $arg1,$ctx
91         pushf
92         lea     .Lpadlock_saved_context(%rip),%rax
93         call    _padlock_verify_ctx
94         lea     8(%rsp),%rsp
95         ret
96 .size   padlock_verify_context,.-padlock_verify_context
97
98 .type   _padlock_verify_ctx,\@abi-omnipotent
99 .align  16
100 _padlock_verify_ctx:
101         mov     8(%rsp),%r8
102         bt      \$30,%r8
103         jnc     .Lverified
104         cmp     (%rax),$ctx
105         je      .Lverified
106         pushf
107         popf
108 .Lverified:
109         mov     $ctx,(%rax)
110         ret
111 .size   _padlock_verify_ctx,.-_padlock_verify_ctx
112
113 .globl  padlock_reload_key
114 .type   padlock_reload_key,\@abi-omnipotent
115 .align  16
116 padlock_reload_key:
117         pushf
118         popf
119         ret
120 .size   padlock_reload_key,.-padlock_reload_key
121
122 .globl  padlock_aes_block
123 .type   padlock_aes_block,\@function,3
124 .align  16
125 padlock_aes_block:
126         mov     %rbx,%r8
127         mov     \$1,$len
128         lea     32($ctx),%rbx           # key
129         lea     16($ctx),$ctx           # control word
130         .byte   0xf3,0x0f,0xa7,0xc8     # rep xcryptecb
131         mov     %r8,%rbx
132         ret
133 .size   padlock_aes_block,.-padlock_aes_block
134
135 .globl  padlock_xstore
136 .type   padlock_xstore,\@function,2
137 .align  16
138 padlock_xstore:
139         mov     %esi,%edx
140         .byte   0x0f,0xa7,0xc0          # xstore
141         ret
142 .size   padlock_xstore,.-padlock_xstore
143
144 .globl  padlock_sha1_oneshot
145 .type   padlock_sha1_oneshot,\@function,3
146 .align  16
147 padlock_sha1_oneshot:
148         xor     %rax,%rax
149         mov     %rdx,%rcx
150         .byte   0xf3,0x0f,0xa6,0xc8     # rep xsha1
151         ret
152 .size   padlock_sha1_oneshot,.-padlock_sha1_oneshot
153
154 .globl  padlock_sha1_blocks
155 .type   padlock_sha1_blocks,\@function,3
156 .align  16
157 padlock_sha1_blocks:
158         mov     \$-1,%rax
159         mov     %rdx,%rcx
160         .byte   0xf3,0x0f,0xa6,0xc8     # rep xsha1
161         ret
162 .size   padlock_sha1_blocks,.-padlock_sha1_blocks
163
164 .globl  padlock_sha256_oneshot
165 .type   padlock_sha256_oneshot,\@function,3
166 .align  16
167 padlock_sha256_oneshot:
168         xor     %rax,%rax
169         mov     %rdx,%rcx
170         .byte   0xf3,0x0f,0xa6,0xd0     # rep xsha256
171         ret
172 .size   padlock_sha256_oneshot,.-padlock_sha256_oneshot
173
174 .globl  padlock_sha256_blocks
175 .type   padlock_sha256_blocks,\@function,3
176 .align  16
177 padlock_sha256_blocks:
178         mov     \$-1,%rax
179         mov     %rdx,%rcx
180         .byte   0xf3,0x0f,0xa6,0xd0     # rep xsha256
181         ret
182 .size   padlock_sha256_blocks,.-padlock_sha256_blocks
183
184 .globl  padlock_sha512_blocks,\@function,3
185 .align  16
186 padlock_sha512_blocks:
187         mov     %rdx,%rcx
188         .byte   0xf3,0x0f,0xa6,0xe0     # rep xha512
189         ret
190 .size   padlock_sha512_blocks,.-padlock_sha512_blocks
191 ___
192
193 sub generate_mode {
194 my ($mode,$opcode) = @_;
195 # int padlock_$mode_encrypt(void *out, const void *inp,
196 #               struct padlock_cipher_data *ctx, size_t len);
197 $code.=<<___;
198 .globl  padlock_${mode}_encrypt
199 .type   padlock_${mode}_encrypt,\@function,4
200 .align  16
201 padlock_${mode}_encrypt:
202         push    %rbp
203         push    %rbx
204
205         xor     %eax,%eax
206         test    \$15,$ctx
207         jnz     .L${mode}_abort
208         test    \$15,$len
209         jnz     .L${mode}_abort
210         lea     .Lpadlock_saved_context(%rip),%rax
211         pushf
212         cld
213         call    _padlock_verify_ctx
214         lea     16($ctx),$ctx           # control word
215         xor     %eax,%eax
216         xor     %ebx,%ebx
217         testl   \$`1<<5`,($ctx)         # align bit in control word
218         jnz     .L${mode}_aligned
219         test    \$0x0f,$out
220         setz    %al                     # !out_misaligned
221         test    \$0x0f,$inp
222         setz    %bl                     # !inp_misaligned
223         test    %ebx,%eax
224         jnz     .L${mode}_aligned
225         neg     %rax
226         mov     \$$PADLOCK_CHUNK,$chunk
227         not     %rax                    # out_misaligned?-1:0
228         lea     (%rsp),%rbp
229         cmp     $chunk,$len
230         cmovc   $len,$chunk             # chunk=len>PADLOCK_CHUNK?PADLOCK_CHUNK:len
231         and     $chunk,%rax             # out_misaligned?chunk:0
232         mov     $len,$chunk
233         neg     %rax
234         and     \$$PADLOCK_CHUNK-1,$chunk       # chunk%=PADLOCK_CHUNK
235         lea     (%rax,%rbp),%rsp
236         jmp     .L${mode}_loop
237 .align  16
238 .L${mode}_loop:
239         mov     $out,%r8                # save parameters
240         mov     $inp,%r9
241         mov     $len,%r10
242         mov     $chunk,$len
243         mov     $chunk,%r11
244         test    \$0x0f,$out             # out_misaligned
245         cmovnz  %rsp,$out
246         test    \$0x0f,$inp             # inp_misaligned
247         jz      .L${mode}_inp_aligned
248         shr     \$3,$len
249         .byte   0xf3,0x48,0xa5          # rep movsq
250         sub     $chunk,$out
251         mov     $chunk,$len
252         mov     $out,$inp
253 .L${mode}_inp_aligned:
254         lea     -16($ctx),%rax          # ivp
255         lea     16($ctx),%rbx           # key
256         shr     \$4,$len
257         .byte   0xf3,0x0f,0xa7,$opcode  # rep xcrypt*
258 ___
259 $code.=<<___                            if ($mode !~ /ecb|ctr/);
260         movdqa  (%rax),%xmm0
261         movdqa  %xmm0,-16($ctx)         # copy [or refresh] iv
262 ___
263 $code.=<<___;
264         mov     %r8,$out                # restore paramters
265         mov     %r11,$chunk
266         test    \$0x0f,$out
267         jz      .L${mode}_out_aligned
268         mov     $chunk,$len
269         shr     \$3,$len
270         lea     (%rsp),$inp
271         .byte   0xf3,0x48,0xa5          # rep movsq
272         sub     $chunk,$out
273 .L${mode}_out_aligned:
274         mov     %r9,$inp
275         mov     %r10,$len
276         add     $chunk,$out
277         add     $chunk,$inp
278         sub     $chunk,$len
279         mov     \$$PADLOCK_CHUNK,$chunk
280         jnz     .L${mode}_loop
281
282         test    \$0x0f,$out
283         jz      .L${mode}_done
284
285         mov     %rbp,$len
286         mov     %rsp,$out
287         sub     %rsp,$len
288         xor     %rax,%rax
289         shr     \$3,$len
290         .byte   0xf3,0x48,0xab          # rep stosq
291 .L${mode}_done:
292         lea     (%rbp),%rsp
293         jmp     .L${mode}_exit
294
295 .align  16
296 .L${mode}_aligned:
297         lea     -16($ctx),%rax          # ivp
298         lea     16($ctx),%rbx           # key
299         shr     \$4,$len                # len/=AES_BLOCK_SIZE
300         .byte   0xf3,0x0f,0xa7,$opcode  # rep xcrypt*
301 ___
302 $code.=<<___                            if ($mode !~ /ecb|ctr/);
303         movdqa  (%rax),%xmm0
304         movdqa  %xmm0,-16($ctx)         # copy [or refresh] iv
305 ___
306 $code.=<<___;
307 .L${mode}_exit:
308         mov     \$1,%eax
309         lea     8(%rsp),%rsp
310 .L${mode}_abort:
311         pop     %rbx
312         pop     %rbp
313         ret
314 .size   padlock_${mode}_encrypt,.-padlock_${mode}_encrypt
315 ___
316 }
317
318 &generate_mode("ecb",0xc8);
319 &generate_mode("cbc",0xd0);
320 &generate_mode("cfb",0xe0);
321 &generate_mode("ofb",0xe8);
322 &generate_mode("ctr16",0xd8);
323
324 $code.=<<___;
325 .asciz  "VIA Padlock x86_64 module, CRYPTOGAMS by <appro\@openssl.org>"
326 .align  16
327 .data
328 .align  8
329 .Lpadlock_saved_context:
330         .quad   0
331 ___
332 $code =~ s/\`([^\`]*)\`/eval($1)/gem;
333
334 print $code;
335
336 close STDOUT;