3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
12 # Assembler helpers for Padlock engine. See even e_padlock-x86.pl for
17 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
19 $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
21 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
22 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
23 ( $xlate="${dir}../../crypto/perlasm/x86_64-xlate.pl" and -f $xlate) or
24 die "can't locate x86_64-xlate.pl";
26 open STDOUT,"| $^X $xlate $flavour $output";
30 $PADLOCK_CHUNK=512; # Must be a power of 2 between 32 and 2^20
38 ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
39 ("%rdi","%rsi","%rdx","%rcx"); # Unix order
42 .globl padlock_capability
43 .type padlock_capability,\@abi-omnipotent
50 cmp \$`"0x".unpack("H*",'tneC')`,%ebx
52 cmp \$`"0x".unpack("H*",'Hrua')`,%edx
54 cmp \$`"0x".unpack("H*",'slua')`,%ecx
66 or \$0x10,%eax # set Nano bit#4
70 .size padlock_capability,.-padlock_capability
72 .globl padlock_key_bswap
73 .type padlock_key_bswap,\@abi-omnipotent,0
85 .size padlock_key_bswap,.-padlock_key_bswap
87 .globl padlock_verify_context
88 .type padlock_verify_context,\@abi-omnipotent
90 padlock_verify_context:
93 lea .Lpadlock_saved_context(%rip),%rax
94 call _padlock_verify_ctx
97 .size padlock_verify_context,.-padlock_verify_context
99 .type _padlock_verify_ctx,\@abi-omnipotent
112 .size _padlock_verify_ctx,.-_padlock_verify_ctx
114 .globl padlock_reload_key
115 .type padlock_reload_key,\@abi-omnipotent
121 .size padlock_reload_key,.-padlock_reload_key
123 .globl padlock_aes_block
124 .type padlock_aes_block,\@function,3
129 lea 32($ctx),%rbx # key
130 lea 16($ctx),$ctx # control word
131 .byte 0xf3,0x0f,0xa7,0xc8 # rep xcryptecb
134 .size padlock_aes_block,.-padlock_aes_block
136 .globl padlock_xstore
137 .type padlock_xstore,\@function,2
141 .byte 0x0f,0xa7,0xc0 # xstore
143 .size padlock_xstore,.-padlock_xstore
145 .globl padlock_sha1_oneshot
146 .type padlock_sha1_oneshot,\@function,3
148 padlock_sha1_oneshot:
150 mov %rdi,%rdx # put aside %rdi
151 movups (%rdi),%xmm0 # copy-in context
158 .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1
161 movups %xmm0,(%rdx) # copy-out context
164 .size padlock_sha1_oneshot,.-padlock_sha1_oneshot
166 .globl padlock_sha1_blocks
167 .type padlock_sha1_blocks,\@function,3
171 mov %rdi,%rdx # put aside %rdi
172 movups (%rdi),%xmm0 # copy-in context
179 .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1
182 movups %xmm0,(%rdx) # copy-out context
185 .size padlock_sha1_blocks,.-padlock_sha1_blocks
187 .globl padlock_sha256_oneshot
188 .type padlock_sha256_oneshot,\@function,3
190 padlock_sha256_oneshot:
192 mov %rdi,%rdx # put aside %rdi
193 movups (%rdi),%xmm0 # copy-in context
195 movups 16(%rdi),%xmm1
198 movaps %xmm1,16(%rsp)
200 .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256
202 movaps 16(%rsp),%xmm1
203 movups %xmm0,(%rdx) # copy-out context
204 movups %xmm1,16(%rdx)
206 .size padlock_sha256_oneshot,.-padlock_sha256_oneshot
208 .globl padlock_sha256_blocks
209 .type padlock_sha256_blocks,\@function,3
211 padlock_sha256_blocks:
213 mov %rdi,%rdx # put aside %rdi
214 movups (%rdi),%xmm0 # copy-in context
216 movups 16(%rdi),%xmm1
219 movaps %xmm1,16(%rsp)
221 .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256
223 movaps 16(%rsp),%xmm1
224 movups %xmm0,(%rdx) # copy-out context
225 movups %xmm1,16(%rdx)
227 .size padlock_sha256_blocks,.-padlock_sha256_blocks
229 .globl padlock_sha512_blocks
230 .type padlock_sha512_blocks,\@function,3
232 padlock_sha512_blocks:
234 mov %rdi,%rdx # put aside %rdi
235 movups (%rdi),%xmm0 # copy-in context
237 movups 16(%rdi),%xmm1
238 movups 32(%rdi),%xmm2
239 movups 48(%rdi),%xmm3
242 movaps %xmm1,16(%rsp)
243 movaps %xmm2,32(%rsp)
244 movaps %xmm3,48(%rsp)
245 .byte 0xf3,0x0f,0xa6,0xe0 # rep xha512
247 movaps 16(%rsp),%xmm1
248 movaps 32(%rsp),%xmm2
249 movaps 48(%rsp),%xmm3
250 movups %xmm0,(%rdx) # copy-out context
251 movups %xmm1,16(%rdx)
252 movups %xmm2,32(%rdx)
253 movups %xmm3,48(%rdx)
255 .size padlock_sha512_blocks,.-padlock_sha512_blocks
259 my ($mode,$opcode) = @_;
260 # int padlock_$mode_encrypt(void *out, const void *inp,
261 # struct padlock_cipher_data *ctx, size_t len);
263 .globl padlock_${mode}_encrypt
264 .type padlock_${mode}_encrypt,\@function,4
266 padlock_${mode}_encrypt:
275 lea .Lpadlock_saved_context(%rip),%rax
278 call _padlock_verify_ctx
279 lea 16($ctx),$ctx # control word
282 testl \$`1<<5`,($ctx) # align bit in control word
283 jnz .L${mode}_aligned
285 setz %al # !out_misaligned
287 setz %bl # !inp_misaligned
289 jnz .L${mode}_aligned
291 mov \$$PADLOCK_CHUNK,$chunk
292 not %rax # out_misaligned?-1:0
295 cmovc $len,$chunk # chunk=len>PADLOCK_CHUNK?PADLOCK_CHUNK:len
296 and $chunk,%rax # out_misaligned?chunk:0
299 and \$$PADLOCK_CHUNK-1,$chunk # chunk%=PADLOCK_CHUNK
302 $code.=<<___ if ($mode eq "ctr32");
303 mov -4($ctx),%eax # pull 32-bit counter
306 and \$`$PADLOCK_CHUNK/16-1`,%eax
310 cmova %rax,$chunk # don't let counter cross PADLOCK_CHUNK
316 cmp $len,$chunk # ctr32 artefact
317 cmova $len,$chunk # ctr32 artefact
318 mov $out,%r8 # save parameters
323 test \$0x0f,$out # out_misaligned
325 test \$0x0f,$inp # inp_misaligned
326 jz .L${mode}_inp_aligned
328 .byte 0xf3,0x48,0xa5 # rep movsq
332 .L${mode}_inp_aligned:
333 lea -16($ctx),%rax # ivp
334 lea 16($ctx),%rbx # key
336 .byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt*
338 $code.=<<___ if ($mode !~ /ecb|ctr/);
340 movdqa %xmm0,-16($ctx) # copy [or refresh] iv
342 $code.=<<___ if ($mode eq "ctr32");
343 mov -4($ctx),%eax # pull 32-bit counter
344 test \$0xffff0000,%eax
345 jnz .L${mode}_no_corr
353 mov %r8,$out # restore paramters
356 jz .L${mode}_out_aligned
360 .byte 0xf3,0x48,0xa5 # rep movsq
362 .L${mode}_out_aligned:
368 mov \$$PADLOCK_CHUNK,$chunk
379 .byte 0xf3,0x48,0xab # rep stosq
387 $code.=<<___ if ($mode eq "ctr32");
388 mov -4($ctx),%eax # pull 32-bit counter
389 mov \$`16*0x10000`,$chunk
395 jz .L${mode}_aligned_loop
398 cmova %rax,$chunk # don't let counter cross 2^16
399 jmp .L${mode}_aligned_loop
401 .L${mode}_aligned_loop:
404 mov $len,%r10 # save parameters
409 lea -16($ctx),%rax # ivp
410 lea 16($ctx),%rbx # key
411 shr \$4,$len # len/=AES_BLOCK_SIZE
412 .byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt*
414 $code.=<<___ if ($mode !~ /ecb|ctr/);
416 movdqa %xmm0,-16($ctx) # copy [or refresh] iv
418 $code.=<<___ if ($mode eq "ctr32");
419 mov -4($ctx),%eax # pull 32-bit counter
425 mov %r11,$chunk # restore paramters
428 mov \$`16*0x10000`,$chunk
429 jnz .L${mode}_aligned_loop
439 .size padlock_${mode}_encrypt,.-padlock_${mode}_encrypt
443 &generate_mode("ecb",0xc8);
444 &generate_mode("cbc",0xd0);
445 &generate_mode("cfb",0xe0);
446 &generate_mode("ofb",0xe8);
447 &generate_mode("ctr32",0xd8); # all 64-bit CPUs have working CTR...
450 .asciz "VIA Padlock x86_64 module, CRYPTOGAMS by <appro\@openssl.org>"
454 .Lpadlock_saved_context:
457 $code =~ s/\`([^\`]*)\`/eval($1)/gem;