3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
12 # Assembler helpers for Padlock engine.
16 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
18 $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
20 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
21 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
22 ( $xlate="${dir}../../crypto/perlasm/x86_64-xlate.pl" and -f $xlate) or
23 die "can't locate x86_64-xlate.pl";
25 open STDOUT,"| $^X $xlate $flavour $output";
29 $PADLOCK_CHUNK=512; # Must be a power of 2 larger than 16
37 ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
38 ("%rdi","%rsi","%rdx","%rcx"); # Unix order
41 .globl padlock_capability
42 .type padlock_capability,\@abi-omnipotent
49 cmp \$`"0x".unpack("H*",'tneC')`,%ebx
51 cmp \$`"0x".unpack("H*",'Hrua')`,%edx
53 cmp \$`"0x".unpack("H*",'slua')`,%ecx
65 or \$0x10,%eax # set Nano bit#4
69 .size padlock_capability,.-padlock_capability
71 .globl padlock_key_bswap
72 .type padlock_key_bswap,\@abi-omnipotent,0
84 .size padlock_key_bswap,.-padlock_key_bswap
86 .globl padlock_verify_context
87 .type padlock_verify_context,\@abi-omnipotent
89 padlock_verify_context:
92 lea .Lpadlock_saved_context(%rip),%rax
93 call _padlock_verify_ctx
96 .size padlock_verify_context,.-padlock_verify_context
98 .type _padlock_verify_ctx,\@abi-omnipotent
111 .size _padlock_verify_ctx,.-_padlock_verify_ctx
113 .globl padlock_reload_key
114 .type padlock_reload_key,\@abi-omnipotent
120 .size padlock_reload_key,.-padlock_reload_key
122 .globl padlock_aes_block
123 .type padlock_aes_block,\@function,3
128 lea 32($ctx),%rbx # key
129 lea 16($ctx),$ctx # control word
130 .byte 0xf3,0x0f,0xa7,0xc8 # rep xcryptecb
133 .size padlock_aes_block,.-padlock_aes_block
135 .globl padlock_xstore
136 .type padlock_xstore,\@function,2
140 .byte 0x0f,0xa7,0xc0 # xstore
142 .size padlock_xstore,.-padlock_xstore
144 .globl padlock_sha1_oneshot
145 .type padlock_sha1_oneshot,\@function,3
147 padlock_sha1_oneshot:
150 .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1
152 .size padlock_sha1_oneshot,.-padlock_sha1_oneshot
154 .globl padlock_sha1_blocks
155 .type padlock_sha1_blocks,\@function,3
160 .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1
162 .size padlock_sha1_blocks,.-padlock_sha1_blocks
164 .globl padlock_sha256_oneshot
165 .type padlock_sha256_oneshot,\@function,3
167 padlock_sha256_oneshot:
170 .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256
172 .size padlock_sha256_oneshot,.-padlock_sha256_oneshot
174 .globl padlock_sha256_blocks
175 .type padlock_sha256_blocks,\@function,3
177 padlock_sha256_blocks:
180 .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256
182 .size padlock_sha256_blocks,.-padlock_sha256_blocks
184 .globl padlock_sha512_blocks
185 .type padlock_sha512_blocks,\@function,3
187 padlock_sha512_blocks:
189 .byte 0xf3,0x0f,0xa6,0xe0 # rep xha512
191 .size padlock_sha512_blocks,.-padlock_sha512_blocks
195 my ($mode,$opcode) = @_;
196 # int padlock_$mode_encrypt(void *out, const void *inp,
197 # struct padlock_cipher_data *ctx, size_t len);
199 .globl padlock_${mode}_encrypt
200 .type padlock_${mode}_encrypt,\@function,4
202 padlock_${mode}_encrypt:
211 lea .Lpadlock_saved_context(%rip),%rax
214 call _padlock_verify_ctx
215 lea 16($ctx),$ctx # control word
218 testl \$`1<<5`,($ctx) # align bit in control word
219 jnz .L${mode}_aligned
221 setz %al # !out_misaligned
223 setz %bl # !inp_misaligned
225 jnz .L${mode}_aligned
227 mov \$$PADLOCK_CHUNK,$chunk
228 not %rax # out_misaligned?-1:0
231 cmovc $len,$chunk # chunk=len>PADLOCK_CHUNK?PADLOCK_CHUNK:len
232 and $chunk,%rax # out_misaligned?chunk:0
235 and \$$PADLOCK_CHUNK-1,$chunk # chunk%=PADLOCK_CHUNK
240 mov $out,%r8 # save parameters
245 test \$0x0f,$out # out_misaligned
247 test \$0x0f,$inp # inp_misaligned
248 jz .L${mode}_inp_aligned
250 .byte 0xf3,0x48,0xa5 # rep movsq
254 .L${mode}_inp_aligned:
255 lea -16($ctx),%rax # ivp
256 lea 16($ctx),%rbx # key
258 .byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt*
260 $code.=<<___ if ($mode !~ /ecb|ctr/);
262 movdqa %xmm0,-16($ctx) # copy [or refresh] iv
265 mov %r8,$out # restore paramters
268 jz .L${mode}_out_aligned
272 .byte 0xf3,0x48,0xa5 # rep movsq
274 .L${mode}_out_aligned:
280 mov \$$PADLOCK_CHUNK,$chunk
291 .byte 0xf3,0x48,0xab # rep stosq
298 lea -16($ctx),%rax # ivp
299 lea 16($ctx),%rbx # key
300 shr \$4,$len # len/=AES_BLOCK_SIZE
301 .byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt*
303 $code.=<<___ if ($mode !~ /ecb|ctr/);
305 movdqa %xmm0,-16($ctx) # copy [or refresh] iv
315 .size padlock_${mode}_encrypt,.-padlock_${mode}_encrypt
319 &generate_mode("ecb",0xc8);
320 &generate_mode("cbc",0xd0);
321 &generate_mode("cfb",0xe0);
322 &generate_mode("ofb",0xe8);
323 &generate_mode("ctr16",0xd8);
326 .asciz "VIA Padlock x86_64 module, CRYPTOGAMS by <appro\@openssl.org>"
330 .Lpadlock_saved_context:
333 $code =~ s/\`([^\`]*)\`/eval($1)/gem;