+
+ lea -16($ctx),%rax # ivp
+ lea 16($ctx),%rbx # key
+ shr \$4,$len # len/=AES_BLOCK_SIZE
+ .byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt*
+
+ mov -4($ctx),%eax # pull 32-bit counter
+ bswap %eax
+ add \$0x10000,%eax
+ bswap %eax
+ mov %eax,-4($ctx)
+
+ mov %r10,$len # restore paramters
+ sub %r11,$len
+ mov \$`16*0x10000`,$chunk
+ jz .L${mode}_exit
+ cmp $chunk,$len
+ jae .L${mode}_aligned_loop
+
+.L${mode}_aligned_skip:
+___
+$code.=<<___ if ($PADLOCK_PREFETCH{$mode});
+ lea ($inp,$len),%rbp
+ neg %rbp
+ and \$0xfff,%rbp # distance to page boundary
+ xor %eax,%eax
+ cmp \$$PADLOCK_PREFETCH{$mode},%rbp
+ mov \$$PADLOCK_PREFETCH{$mode}-1,%rbp
+ cmovae %rax,%rbp
+ and $len,%rbp # remainder
+ sub %rbp,$len
+ jz .L${mode}_aligned_tail