+&set_label("${mode}_short",16);
+ &xor ("eax","eax");
+ &lea ("ebp",&DWP(-24,"esp"));
+ &sub ("eax",$len);
+ &lea ("esp",&DWP(0,"eax","ebp"));
+ &and ("esp",-16);
+ &xor ($chunk,$chunk);
+&set_label("${mode}_short_copy");
+ &movups ("xmm0",&QWP(0,$inp,$chunk));
+ &lea ($chunk,&DWP(16,$chunk));
+ &cmp ($len,$chunk);
+ &movaps (&QWP(-16,"esp",$chunk),"xmm0");
+ &ja (&label("${mode}_short_copy"));
+ &mov ($inp,"esp");
+ &mov ($chunk,$len);
+ &jmp (&label("${mode}_loop"));
+