+ &shl (&LB($nlo),4);
+ &and ($rem,0xf);
+ &psrlq ($Zlo,4);
+ &and ($nhi,0xf0);
+ &movq ($tmp,$Zhi);
+ &psrlq ($Zhi,4);
+ &pxor ($Zlo,&QWP(8,$Htbl,$nlo));
+ &psllq ($tmp,60);
+ &pxor ($Zhi,&QWP(0,$rem_4bit,$rem,8));
+ &movd ($rem,$Zlo);
+ &pxor ($Zhi,&QWP(0,$Htbl,$nlo));
+ &pxor ($Zlo,$tmp);
+
+ &psrlq ($Zlo,4);
+ &and ($rem,0xf);
+ &pxor ($Zlo,&QWP(8,$Htbl,$nhi));
+ &movq ($tmp,$Zhi);
+ &psrlq ($Zhi,4);
+ &psllq ($tmp,60);
+ &pxor ($Zhi,&QWP(0,$rem_4bit,$rem,8));
+ &movd ($rem,$Zlo);
+ &pxor ($Zhi,&QWP(0,$Htbl,$nhi));
+ &mov ($nhi,$nlo);
+ &pxor ($Zlo,$tmp);
+