# Sandy Bridge 17.4 14.2(+23%) 11.6(+50%(**)) 11.2 8.10(+38%(**))
# Ivy Bridge 12.6 10.5(+20%) 10.3(+22%) 8.17 7.22(+13%)
# Haswell 12.2 9.28(+31%) 7.80(+56%) 7.66 5.40(+42%)
+# Skylake 11.4 9.03(+26%) 7.70(+48%) 7.25 5.20(+40%)
# Bulldozer 21.1 13.6(+54%) 13.6(+54%(***)) 13.5 8.58(+57%)
# VIA Nano 23.0 16.5(+39%) - 14.7 -
# Atom 23.0 18.9(+22%) - 14.7 -
+# Silvermont 27.4 20.6(+33%) - 17.5 -
#
# (*) whichever best applicable;
# (**) switch from ror to shrd stands for fair share of improvement;
$avx = ($1>=10) + ($1>=11);
}
+if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) {
+ $avx = ($2>=3.0) + ($2>3.0);
+}
+
$shaext=1; ### set to zero if compiling for 1.0.1
$avx=1 if (!$shaext && $avx);
pop %rsi
ret
.size se_handler,.-se_handler
+___
+$code.=<<___ if ($SZ==4 && $shaext);
.type shaext_handler,\@abi-omnipotent
.align 16
shaext_handler:
jmp .Lin_prologue
.size shaext_handler,.-shaext_handler
+___
+$code.=<<___;
.section .pdata
.align 4
.rva .LSEH_begin_$func
.rva .LSEH_end_$func
.rva .LSEH_info_$func
___
-$code.=<<___ if ($SZ==4 && $shext);
+$code.=<<___ if ($SZ==4 && $shaext);
.rva .LSEH_begin_${func}_shaext
.rva .LSEH_end_${func}_shaext
.rva .LSEH_info_${func}_shaext
.rva se_handler
.rva .Lprologue,.Lepilogue # HandlerData[]
___
-$code.=<<___ if ($SZ==4);
+$code.=<<___ if ($SZ==4 && $shaext);
.LSEH_info_${func}_shaext:
.byte 9,0,0,0
.rva shaext_handler
+___
+$code.=<<___ if ($SZ==4);
.LSEH_info_${func}_ssse3:
.byte 9,0,0,0
.rva se_handler