bn/asm/x86[_64]-mont*.pl: complement alloca with page-walking.
[openssl.git] / crypto / bn / asm / x86-mont.pl
index 5cd3cd2ed50a968fa2ab60bcd361b185dce923ac..89f4de61e8964fce2793804a78c313fde9e27a3c 100755 (executable)
@@ -85,6 +85,21 @@ $frame=32;                           # size of above frame rounded up to 16n
 
        &and    ("esp",-64);            # align to cache line
 
+       # Some OSes, *cough*-dows, insist on stack being "wired" to
+       # physical memory in strictly sequential manner, i.e. if stack
+       # allocation spans two pages, then reference to farmost one can
+       # be punishable by SEGV. But page walking can do good even on
+       # other OSes, because it guarantees that villain thread hits
+       # the guard page before it can make damage to innocent one...
+       &mov    ("eax","ebp");
+       &sub    ("eax","esp");
+       &and    ("eax",-4096);
+&set_label("page_walk");
+       &mov    ("edx",&DWP(0,"esp","eax"));
+       &sub    ("eax",4096);
+       &data_byte(0x2e);
+       &jnc    (&label("page_walk"));
+
        ################################# load argument block...
        &mov    ("eax",&DWP(0*4,"esi"));# BN_ULONG *rp
        &mov    ("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap
@@ -527,8 +542,10 @@ $sbit=$num;
        &jle    (&label("sqradd"));
 
        &mov    ($carry,"edx");
-       &lea    ("edx",&DWP(0,$sbit,"edx",2));
+       &add    ("edx","edx");
        &shr    ($carry,31);
+       &add    ("edx",$sbit);
+       &adc    ($carry,0);
 &set_label("sqrlast");
        &mov    ($word,$_n0);
        &mov    ($inp,$_np);