ec/asm/ecp_nistz256-*.pl: addition to perform stricter reduction.
[openssl.git] / crypto / ec / asm / ecp_nistz256-x86.pl
index e9fa038a0b3407a437e4224edbc888e565b7fb38..b96b1aa0171ca5e6405ff2a3f4445e3aa0b5313a 100755 (executable)
@@ -284,18 +284,41 @@ for(1..37) {
        &mov    (&DWP(16,"edi"),"eax");
        &adc    ("ecx",&DWP(24,"ebp"));
        &mov    (&DWP(20,"edi"),"ebx");
+       &mov    ("esi",0);
        &adc    ("edx",&DWP(28,"ebp"));
        &mov    (&DWP(24,"edi"),"ecx");
-       &sbb    ("esi","esi");                  # broadcast carry bit
+       &adc    ("esi",0);
        &mov    (&DWP(28,"edi"),"edx");
 
-       # if a+b carries, subtract modulus.
+       # if a+b >= modulus, subtract modulus.
        #
+       # But since comparison implies subtraction, we subtract modulus
+       # to see if it borrows, and then subtract it for real if
+       # subtraction didn't borrow.
+
+       &mov    ("eax",&DWP(0,"edi"));
+       &mov    ("ebx",&DWP(4,"edi"));
+       &mov    ("ecx",&DWP(8,"edi"));
+       &sub    ("eax",-1);
+       &mov    ("edx",&DWP(12,"edi"));
+       &sbb    ("ebx",-1);
+       &mov    ("eax",&DWP(16,"edi"));
+       &sbb    ("ecx",-1);
+       &mov    ("ebx",&DWP(20,"edi"));
+       &sbb    ("edx",0);
+       &mov    ("ecx",&DWP(24,"edi"));
+       &sbb    ("eax",0);
+       &mov    ("edx",&DWP(28,"edi"));
+       &sbb    ("ebx",0);
+       &sbb    ("ecx",1);
+       &sbb    ("edx",-1);
+       &sbb    ("esi",0);
+
        # Note that because mod has special form, i.e. consists of
        # 0xffffffff, 1 and 0s, we can conditionally synthesize it by
-       # assigning carry bit to one register, %ebp, and its negative
-       # to another, %esi. But we started by calculating %esi...
+       # by using borrow.
 
+       &not    ("esi");
        &mov    ("eax",&DWP(0,"edi"));
        &mov    ("ebp","esi");
        &mov    ("ebx",&DWP(4,"edi"));