X-Git-Url: https://git.openssl.org/gitweb/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fbn%2Fbn_lcl.h;h=d24cffdfae65c4da6a50c26691e2ce35fdbdd3c3;hp=9ce0bc910bcb2bcc5f122ac2f182896af0ef299e;hb=fb81ac5e6be4041e59df9362cd63880b21e2cafc;hpb=dcbe935125cd85ee7196ade26cea2491c454b17a diff --git a/crypto/bn/bn_lcl.h b/crypto/bn/bn_lcl.h index 9ce0bc910b..d24cffdfae 100644 --- a/crypto/bn/bn_lcl.h +++ b/crypto/bn/bn_lcl.h @@ -86,6 +86,54 @@ extern "C" { #endif #endif +#if !defined(NO_ASM) && !defined(PEDANTIC) +/* + * BN_UMULT_HIGH section. + * + * No, I'm not trying to overwhelm you when stating that the + * product of N-bit numbers is 2*N bits wide:-) No, I don't expect + * you to be impressed when I say that if the compiler doesn't + * support 2*N integer type, then you have to replace every N*N + * multiplication with 4 (N/2)*(N/2) accompanied by some shifts + * and additions which unavoidably results in severe performance + * penalties. Of course provided that the hardware is capable of + * producing 2*N result... That's when you normally start + * considering assembler implementation. However! It should be + * pointed out that some CPUs (most notably Alpha, PowerPC and + * upcoming IA-64 family:-) provide *separate* instruction + * calculating the upper half of the product placing the result + * into a general purpose register. Now *if* the compiler supports + * inline assembler, then it's not impossible to implement the + * "bignum" routines (and have the compiler optimize 'em) + * exhibiting "native" performance in C. That's what BN_UMULT_HIGH + * macro is about:-) + * + * + */ +# if defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT)) +# if defined(__DECC) +# include +# define BN_UMULT_HIGH(a,b) (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b)) +# elif defined(__GNUC__) +# define BN_UMULT_HIGH(a,b) ({ \ + register BN_ULONG ret; \ + asm ("umulh %1,%2,%0" \ + : "=r"(ret) \ + : "r"(a), "r"(b)); \ + ret; }) +# endif /* compiler */ +# elif defined(_ARCH_PPC) && defined(__64BIT__) && defined(SIXTY_FOUR_BIT_LONG) +# if defined(__GNUC__) +# define BN_UMULT_HIGH(a,b) ({ \ + register BN_ULONG ret; \ + asm ("mulhdu %0,%1,%2" \ + : "=r"(ret) \ + : "r"(a), "r"(b)); \ + ret; }) +# endif /* compiler */ +# endif /* cpu */ +#endif /* NO_ASM */ + /************************************************************* * Using the long long type */ @@ -151,6 +199,43 @@ extern "C" { (c)= Hw(t); \ } +#define sqr(r0,r1,a) { \ + BN_ULLONG t; \ + t=(BN_ULLONG)(a)*(a); \ + (r0)=Lw(t); \ + (r1)=Hw(t); ] + } + +#elif defined(BN_UMULT_HIGH) +#define mul_add(r,a,w,c) { \ + BN_ULONG high,low,ret,tmp=(a); \ + ret = (r); \ + high= BN_UMULT_HIGH(w,tmp); \ + ret += (c); \ + low = (w) * tmp; \ + (c) = (ret<(c))?1:0; \ + (c) += high; \ + ret += low; \ + (c) += (ret