$code.=<<___;
.extern OPENSSL_ia32cap_P
-.globl rsaz_avx512ifma_eligible
-.type rsaz_avx512ifma_eligible,\@abi-omnipotent
+.globl ossl_rsaz_avx512ifma_eligible
+.type ossl_rsaz_avx512ifma_eligible,\@abi-omnipotent
.align 32
-rsaz_avx512ifma_eligible:
+ossl_rsaz_avx512ifma_eligible:
mov OPENSSL_ia32cap_P+8(%rip), %ecx
xor %eax,%eax
and \$`1<<31|1<<21|1<<17|1<<16`, %ecx # avx512vl + avx512ifma + avx512dq + avx512f
cmp \$`1<<31|1<<21|1<<17|1<<16`, %ecx
cmove %ecx,%eax
ret
-.size rsaz_avx512ifma_eligible, .-rsaz_avx512ifma_eligible
+.size ossl_rsaz_avx512ifma_eligible, .-ossl_rsaz_avx512ifma_eligible
___
###############################################################################
# This post-condition is true, provided the correct parameter |s| is choosen, i.e.
# s >= n + 2 * k, which matches our case: 1040 > 1024 + 2 * 1.
#
-# void RSAZ_amm52x20_x1_256(BN_ULONG *res,
+# void ossl_rsaz_amm52x20_x1_256(BN_ULONG *res,
# const BN_ULONG *a,
# const BN_ULONG *b,
# const BN_ULONG *m,
$code.=<<___;
.text
-.globl RSAZ_amm52x20_x1_256
-.type RSAZ_amm52x20_x1_256,\@function,5
+.globl ossl_rsaz_amm52x20_x1_256
+.type ossl_rsaz_amm52x20_x1_256,\@function,5
.align 32
-RSAZ_amm52x20_x1_256:
+ossl_rsaz_amm52x20_x1_256:
.cfi_startproc
endbranch
push %rbx
.Lrsaz_amm52x20_x1_256_epilogue:
ret
.cfi_endproc
-.size RSAZ_amm52x20_x1_256, .-RSAZ_amm52x20_x1_256
+.size ossl_rsaz_amm52x20_x1_256, .-ossl_rsaz_amm52x20_x1_256
___
$code.=<<___;
###############################################################################
# Dual Almost Montgomery Multiplication for 20-digit number in radix 2^52
#
-# See description of RSAZ_amm52x20_x1_256() above for details about Almost
+# See description of ossl_rsaz_amm52x20_x1_256() above for details about Almost
# Montgomery Multiplication algorithm and function input parameters description.
#
# This function does two AMMs for two independent inputs, hence dual.
#
-# void RSAZ_amm52x20_x2_256(BN_ULONG out[2][20],
+# void ossl_rsaz_amm52x20_x2_256(BN_ULONG out[2][20],
# const BN_ULONG a[2][20],
# const BN_ULONG b[2][20],
# const BN_ULONG m[2][20],
$code.=<<___;
.text
-.globl RSAZ_amm52x20_x2_256
-.type RSAZ_amm52x20_x2_256,\@function,5
+.globl ossl_rsaz_amm52x20_x2_256
+.type ossl_rsaz_amm52x20_x2_256,\@function,5
.align 32
-RSAZ_amm52x20_x2_256:
+ossl_rsaz_amm52x20_x2_256:
.cfi_startproc
endbranch
push %rbx
.Lrsaz_amm52x20_x2_256_epilogue:
ret
.cfi_endproc
-.size RSAZ_amm52x20_x2_256, .-RSAZ_amm52x20_x2_256
+.size ossl_rsaz_amm52x20_x2_256, .-ossl_rsaz_amm52x20_x2_256
___
}
.section .pdata
.align 4
- .rva .LSEH_begin_RSAZ_amm52x20_x1_256
- .rva .LSEH_end_RSAZ_amm52x20_x1_256
- .rva .LSEH_info_RSAZ_amm52x20_x1_256
+ .rva .LSEH_begin_ossl_rsaz_amm52x20_x1_256
+ .rva .LSEH_end_ossl_rsaz_amm52x20_x1_256
+ .rva .LSEH_info_ossl_rsaz_amm52x20_x1_256
- .rva .LSEH_begin_RSAZ_amm52x20_x2_256
- .rva .LSEH_end_RSAZ_amm52x20_x2_256
- .rva .LSEH_info_RSAZ_amm52x20_x2_256
+ .rva .LSEH_begin_ossl_rsaz_amm52x20_x2_256
+ .rva .LSEH_end_ossl_rsaz_amm52x20_x2_256
+ .rva .LSEH_info_ossl_rsaz_amm52x20_x2_256
.rva .LSEH_begin_ossl_extract_multiplier_2x20_win5
.rva .LSEH_end_ossl_extract_multiplier_2x20_win5
.section .xdata
.align 8
-.LSEH_info_RSAZ_amm52x20_x1_256:
+.LSEH_info_ossl_rsaz_amm52x20_x1_256:
.byte 9,0,0,0
.rva rsaz_def_handler
.rva .Lrsaz_amm52x20_x1_256_body,.Lrsaz_amm52x20_x1_256_epilogue
-.LSEH_info_RSAZ_amm52x20_x2_256:
+.LSEH_info_ossl_rsaz_amm52x20_x2_256:
.byte 9,0,0,0
.rva rsaz_def_handler
.rva .Lrsaz_amm52x20_x2_256_body,.Lrsaz_amm52x20_x2_256_epilogue
$code.=<<___;
.text
-.globl rsaz_avx512ifma_eligible
-.type rsaz_avx512ifma_eligible,\@abi-omnipotent
-rsaz_avx512ifma_eligible:
+.globl ossl_rsaz_avx512ifma_eligible
+.type ossl_rsaz_avx512ifma_eligible,\@abi-omnipotent
+ossl_rsaz_avx512ifma_eligible:
xor %eax,%eax
ret
-.size rsaz_avx512ifma_eligible, .-rsaz_avx512ifma_eligible
+.size ossl_rsaz_avx512ifma_eligible, .-ossl_rsaz_avx512ifma_eligible
-.globl RSAZ_amm52x20_x1_256
-.globl RSAZ_amm52x20_x2_256
+.globl ossl_rsaz_amm52x20_x1_256
+.globl ossl_rsaz_amm52x20_x2_256
.globl ossl_extract_multiplier_2x20_win5
-.type RSAZ_amm52x20_x1_256,\@abi-omnipotent
-RSAZ_amm52x20_x1_256:
-RSAZ_amm52x20_x2_256:
+.type ossl_rsaz_amm52x20_x1_256,\@abi-omnipotent
+ossl_rsaz_amm52x20_x1_256:
+ossl_rsaz_amm52x20_x2_256:
ossl_extract_multiplier_2x20_win5:
.byte 0x0f,0x0b # ud2
ret
-.size RSAZ_amm52x20_x1_256, .-RSAZ_amm52x20_x1_256
+.size ossl_rsaz_amm52x20_x1_256, .-ossl_rsaz_amm52x20_x1_256
___
}}}
BN_MONT_CTX *mont1 = NULL;
BN_MONT_CTX *mont2 = NULL;
- if (rsaz_avx512ifma_eligible() &&
+ if (ossl_rsaz_avx512ifma_eligible() &&
((a1->top == 16) && (p1->top == 16) && (BN_num_bits(m1) == 1024) &&
(a2->top == 16) && (p2->top == 16) && (BN_num_bits(m2) == 1024))) {
goto err;
}
- ret = RSAZ_mod_exp_avx512_x2(rr1->d, a1->d, p1->d, m1->d, mont1->RR.d,
- mont1->n0[0],
- rr2->d, a2->d, p2->d, m2->d, mont2->RR.d,
- mont2->n0[0],
- 1024 /* factor bit size */);
+ ret = ossl_rsaz_mod_exp_avx512_x2(rr1->d, a1->d, p1->d, m1->d,
+ mont1->RR.d, mont1->n0[0],
+ rr2->d, a2->d, p2->d, m2->d,
+ mont2->RR.d, mont2->n0[0],
+ 1024 /* factor bit size */);
rr1->top = 16;
rr1->neg = 0;
*/
/*AMM = Almost Montgomery Multiplication. */
-void RSAZ_amm52x20_x1_256(BN_ULONG *res, const BN_ULONG *base,
- const BN_ULONG *exp, const BN_ULONG *m,
- BN_ULONG k0);
-void RSAZ_exp52x20_x2_256(BN_ULONG *res, const BN_ULONG *base,
- const BN_ULONG *exp[2], const BN_ULONG *m,
- const BN_ULONG *rr, const BN_ULONG k0[2]);
-void RSAZ_amm52x20_x2_256(BN_ULONG *out, const BN_ULONG *a,
- const BN_ULONG *b, const BN_ULONG *m,
- const BN_ULONG k0[2]);
+void ossl_rsaz_amm52x20_x1_256(BN_ULONG *res, const BN_ULONG *base,
+ const BN_ULONG *exp, const BN_ULONG *m,
+ BN_ULONG k0);
+static void RSAZ_exp52x20_x2_256(BN_ULONG *res, const BN_ULONG *base,
+ const BN_ULONG *exp[2], const BN_ULONG *m,
+ const BN_ULONG *rr, const BN_ULONG k0[2]);
+void ossl_rsaz_amm52x20_x2_256(BN_ULONG *out, const BN_ULONG *a,
+ const BN_ULONG *b, const BN_ULONG *m,
+ const BN_ULONG k0[2]);
void ossl_extract_multiplier_2x20_win5(BN_ULONG *red_Y,
const BN_ULONG *red_table,
int red_table_idx, int tbl_idx);
* \return 0 in case of failure,
* 1 in case of success.
*/
-int RSAZ_mod_exp_avx512_x2(BN_ULONG *res1,
- const BN_ULONG *base1,
- const BN_ULONG *exp1,
- const BN_ULONG *m1,
- const BN_ULONG *rr1,
- BN_ULONG k0_1,
- BN_ULONG *res2,
- const BN_ULONG *base2,
- const BN_ULONG *exp2,
- const BN_ULONG *m2,
- const BN_ULONG *rr2,
- BN_ULONG k0_2,
- int factor_size)
+int ossl_rsaz_mod_exp_avx512_x2(BN_ULONG *res1,
+ const BN_ULONG *base1,
+ const BN_ULONG *exp1,
+ const BN_ULONG *m1,
+ const BN_ULONG *rr1,
+ BN_ULONG k0_1,
+ BN_ULONG *res2,
+ const BN_ULONG *base2,
+ const BN_ULONG *exp2,
+ const BN_ULONG *m2,
+ const BN_ULONG *rr2,
+ BN_ULONG k0_2,
+ int factor_size)
{
int ret = 0;
/* Only 1024-bit factor size is supported now */
switch (factor_size) {
case 1024:
- amm = RSAZ_amm52x20_x1_256;
+ amm = ossl_rsaz_amm52x20_x1_256;
exp_x2 = RSAZ_exp52x20_x2_256;
break;
default:
*
* \return (void).
*/
-void RSAZ_exp52x20_x2_256(BN_ULONG *out, /* [2][20] */
- const BN_ULONG *base, /* [2][20] */
- const BN_ULONG *exp[2], /* 2x16 */
- const BN_ULONG *m, /* [2][20] */
- const BN_ULONG *rr, /* [2][20] */
- const BN_ULONG k0[2])
+static void RSAZ_exp52x20_x2_256(BN_ULONG *out, /* [2][20] */
+ const BN_ULONG *base, /* [2][20] */
+ const BN_ULONG *exp[2], /* 2x16 */
+ const BN_ULONG *m, /* [2][20] */
+ const BN_ULONG *rr, /* [2][20] */
+ const BN_ULONG k0[2])
{
# define BITSIZE_MODULUS (1024)
# define EXP_WIN_SIZE (5)
*/
# define RED_DIGITS (20)
# define EXP_DIGITS (16)
-# define DAMM RSAZ_amm52x20_x2_256
+# define DAMM ossl_rsaz_amm52x20_x2_256
/*
* Squaring is done using multiplication now. That can be a subject of
* optimization in future.
*/
# define DAMS(r,a,m,k0) \
- RSAZ_amm52x20_x2_256((r),(a),(a),(m),(k0))
+ ossl_rsaz_amm52x20_x2_256((r),(a),(a),(m),(k0))
/* Allocate stack for red(undant) result Y and multiplier X */
ALIGN64 BN_ULONG red_Y[2][RED_DIGITS];