1
0
Fork 0
mirror of https://github.com/ossrs/srs.git synced 2025-03-09 15:49:59 +00:00

AppleM1: Update openssl to v1.1.1l

This commit is contained in:
winlin 2022-08-14 19:05:01 +08:00
parent 1fe12b8e8c
commit b787656eea
990 changed files with 13406 additions and 18710 deletions

View file

@ -1,5 +1,5 @@
#! /usr/bin/env perl
# Copyright 2015-2018 The OpenSSL Project Authors. All Rights Reserved.
# Copyright 2015-2020 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
@ -1394,7 +1394,7 @@ my ($Z1sqr, $Z2sqr) = ($Hsqr, $Rsqr);
# 256-bit vectors on top. Then note that we push
# starting from r0, which means that we have copy of
# input arguments just below these temporary vectors.
# We use three of them for !in1infty, !in2intfy and
# We use three of them for ~in1infty, ~in2infty and
# result of check for zero.
$code.=<<___;
@ -1424,7 +1424,7 @@ ecp_nistz256_point_add:
#endif
movne r12,#-1
stmia r3,{r4-r11}
str r12,[sp,#32*18+8] @ !in2infty
str r12,[sp,#32*18+8] @ ~in2infty
ldmia $a_ptr!,{r4-r11} @ copy in1_x
add r3,sp,#$in1_x
@ -1445,7 +1445,7 @@ ecp_nistz256_point_add:
#endif
movne r12,#-1
stmia r3,{r4-r11}
str r12,[sp,#32*18+4] @ !in1infty
str r12,[sp,#32*18+4] @ ~in1infty
add $a_ptr,sp,#$in2_z
add $b_ptr,sp,#$in2_z
@ -1510,33 +1510,20 @@ ecp_nistz256_point_add:
orr $a0,$a0,$a2
orr $a4,$a4,$a6
orr $a0,$a0,$a7
orrs $a0,$a0,$a4
orr $a0,$a0,$a4 @ ~is_equal(U1,U2)
bne .Ladd_proceed @ is_equal(U1,U2)?
ldr $t0,[sp,#32*18+4] @ ~in1infty
ldr $t1,[sp,#32*18+8] @ ~in2infty
ldr $t2,[sp,#32*18+12] @ ~is_equal(S1,S2)
mvn $t0,$t0 @ -1/0 -> 0/-1
mvn $t1,$t1 @ -1/0 -> 0/-1
orr $a0,$a0,$t0
orr $a0,$a0,$t1
orrs $a0,$a0,$t2 @ set flags
ldr $t0,[sp,#32*18+4]
ldr $t1,[sp,#32*18+8]
ldr $t2,[sp,#32*18+12]
tst $t0,$t1
beq .Ladd_proceed @ (in1infty || in2infty)?
tst $t2,$t2
beq .Ladd_double @ is_equal(S1,S2)?
@ if(~is_equal(U1,U2) | in1infty | in2infty | ~is_equal(S1,S2))
bne .Ladd_proceed
ldr $r_ptr,[sp,#32*18+16]
eor r4,r4,r4
eor r5,r5,r5
eor r6,r6,r6
eor r7,r7,r7
eor r8,r8,r8
eor r9,r9,r9
eor r10,r10,r10
eor r11,r11,r11
stmia $r_ptr!,{r4-r11}
stmia $r_ptr!,{r4-r11}
stmia $r_ptr!,{r4-r11}
b .Ladd_done
.align 4
.Ladd_double:
ldr $a_ptr,[sp,#32*18+20]
add sp,sp,#32*(18-5)+16 @ difference in frame sizes
@ -1601,15 +1588,15 @@ ecp_nistz256_point_add:
add $b_ptr,sp,#$S2
bl __ecp_nistz256_sub_from @ p256_sub(res_y, res_y, S2);
ldr r11,[sp,#32*18+4] @ !in1intfy
ldr r12,[sp,#32*18+8] @ !in2intfy
ldr r11,[sp,#32*18+4] @ ~in1infty
ldr r12,[sp,#32*18+8] @ ~in2infty
add r1,sp,#$res_x
add r2,sp,#$in2_x
and r10,r11,r12
and r10,r11,r12 @ ~in1infty & ~in2infty
mvn r11,r11
add r3,sp,#$in1_x
and r11,r11,r12
mvn r12,r12
and r11,r11,r12 @ in1infty & ~in2infty
mvn r12,r12 @ in2infty
ldr $r_ptr,[sp,#32*18+16]
___
for($i=0;$i<96;$i+=8) { # conditional moves
@ -1617,11 +1604,11 @@ $code.=<<___;
ldmia r1!,{r4-r5} @ res_x
ldmia r2!,{r6-r7} @ in2_x
ldmia r3!,{r8-r9} @ in1_x
and r4,r4,r10
and r4,r4,r10 @ ~in1infty & ~in2infty
and r5,r5,r10
and r6,r6,r11
and r6,r6,r11 @ in1infty & ~in2infty
and r7,r7,r11
and r8,r8,r12
and r8,r8,r12 @ in2infty
and r9,r9,r12
orr r4,r4,r6
orr r5,r5,r7
@ -1656,7 +1643,7 @@ my $Z1sqr = $S2;
# 256-bit vectors on top. Then note that we push
# starting from r0, which means that we have copy of
# input arguments just below these temporary vectors.
# We use two of them for !in1infty, !in2intfy.
# We use two of them for ~in1infty, ~in2infty.
my @ONE_mont=(1,0,0,-1,-1,-1,-2,0);
@ -1687,7 +1674,7 @@ ecp_nistz256_point_add_affine:
#endif
movne r12,#-1
stmia r3,{r4-r11}
str r12,[sp,#32*15+4] @ !in1infty
str r12,[sp,#32*15+4] @ ~in1infty
ldmia $b_ptr!,{r4-r11} @ copy in2_x
add r3,sp,#$in2_x
@ -1714,7 +1701,7 @@ ecp_nistz256_point_add_affine:
it ne
#endif
movne r12,#-1
str r12,[sp,#32*15+8] @ !in2infty
str r12,[sp,#32*15+8] @ ~in2infty
add $a_ptr,sp,#$in1_z
add $b_ptr,sp,#$in1_z
@ -1796,15 +1783,15 @@ ecp_nistz256_point_add_affine:
add $b_ptr,sp,#$S2
bl __ecp_nistz256_sub_from @ p256_sub(res_y, res_y, S2);
ldr r11,[sp,#32*15+4] @ !in1intfy
ldr r12,[sp,#32*15+8] @ !in2intfy
ldr r11,[sp,#32*15+4] @ ~in1infty
ldr r12,[sp,#32*15+8] @ ~in2infty
add r1,sp,#$res_x
add r2,sp,#$in2_x
and r10,r11,r12
and r10,r11,r12 @ ~in1infty & ~in2infty
mvn r11,r11
add r3,sp,#$in1_x
and r11,r11,r12
mvn r12,r12
and r11,r11,r12 @ in1infty & ~in2infty
mvn r12,r12 @ in2infty
ldr $r_ptr,[sp,#32*15]
___
for($i=0;$i<64;$i+=8) { # conditional moves
@ -1812,11 +1799,11 @@ $code.=<<___;
ldmia r1!,{r4-r5} @ res_x
ldmia r2!,{r6-r7} @ in2_x
ldmia r3!,{r8-r9} @ in1_x
and r4,r4,r10
and r4,r4,r10 @ ~in1infty & ~in2infty
and r5,r5,r10
and r6,r6,r11
and r6,r6,r11 @ in1infty & ~in2infty
and r7,r7,r11
and r8,r8,r12
and r8,r8,r12 @ in2infty
and r9,r9,r12
orr r4,r4,r6
orr r5,r5,r7
@ -1862,4 +1849,4 @@ foreach (split("\n",$code)) {
print $_,"\n";
}
close STDOUT; # enforce flush
close STDOUT or die "error closing STDOUT: $!"; # enforce flush

View file

@ -1,5 +1,5 @@
#! /usr/bin/env perl
# Copyright 2015-2019 The OpenSSL Project Authors. All Rights Reserved.
# Copyright 2015-2020 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
@ -722,7 +722,7 @@ $code.=<<___;
.align 5
ecp_nistz256_point_double:
.inst 0xd503233f // paciasp
stp x29,x30,[sp,#-80]!
stp x29,x30,[sp,#-96]!
add x29,sp,#0
stp x19,x20,[sp,#16]
stp x21,x22,[sp,#32]
@ -855,7 +855,7 @@ ecp_nistz256_point_double:
add sp,x29,#0 // destroy frame
ldp x19,x20,[x29,#16]
ldp x21,x22,[x29,#32]
ldp x29,x30,[sp],#80
ldp x29,x30,[sp],#96
.inst 0xd50323bf // autiasp
ret
.size ecp_nistz256_point_double,.-ecp_nistz256_point_double
@ -872,7 +872,7 @@ my ($res_x,$res_y,$res_z,
my ($Z1sqr, $Z2sqr) = ($Hsqr, $Rsqr);
# above map() describes stack layout with 12 temporary
# 256-bit vectors on top.
my ($rp_real,$ap_real,$bp_real,$in1infty,$in2infty,$temp)=map("x$_",(21..26));
my ($rp_real,$ap_real,$bp_real,$in1infty,$in2infty,$temp0,$temp1,$temp2)=map("x$_",(21..28));
$code.=<<___;
.globl ecp_nistz256_point_add
@ -880,12 +880,13 @@ $code.=<<___;
.align 5
ecp_nistz256_point_add:
.inst 0xd503233f // paciasp
stp x29,x30,[sp,#-80]!
stp x29,x30,[sp,#-96]!
add x29,sp,#0
stp x19,x20,[sp,#16]
stp x21,x22,[sp,#32]
stp x23,x24,[sp,#48]
stp x25,x26,[sp,#64]
stp x27,x28,[sp,#80]
sub sp,sp,#32*12
ldp $a0,$a1,[$bp,#64] // in2_z
@ -899,7 +900,7 @@ ecp_nistz256_point_add:
orr $t2,$a2,$a3
orr $in2infty,$t0,$t2
cmp $in2infty,#0
csetm $in2infty,ne // !in2infty
csetm $in2infty,ne // ~in2infty
add $rp,sp,#$Z2sqr
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z2sqr, in2_z);
@ -909,7 +910,7 @@ ecp_nistz256_point_add:
orr $t2,$a2,$a3
orr $in1infty,$t0,$t2
cmp $in1infty,#0
csetm $in1infty,ne // !in1infty
csetm $in1infty,ne // ~in1infty
add $rp,sp,#$Z1sqr
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z);
@ -950,7 +951,7 @@ ecp_nistz256_point_add:
orr $acc0,$acc0,$acc1 // see if result is zero
orr $acc2,$acc2,$acc3
orr $temp,$acc0,$acc2
orr $temp0,$acc0,$acc2 // ~is_equal(S1,S2)
add $bp,sp,#$Z2sqr
add $rp,sp,#$U1
@ -971,32 +972,21 @@ ecp_nistz256_point_add:
orr $acc0,$acc0,$acc1 // see if result is zero
orr $acc2,$acc2,$acc3
orr $acc0,$acc0,$acc2
tst $acc0,$acc0
b.ne .Ladd_proceed // is_equal(U1,U2)?
orr $acc0,$acc0,$acc2 // ~is_equal(U1,U2)
tst $in1infty,$in2infty
b.eq .Ladd_proceed // (in1infty || in2infty)?
mvn $temp1,$in1infty // -1/0 -> 0/-1
mvn $temp2,$in2infty // -1/0 -> 0/-1
orr $acc0,$acc0,$temp1
orr $acc0,$acc0,$temp2
orr $acc0,$acc0,$temp0
cbnz $acc0,.Ladd_proceed // if(~is_equal(U1,U2) | in1infty | in2infty | ~is_equal(S1,S2))
tst $temp,$temp
b.eq .Ladd_double // is_equal(S1,S2)?
eor $a0,$a0,$a0
eor $a1,$a1,$a1
stp $a0,$a1,[$rp_real]
stp $a0,$a1,[$rp_real,#16]
stp $a0,$a1,[$rp_real,#32]
stp $a0,$a1,[$rp_real,#48]
stp $a0,$a1,[$rp_real,#64]
stp $a0,$a1,[$rp_real,#80]
b .Ladd_done
.align 4
.Ladd_double:
mov $ap,$ap_real
mov $rp,$rp_real
ldp x23,x24,[x29,#48]
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
add sp,sp,#32*(12-4) // difference in stack frames
b .Ldouble_shortcut
@ -1081,14 +1071,14 @@ ___
for($i=0;$i<64;$i+=32) { # conditional moves
$code.=<<___;
ldp $acc0,$acc1,[$ap_real,#$i] // in1
cmp $in1infty,#0 // !$in1intfy, remember?
cmp $in1infty,#0 // ~$in1intfy, remember?
ldp $acc2,$acc3,[$ap_real,#$i+16]
csel $t0,$a0,$t0,ne
csel $t1,$a1,$t1,ne
ldp $a0,$a1,[sp,#$res_x+$i+32] // res
csel $t2,$a2,$t2,ne
csel $t3,$a3,$t3,ne
cmp $in2infty,#0 // !$in2intfy, remember?
cmp $in2infty,#0 // ~$in2intfy, remember?
ldp $a2,$a3,[sp,#$res_x+$i+48]
csel $acc0,$t0,$acc0,ne
csel $acc1,$t1,$acc1,ne
@ -1102,13 +1092,13 @@ ___
}
$code.=<<___;
ldp $acc0,$acc1,[$ap_real,#$i] // in1
cmp $in1infty,#0 // !$in1intfy, remember?
cmp $in1infty,#0 // ~$in1intfy, remember?
ldp $acc2,$acc3,[$ap_real,#$i+16]
csel $t0,$a0,$t0,ne
csel $t1,$a1,$t1,ne
csel $t2,$a2,$t2,ne
csel $t3,$a3,$t3,ne
cmp $in2infty,#0 // !$in2intfy, remember?
cmp $in2infty,#0 // ~$in2intfy, remember?
csel $acc0,$t0,$acc0,ne
csel $acc1,$t1,$acc1,ne
csel $acc2,$t2,$acc2,ne
@ -1122,7 +1112,8 @@ $code.=<<___;
ldp x21,x22,[x29,#32]
ldp x23,x24,[x29,#48]
ldp x25,x26,[x29,#64]
ldp x29,x30,[sp],#80
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
.inst 0xd50323bf // autiasp
ret
.size ecp_nistz256_point_add,.-ecp_nistz256_point_add
@ -1166,7 +1157,7 @@ ecp_nistz256_point_add_affine:
orr $t2,$a2,$a3
orr $in1infty,$t0,$t2
cmp $in1infty,#0
csetm $in1infty,ne // !in1infty
csetm $in1infty,ne // ~in1infty
ldp $acc0,$acc1,[$bp] // in2_x
ldp $acc2,$acc3,[$bp,#16]
@ -1180,7 +1171,7 @@ ecp_nistz256_point_add_affine:
orr $t0,$t0,$t2
orr $in2infty,$acc0,$t0
cmp $in2infty,#0
csetm $in2infty,ne // !in2infty
csetm $in2infty,ne // ~in2infty
add $rp,sp,#$Z1sqr
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z);
@ -1290,14 +1281,14 @@ ___
for($i=0;$i<64;$i+=32) { # conditional moves
$code.=<<___;
ldp $acc0,$acc1,[$ap_real,#$i] // in1
cmp $in1infty,#0 // !$in1intfy, remember?
cmp $in1infty,#0 // ~$in1intfy, remember?
ldp $acc2,$acc3,[$ap_real,#$i+16]
csel $t0,$a0,$t0,ne
csel $t1,$a1,$t1,ne
ldp $a0,$a1,[sp,#$res_x+$i+32] // res
csel $t2,$a2,$t2,ne
csel $t3,$a3,$t3,ne
cmp $in2infty,#0 // !$in2intfy, remember?
cmp $in2infty,#0 // ~$in2intfy, remember?
ldp $a2,$a3,[sp,#$res_x+$i+48]
csel $acc0,$t0,$acc0,ne
csel $acc1,$t1,$acc1,ne
@ -1314,13 +1305,13 @@ ___
}
$code.=<<___;
ldp $acc0,$acc1,[$ap_real,#$i] // in1
cmp $in1infty,#0 // !$in1intfy, remember?
cmp $in1infty,#0 // ~$in1intfy, remember?
ldp $acc2,$acc3,[$ap_real,#$i+16]
csel $t0,$a0,$t0,ne
csel $t1,$a1,$t1,ne
csel $t2,$a2,$t2,ne
csel $t3,$a3,$t3,ne
cmp $in2infty,#0 // !$in2intfy, remember?
cmp $in2infty,#0 // ~$in2intfy, remember?
csel $acc0,$t0,$acc0,ne
csel $acc1,$t1,$acc1,ne
csel $acc2,$t2,$acc2,ne
@ -1880,4 +1871,4 @@ foreach (split("\n",$code)) {
print $_,"\n";
}
close STDOUT; # enforce flush
close STDOUT or die "error closing STDOUT: $!"; # enforce flush

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,5 @@
#! /usr/bin/env perl
# Copyright 2016-2018 The OpenSSL Project Authors. All Rights Reserved.
# Copyright 2016-2020 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
@ -2379,4 +2379,4 @@ foreach (split("\n",$code)) {
print $_,"\n";
}
close STDOUT; # enforce flush
close STDOUT or die "error closing STDOUT: $!"; # enforce flush

View file

@ -1,5 +1,5 @@
#! /usr/bin/env perl
# Copyright 2015-2018 The OpenSSL Project Authors. All Rights Reserved.
# Copyright 2015-2020 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
@ -2301,7 +2301,6 @@ my ($Z1sqr, $Z2sqr) = ($Hsqr, $Rsqr);
# !in1infty, !in2infty and result of check for zero.
$code.=<<___;
.globl ecp_nistz256_point_add_vis3
.align 32
ecp_nistz256_point_add_vis3:
save %sp,-STACK64_FRAME-32*18-32,%sp
@ -3058,4 +3057,4 @@ foreach (split("\n",$code)) {
print $_,"\n";
}
close STDOUT;
close STDOUT or die "error closing STDOUT: $!";

View file

@ -1,5 +1,5 @@
#! /usr/bin/env perl
# Copyright 2015-2018 The OpenSSL Project Authors. All Rights Reserved.
# Copyright 2015-2020 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
@ -1388,7 +1388,7 @@ for ($i=0;$i<7;$i++) {
# above map() describes stack layout with 18 temporary
# 256-bit vectors on top, then we take extra words for
# !in1infty, !in2infty, result of check for zero and
# ~in1infty, ~in2infty, result of check for zero and
# OPENSSL_ia32cap_P copy. [one unused word for padding]
&stack_push(8*18+5);
if ($sse2) {
@ -1419,7 +1419,7 @@ for ($i=0;$i<7;$i++) {
&sub ("eax","ebp");
&or ("ebp","eax");
&sar ("ebp",31);
&mov (&DWP(32*18+4,"esp"),"ebp"); # !in2infty
&mov (&DWP(32*18+4,"esp"),"ebp"); # ~in2infty
&lea ("edi",&DWP($in1_x,"esp"));
for($i=0;$i<96;$i+=16) {
@ -1441,7 +1441,7 @@ for ($i=0;$i<7;$i++) {
&sub ("eax","ebp");
&or ("ebp","eax");
&sar ("ebp",31);
&mov (&DWP(32*18+0,"esp"),"ebp"); # !in1infty
&mov (&DWP(32*18+0,"esp"),"ebp"); # ~in1infty
&mov ("eax",&DWP(32*18+12,"esp")); # OPENSSL_ia32cap_P copy
&lea ("esi",&DWP($in2_z,"esp"));
@ -1516,23 +1516,19 @@ for ($i=0;$i<7;$i++) {
&or ("eax",&DWP(0,"edi"));
&or ("eax",&DWP(4,"edi"));
&or ("eax",&DWP(8,"edi"));
&or ("eax",&DWP(12,"edi"));
&or ("eax",&DWP(12,"edi")); # ~is_equal(U1,U2)
&mov ("ebx",&DWP(32*18+0,"esp")); # ~in1infty
&not ("ebx"); # -1/0 -> 0/-1
&or ("eax","ebx");
&mov ("ebx",&DWP(32*18+4,"esp")); # ~in2infty
&not ("ebx"); # -1/0 -> 0/-1
&or ("eax","ebx");
&or ("eax",&DWP(32*18+8,"esp")); # ~is_equal(S1,S2)
# if (~is_equal(U1,U2) | in1infty | in2infty | ~is_equal(S1,S2))
&data_byte(0x3e); # predict taken
&jnz (&label("add_proceed")); # is_equal(U1,U2)?
&mov ("eax",&DWP(32*18+0,"esp"));
&and ("eax",&DWP(32*18+4,"esp"));
&mov ("ebx",&DWP(32*18+8,"esp"));
&jz (&label("add_proceed")); # (in1infty || in2infty)?
&test ("ebx","ebx");
&jz (&label("add_double")); # is_equal(S1,S2)?
&mov ("edi",&wparam(0));
&xor ("eax","eax");
&mov ("ecx",96/4);
&data_byte(0xfc,0xf3,0xab); # cld; stosd
&jmp (&label("add_done"));
&jnz (&label("add_proceed"));
&set_label("add_double",16);
&mov ("esi",&wparam(1));
@ -1614,34 +1610,34 @@ for ($i=0;$i<7;$i++) {
&lea ("edi",&DWP($res_y,"esp"));
&call ("_ecp_nistz256_sub"); # p256_sub(res_y, res_y, S2);
&mov ("ebp",&DWP(32*18+0,"esp")); # !in1infty
&mov ("esi",&DWP(32*18+4,"esp")); # !in2infty
&mov ("ebp",&DWP(32*18+0,"esp")); # ~in1infty
&mov ("esi",&DWP(32*18+4,"esp")); # ~in2infty
&mov ("edi",&wparam(0));
&mov ("edx","ebp");
&not ("ebp");
&and ("edx","esi");
&and ("ebp","esi");
&not ("esi");
&and ("edx","esi"); # ~in1infty & ~in2infty
&and ("ebp","esi"); # in1infty & ~in2infty
&not ("esi"); # in2infty
########################################
# conditional moves
for($i=64;$i<96;$i+=4) {
&mov ("eax","edx");
&mov ("eax","edx"); # ~in1infty & ~in2infty
&and ("eax",&DWP($res_x+$i,"esp"));
&mov ("ebx","ebp");
&mov ("ebx","ebp"); # in1infty & ~in2infty
&and ("ebx",&DWP($in2_x+$i,"esp"));
&mov ("ecx","esi");
&mov ("ecx","esi"); # in2infty
&and ("ecx",&DWP($in1_x+$i,"esp"));
&or ("eax","ebx");
&or ("eax","ecx");
&mov (&DWP($i,"edi"),"eax");
}
for($i=0;$i<64;$i+=4) {
&mov ("eax","edx");
&mov ("eax","edx"); # ~in1infty & ~in2infty
&and ("eax",&DWP($res_x+$i,"esp"));
&mov ("ebx","ebp");
&mov ("ebx","ebp"); # in1infty & ~in2infty
&and ("ebx",&DWP($in2_x+$i,"esp"));
&mov ("ecx","esi");
&mov ("ecx","esi"); # in2infty
&and ("ecx",&DWP($in1_x+$i,"esp"));
&or ("eax","ebx");
&or ("eax","ecx");
@ -1668,7 +1664,7 @@ for ($i=0;$i<7;$i++) {
# above map() describes stack layout with 15 temporary
# 256-bit vectors on top, then we take extra words for
# !in1infty, !in2infty, and OPENSSL_ia32cap_P copy.
# ~in1infty, ~in2infty, and OPENSSL_ia32cap_P copy.
&stack_push(8*15+3);
if ($sse2) {
&call ("_picup_eax");
@ -1698,7 +1694,7 @@ for ($i=0;$i<7;$i++) {
&sub ("eax","ebp");
&or ("ebp","eax");
&sar ("ebp",31);
&mov (&DWP(32*15+0,"esp"),"ebp"); # !in1infty
&mov (&DWP(32*15+0,"esp"),"ebp"); # ~in1infty
&lea ("edi",&DWP($in2_x,"esp"));
for($i=0;$i<64;$i+=16) {
@ -1724,7 +1720,7 @@ for ($i=0;$i<7;$i++) {
&lea ("ebp",&DWP($in1_z,"esp"));
&sar ("ebx",31);
&lea ("edi",&DWP($Z1sqr,"esp"));
&mov (&DWP(32*15+4,"esp"),"ebx"); # !in2infty
&mov (&DWP(32*15+4,"esp"),"ebx"); # ~in2infty
&call ("_ecp_nistz256_mul_mont"); # p256_sqr_mont(Z1sqr, in1_z);
@ -1823,14 +1819,14 @@ for ($i=0;$i<7;$i++) {
&lea ("edi",&DWP($res_y,"esp"));
&call ("_ecp_nistz256_sub"); # p256_sub(res_y, res_y, S2);
&mov ("ebp",&DWP(32*15+0,"esp")); # !in1infty
&mov ("esi",&DWP(32*15+4,"esp")); # !in2infty
&mov ("ebp",&DWP(32*15+0,"esp")); # ~in1infty
&mov ("esi",&DWP(32*15+4,"esp")); # ~in2infty
&mov ("edi",&wparam(0));
&mov ("edx","ebp");
&not ("ebp");
&and ("edx","esi");
&and ("ebp","esi");
&not ("esi");
&and ("edx","esi"); # ~in1infty & ~in2infty
&and ("ebp","esi"); # in1infty & ~in2infty
&not ("esi"); # in2infty
########################################
# conditional moves
@ -1848,11 +1844,11 @@ for ($i=0;$i<7;$i++) {
&mov (&DWP($i,"edi"),"eax");
}
for($i=0;$i<64;$i+=4) {
&mov ("eax","edx");
&mov ("eax","edx"); # ~in1infty & ~in2infty
&and ("eax",&DWP($res_x+$i,"esp"));
&mov ("ebx","ebp");
&mov ("ebx","ebp"); # in1infty & ~in2infty
&and ("ebx",&DWP($in2_x+$i,"esp"));
&mov ("ecx","esi");
&mov ("ecx","esi"); # in2infty
&and ("ecx",&DWP($in1_x+$i,"esp"));
&or ("eax","ebx");
&or ("eax","ecx");
@ -1863,4 +1859,4 @@ for ($i=0;$i<7;$i++) {
&asm_finish();
close STDOUT;
close STDOUT or die "error closing STDOUT: $!";

View file

@ -1,5 +1,5 @@
#! /usr/bin/env perl
# Copyright 2014-2019 The OpenSSL Project Authors. All Rights Reserved.
# Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
# Copyright (c) 2014, Intel Corporation. All Rights Reserved.
# Copyright (c) 2015 CloudFlare, Inc.
#
@ -72,7 +72,7 @@ if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
$addx = ($1>=12);
}
if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9])\.([0-9]+)/) {
if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|.*based on LLVM) ([0-9]+)\.([0-9]+)/) {
my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10
$avx = ($ver>=3.0) + ($ver>=3.01);
$addx = ($ver>=3.03);
@ -1301,7 +1301,7 @@ ecp_nistz256_ord_mul_montx:
################################# reduction
mulx 8*0+128(%r14), $t0, $t1
adcx $t0, $acc3 # guranteed to be zero
adcx $t0, $acc3 # guaranteed to be zero
adox $t1, $acc4
mulx 8*1+128(%r14), $t0, $t1
@ -1579,6 +1579,7 @@ $code.=<<___;
.type ecp_nistz256_to_mont,\@function,2
.align 32
ecp_nistz256_to_mont:
.cfi_startproc
___
$code.=<<___ if ($addx);
mov \$0x80100, %ecx
@ -1587,6 +1588,7 @@ ___
$code.=<<___;
lea .LRR(%rip), $b_org
jmp .Lmul_mont
.cfi_endproc
.size ecp_nistz256_to_mont,.-ecp_nistz256_to_mont
################################################################################
@ -2562,6 +2564,7 @@ $code.=<<___;
.type ecp_nistz256_scatter_w5,\@abi-omnipotent
.align 32
ecp_nistz256_scatter_w5:
.cfi_startproc
lea -3($index,$index,2), $index
movdqa 0x00($in_t), %xmm0
shl \$5, $index
@ -2578,6 +2581,7 @@ ecp_nistz256_scatter_w5:
movdqa %xmm5, 0x50($val,$index)
ret
.cfi_endproc
.size ecp_nistz256_scatter_w5,.-ecp_nistz256_scatter_w5
################################################################################
@ -2685,6 +2689,7 @@ $code.=<<___;
.type ecp_nistz256_scatter_w7,\@abi-omnipotent
.align 32
ecp_nistz256_scatter_w7:
.cfi_startproc
movdqu 0x00($in_t), %xmm0
shl \$6, $index
movdqu 0x10($in_t), %xmm1
@ -2696,6 +2701,7 @@ ecp_nistz256_scatter_w7:
movdqa %xmm3, 0x30($val,$index)
ret
.cfi_endproc
.size ecp_nistz256_scatter_w7,.-ecp_nistz256_scatter_w7
################################################################################
@ -3020,8 +3026,10 @@ $code.=<<___;
.type ecp_nistz256_avx2_gather_w7,\@function,3
.align 32
ecp_nistz256_avx2_gather_w7:
.cfi_startproc
.byte 0x0f,0x0b # ud2
ret
.cfi_endproc
.size ecp_nistz256_avx2_gather_w7,.-ecp_nistz256_avx2_gather_w7
___
}
@ -3617,29 +3625,19 @@ $code.=<<___;
call __ecp_nistz256_sub_from$x # p256_sub(H, U2, U1);
or $acc5, $acc4 # see if result is zero
or $acc0, $acc4
or $acc1, $acc4 # !is_equal(U1, U2)
movq %xmm2, $acc0 # in1infty | in2infty
movq %xmm3, $acc1 # !is_equal(S1, S2)
or $acc0, $acc4
or $acc1, $acc4
# if (!is_equal(U1, U2) | in1infty | in2infty | !is_equal(S1, S2))
.byte 0x3e # predict taken
jnz .Ladd_proceed$x # is_equal(U1,U2)?
movq %xmm2, $acc0
movq %xmm3, $acc1
test $acc0, $acc0
jnz .Ladd_proceed$x # (in1infty || in2infty)?
test $acc1, $acc1
jz .Ladd_double$x # is_equal(S1,S2)?
jnz .Ladd_proceed$x
movq %xmm0, $r_ptr # restore $r_ptr
pxor %xmm0, %xmm0
movdqu %xmm0, 0x00($r_ptr)
movdqu %xmm0, 0x10($r_ptr)
movdqu %xmm0, 0x20($r_ptr)
movdqu %xmm0, 0x30($r_ptr)
movdqu %xmm0, 0x40($r_ptr)
movdqu %xmm0, 0x50($r_ptr)
jmp .Ladd_done$x
.align 32
.Ladd_double$x:
movq %xmm1, $a_ptr # restore $a_ptr
movq %xmm0, $r_ptr # restore $r_ptr
@ -4738,4 +4736,4 @@ ___
$code =~ s/\`([^\`]*)\`/eval $1/gem;
print $code;
close STDOUT;
close STDOUT or die "error closing STDOUT: $!";

View file

@ -1,5 +1,5 @@
#! /usr/bin/env perl
# Copyright 2018 The OpenSSL Project Authors. All Rights Reserved.
# Copyright 2018-2020 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
@ -451,7 +451,7 @@ x25519_fe64_tobytes:
and $t0,$t0,$t1
sldi $a3,$a3,1
add $t0,$t0,$t1 # compare to modulus in the same go
srdi $a3,$a3,1 # most signifcant bit cleared
srdi $a3,$a3,1 # most significant bit cleared
addc $a0,$a0,$t0
addze $a1,$a1
@ -462,7 +462,7 @@ x25519_fe64_tobytes:
sradi $t0,$a3,63 # most significant bit -> mask
sldi $a3,$a3,1
andc $t0,$t1,$t0
srdi $a3,$a3,1 # most signifcant bit cleared
srdi $a3,$a3,1 # most significant bit cleared
subi $rp,$rp,1
subfc $a0,$t0,$a0
@ -821,4 +821,4 @@ ___
$code =~ s/\`([^\`]*)\`/eval $1/gem;
print $code;
close STDOUT;
close STDOUT or die "error closing STDOUT: $!";

View file

@ -1,5 +1,5 @@
#!/usr/bin/env perl
# Copyright 2018 The OpenSSL Project Authors. All Rights Reserved.
# Copyright 2018-2020 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
@ -90,7 +90,7 @@ if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
$addx = ($1>=12);
}
if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9])\.([0-9]+)/) {
if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|.*based on LLVM) ([0-9]+)\.([0-9]+)/) {
my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10
$addx = ($ver>=3.03);
}
@ -488,12 +488,14 @@ $code.=<<___;
.type x25519_fe64_eligible,\@abi-omnipotent
.align 32
x25519_fe64_eligible:
.cfi_startproc
mov OPENSSL_ia32cap_P+8(%rip),%ecx
xor %eax,%eax
and \$0x80100,%ecx
cmp \$0x80100,%ecx
cmove %ecx,%eax
ret
.cfi_endproc
.size x25519_fe64_eligible,.-x25519_fe64_eligible
.globl x25519_fe64_mul
@ -722,6 +724,7 @@ x25519_fe64_sqr:
.align 32
x25519_fe64_mul121666:
.Lfe64_mul121666_body:
.cfi_startproc
mov \$121666,%edx
mulx 8*0(%rsi),$acc0,%rcx
mulx 8*1(%rsi),$acc1,%rax
@ -750,6 +753,7 @@ x25519_fe64_mul121666:
.Lfe64_mul121666_epilogue:
ret
.cfi_endproc
.size x25519_fe64_mul121666,.-x25519_fe64_mul121666
.globl x25519_fe64_add
@ -757,6 +761,7 @@ x25519_fe64_mul121666:
.align 32
x25519_fe64_add:
.Lfe64_add_body:
.cfi_startproc
mov 8*0(%rsi),$acc0
mov 8*1(%rsi),$acc1
mov 8*2(%rsi),$acc2
@ -785,6 +790,7 @@ x25519_fe64_add:
.Lfe64_add_epilogue:
ret
.cfi_endproc
.size x25519_fe64_add,.-x25519_fe64_add
.globl x25519_fe64_sub
@ -792,6 +798,7 @@ x25519_fe64_add:
.align 32
x25519_fe64_sub:
.Lfe64_sub_body:
.cfi_startproc
mov 8*0(%rsi),$acc0
mov 8*1(%rsi),$acc1
mov 8*2(%rsi),$acc2
@ -820,6 +827,7 @@ x25519_fe64_sub:
.Lfe64_sub_epilogue:
ret
.cfi_endproc
.size x25519_fe64_sub,.-x25519_fe64_sub
.globl x25519_fe64_tobytes
@ -827,6 +835,7 @@ x25519_fe64_sub:
.align 32
x25519_fe64_tobytes:
.Lfe64_to_body:
.cfi_startproc
mov 8*0(%rsi),$acc0
mov 8*1(%rsi),$acc1
mov 8*2(%rsi),$acc2
@ -862,6 +871,7 @@ x25519_fe64_tobytes:
.Lfe64_to_epilogue:
ret
.cfi_endproc
.size x25519_fe64_tobytes,.-x25519_fe64_tobytes
___
} else {
@ -870,8 +880,10 @@ $code.=<<___;
.type x25519_fe64_eligible,\@abi-omnipotent
.align 32
x25519_fe64_eligible:
.cfi_startproc
xor %eax,%eax
ret
.cfi_endproc
.size x25519_fe64_eligible,.-x25519_fe64_eligible
.globl x25519_fe64_mul
@ -887,8 +899,10 @@ x25519_fe64_mul121666:
x25519_fe64_add:
x25519_fe64_sub:
x25519_fe64_tobytes:
.cfi_startproc
.byte 0x0f,0x0b # ud2
ret
.cfi_endproc
.size x25519_fe64_mul,.-x25519_fe64_mul
___
}
@ -1114,4 +1128,4 @@ ___
$code =~ s/\`([^\`]*)\`/eval $1/gem;
print $code;
close STDOUT;
close STDOUT or die "error closing STDOUT: $!";