mirror of
https://github.com/ossrs/srs.git
synced 2025-03-09 15:49:59 +00:00
Upgrade openssl from 1.1.0e to 1.1.1b, with source code. 4.0.78
This commit is contained in:
parent
8f1c992379
commit
96dbd7bced
1476 changed files with 616554 additions and 4 deletions
241
trunk/3rdparty/openssl-1.1-fit/crypto/bn/README.pod
vendored
Normal file
241
trunk/3rdparty/openssl-1.1-fit/crypto/bn/README.pod
vendored
Normal file
|
@ -0,0 +1,241 @@
|
|||
=pod
|
||||
|
||||
=head1 NAME
|
||||
|
||||
bn_mul_words, bn_mul_add_words, bn_sqr_words, bn_div_words,
|
||||
bn_add_words, bn_sub_words, bn_mul_comba4, bn_mul_comba8,
|
||||
bn_sqr_comba4, bn_sqr_comba8, bn_cmp_words, bn_mul_normal,
|
||||
bn_mul_low_normal, bn_mul_recursive, bn_mul_part_recursive,
|
||||
bn_mul_low_recursive, bn_sqr_normal, bn_sqr_recursive,
|
||||
bn_expand, bn_wexpand, bn_expand2, bn_fix_top, bn_check_top,
|
||||
bn_print, bn_dump, bn_set_max, bn_set_high, bn_set_low - BIGNUM
|
||||
library internal functions
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
#include <openssl/bn.h>
|
||||
|
||||
BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w);
|
||||
BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num,
|
||||
BN_ULONG w);
|
||||
void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num);
|
||||
BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d);
|
||||
BN_ULONG bn_add_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,
|
||||
int num);
|
||||
BN_ULONG bn_sub_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,
|
||||
int num);
|
||||
|
||||
void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
|
||||
void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
|
||||
void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a);
|
||||
void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a);
|
||||
|
||||
int bn_cmp_words(BN_ULONG *a, BN_ULONG *b, int n);
|
||||
|
||||
void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b,
|
||||
int nb);
|
||||
void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n);
|
||||
void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
|
||||
int dna, int dnb, BN_ULONG *tmp);
|
||||
void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b,
|
||||
int n, int tna, int tnb, BN_ULONG *tmp);
|
||||
void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b,
|
||||
int n2, BN_ULONG *tmp);
|
||||
|
||||
void bn_sqr_normal(BN_ULONG *r, BN_ULONG *a, int n, BN_ULONG *tmp);
|
||||
void bn_sqr_recursive(BN_ULONG *r, BN_ULONG *a, int n2, BN_ULONG *tmp);
|
||||
|
||||
void mul(BN_ULONG r, BN_ULONG a, BN_ULONG w, BN_ULONG c);
|
||||
void mul_add(BN_ULONG r, BN_ULONG a, BN_ULONG w, BN_ULONG c);
|
||||
void sqr(BN_ULONG r0, BN_ULONG r1, BN_ULONG a);
|
||||
|
||||
BIGNUM *bn_expand(BIGNUM *a, int bits);
|
||||
BIGNUM *bn_wexpand(BIGNUM *a, int n);
|
||||
BIGNUM *bn_expand2(BIGNUM *a, int n);
|
||||
void bn_fix_top(BIGNUM *a);
|
||||
|
||||
void bn_check_top(BIGNUM *a);
|
||||
void bn_print(BIGNUM *a);
|
||||
void bn_dump(BN_ULONG *d, int n);
|
||||
void bn_set_max(BIGNUM *a);
|
||||
void bn_set_high(BIGNUM *r, BIGNUM *a, int n);
|
||||
void bn_set_low(BIGNUM *r, BIGNUM *a, int n);
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This page documents the internal functions used by the OpenSSL
|
||||
B<BIGNUM> implementation. They are described here to facilitate
|
||||
debugging and extending the library. They are I<not> to be used by
|
||||
applications.
|
||||
|
||||
=head2 The BIGNUM structure
|
||||
|
||||
typedef struct bignum_st BIGNUM;
|
||||
|
||||
struct bignum_st
|
||||
{
|
||||
BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit chunks. */
|
||||
int top; /* Index of last used d +1. */
|
||||
/* The next are internal book keeping for bn_expand. */
|
||||
int dmax; /* Size of the d array. */
|
||||
int neg; /* one if the number is negative */
|
||||
int flags;
|
||||
};
|
||||
|
||||
|
||||
The integer value is stored in B<d>, a malloc()ed array of words (B<BN_ULONG>),
|
||||
least significant word first. A B<BN_ULONG> can be either 16, 32 or 64 bits
|
||||
in size, depending on the 'number of bits' (B<BITS2>) specified in
|
||||
C<openssl/bn.h>.
|
||||
|
||||
B<dmax> is the size of the B<d> array that has been allocated. B<top>
|
||||
is the number of words being used, so for a value of 4, bn.d[0]=4 and
|
||||
bn.top=1. B<neg> is 1 if the number is negative. When a B<BIGNUM> is
|
||||
B<0>, the B<d> field can be B<NULL> and B<top> == B<0>.
|
||||
|
||||
B<flags> is a bit field of flags which are defined in C<openssl/bn.h>. The
|
||||
flags begin with B<BN_FLG_>. The macros BN_set_flags(b, n) and
|
||||
BN_get_flags(b, n) exist to enable or fetch flag(s) B<n> from B<BIGNUM>
|
||||
structure B<b>.
|
||||
|
||||
Various routines in this library require the use of temporary
|
||||
B<BIGNUM> variables during their execution. Since dynamic memory
|
||||
allocation to create B<BIGNUM>s is rather expensive when used in
|
||||
conjunction with repeated subroutine calls, the B<BN_CTX> structure is
|
||||
used. This structure contains B<BN_CTX_NUM> B<BIGNUM>s, see
|
||||
L<BN_CTX_start(3)>.
|
||||
|
||||
=head2 Low-level arithmetic operations
|
||||
|
||||
These functions are implemented in C and for several platforms in
|
||||
assembly language:
|
||||
|
||||
bn_mul_words(B<rp>, B<ap>, B<num>, B<w>) operates on the B<num> word
|
||||
arrays B<rp> and B<ap>. It computes B<ap> * B<w>, places the result
|
||||
in B<rp>, and returns the high word (carry).
|
||||
|
||||
bn_mul_add_words(B<rp>, B<ap>, B<num>, B<w>) operates on the B<num>
|
||||
word arrays B<rp> and B<ap>. It computes B<ap> * B<w> + B<rp>, places
|
||||
the result in B<rp>, and returns the high word (carry).
|
||||
|
||||
bn_sqr_words(B<rp>, B<ap>, B<n>) operates on the B<num> word array
|
||||
B<ap> and the 2*B<num> word array B<ap>. It computes B<ap> * B<ap>
|
||||
word-wise, and places the low and high bytes of the result in B<rp>.
|
||||
|
||||
bn_div_words(B<h>, B<l>, B<d>) divides the two word number (B<h>, B<l>)
|
||||
by B<d> and returns the result.
|
||||
|
||||
bn_add_words(B<rp>, B<ap>, B<bp>, B<num>) operates on the B<num> word
|
||||
arrays B<ap>, B<bp> and B<rp>. It computes B<ap> + B<bp>, places the
|
||||
result in B<rp>, and returns the high word (carry).
|
||||
|
||||
bn_sub_words(B<rp>, B<ap>, B<bp>, B<num>) operates on the B<num> word
|
||||
arrays B<ap>, B<bp> and B<rp>. It computes B<ap> - B<bp>, places the
|
||||
result in B<rp>, and returns the carry (1 if B<bp> E<gt> B<ap>, 0
|
||||
otherwise).
|
||||
|
||||
bn_mul_comba4(B<r>, B<a>, B<b>) operates on the 4 word arrays B<a> and
|
||||
B<b> and the 8 word array B<r>. It computes B<a>*B<b> and places the
|
||||
result in B<r>.
|
||||
|
||||
bn_mul_comba8(B<r>, B<a>, B<b>) operates on the 8 word arrays B<a> and
|
||||
B<b> and the 16 word array B<r>. It computes B<a>*B<b> and places the
|
||||
result in B<r>.
|
||||
|
||||
bn_sqr_comba4(B<r>, B<a>, B<b>) operates on the 4 word arrays B<a> and
|
||||
B<b> and the 8 word array B<r>.
|
||||
|
||||
bn_sqr_comba8(B<r>, B<a>, B<b>) operates on the 8 word arrays B<a> and
|
||||
B<b> and the 16 word array B<r>.
|
||||
|
||||
The following functions are implemented in C:
|
||||
|
||||
bn_cmp_words(B<a>, B<b>, B<n>) operates on the B<n> word arrays B<a>
|
||||
and B<b>. It returns 1, 0 and -1 if B<a> is greater than, equal and
|
||||
less than B<b>.
|
||||
|
||||
bn_mul_normal(B<r>, B<a>, B<na>, B<b>, B<nb>) operates on the B<na>
|
||||
word array B<a>, the B<nb> word array B<b> and the B<na>+B<nb> word
|
||||
array B<r>. It computes B<a>*B<b> and places the result in B<r>.
|
||||
|
||||
bn_mul_low_normal(B<r>, B<a>, B<b>, B<n>) operates on the B<n> word
|
||||
arrays B<r>, B<a> and B<b>. It computes the B<n> low words of
|
||||
B<a>*B<b> and places the result in B<r>.
|
||||
|
||||
bn_mul_recursive(B<r>, B<a>, B<b>, B<n2>, B<dna>, B<dnb>, B<t>) operates
|
||||
on the word arrays B<a> and B<b> of length B<n2>+B<dna> and B<n2>+B<dnb>
|
||||
(B<dna> and B<dnb> are currently allowed to be 0 or negative) and the 2*B<n2>
|
||||
word arrays B<r> and B<t>. B<n2> must be a power of 2. It computes
|
||||
B<a>*B<b> and places the result in B<r>.
|
||||
|
||||
bn_mul_part_recursive(B<r>, B<a>, B<b>, B<n>, B<tna>, B<tnb>, B<tmp>)
|
||||
operates on the word arrays B<a> and B<b> of length B<n>+B<tna> and
|
||||
B<n>+B<tnb> and the 4*B<n> word arrays B<r> and B<tmp>.
|
||||
|
||||
bn_mul_low_recursive(B<r>, B<a>, B<b>, B<n2>, B<tmp>) operates on the
|
||||
B<n2> word arrays B<r> and B<tmp> and the B<n2>/2 word arrays B<a>
|
||||
and B<b>.
|
||||
|
||||
BN_mul() calls bn_mul_normal(), or an optimized implementation if the
|
||||
factors have the same size: bn_mul_comba8() is used if they are 8
|
||||
words long, bn_mul_recursive() if they are larger than
|
||||
B<BN_MULL_SIZE_NORMAL> and the size is an exact multiple of the word
|
||||
size, and bn_mul_part_recursive() for others that are larger than
|
||||
B<BN_MULL_SIZE_NORMAL>.
|
||||
|
||||
bn_sqr_normal(B<r>, B<a>, B<n>, B<tmp>) operates on the B<n> word array
|
||||
B<a> and the 2*B<n> word arrays B<tmp> and B<r>.
|
||||
|
||||
The implementations use the following macros which, depending on the
|
||||
architecture, may use "long long" C operations or inline assembler.
|
||||
They are defined in C<bn_lcl.h>.
|
||||
|
||||
mul(B<r>, B<a>, B<w>, B<c>) computes B<w>*B<a>+B<c> and places the
|
||||
low word of the result in B<r> and the high word in B<c>.
|
||||
|
||||
mul_add(B<r>, B<a>, B<w>, B<c>) computes B<w>*B<a>+B<r>+B<c> and
|
||||
places the low word of the result in B<r> and the high word in B<c>.
|
||||
|
||||
sqr(B<r0>, B<r1>, B<a>) computes B<a>*B<a> and places the low word
|
||||
of the result in B<r0> and the high word in B<r1>.
|
||||
|
||||
=head2 Size changes
|
||||
|
||||
bn_expand() ensures that B<b> has enough space for a B<bits> bit
|
||||
number. bn_wexpand() ensures that B<b> has enough space for an
|
||||
B<n> word number. If the number has to be expanded, both macros
|
||||
call bn_expand2(), which allocates a new B<d> array and copies the
|
||||
data. They return B<NULL> on error, B<b> otherwise.
|
||||
|
||||
The bn_fix_top() macro reduces B<a-E<gt>top> to point to the most
|
||||
significant non-zero word plus one when B<a> has shrunk.
|
||||
|
||||
=head2 Debugging
|
||||
|
||||
bn_check_top() verifies that C<((a)-E<gt>top E<gt>= 0 && (a)-E<gt>top
|
||||
E<lt>= (a)-E<gt>dmax)>. A violation will cause the program to abort.
|
||||
|
||||
bn_print() prints B<a> to stderr. bn_dump() prints B<n> words at B<d>
|
||||
(in reverse order, i.e. most significant word first) to stderr.
|
||||
|
||||
bn_set_max() makes B<a> a static number with a B<dmax> of its current size.
|
||||
This is used by bn_set_low() and bn_set_high() to make B<r> a read-only
|
||||
B<BIGNUM> that contains the B<n> low or high words of B<a>.
|
||||
|
||||
If B<BN_DEBUG> is not defined, bn_check_top(), bn_print(), bn_dump()
|
||||
and bn_set_max() are defined as empty macros.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<bn(3)>
|
||||
|
||||
=head1 COPYRIGHT
|
||||
|
||||
Copyright 2000-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the OpenSSL license (the "License"). You may not use
|
||||
this file except in compliance with the License. You can obtain a copy
|
||||
in the file LICENSE in the source distribution or at
|
||||
L<https://www.openssl.org/source/license.html>.
|
||||
|
||||
=cut
|
328
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/alpha-mont.pl
vendored
Normal file
328
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/alpha-mont.pl
vendored
Normal file
|
@ -0,0 +1,328 @@
|
|||
#! /usr/bin/env perl
|
||||
# Copyright 2006-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
#
|
||||
# On 21264 RSA sign performance improves by 70/35/20/15 percent for
|
||||
# 512/1024/2048/4096 bit key lengths. This is against vendor compiler
|
||||
# instructed to '-tune host' code with in-line assembler. Other
|
||||
# benchmarks improve by 15-20%. To anchor it to something else, the
|
||||
# code provides approximately the same performance per GHz as AMD64.
|
||||
# I.e. if you compare 1GHz 21264 and 2GHz Opteron, you'll observe ~2x
|
||||
# difference.
|
||||
|
||||
$output=pop;
|
||||
open STDOUT,">$output";
|
||||
|
||||
# int bn_mul_mont(
|
||||
$rp="a0"; # BN_ULONG *rp,
|
||||
$ap="a1"; # const BN_ULONG *ap,
|
||||
$bp="a2"; # const BN_ULONG *bp,
|
||||
$np="a3"; # const BN_ULONG *np,
|
||||
$n0="a4"; # const BN_ULONG *n0,
|
||||
$num="a5"; # int num);
|
||||
|
||||
$lo0="t0";
|
||||
$hi0="t1";
|
||||
$lo1="t2";
|
||||
$hi1="t3";
|
||||
$aj="t4";
|
||||
$bi="t5";
|
||||
$nj="t6";
|
||||
$tp="t7";
|
||||
$alo="t8";
|
||||
$ahi="t9";
|
||||
$nlo="t10";
|
||||
$nhi="t11";
|
||||
$tj="t12";
|
||||
$i="s3";
|
||||
$j="s4";
|
||||
$m1="s5";
|
||||
|
||||
$code=<<___;
|
||||
#ifdef __linux__
|
||||
#include <asm/regdef.h>
|
||||
#else
|
||||
#include <asm.h>
|
||||
#include <regdef.h>
|
||||
#endif
|
||||
|
||||
.text
|
||||
|
||||
.set noat
|
||||
.set noreorder
|
||||
|
||||
.globl bn_mul_mont
|
||||
.align 5
|
||||
.ent bn_mul_mont
|
||||
bn_mul_mont:
|
||||
lda sp,-48(sp)
|
||||
stq ra,0(sp)
|
||||
stq s3,8(sp)
|
||||
stq s4,16(sp)
|
||||
stq s5,24(sp)
|
||||
stq fp,32(sp)
|
||||
mov sp,fp
|
||||
.mask 0x0400f000,-48
|
||||
.frame fp,48,ra
|
||||
.prologue 0
|
||||
|
||||
.align 4
|
||||
.set reorder
|
||||
sextl $num,$num
|
||||
mov 0,v0
|
||||
cmplt $num,4,AT
|
||||
bne AT,.Lexit
|
||||
|
||||
ldq $hi0,0($ap) # ap[0]
|
||||
s8addq $num,16,AT
|
||||
ldq $aj,8($ap)
|
||||
subq sp,AT,sp
|
||||
ldq $bi,0($bp) # bp[0]
|
||||
lda AT,-4096(zero) # mov -4096,AT
|
||||
ldq $n0,0($n0)
|
||||
and sp,AT,sp
|
||||
|
||||
mulq $hi0,$bi,$lo0
|
||||
ldq $hi1,0($np) # np[0]
|
||||
umulh $hi0,$bi,$hi0
|
||||
ldq $nj,8($np)
|
||||
|
||||
mulq $lo0,$n0,$m1
|
||||
|
||||
mulq $hi1,$m1,$lo1
|
||||
umulh $hi1,$m1,$hi1
|
||||
|
||||
addq $lo1,$lo0,$lo1
|
||||
cmpult $lo1,$lo0,AT
|
||||
addq $hi1,AT,$hi1
|
||||
|
||||
mulq $aj,$bi,$alo
|
||||
mov 2,$j
|
||||
umulh $aj,$bi,$ahi
|
||||
mov sp,$tp
|
||||
|
||||
mulq $nj,$m1,$nlo
|
||||
s8addq $j,$ap,$aj
|
||||
umulh $nj,$m1,$nhi
|
||||
s8addq $j,$np,$nj
|
||||
.align 4
|
||||
.L1st:
|
||||
.set noreorder
|
||||
ldq $aj,0($aj)
|
||||
addl $j,1,$j
|
||||
ldq $nj,0($nj)
|
||||
lda $tp,8($tp)
|
||||
|
||||
addq $alo,$hi0,$lo0
|
||||
mulq $aj,$bi,$alo
|
||||
cmpult $lo0,$hi0,AT
|
||||
addq $nlo,$hi1,$lo1
|
||||
|
||||
mulq $nj,$m1,$nlo
|
||||
addq $ahi,AT,$hi0
|
||||
cmpult $lo1,$hi1,v0
|
||||
cmplt $j,$num,$tj
|
||||
|
||||
umulh $aj,$bi,$ahi
|
||||
addq $nhi,v0,$hi1
|
||||
addq $lo1,$lo0,$lo1
|
||||
s8addq $j,$ap,$aj
|
||||
|
||||
umulh $nj,$m1,$nhi
|
||||
cmpult $lo1,$lo0,v0
|
||||
addq $hi1,v0,$hi1
|
||||
s8addq $j,$np,$nj
|
||||
|
||||
stq $lo1,-8($tp)
|
||||
nop
|
||||
unop
|
||||
bne $tj,.L1st
|
||||
.set reorder
|
||||
|
||||
addq $alo,$hi0,$lo0
|
||||
addq $nlo,$hi1,$lo1
|
||||
cmpult $lo0,$hi0,AT
|
||||
cmpult $lo1,$hi1,v0
|
||||
addq $ahi,AT,$hi0
|
||||
addq $nhi,v0,$hi1
|
||||
|
||||
addq $lo1,$lo0,$lo1
|
||||
cmpult $lo1,$lo0,v0
|
||||
addq $hi1,v0,$hi1
|
||||
|
||||
stq $lo1,0($tp)
|
||||
|
||||
addq $hi1,$hi0,$hi1
|
||||
cmpult $hi1,$hi0,AT
|
||||
stq $hi1,8($tp)
|
||||
stq AT,16($tp)
|
||||
|
||||
mov 1,$i
|
||||
.align 4
|
||||
.Louter:
|
||||
s8addq $i,$bp,$bi
|
||||
ldq $hi0,0($ap)
|
||||
ldq $aj,8($ap)
|
||||
ldq $bi,0($bi)
|
||||
ldq $hi1,0($np)
|
||||
ldq $nj,8($np)
|
||||
ldq $tj,0(sp)
|
||||
|
||||
mulq $hi0,$bi,$lo0
|
||||
umulh $hi0,$bi,$hi0
|
||||
|
||||
addq $lo0,$tj,$lo0
|
||||
cmpult $lo0,$tj,AT
|
||||
addq $hi0,AT,$hi0
|
||||
|
||||
mulq $lo0,$n0,$m1
|
||||
|
||||
mulq $hi1,$m1,$lo1
|
||||
umulh $hi1,$m1,$hi1
|
||||
|
||||
addq $lo1,$lo0,$lo1
|
||||
cmpult $lo1,$lo0,AT
|
||||
mov 2,$j
|
||||
addq $hi1,AT,$hi1
|
||||
|
||||
mulq $aj,$bi,$alo
|
||||
mov sp,$tp
|
||||
umulh $aj,$bi,$ahi
|
||||
|
||||
mulq $nj,$m1,$nlo
|
||||
s8addq $j,$ap,$aj
|
||||
umulh $nj,$m1,$nhi
|
||||
.align 4
|
||||
.Linner:
|
||||
.set noreorder
|
||||
ldq $tj,8($tp) #L0
|
||||
nop #U1
|
||||
ldq $aj,0($aj) #L1
|
||||
s8addq $j,$np,$nj #U0
|
||||
|
||||
ldq $nj,0($nj) #L0
|
||||
nop #U1
|
||||
addq $alo,$hi0,$lo0 #L1
|
||||
lda $tp,8($tp)
|
||||
|
||||
mulq $aj,$bi,$alo #U1
|
||||
cmpult $lo0,$hi0,AT #L0
|
||||
addq $nlo,$hi1,$lo1 #L1
|
||||
addl $j,1,$j
|
||||
|
||||
mulq $nj,$m1,$nlo #U1
|
||||
addq $ahi,AT,$hi0 #L0
|
||||
addq $lo0,$tj,$lo0 #L1
|
||||
cmpult $lo1,$hi1,v0 #U0
|
||||
|
||||
umulh $aj,$bi,$ahi #U1
|
||||
cmpult $lo0,$tj,AT #L0
|
||||
addq $lo1,$lo0,$lo1 #L1
|
||||
addq $nhi,v0,$hi1 #U0
|
||||
|
||||
umulh $nj,$m1,$nhi #U1
|
||||
s8addq $j,$ap,$aj #L0
|
||||
cmpult $lo1,$lo0,v0 #L1
|
||||
cmplt $j,$num,$tj #U0 # borrow $tj
|
||||
|
||||
addq $hi0,AT,$hi0 #L0
|
||||
addq $hi1,v0,$hi1 #U1
|
||||
stq $lo1,-8($tp) #L1
|
||||
bne $tj,.Linner #U0
|
||||
.set reorder
|
||||
|
||||
ldq $tj,8($tp)
|
||||
addq $alo,$hi0,$lo0
|
||||
addq $nlo,$hi1,$lo1
|
||||
cmpult $lo0,$hi0,AT
|
||||
cmpult $lo1,$hi1,v0
|
||||
addq $ahi,AT,$hi0
|
||||
addq $nhi,v0,$hi1
|
||||
|
||||
addq $lo0,$tj,$lo0
|
||||
cmpult $lo0,$tj,AT
|
||||
addq $hi0,AT,$hi0
|
||||
|
||||
ldq $tj,16($tp)
|
||||
addq $lo1,$lo0,$j
|
||||
cmpult $j,$lo0,v0
|
||||
addq $hi1,v0,$hi1
|
||||
|
||||
addq $hi1,$hi0,$lo1
|
||||
stq $j,0($tp)
|
||||
cmpult $lo1,$hi0,$hi1
|
||||
addq $lo1,$tj,$lo1
|
||||
cmpult $lo1,$tj,AT
|
||||
addl $i,1,$i
|
||||
addq $hi1,AT,$hi1
|
||||
stq $lo1,8($tp)
|
||||
cmplt $i,$num,$tj # borrow $tj
|
||||
stq $hi1,16($tp)
|
||||
bne $tj,.Louter
|
||||
|
||||
s8addq $num,sp,$tj # &tp[num]
|
||||
mov $rp,$bp # put rp aside
|
||||
mov sp,$tp
|
||||
mov sp,$ap
|
||||
mov 0,$hi0 # clear borrow bit
|
||||
|
||||
.align 4
|
||||
.Lsub: ldq $lo0,0($tp)
|
||||
ldq $lo1,0($np)
|
||||
lda $tp,8($tp)
|
||||
lda $np,8($np)
|
||||
subq $lo0,$lo1,$lo1 # tp[i]-np[i]
|
||||
cmpult $lo0,$lo1,AT
|
||||
subq $lo1,$hi0,$lo0
|
||||
cmpult $lo1,$lo0,$hi0
|
||||
or $hi0,AT,$hi0
|
||||
stq $lo0,0($rp)
|
||||
cmpult $tp,$tj,v0
|
||||
lda $rp,8($rp)
|
||||
bne v0,.Lsub
|
||||
|
||||
subq $hi1,$hi0,$hi0 # handle upmost overflow bit
|
||||
mov sp,$tp
|
||||
mov $bp,$rp # restore rp
|
||||
|
||||
.align 4
|
||||
.Lcopy: ldq $aj,0($tp) # conditional copy
|
||||
ldq $nj,0($rp)
|
||||
lda $tp,8($tp)
|
||||
lda $rp,8($rp)
|
||||
cmoveq $hi0,$nj,$aj
|
||||
stq zero,-8($tp) # zap tp
|
||||
cmpult $tp,$tj,AT
|
||||
stq $aj,-8($rp)
|
||||
bne AT,.Lcopy
|
||||
mov 1,v0
|
||||
|
||||
.Lexit:
|
||||
.set noreorder
|
||||
mov fp,sp
|
||||
/*ldq ra,0(sp)*/
|
||||
ldq s3,8(sp)
|
||||
ldq s4,16(sp)
|
||||
ldq s5,24(sp)
|
||||
ldq fp,32(sp)
|
||||
lda sp,48(sp)
|
||||
ret (ra)
|
||||
.end bn_mul_mont
|
||||
.ascii "Montgomery Multiplication for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.align 2
|
||||
___
|
||||
|
||||
print $code;
|
||||
close STDOUT;
|
332
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/armv4-gf2m.pl
vendored
Normal file
332
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/armv4-gf2m.pl
vendored
Normal file
|
@ -0,0 +1,332 @@
|
|||
#! /usr/bin/env perl
|
||||
# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
#
|
||||
# May 2011
|
||||
#
|
||||
# The module implements bn_GF2m_mul_2x2 polynomial multiplication
|
||||
# used in bn_gf2m.c. It's kind of low-hanging mechanical port from
|
||||
# C for the time being... Except that it has two code paths: pure
|
||||
# integer code suitable for any ARMv4 and later CPU and NEON code
|
||||
# suitable for ARMv7. Pure integer 1x1 multiplication subroutine runs
|
||||
# in ~45 cycles on dual-issue core such as Cortex A8, which is ~50%
|
||||
# faster than compiler-generated code. For ECDH and ECDSA verify (but
|
||||
# not for ECDSA sign) it means 25%-45% improvement depending on key
|
||||
# length, more for longer keys. Even though NEON 1x1 multiplication
|
||||
# runs in even less cycles, ~30, improvement is measurable only on
|
||||
# longer keys. One has to optimize code elsewhere to get NEON glow...
|
||||
#
|
||||
# April 2014
|
||||
#
|
||||
# Double bn_GF2m_mul_2x2 performance by using algorithm from paper
|
||||
# referred below, which improves ECDH and ECDSA verify benchmarks
|
||||
# by 18-40%.
|
||||
#
|
||||
# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
|
||||
# Polynomial Multiplication on ARM Processors using the NEON Engine.
|
||||
#
|
||||
# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
|
||||
|
||||
$flavour = shift;
|
||||
if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
|
||||
else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
|
||||
|
||||
if ($flavour && $flavour ne "void") {
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
|
||||
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
|
||||
die "can't locate arm-xlate.pl";
|
||||
|
||||
open STDOUT,"| \"$^X\" $xlate $flavour $output";
|
||||
} else {
|
||||
open STDOUT,">$output";
|
||||
}
|
||||
|
||||
$code=<<___;
|
||||
#include "arm_arch.h"
|
||||
|
||||
.text
|
||||
#if defined(__thumb2__)
|
||||
.syntax unified
|
||||
.thumb
|
||||
#else
|
||||
.code 32
|
||||
#endif
|
||||
___
|
||||
################
|
||||
# private interface to mul_1x1_ialu
|
||||
#
|
||||
$a="r1";
|
||||
$b="r0";
|
||||
|
||||
($a0,$a1,$a2,$a12,$a4,$a14)=
|
||||
($hi,$lo,$t0,$t1, $i0,$i1 )=map("r$_",(4..9),12);
|
||||
|
||||
$mask="r12";
|
||||
|
||||
$code.=<<___;
|
||||
.type mul_1x1_ialu,%function
|
||||
.align 5
|
||||
mul_1x1_ialu:
|
||||
mov $a0,#0
|
||||
bic $a1,$a,#3<<30 @ a1=a&0x3fffffff
|
||||
str $a0,[sp,#0] @ tab[0]=0
|
||||
add $a2,$a1,$a1 @ a2=a1<<1
|
||||
str $a1,[sp,#4] @ tab[1]=a1
|
||||
eor $a12,$a1,$a2 @ a1^a2
|
||||
str $a2,[sp,#8] @ tab[2]=a2
|
||||
mov $a4,$a1,lsl#2 @ a4=a1<<2
|
||||
str $a12,[sp,#12] @ tab[3]=a1^a2
|
||||
eor $a14,$a1,$a4 @ a1^a4
|
||||
str $a4,[sp,#16] @ tab[4]=a4
|
||||
eor $a0,$a2,$a4 @ a2^a4
|
||||
str $a14,[sp,#20] @ tab[5]=a1^a4
|
||||
eor $a12,$a12,$a4 @ a1^a2^a4
|
||||
str $a0,[sp,#24] @ tab[6]=a2^a4
|
||||
and $i0,$mask,$b,lsl#2
|
||||
str $a12,[sp,#28] @ tab[7]=a1^a2^a4
|
||||
|
||||
and $i1,$mask,$b,lsr#1
|
||||
ldr $lo,[sp,$i0] @ tab[b & 0x7]
|
||||
and $i0,$mask,$b,lsr#4
|
||||
ldr $t1,[sp,$i1] @ tab[b >> 3 & 0x7]
|
||||
and $i1,$mask,$b,lsr#7
|
||||
ldr $t0,[sp,$i0] @ tab[b >> 6 & 0x7]
|
||||
eor $lo,$lo,$t1,lsl#3 @ stall
|
||||
mov $hi,$t1,lsr#29
|
||||
ldr $t1,[sp,$i1] @ tab[b >> 9 & 0x7]
|
||||
|
||||
and $i0,$mask,$b,lsr#10
|
||||
eor $lo,$lo,$t0,lsl#6
|
||||
eor $hi,$hi,$t0,lsr#26
|
||||
ldr $t0,[sp,$i0] @ tab[b >> 12 & 0x7]
|
||||
|
||||
and $i1,$mask,$b,lsr#13
|
||||
eor $lo,$lo,$t1,lsl#9
|
||||
eor $hi,$hi,$t1,lsr#23
|
||||
ldr $t1,[sp,$i1] @ tab[b >> 15 & 0x7]
|
||||
|
||||
and $i0,$mask,$b,lsr#16
|
||||
eor $lo,$lo,$t0,lsl#12
|
||||
eor $hi,$hi,$t0,lsr#20
|
||||
ldr $t0,[sp,$i0] @ tab[b >> 18 & 0x7]
|
||||
|
||||
and $i1,$mask,$b,lsr#19
|
||||
eor $lo,$lo,$t1,lsl#15
|
||||
eor $hi,$hi,$t1,lsr#17
|
||||
ldr $t1,[sp,$i1] @ tab[b >> 21 & 0x7]
|
||||
|
||||
and $i0,$mask,$b,lsr#22
|
||||
eor $lo,$lo,$t0,lsl#18
|
||||
eor $hi,$hi,$t0,lsr#14
|
||||
ldr $t0,[sp,$i0] @ tab[b >> 24 & 0x7]
|
||||
|
||||
and $i1,$mask,$b,lsr#25
|
||||
eor $lo,$lo,$t1,lsl#21
|
||||
eor $hi,$hi,$t1,lsr#11
|
||||
ldr $t1,[sp,$i1] @ tab[b >> 27 & 0x7]
|
||||
|
||||
tst $a,#1<<30
|
||||
and $i0,$mask,$b,lsr#28
|
||||
eor $lo,$lo,$t0,lsl#24
|
||||
eor $hi,$hi,$t0,lsr#8
|
||||
ldr $t0,[sp,$i0] @ tab[b >> 30 ]
|
||||
|
||||
#ifdef __thumb2__
|
||||
itt ne
|
||||
#endif
|
||||
eorne $lo,$lo,$b,lsl#30
|
||||
eorne $hi,$hi,$b,lsr#2
|
||||
tst $a,#1<<31
|
||||
eor $lo,$lo,$t1,lsl#27
|
||||
eor $hi,$hi,$t1,lsr#5
|
||||
#ifdef __thumb2__
|
||||
itt ne
|
||||
#endif
|
||||
eorne $lo,$lo,$b,lsl#31
|
||||
eorne $hi,$hi,$b,lsr#1
|
||||
eor $lo,$lo,$t0,lsl#30
|
||||
eor $hi,$hi,$t0,lsr#2
|
||||
|
||||
mov pc,lr
|
||||
.size mul_1x1_ialu,.-mul_1x1_ialu
|
||||
___
|
||||
################
|
||||
# void bn_GF2m_mul_2x2(BN_ULONG *r,
|
||||
# BN_ULONG a1,BN_ULONG a0,
|
||||
# BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0·b1b0
|
||||
{
|
||||
$code.=<<___;
|
||||
.global bn_GF2m_mul_2x2
|
||||
.type bn_GF2m_mul_2x2,%function
|
||||
.align 5
|
||||
bn_GF2m_mul_2x2:
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
stmdb sp!,{r10,lr}
|
||||
ldr r12,.LOPENSSL_armcap
|
||||
adr r10,.LOPENSSL_armcap
|
||||
ldr r12,[r12,r10]
|
||||
#ifdef __APPLE__
|
||||
ldr r12,[r12]
|
||||
#endif
|
||||
tst r12,#ARMV7_NEON
|
||||
itt ne
|
||||
ldrne r10,[sp],#8
|
||||
bne .LNEON
|
||||
stmdb sp!,{r4-r9}
|
||||
#else
|
||||
stmdb sp!,{r4-r10,lr}
|
||||
#endif
|
||||
___
|
||||
$ret="r10"; # reassigned 1st argument
|
||||
$code.=<<___;
|
||||
mov $ret,r0 @ reassign 1st argument
|
||||
mov $b,r3 @ $b=b1
|
||||
sub r7,sp,#36
|
||||
mov r8,sp
|
||||
and r7,r7,#-32
|
||||
ldr r3,[sp,#32] @ load b0
|
||||
mov $mask,#7<<2
|
||||
mov sp,r7 @ allocate tab[8]
|
||||
str r8,[r7,#32]
|
||||
|
||||
bl mul_1x1_ialu @ a1·b1
|
||||
str $lo,[$ret,#8]
|
||||
str $hi,[$ret,#12]
|
||||
|
||||
eor $b,$b,r3 @ flip b0 and b1
|
||||
eor $a,$a,r2 @ flip a0 and a1
|
||||
eor r3,r3,$b
|
||||
eor r2,r2,$a
|
||||
eor $b,$b,r3
|
||||
eor $a,$a,r2
|
||||
bl mul_1x1_ialu @ a0·b0
|
||||
str $lo,[$ret]
|
||||
str $hi,[$ret,#4]
|
||||
|
||||
eor $a,$a,r2
|
||||
eor $b,$b,r3
|
||||
bl mul_1x1_ialu @ (a1+a0)·(b1+b0)
|
||||
___
|
||||
@r=map("r$_",(6..9));
|
||||
$code.=<<___;
|
||||
ldmia $ret,{@r[0]-@r[3]}
|
||||
eor $lo,$lo,$hi
|
||||
ldr sp,[sp,#32] @ destroy tab[8]
|
||||
eor $hi,$hi,@r[1]
|
||||
eor $lo,$lo,@r[0]
|
||||
eor $hi,$hi,@r[2]
|
||||
eor $lo,$lo,@r[3]
|
||||
eor $hi,$hi,@r[3]
|
||||
str $hi,[$ret,#8]
|
||||
eor $lo,$lo,$hi
|
||||
str $lo,[$ret,#4]
|
||||
|
||||
#if __ARM_ARCH__>=5
|
||||
ldmia sp!,{r4-r10,pc}
|
||||
#else
|
||||
ldmia sp!,{r4-r10,lr}
|
||||
tst lr,#1
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
bx lr @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
___
|
||||
}
|
||||
{
|
||||
my ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12));
|
||||
my ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31));
|
||||
|
||||
$code.=<<___;
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
|
||||
.align 5
|
||||
.LNEON:
|
||||
ldr r12, [sp] @ 5th argument
|
||||
vmov $a, r2, r1
|
||||
vmov $b, r12, r3
|
||||
vmov.i64 $k48, #0x0000ffffffffffff
|
||||
vmov.i64 $k32, #0x00000000ffffffff
|
||||
vmov.i64 $k16, #0x000000000000ffff
|
||||
|
||||
vext.8 $t0#lo, $a, $a, #1 @ A1
|
||||
vmull.p8 $t0, $t0#lo, $b @ F = A1*B
|
||||
vext.8 $r#lo, $b, $b, #1 @ B1
|
||||
vmull.p8 $r, $a, $r#lo @ E = A*B1
|
||||
vext.8 $t1#lo, $a, $a, #2 @ A2
|
||||
vmull.p8 $t1, $t1#lo, $b @ H = A2*B
|
||||
vext.8 $t3#lo, $b, $b, #2 @ B2
|
||||
vmull.p8 $t3, $a, $t3#lo @ G = A*B2
|
||||
vext.8 $t2#lo, $a, $a, #3 @ A3
|
||||
veor $t0, $t0, $r @ L = E + F
|
||||
vmull.p8 $t2, $t2#lo, $b @ J = A3*B
|
||||
vext.8 $r#lo, $b, $b, #3 @ B3
|
||||
veor $t1, $t1, $t3 @ M = G + H
|
||||
vmull.p8 $r, $a, $r#lo @ I = A*B3
|
||||
veor $t0#lo, $t0#lo, $t0#hi @ t0 = (L) (P0 + P1) << 8
|
||||
vand $t0#hi, $t0#hi, $k48
|
||||
vext.8 $t3#lo, $b, $b, #4 @ B4
|
||||
veor $t1#lo, $t1#lo, $t1#hi @ t1 = (M) (P2 + P3) << 16
|
||||
vand $t1#hi, $t1#hi, $k32
|
||||
vmull.p8 $t3, $a, $t3#lo @ K = A*B4
|
||||
veor $t2, $t2, $r @ N = I + J
|
||||
veor $t0#lo, $t0#lo, $t0#hi
|
||||
veor $t1#lo, $t1#lo, $t1#hi
|
||||
veor $t2#lo, $t2#lo, $t2#hi @ t2 = (N) (P4 + P5) << 24
|
||||
vand $t2#hi, $t2#hi, $k16
|
||||
vext.8 $t0, $t0, $t0, #15
|
||||
veor $t3#lo, $t3#lo, $t3#hi @ t3 = (K) (P6 + P7) << 32
|
||||
vmov.i64 $t3#hi, #0
|
||||
vext.8 $t1, $t1, $t1, #14
|
||||
veor $t2#lo, $t2#lo, $t2#hi
|
||||
vmull.p8 $r, $a, $b @ D = A*B
|
||||
vext.8 $t3, $t3, $t3, #12
|
||||
vext.8 $t2, $t2, $t2, #13
|
||||
veor $t0, $t0, $t1
|
||||
veor $t2, $t2, $t3
|
||||
veor $r, $r, $t0
|
||||
veor $r, $r, $t2
|
||||
|
||||
vst1.32 {$r}, [r0]
|
||||
ret @ bx lr
|
||||
#endif
|
||||
___
|
||||
}
|
||||
$code.=<<___;
|
||||
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.align 5
|
||||
.LOPENSSL_armcap:
|
||||
.word OPENSSL_armcap_P-.
|
||||
#endif
|
||||
.asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.align 5
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.comm OPENSSL_armcap_P,4,4
|
||||
#endif
|
||||
___
|
||||
|
||||
foreach (split("\n",$code)) {
|
||||
s/\`([^\`]*)\`/eval $1/geo;
|
||||
|
||||
s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or
|
||||
s/\bret\b/bx lr/go or
|
||||
s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
|
||||
|
||||
print $_,"\n";
|
||||
}
|
||||
close STDOUT; # enforce flush
|
757
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/armv4-mont.pl
vendored
Normal file
757
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/armv4-mont.pl
vendored
Normal file
|
@ -0,0 +1,757 @@
|
|||
#! /usr/bin/env perl
|
||||
# Copyright 2007-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
|
||||
# January 2007.
|
||||
|
||||
# Montgomery multiplication for ARMv4.
|
||||
#
|
||||
# Performance improvement naturally varies among CPU implementations
|
||||
# and compilers. The code was observed to provide +65-35% improvement
|
||||
# [depending on key length, less for longer keys] on ARM920T, and
|
||||
# +115-80% on Intel IXP425. This is compared to pre-bn_mul_mont code
|
||||
# base and compiler generated code with in-lined umull and even umlal
|
||||
# instructions. The latter means that this code didn't really have an
|
||||
# "advantage" of utilizing some "secret" instruction.
|
||||
#
|
||||
# The code is interoperable with Thumb ISA and is rather compact, less
|
||||
# than 1/2KB. Windows CE port would be trivial, as it's exclusively
|
||||
# about decorations, ABI and instruction syntax are identical.
|
||||
|
||||
# November 2013
|
||||
#
|
||||
# Add NEON code path, which handles lengths divisible by 8. RSA/DSA
|
||||
# performance improvement on Cortex-A8 is ~45-100% depending on key
|
||||
# length, more for longer keys. On Cortex-A15 the span is ~10-105%.
|
||||
# On Snapdragon S4 improvement was measured to vary from ~70% to
|
||||
# incredible ~380%, yes, 4.8x faster, for RSA4096 sign. But this is
|
||||
# rather because original integer-only code seems to perform
|
||||
# suboptimally on S4. Situation on Cortex-A9 is unfortunately
|
||||
# different. It's being looked into, but the trouble is that
|
||||
# performance for vectors longer than 256 bits is actually couple
|
||||
# of percent worse than for integer-only code. The code is chosen
|
||||
# for execution on all NEON-capable processors, because gain on
|
||||
# others outweighs the marginal loss on Cortex-A9.
|
||||
|
||||
# September 2015
|
||||
#
|
||||
# Align Cortex-A9 performance with November 2013 improvements, i.e.
|
||||
# NEON code is now ~20-105% faster than integer-only one on this
|
||||
# processor. But this optimization further improved performance even
|
||||
# on other processors: NEON code path is ~45-180% faster than original
|
||||
# integer-only on Cortex-A8, ~10-210% on Cortex-A15, ~70-450% on
|
||||
# Snapdragon S4.
|
||||
|
||||
$flavour = shift;
|
||||
if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
|
||||
else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
|
||||
|
||||
if ($flavour && $flavour ne "void") {
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
|
||||
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
|
||||
die "can't locate arm-xlate.pl";
|
||||
|
||||
open STDOUT,"| \"$^X\" $xlate $flavour $output";
|
||||
} else {
|
||||
open STDOUT,">$output";
|
||||
}
|
||||
|
||||
$num="r0"; # starts as num argument, but holds &tp[num-1]
|
||||
$ap="r1";
|
||||
$bp="r2"; $bi="r2"; $rp="r2";
|
||||
$np="r3";
|
||||
$tp="r4";
|
||||
$aj="r5";
|
||||
$nj="r6";
|
||||
$tj="r7";
|
||||
$n0="r8";
|
||||
########### # r9 is reserved by ELF as platform specific, e.g. TLS pointer
|
||||
$alo="r10"; # sl, gcc uses it to keep @GOT
|
||||
$ahi="r11"; # fp
|
||||
$nlo="r12"; # ip
|
||||
########### # r13 is stack pointer
|
||||
$nhi="r14"; # lr
|
||||
########### # r15 is program counter
|
||||
|
||||
#### argument block layout relative to &tp[num-1], a.k.a. $num
|
||||
$_rp="$num,#12*4";
|
||||
# ap permanently resides in r1
|
||||
$_bp="$num,#13*4";
|
||||
# np permanently resides in r3
|
||||
$_n0="$num,#14*4";
|
||||
$_num="$num,#15*4"; $_bpend=$_num;
|
||||
|
||||
$code=<<___;
|
||||
#include "arm_arch.h"
|
||||
|
||||
.text
|
||||
#if defined(__thumb2__)
|
||||
.syntax unified
|
||||
.thumb
|
||||
#else
|
||||
.code 32
|
||||
#endif
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.align 5
|
||||
.LOPENSSL_armcap:
|
||||
.word OPENSSL_armcap_P-.Lbn_mul_mont
|
||||
#endif
|
||||
|
||||
.global bn_mul_mont
|
||||
.type bn_mul_mont,%function
|
||||
|
||||
.align 5
|
||||
bn_mul_mont:
|
||||
.Lbn_mul_mont:
|
||||
ldr ip,[sp,#4] @ load num
|
||||
stmdb sp!,{r0,r2} @ sp points at argument block
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
tst ip,#7
|
||||
bne .Lialu
|
||||
adr r0,.Lbn_mul_mont
|
||||
ldr r2,.LOPENSSL_armcap
|
||||
ldr r0,[r0,r2]
|
||||
#ifdef __APPLE__
|
||||
ldr r0,[r0]
|
||||
#endif
|
||||
tst r0,#ARMV7_NEON @ NEON available?
|
||||
ldmia sp, {r0,r2}
|
||||
beq .Lialu
|
||||
add sp,sp,#8
|
||||
b bn_mul8x_mont_neon
|
||||
.align 4
|
||||
.Lialu:
|
||||
#endif
|
||||
cmp ip,#2
|
||||
mov $num,ip @ load num
|
||||
#ifdef __thumb2__
|
||||
ittt lt
|
||||
#endif
|
||||
movlt r0,#0
|
||||
addlt sp,sp,#2*4
|
||||
blt .Labrt
|
||||
|
||||
stmdb sp!,{r4-r12,lr} @ save 10 registers
|
||||
|
||||
mov $num,$num,lsl#2 @ rescale $num for byte count
|
||||
sub sp,sp,$num @ alloca(4*num)
|
||||
sub sp,sp,#4 @ +extra dword
|
||||
sub $num,$num,#4 @ "num=num-1"
|
||||
add $tp,$bp,$num @ &bp[num-1]
|
||||
|
||||
add $num,sp,$num @ $num to point at &tp[num-1]
|
||||
ldr $n0,[$_n0] @ &n0
|
||||
ldr $bi,[$bp] @ bp[0]
|
||||
ldr $aj,[$ap],#4 @ ap[0],ap++
|
||||
ldr $nj,[$np],#4 @ np[0],np++
|
||||
ldr $n0,[$n0] @ *n0
|
||||
str $tp,[$_bpend] @ save &bp[num]
|
||||
|
||||
umull $alo,$ahi,$aj,$bi @ ap[0]*bp[0]
|
||||
str $n0,[$_n0] @ save n0 value
|
||||
mul $n0,$alo,$n0 @ "tp[0]"*n0
|
||||
mov $nlo,#0
|
||||
umlal $alo,$nlo,$nj,$n0 @ np[0]*n0+"t[0]"
|
||||
mov $tp,sp
|
||||
|
||||
.L1st:
|
||||
ldr $aj,[$ap],#4 @ ap[j],ap++
|
||||
mov $alo,$ahi
|
||||
ldr $nj,[$np],#4 @ np[j],np++
|
||||
mov $ahi,#0
|
||||
umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[0]
|
||||
mov $nhi,#0
|
||||
umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0
|
||||
adds $nlo,$nlo,$alo
|
||||
str $nlo,[$tp],#4 @ tp[j-1]=,tp++
|
||||
adc $nlo,$nhi,#0
|
||||
cmp $tp,$num
|
||||
bne .L1st
|
||||
|
||||
adds $nlo,$nlo,$ahi
|
||||
ldr $tp,[$_bp] @ restore bp
|
||||
mov $nhi,#0
|
||||
ldr $n0,[$_n0] @ restore n0
|
||||
adc $nhi,$nhi,#0
|
||||
str $nlo,[$num] @ tp[num-1]=
|
||||
mov $tj,sp
|
||||
str $nhi,[$num,#4] @ tp[num]=
|
||||
|
||||
.Louter:
|
||||
sub $tj,$num,$tj @ "original" $num-1 value
|
||||
sub $ap,$ap,$tj @ "rewind" ap to &ap[1]
|
||||
ldr $bi,[$tp,#4]! @ *(++bp)
|
||||
sub $np,$np,$tj @ "rewind" np to &np[1]
|
||||
ldr $aj,[$ap,#-4] @ ap[0]
|
||||
ldr $alo,[sp] @ tp[0]
|
||||
ldr $nj,[$np,#-4] @ np[0]
|
||||
ldr $tj,[sp,#4] @ tp[1]
|
||||
|
||||
mov $ahi,#0
|
||||
umlal $alo,$ahi,$aj,$bi @ ap[0]*bp[i]+tp[0]
|
||||
str $tp,[$_bp] @ save bp
|
||||
mul $n0,$alo,$n0
|
||||
mov $nlo,#0
|
||||
umlal $alo,$nlo,$nj,$n0 @ np[0]*n0+"tp[0]"
|
||||
mov $tp,sp
|
||||
|
||||
.Linner:
|
||||
ldr $aj,[$ap],#4 @ ap[j],ap++
|
||||
adds $alo,$ahi,$tj @ +=tp[j]
|
||||
ldr $nj,[$np],#4 @ np[j],np++
|
||||
mov $ahi,#0
|
||||
umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[i]
|
||||
mov $nhi,#0
|
||||
umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0
|
||||
adc $ahi,$ahi,#0
|
||||
ldr $tj,[$tp,#8] @ tp[j+1]
|
||||
adds $nlo,$nlo,$alo
|
||||
str $nlo,[$tp],#4 @ tp[j-1]=,tp++
|
||||
adc $nlo,$nhi,#0
|
||||
cmp $tp,$num
|
||||
bne .Linner
|
||||
|
||||
adds $nlo,$nlo,$ahi
|
||||
mov $nhi,#0
|
||||
ldr $tp,[$_bp] @ restore bp
|
||||
adc $nhi,$nhi,#0
|
||||
ldr $n0,[$_n0] @ restore n0
|
||||
adds $nlo,$nlo,$tj
|
||||
ldr $tj,[$_bpend] @ restore &bp[num]
|
||||
adc $nhi,$nhi,#0
|
||||
str $nlo,[$num] @ tp[num-1]=
|
||||
str $nhi,[$num,#4] @ tp[num]=
|
||||
|
||||
cmp $tp,$tj
|
||||
#ifdef __thumb2__
|
||||
itt ne
|
||||
#endif
|
||||
movne $tj,sp
|
||||
bne .Louter
|
||||
|
||||
ldr $rp,[$_rp] @ pull rp
|
||||
mov $aj,sp
|
||||
add $num,$num,#4 @ $num to point at &tp[num]
|
||||
sub $aj,$num,$aj @ "original" num value
|
||||
mov $tp,sp @ "rewind" $tp
|
||||
mov $ap,$tp @ "borrow" $ap
|
||||
sub $np,$np,$aj @ "rewind" $np to &np[0]
|
||||
|
||||
subs $tj,$tj,$tj @ "clear" carry flag
|
||||
.Lsub: ldr $tj,[$tp],#4
|
||||
ldr $nj,[$np],#4
|
||||
sbcs $tj,$tj,$nj @ tp[j]-np[j]
|
||||
str $tj,[$rp],#4 @ rp[j]=
|
||||
teq $tp,$num @ preserve carry
|
||||
bne .Lsub
|
||||
sbcs $nhi,$nhi,#0 @ upmost carry
|
||||
mov $tp,sp @ "rewind" $tp
|
||||
sub $rp,$rp,$aj @ "rewind" $rp
|
||||
|
||||
.Lcopy: ldr $tj,[$tp] @ conditional copy
|
||||
ldr $aj,[$rp]
|
||||
str sp,[$tp],#4 @ zap tp
|
||||
#ifdef __thumb2__
|
||||
it cc
|
||||
#endif
|
||||
movcc $aj,$tj
|
||||
str $aj,[$rp],#4
|
||||
teq $tp,$num @ preserve carry
|
||||
bne .Lcopy
|
||||
|
||||
mov sp,$num
|
||||
add sp,sp,#4 @ skip over tp[num+1]
|
||||
ldmia sp!,{r4-r12,lr} @ restore registers
|
||||
add sp,sp,#2*4 @ skip over {r0,r2}
|
||||
mov r0,#1
|
||||
.Labrt:
|
||||
#if __ARM_ARCH__>=5
|
||||
ret @ bx lr
|
||||
#else
|
||||
tst lr,#1
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
bx lr @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
.size bn_mul_mont,.-bn_mul_mont
|
||||
___
|
||||
{
|
||||
my ($A0,$A1,$A2,$A3)=map("d$_",(0..3));
|
||||
my ($N0,$N1,$N2,$N3)=map("d$_",(4..7));
|
||||
my ($Z,$Temp)=("q4","q5");
|
||||
my @ACC=map("q$_",(6..13));
|
||||
my ($Bi,$Ni,$M0)=map("d$_",(28..31));
|
||||
my $zero="$Z#lo";
|
||||
my $temp="$Temp#lo";
|
||||
|
||||
my ($rptr,$aptr,$bptr,$nptr,$n0,$num)=map("r$_",(0..5));
|
||||
my ($tinptr,$toutptr,$inner,$outer,$bnptr)=map("r$_",(6..11));
|
||||
|
||||
$code.=<<___;
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
|
||||
.type bn_mul8x_mont_neon,%function
|
||||
.align 5
|
||||
bn_mul8x_mont_neon:
|
||||
mov ip,sp
|
||||
stmdb sp!,{r4-r11}
|
||||
vstmdb sp!,{d8-d15} @ ABI specification says so
|
||||
ldmia ip,{r4-r5} @ load rest of parameter block
|
||||
mov ip,sp
|
||||
|
||||
cmp $num,#8
|
||||
bhi .LNEON_8n
|
||||
|
||||
@ special case for $num==8, everything is in register bank...
|
||||
|
||||
vld1.32 {${Bi}[0]}, [$bptr,:32]!
|
||||
veor $zero,$zero,$zero
|
||||
sub $toutptr,sp,$num,lsl#4
|
||||
vld1.32 {$A0-$A3}, [$aptr]! @ can't specify :32 :-(
|
||||
and $toutptr,$toutptr,#-64
|
||||
vld1.32 {${M0}[0]}, [$n0,:32]
|
||||
mov sp,$toutptr @ alloca
|
||||
vzip.16 $Bi,$zero
|
||||
|
||||
vmull.u32 @ACC[0],$Bi,${A0}[0]
|
||||
vmull.u32 @ACC[1],$Bi,${A0}[1]
|
||||
vmull.u32 @ACC[2],$Bi,${A1}[0]
|
||||
vshl.i64 $Ni,@ACC[0]#hi,#16
|
||||
vmull.u32 @ACC[3],$Bi,${A1}[1]
|
||||
|
||||
vadd.u64 $Ni,$Ni,@ACC[0]#lo
|
||||
veor $zero,$zero,$zero
|
||||
vmul.u32 $Ni,$Ni,$M0
|
||||
|
||||
vmull.u32 @ACC[4],$Bi,${A2}[0]
|
||||
vld1.32 {$N0-$N3}, [$nptr]!
|
||||
vmull.u32 @ACC[5],$Bi,${A2}[1]
|
||||
vmull.u32 @ACC[6],$Bi,${A3}[0]
|
||||
vzip.16 $Ni,$zero
|
||||
vmull.u32 @ACC[7],$Bi,${A3}[1]
|
||||
|
||||
vmlal.u32 @ACC[0],$Ni,${N0}[0]
|
||||
sub $outer,$num,#1
|
||||
vmlal.u32 @ACC[1],$Ni,${N0}[1]
|
||||
vmlal.u32 @ACC[2],$Ni,${N1}[0]
|
||||
vmlal.u32 @ACC[3],$Ni,${N1}[1]
|
||||
|
||||
vmlal.u32 @ACC[4],$Ni,${N2}[0]
|
||||
vmov $Temp,@ACC[0]
|
||||
vmlal.u32 @ACC[5],$Ni,${N2}[1]
|
||||
vmov @ACC[0],@ACC[1]
|
||||
vmlal.u32 @ACC[6],$Ni,${N3}[0]
|
||||
vmov @ACC[1],@ACC[2]
|
||||
vmlal.u32 @ACC[7],$Ni,${N3}[1]
|
||||
vmov @ACC[2],@ACC[3]
|
||||
vmov @ACC[3],@ACC[4]
|
||||
vshr.u64 $temp,$temp,#16
|
||||
vmov @ACC[4],@ACC[5]
|
||||
vmov @ACC[5],@ACC[6]
|
||||
vadd.u64 $temp,$temp,$Temp#hi
|
||||
vmov @ACC[6],@ACC[7]
|
||||
veor @ACC[7],@ACC[7]
|
||||
vshr.u64 $temp,$temp,#16
|
||||
|
||||
b .LNEON_outer8
|
||||
|
||||
.align 4
|
||||
.LNEON_outer8:
|
||||
vld1.32 {${Bi}[0]}, [$bptr,:32]!
|
||||
veor $zero,$zero,$zero
|
||||
vzip.16 $Bi,$zero
|
||||
vadd.u64 @ACC[0]#lo,@ACC[0]#lo,$temp
|
||||
|
||||
vmlal.u32 @ACC[0],$Bi,${A0}[0]
|
||||
vmlal.u32 @ACC[1],$Bi,${A0}[1]
|
||||
vmlal.u32 @ACC[2],$Bi,${A1}[0]
|
||||
vshl.i64 $Ni,@ACC[0]#hi,#16
|
||||
vmlal.u32 @ACC[3],$Bi,${A1}[1]
|
||||
|
||||
vadd.u64 $Ni,$Ni,@ACC[0]#lo
|
||||
veor $zero,$zero,$zero
|
||||
subs $outer,$outer,#1
|
||||
vmul.u32 $Ni,$Ni,$M0
|
||||
|
||||
vmlal.u32 @ACC[4],$Bi,${A2}[0]
|
||||
vmlal.u32 @ACC[5],$Bi,${A2}[1]
|
||||
vmlal.u32 @ACC[6],$Bi,${A3}[0]
|
||||
vzip.16 $Ni,$zero
|
||||
vmlal.u32 @ACC[7],$Bi,${A3}[1]
|
||||
|
||||
vmlal.u32 @ACC[0],$Ni,${N0}[0]
|
||||
vmlal.u32 @ACC[1],$Ni,${N0}[1]
|
||||
vmlal.u32 @ACC[2],$Ni,${N1}[0]
|
||||
vmlal.u32 @ACC[3],$Ni,${N1}[1]
|
||||
|
||||
vmlal.u32 @ACC[4],$Ni,${N2}[0]
|
||||
vmov $Temp,@ACC[0]
|
||||
vmlal.u32 @ACC[5],$Ni,${N2}[1]
|
||||
vmov @ACC[0],@ACC[1]
|
||||
vmlal.u32 @ACC[6],$Ni,${N3}[0]
|
||||
vmov @ACC[1],@ACC[2]
|
||||
vmlal.u32 @ACC[7],$Ni,${N3}[1]
|
||||
vmov @ACC[2],@ACC[3]
|
||||
vmov @ACC[3],@ACC[4]
|
||||
vshr.u64 $temp,$temp,#16
|
||||
vmov @ACC[4],@ACC[5]
|
||||
vmov @ACC[5],@ACC[6]
|
||||
vadd.u64 $temp,$temp,$Temp#hi
|
||||
vmov @ACC[6],@ACC[7]
|
||||
veor @ACC[7],@ACC[7]
|
||||
vshr.u64 $temp,$temp,#16
|
||||
|
||||
bne .LNEON_outer8
|
||||
|
||||
vadd.u64 @ACC[0]#lo,@ACC[0]#lo,$temp
|
||||
mov $toutptr,sp
|
||||
vshr.u64 $temp,@ACC[0]#lo,#16
|
||||
mov $inner,$num
|
||||
vadd.u64 @ACC[0]#hi,@ACC[0]#hi,$temp
|
||||
add $tinptr,sp,#96
|
||||
vshr.u64 $temp,@ACC[0]#hi,#16
|
||||
vzip.16 @ACC[0]#lo,@ACC[0]#hi
|
||||
|
||||
b .LNEON_tail_entry
|
||||
|
||||
.align 4
|
||||
.LNEON_8n:
|
||||
veor @ACC[0],@ACC[0],@ACC[0]
|
||||
sub $toutptr,sp,#128
|
||||
veor @ACC[1],@ACC[1],@ACC[1]
|
||||
sub $toutptr,$toutptr,$num,lsl#4
|
||||
veor @ACC[2],@ACC[2],@ACC[2]
|
||||
and $toutptr,$toutptr,#-64
|
||||
veor @ACC[3],@ACC[3],@ACC[3]
|
||||
mov sp,$toutptr @ alloca
|
||||
veor @ACC[4],@ACC[4],@ACC[4]
|
||||
add $toutptr,$toutptr,#256
|
||||
veor @ACC[5],@ACC[5],@ACC[5]
|
||||
sub $inner,$num,#8
|
||||
veor @ACC[6],@ACC[6],@ACC[6]
|
||||
veor @ACC[7],@ACC[7],@ACC[7]
|
||||
|
||||
.LNEON_8n_init:
|
||||
vst1.64 {@ACC[0]-@ACC[1]},[$toutptr,:256]!
|
||||
subs $inner,$inner,#8
|
||||
vst1.64 {@ACC[2]-@ACC[3]},[$toutptr,:256]!
|
||||
vst1.64 {@ACC[4]-@ACC[5]},[$toutptr,:256]!
|
||||
vst1.64 {@ACC[6]-@ACC[7]},[$toutptr,:256]!
|
||||
bne .LNEON_8n_init
|
||||
|
||||
add $tinptr,sp,#256
|
||||
vld1.32 {$A0-$A3},[$aptr]!
|
||||
add $bnptr,sp,#8
|
||||
vld1.32 {${M0}[0]},[$n0,:32]
|
||||
mov $outer,$num
|
||||
b .LNEON_8n_outer
|
||||
|
||||
.align 4
|
||||
.LNEON_8n_outer:
|
||||
vld1.32 {${Bi}[0]},[$bptr,:32]! @ *b++
|
||||
veor $zero,$zero,$zero
|
||||
vzip.16 $Bi,$zero
|
||||
add $toutptr,sp,#128
|
||||
vld1.32 {$N0-$N3},[$nptr]!
|
||||
|
||||
vmlal.u32 @ACC[0],$Bi,${A0}[0]
|
||||
vmlal.u32 @ACC[1],$Bi,${A0}[1]
|
||||
veor $zero,$zero,$zero
|
||||
vmlal.u32 @ACC[2],$Bi,${A1}[0]
|
||||
vshl.i64 $Ni,@ACC[0]#hi,#16
|
||||
vmlal.u32 @ACC[3],$Bi,${A1}[1]
|
||||
vadd.u64 $Ni,$Ni,@ACC[0]#lo
|
||||
vmlal.u32 @ACC[4],$Bi,${A2}[0]
|
||||
vmul.u32 $Ni,$Ni,$M0
|
||||
vmlal.u32 @ACC[5],$Bi,${A2}[1]
|
||||
vst1.32 {$Bi},[sp,:64] @ put aside smashed b[8*i+0]
|
||||
vmlal.u32 @ACC[6],$Bi,${A3}[0]
|
||||
vzip.16 $Ni,$zero
|
||||
vmlal.u32 @ACC[7],$Bi,${A3}[1]
|
||||
___
|
||||
for ($i=0; $i<7;) {
|
||||
$code.=<<___;
|
||||
vld1.32 {${Bi}[0]},[$bptr,:32]! @ *b++
|
||||
vmlal.u32 @ACC[0],$Ni,${N0}[0]
|
||||
veor $temp,$temp,$temp
|
||||
vmlal.u32 @ACC[1],$Ni,${N0}[1]
|
||||
vzip.16 $Bi,$temp
|
||||
vmlal.u32 @ACC[2],$Ni,${N1}[0]
|
||||
vshr.u64 @ACC[0]#lo,@ACC[0]#lo,#16
|
||||
vmlal.u32 @ACC[3],$Ni,${N1}[1]
|
||||
vmlal.u32 @ACC[4],$Ni,${N2}[0]
|
||||
vadd.u64 @ACC[0]#lo,@ACC[0]#lo,@ACC[0]#hi
|
||||
vmlal.u32 @ACC[5],$Ni,${N2}[1]
|
||||
vshr.u64 @ACC[0]#lo,@ACC[0]#lo,#16
|
||||
vmlal.u32 @ACC[6],$Ni,${N3}[0]
|
||||
vmlal.u32 @ACC[7],$Ni,${N3}[1]
|
||||
vadd.u64 @ACC[1]#lo,@ACC[1]#lo,@ACC[0]#lo
|
||||
vst1.32 {$Ni},[$bnptr,:64]! @ put aside smashed m[8*i+$i]
|
||||
___
|
||||
push(@ACC,shift(@ACC)); $i++;
|
||||
$code.=<<___;
|
||||
vmlal.u32 @ACC[0],$Bi,${A0}[0]
|
||||
vld1.64 {@ACC[7]},[$tinptr,:128]!
|
||||
vmlal.u32 @ACC[1],$Bi,${A0}[1]
|
||||
veor $zero,$zero,$zero
|
||||
vmlal.u32 @ACC[2],$Bi,${A1}[0]
|
||||
vshl.i64 $Ni,@ACC[0]#hi,#16
|
||||
vmlal.u32 @ACC[3],$Bi,${A1}[1]
|
||||
vadd.u64 $Ni,$Ni,@ACC[0]#lo
|
||||
vmlal.u32 @ACC[4],$Bi,${A2}[0]
|
||||
vmul.u32 $Ni,$Ni,$M0
|
||||
vmlal.u32 @ACC[5],$Bi,${A2}[1]
|
||||
vst1.32 {$Bi},[$bnptr,:64]! @ put aside smashed b[8*i+$i]
|
||||
vmlal.u32 @ACC[6],$Bi,${A3}[0]
|
||||
vzip.16 $Ni,$zero
|
||||
vmlal.u32 @ACC[7],$Bi,${A3}[1]
|
||||
___
|
||||
}
|
||||
$code.=<<___;
|
||||
vld1.32 {$Bi},[sp,:64] @ pull smashed b[8*i+0]
|
||||
vmlal.u32 @ACC[0],$Ni,${N0}[0]
|
||||
vld1.32 {$A0-$A3},[$aptr]!
|
||||
vmlal.u32 @ACC[1],$Ni,${N0}[1]
|
||||
vmlal.u32 @ACC[2],$Ni,${N1}[0]
|
||||
vshr.u64 @ACC[0]#lo,@ACC[0]#lo,#16
|
||||
vmlal.u32 @ACC[3],$Ni,${N1}[1]
|
||||
vmlal.u32 @ACC[4],$Ni,${N2}[0]
|
||||
vadd.u64 @ACC[0]#lo,@ACC[0]#lo,@ACC[0]#hi
|
||||
vmlal.u32 @ACC[5],$Ni,${N2}[1]
|
||||
vshr.u64 @ACC[0]#lo,@ACC[0]#lo,#16
|
||||
vmlal.u32 @ACC[6],$Ni,${N3}[0]
|
||||
vmlal.u32 @ACC[7],$Ni,${N3}[1]
|
||||
vadd.u64 @ACC[1]#lo,@ACC[1]#lo,@ACC[0]#lo
|
||||
vst1.32 {$Ni},[$bnptr,:64] @ put aside smashed m[8*i+$i]
|
||||
add $bnptr,sp,#8 @ rewind
|
||||
___
|
||||
push(@ACC,shift(@ACC));
|
||||
$code.=<<___;
|
||||
sub $inner,$num,#8
|
||||
b .LNEON_8n_inner
|
||||
|
||||
.align 4
|
||||
.LNEON_8n_inner:
|
||||
subs $inner,$inner,#8
|
||||
vmlal.u32 @ACC[0],$Bi,${A0}[0]
|
||||
vld1.64 {@ACC[7]},[$tinptr,:128]
|
||||
vmlal.u32 @ACC[1],$Bi,${A0}[1]
|
||||
vld1.32 {$Ni},[$bnptr,:64]! @ pull smashed m[8*i+0]
|
||||
vmlal.u32 @ACC[2],$Bi,${A1}[0]
|
||||
vld1.32 {$N0-$N3},[$nptr]!
|
||||
vmlal.u32 @ACC[3],$Bi,${A1}[1]
|
||||
it ne
|
||||
addne $tinptr,$tinptr,#16 @ don't advance in last iteration
|
||||
vmlal.u32 @ACC[4],$Bi,${A2}[0]
|
||||
vmlal.u32 @ACC[5],$Bi,${A2}[1]
|
||||
vmlal.u32 @ACC[6],$Bi,${A3}[0]
|
||||
vmlal.u32 @ACC[7],$Bi,${A3}[1]
|
||||
___
|
||||
for ($i=1; $i<8; $i++) {
|
||||
$code.=<<___;
|
||||
vld1.32 {$Bi},[$bnptr,:64]! @ pull smashed b[8*i+$i]
|
||||
vmlal.u32 @ACC[0],$Ni,${N0}[0]
|
||||
vmlal.u32 @ACC[1],$Ni,${N0}[1]
|
||||
vmlal.u32 @ACC[2],$Ni,${N1}[0]
|
||||
vmlal.u32 @ACC[3],$Ni,${N1}[1]
|
||||
vmlal.u32 @ACC[4],$Ni,${N2}[0]
|
||||
vmlal.u32 @ACC[5],$Ni,${N2}[1]
|
||||
vmlal.u32 @ACC[6],$Ni,${N3}[0]
|
||||
vmlal.u32 @ACC[7],$Ni,${N3}[1]
|
||||
vst1.64 {@ACC[0]},[$toutptr,:128]!
|
||||
___
|
||||
push(@ACC,shift(@ACC));
|
||||
$code.=<<___;
|
||||
vmlal.u32 @ACC[0],$Bi,${A0}[0]
|
||||
vld1.64 {@ACC[7]},[$tinptr,:128]
|
||||
vmlal.u32 @ACC[1],$Bi,${A0}[1]
|
||||
vld1.32 {$Ni},[$bnptr,:64]! @ pull smashed m[8*i+$i]
|
||||
vmlal.u32 @ACC[2],$Bi,${A1}[0]
|
||||
it ne
|
||||
addne $tinptr,$tinptr,#16 @ don't advance in last iteration
|
||||
vmlal.u32 @ACC[3],$Bi,${A1}[1]
|
||||
vmlal.u32 @ACC[4],$Bi,${A2}[0]
|
||||
vmlal.u32 @ACC[5],$Bi,${A2}[1]
|
||||
vmlal.u32 @ACC[6],$Bi,${A3}[0]
|
||||
vmlal.u32 @ACC[7],$Bi,${A3}[1]
|
||||
___
|
||||
}
|
||||
$code.=<<___;
|
||||
it eq
|
||||
subeq $aptr,$aptr,$num,lsl#2 @ rewind
|
||||
vmlal.u32 @ACC[0],$Ni,${N0}[0]
|
||||
vld1.32 {$Bi},[sp,:64] @ pull smashed b[8*i+0]
|
||||
vmlal.u32 @ACC[1],$Ni,${N0}[1]
|
||||
vld1.32 {$A0-$A3},[$aptr]!
|
||||
vmlal.u32 @ACC[2],$Ni,${N1}[0]
|
||||
add $bnptr,sp,#8 @ rewind
|
||||
vmlal.u32 @ACC[3],$Ni,${N1}[1]
|
||||
vmlal.u32 @ACC[4],$Ni,${N2}[0]
|
||||
vmlal.u32 @ACC[5],$Ni,${N2}[1]
|
||||
vmlal.u32 @ACC[6],$Ni,${N3}[0]
|
||||
vst1.64 {@ACC[0]},[$toutptr,:128]!
|
||||
vmlal.u32 @ACC[7],$Ni,${N3}[1]
|
||||
|
||||
bne .LNEON_8n_inner
|
||||
___
|
||||
push(@ACC,shift(@ACC));
|
||||
$code.=<<___;
|
||||
add $tinptr,sp,#128
|
||||
vst1.64 {@ACC[0]-@ACC[1]},[$toutptr,:256]!
|
||||
veor q2,q2,q2 @ $N0-$N1
|
||||
vst1.64 {@ACC[2]-@ACC[3]},[$toutptr,:256]!
|
||||
veor q3,q3,q3 @ $N2-$N3
|
||||
vst1.64 {@ACC[4]-@ACC[5]},[$toutptr,:256]!
|
||||
vst1.64 {@ACC[6]},[$toutptr,:128]
|
||||
|
||||
subs $outer,$outer,#8
|
||||
vld1.64 {@ACC[0]-@ACC[1]},[$tinptr,:256]!
|
||||
vld1.64 {@ACC[2]-@ACC[3]},[$tinptr,:256]!
|
||||
vld1.64 {@ACC[4]-@ACC[5]},[$tinptr,:256]!
|
||||
vld1.64 {@ACC[6]-@ACC[7]},[$tinptr,:256]!
|
||||
|
||||
itt ne
|
||||
subne $nptr,$nptr,$num,lsl#2 @ rewind
|
||||
bne .LNEON_8n_outer
|
||||
|
||||
add $toutptr,sp,#128
|
||||
vst1.64 {q2-q3}, [sp,:256]! @ start wiping stack frame
|
||||
vshr.u64 $temp,@ACC[0]#lo,#16
|
||||
vst1.64 {q2-q3},[sp,:256]!
|
||||
vadd.u64 @ACC[0]#hi,@ACC[0]#hi,$temp
|
||||
vst1.64 {q2-q3}, [sp,:256]!
|
||||
vshr.u64 $temp,@ACC[0]#hi,#16
|
||||
vst1.64 {q2-q3}, [sp,:256]!
|
||||
vzip.16 @ACC[0]#lo,@ACC[0]#hi
|
||||
|
||||
mov $inner,$num
|
||||
b .LNEON_tail_entry
|
||||
|
||||
.align 4
|
||||
.LNEON_tail:
|
||||
vadd.u64 @ACC[0]#lo,@ACC[0]#lo,$temp
|
||||
vshr.u64 $temp,@ACC[0]#lo,#16
|
||||
vld1.64 {@ACC[2]-@ACC[3]}, [$tinptr, :256]!
|
||||
vadd.u64 @ACC[0]#hi,@ACC[0]#hi,$temp
|
||||
vld1.64 {@ACC[4]-@ACC[5]}, [$tinptr, :256]!
|
||||
vshr.u64 $temp,@ACC[0]#hi,#16
|
||||
vld1.64 {@ACC[6]-@ACC[7]}, [$tinptr, :256]!
|
||||
vzip.16 @ACC[0]#lo,@ACC[0]#hi
|
||||
|
||||
.LNEON_tail_entry:
|
||||
___
|
||||
for ($i=1; $i<8; $i++) {
|
||||
$code.=<<___;
|
||||
vadd.u64 @ACC[1]#lo,@ACC[1]#lo,$temp
|
||||
vst1.32 {@ACC[0]#lo[0]}, [$toutptr, :32]!
|
||||
vshr.u64 $temp,@ACC[1]#lo,#16
|
||||
vadd.u64 @ACC[1]#hi,@ACC[1]#hi,$temp
|
||||
vshr.u64 $temp,@ACC[1]#hi,#16
|
||||
vzip.16 @ACC[1]#lo,@ACC[1]#hi
|
||||
___
|
||||
push(@ACC,shift(@ACC));
|
||||
}
|
||||
push(@ACC,shift(@ACC));
|
||||
$code.=<<___;
|
||||
vld1.64 {@ACC[0]-@ACC[1]}, [$tinptr, :256]!
|
||||
subs $inner,$inner,#8
|
||||
vst1.32 {@ACC[7]#lo[0]}, [$toutptr, :32]!
|
||||
bne .LNEON_tail
|
||||
|
||||
vst1.32 {${temp}[0]}, [$toutptr, :32] @ top-most bit
|
||||
sub $nptr,$nptr,$num,lsl#2 @ rewind $nptr
|
||||
subs $aptr,sp,#0 @ clear carry flag
|
||||
add $bptr,sp,$num,lsl#2
|
||||
|
||||
.LNEON_sub:
|
||||
ldmia $aptr!, {r4-r7}
|
||||
ldmia $nptr!, {r8-r11}
|
||||
sbcs r8, r4,r8
|
||||
sbcs r9, r5,r9
|
||||
sbcs r10,r6,r10
|
||||
sbcs r11,r7,r11
|
||||
teq $aptr,$bptr @ preserves carry
|
||||
stmia $rptr!, {r8-r11}
|
||||
bne .LNEON_sub
|
||||
|
||||
ldr r10, [$aptr] @ load top-most bit
|
||||
mov r11,sp
|
||||
veor q0,q0,q0
|
||||
sub r11,$bptr,r11 @ this is num*4
|
||||
veor q1,q1,q1
|
||||
mov $aptr,sp
|
||||
sub $rptr,$rptr,r11 @ rewind $rptr
|
||||
mov $nptr,$bptr @ second 3/4th of frame
|
||||
sbcs r10,r10,#0 @ result is carry flag
|
||||
|
||||
.LNEON_copy_n_zap:
|
||||
ldmia $aptr!, {r4-r7}
|
||||
ldmia $rptr, {r8-r11}
|
||||
it cc
|
||||
movcc r8, r4
|
||||
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
|
||||
itt cc
|
||||
movcc r9, r5
|
||||
movcc r10,r6
|
||||
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
|
||||
it cc
|
||||
movcc r11,r7
|
||||
ldmia $aptr, {r4-r7}
|
||||
stmia $rptr!, {r8-r11}
|
||||
sub $aptr,$aptr,#16
|
||||
ldmia $rptr, {r8-r11}
|
||||
it cc
|
||||
movcc r8, r4
|
||||
vst1.64 {q0-q1}, [$aptr,:256]! @ wipe
|
||||
itt cc
|
||||
movcc r9, r5
|
||||
movcc r10,r6
|
||||
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
|
||||
it cc
|
||||
movcc r11,r7
|
||||
teq $aptr,$bptr @ preserves carry
|
||||
stmia $rptr!, {r8-r11}
|
||||
bne .LNEON_copy_n_zap
|
||||
|
||||
mov sp,ip
|
||||
vldmia sp!,{d8-d15}
|
||||
ldmia sp!,{r4-r11}
|
||||
ret @ bx lr
|
||||
.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
|
||||
#endif
|
||||
___
|
||||
}
|
||||
$code.=<<___;
|
||||
.asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.align 2
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.comm OPENSSL_armcap_P,4,4
|
||||
#endif
|
||||
___
|
||||
|
||||
foreach (split("\n",$code)) {
|
||||
s/\`([^\`]*)\`/eval $1/ge;
|
||||
|
||||
s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/ge or
|
||||
s/\bret\b/bx lr/g or
|
||||
s/\bbx\s+lr\b/.word\t0xe12fff1e/g; # make it possible to compile with -march=armv4
|
||||
|
||||
print $_,"\n";
|
||||
}
|
||||
|
||||
close STDOUT;
|
1514
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/armv8-mont.pl
vendored
Executable file
1514
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/armv8-mont.pl
vendored
Executable file
File diff suppressed because it is too large
Load diff
785
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/bn-586.pl
vendored
Normal file
785
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/bn-586.pl
vendored
Normal file
|
@ -0,0 +1,785 @@
|
|||
#! /usr/bin/env perl
|
||||
# Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
push(@INC,"${dir}","${dir}../../perlasm");
|
||||
require "x86asm.pl";
|
||||
|
||||
$output = pop;
|
||||
open STDOUT,">$output";
|
||||
|
||||
&asm_init($ARGV[0]);
|
||||
|
||||
$sse2=0;
|
||||
for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
||||
|
||||
&external_label("OPENSSL_ia32cap_P") if ($sse2);
|
||||
|
||||
&bn_mul_add_words("bn_mul_add_words");
|
||||
&bn_mul_words("bn_mul_words");
|
||||
&bn_sqr_words("bn_sqr_words");
|
||||
&bn_div_words("bn_div_words");
|
||||
&bn_add_words("bn_add_words");
|
||||
&bn_sub_words("bn_sub_words");
|
||||
&bn_sub_part_words("bn_sub_part_words");
|
||||
|
||||
&asm_finish();
|
||||
|
||||
close STDOUT;
|
||||
|
||||
sub bn_mul_add_words
|
||||
{
|
||||
local($name)=@_;
|
||||
|
||||
&function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
|
||||
|
||||
$r="eax";
|
||||
$a="edx";
|
||||
$c="ecx";
|
||||
|
||||
if ($sse2) {
|
||||
&picmeup("eax","OPENSSL_ia32cap_P");
|
||||
&bt(&DWP(0,"eax"),26);
|
||||
&jnc(&label("maw_non_sse2"));
|
||||
|
||||
&mov($r,&wparam(0));
|
||||
&mov($a,&wparam(1));
|
||||
&mov($c,&wparam(2));
|
||||
&movd("mm0",&wparam(3)); # mm0 = w
|
||||
&pxor("mm1","mm1"); # mm1 = carry_in
|
||||
&jmp(&label("maw_sse2_entry"));
|
||||
|
||||
&set_label("maw_sse2_unrolled",16);
|
||||
&movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0]
|
||||
&paddq("mm1","mm3"); # mm1 = carry_in + r[0]
|
||||
&movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0]
|
||||
&pmuludq("mm2","mm0"); # mm2 = w*a[0]
|
||||
&movd("mm4",&DWP(4,$a,"",0)); # mm4 = a[1]
|
||||
&pmuludq("mm4","mm0"); # mm4 = w*a[1]
|
||||
&movd("mm6",&DWP(8,$a,"",0)); # mm6 = a[2]
|
||||
&pmuludq("mm6","mm0"); # mm6 = w*a[2]
|
||||
&movd("mm7",&DWP(12,$a,"",0)); # mm7 = a[3]
|
||||
&pmuludq("mm7","mm0"); # mm7 = w*a[3]
|
||||
&paddq("mm1","mm2"); # mm1 = carry_in + r[0] + w*a[0]
|
||||
&movd("mm3",&DWP(4,$r,"",0)); # mm3 = r[1]
|
||||
&paddq("mm3","mm4"); # mm3 = r[1] + w*a[1]
|
||||
&movd("mm5",&DWP(8,$r,"",0)); # mm5 = r[2]
|
||||
&paddq("mm5","mm6"); # mm5 = r[2] + w*a[2]
|
||||
&movd("mm4",&DWP(12,$r,"",0)); # mm4 = r[3]
|
||||
&paddq("mm7","mm4"); # mm7 = r[3] + w*a[3]
|
||||
&movd(&DWP(0,$r,"",0),"mm1");
|
||||
&movd("mm2",&DWP(16,$a,"",0)); # mm2 = a[4]
|
||||
&pmuludq("mm2","mm0"); # mm2 = w*a[4]
|
||||
&psrlq("mm1",32); # mm1 = carry0
|
||||
&movd("mm4",&DWP(20,$a,"",0)); # mm4 = a[5]
|
||||
&pmuludq("mm4","mm0"); # mm4 = w*a[5]
|
||||
&paddq("mm1","mm3"); # mm1 = carry0 + r[1] + w*a[1]
|
||||
&movd("mm6",&DWP(24,$a,"",0)); # mm6 = a[6]
|
||||
&pmuludq("mm6","mm0"); # mm6 = w*a[6]
|
||||
&movd(&DWP(4,$r,"",0),"mm1");
|
||||
&psrlq("mm1",32); # mm1 = carry1
|
||||
&movd("mm3",&DWP(28,$a,"",0)); # mm3 = a[7]
|
||||
&add($a,32);
|
||||
&pmuludq("mm3","mm0"); # mm3 = w*a[7]
|
||||
&paddq("mm1","mm5"); # mm1 = carry1 + r[2] + w*a[2]
|
||||
&movd("mm5",&DWP(16,$r,"",0)); # mm5 = r[4]
|
||||
&paddq("mm2","mm5"); # mm2 = r[4] + w*a[4]
|
||||
&movd(&DWP(8,$r,"",0),"mm1");
|
||||
&psrlq("mm1",32); # mm1 = carry2
|
||||
&paddq("mm1","mm7"); # mm1 = carry2 + r[3] + w*a[3]
|
||||
&movd("mm5",&DWP(20,$r,"",0)); # mm5 = r[5]
|
||||
&paddq("mm4","mm5"); # mm4 = r[5] + w*a[5]
|
||||
&movd(&DWP(12,$r,"",0),"mm1");
|
||||
&psrlq("mm1",32); # mm1 = carry3
|
||||
&paddq("mm1","mm2"); # mm1 = carry3 + r[4] + w*a[4]
|
||||
&movd("mm5",&DWP(24,$r,"",0)); # mm5 = r[6]
|
||||
&paddq("mm6","mm5"); # mm6 = r[6] + w*a[6]
|
||||
&movd(&DWP(16,$r,"",0),"mm1");
|
||||
&psrlq("mm1",32); # mm1 = carry4
|
||||
&paddq("mm1","mm4"); # mm1 = carry4 + r[5] + w*a[5]
|
||||
&movd("mm5",&DWP(28,$r,"",0)); # mm5 = r[7]
|
||||
&paddq("mm3","mm5"); # mm3 = r[7] + w*a[7]
|
||||
&movd(&DWP(20,$r,"",0),"mm1");
|
||||
&psrlq("mm1",32); # mm1 = carry5
|
||||
&paddq("mm1","mm6"); # mm1 = carry5 + r[6] + w*a[6]
|
||||
&movd(&DWP(24,$r,"",0),"mm1");
|
||||
&psrlq("mm1",32); # mm1 = carry6
|
||||
&paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7]
|
||||
&movd(&DWP(28,$r,"",0),"mm1");
|
||||
&lea($r,&DWP(32,$r));
|
||||
&psrlq("mm1",32); # mm1 = carry_out
|
||||
|
||||
&sub($c,8);
|
||||
&jz(&label("maw_sse2_exit"));
|
||||
&set_label("maw_sse2_entry");
|
||||
&test($c,0xfffffff8);
|
||||
&jnz(&label("maw_sse2_unrolled"));
|
||||
|
||||
&set_label("maw_sse2_loop",4);
|
||||
&movd("mm2",&DWP(0,$a)); # mm2 = a[i]
|
||||
&movd("mm3",&DWP(0,$r)); # mm3 = r[i]
|
||||
&pmuludq("mm2","mm0"); # a[i] *= w
|
||||
&lea($a,&DWP(4,$a));
|
||||
&paddq("mm1","mm3"); # carry += r[i]
|
||||
&paddq("mm1","mm2"); # carry += a[i]*w
|
||||
&movd(&DWP(0,$r),"mm1"); # r[i] = carry_low
|
||||
&sub($c,1);
|
||||
&psrlq("mm1",32); # carry = carry_high
|
||||
&lea($r,&DWP(4,$r));
|
||||
&jnz(&label("maw_sse2_loop"));
|
||||
&set_label("maw_sse2_exit");
|
||||
&movd("eax","mm1"); # c = carry_out
|
||||
&emms();
|
||||
&ret();
|
||||
|
||||
&set_label("maw_non_sse2",16);
|
||||
}
|
||||
|
||||
# function_begin prologue
|
||||
&push("ebp");
|
||||
&push("ebx");
|
||||
&push("esi");
|
||||
&push("edi");
|
||||
|
||||
&comment("");
|
||||
$Low="eax";
|
||||
$High="edx";
|
||||
$a="ebx";
|
||||
$w="ebp";
|
||||
$r="edi";
|
||||
$c="esi";
|
||||
|
||||
&xor($c,$c); # clear carry
|
||||
&mov($r,&wparam(0)); #
|
||||
|
||||
&mov("ecx",&wparam(2)); #
|
||||
&mov($a,&wparam(1)); #
|
||||
|
||||
&and("ecx",0xfffffff8); # num / 8
|
||||
&mov($w,&wparam(3)); #
|
||||
|
||||
&push("ecx"); # Up the stack for a tmp variable
|
||||
|
||||
&jz(&label("maw_finish"));
|
||||
|
||||
&set_label("maw_loop",16);
|
||||
|
||||
for ($i=0; $i<32; $i+=4)
|
||||
{
|
||||
&comment("Round $i");
|
||||
|
||||
&mov("eax",&DWP($i,$a)); # *a
|
||||
&mul($w); # *a * w
|
||||
&add("eax",$c); # L(t)+= c
|
||||
&adc("edx",0); # H(t)+=carry
|
||||
&add("eax",&DWP($i,$r)); # L(t)+= *r
|
||||
&adc("edx",0); # H(t)+=carry
|
||||
&mov(&DWP($i,$r),"eax"); # *r= L(t);
|
||||
&mov($c,"edx"); # c= H(t);
|
||||
}
|
||||
|
||||
&comment("");
|
||||
&sub("ecx",8);
|
||||
&lea($a,&DWP(32,$a));
|
||||
&lea($r,&DWP(32,$r));
|
||||
&jnz(&label("maw_loop"));
|
||||
|
||||
&set_label("maw_finish",0);
|
||||
&mov("ecx",&wparam(2)); # get num
|
||||
&and("ecx",7);
|
||||
&jnz(&label("maw_finish2")); # helps branch prediction
|
||||
&jmp(&label("maw_end"));
|
||||
|
||||
&set_label("maw_finish2",1);
|
||||
for ($i=0; $i<7; $i++)
|
||||
{
|
||||
&comment("Tail Round $i");
|
||||
&mov("eax",&DWP($i*4,$a)); # *a
|
||||
&mul($w); # *a * w
|
||||
&add("eax",$c); # L(t)+=c
|
||||
&adc("edx",0); # H(t)+=carry
|
||||
&add("eax",&DWP($i*4,$r)); # L(t)+= *r
|
||||
&adc("edx",0); # H(t)+=carry
|
||||
&dec("ecx") if ($i != 7-1);
|
||||
&mov(&DWP($i*4,$r),"eax"); # *r= L(t);
|
||||
&mov($c,"edx"); # c= H(t);
|
||||
&jz(&label("maw_end")) if ($i != 7-1);
|
||||
}
|
||||
&set_label("maw_end",0);
|
||||
&mov("eax",$c);
|
||||
|
||||
&pop("ecx"); # clear variable from
|
||||
|
||||
&function_end($name);
|
||||
}
|
||||
|
||||
sub bn_mul_words
|
||||
{
|
||||
local($name)=@_;
|
||||
|
||||
&function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
|
||||
|
||||
$r="eax";
|
||||
$a="edx";
|
||||
$c="ecx";
|
||||
|
||||
if ($sse2) {
|
||||
&picmeup("eax","OPENSSL_ia32cap_P");
|
||||
&bt(&DWP(0,"eax"),26);
|
||||
&jnc(&label("mw_non_sse2"));
|
||||
|
||||
&mov($r,&wparam(0));
|
||||
&mov($a,&wparam(1));
|
||||
&mov($c,&wparam(2));
|
||||
&movd("mm0",&wparam(3)); # mm0 = w
|
||||
&pxor("mm1","mm1"); # mm1 = carry = 0
|
||||
|
||||
&set_label("mw_sse2_loop",16);
|
||||
&movd("mm2",&DWP(0,$a)); # mm2 = a[i]
|
||||
&pmuludq("mm2","mm0"); # a[i] *= w
|
||||
&lea($a,&DWP(4,$a));
|
||||
&paddq("mm1","mm2"); # carry += a[i]*w
|
||||
&movd(&DWP(0,$r),"mm1"); # r[i] = carry_low
|
||||
&sub($c,1);
|
||||
&psrlq("mm1",32); # carry = carry_high
|
||||
&lea($r,&DWP(4,$r));
|
||||
&jnz(&label("mw_sse2_loop"));
|
||||
|
||||
&movd("eax","mm1"); # return carry
|
||||
&emms();
|
||||
&ret();
|
||||
&set_label("mw_non_sse2",16);
|
||||
}
|
||||
|
||||
# function_begin prologue
|
||||
&push("ebp");
|
||||
&push("ebx");
|
||||
&push("esi");
|
||||
&push("edi");
|
||||
|
||||
&comment("");
|
||||
$Low="eax";
|
||||
$High="edx";
|
||||
$a="ebx";
|
||||
$w="ecx";
|
||||
$r="edi";
|
||||
$c="esi";
|
||||
$num="ebp";
|
||||
|
||||
&xor($c,$c); # clear carry
|
||||
&mov($r,&wparam(0)); #
|
||||
&mov($a,&wparam(1)); #
|
||||
&mov($num,&wparam(2)); #
|
||||
&mov($w,&wparam(3)); #
|
||||
|
||||
&and($num,0xfffffff8); # num / 8
|
||||
&jz(&label("mw_finish"));
|
||||
|
||||
&set_label("mw_loop",0);
|
||||
for ($i=0; $i<32; $i+=4)
|
||||
{
|
||||
&comment("Round $i");
|
||||
|
||||
&mov("eax",&DWP($i,$a,"",0)); # *a
|
||||
&mul($w); # *a * w
|
||||
&add("eax",$c); # L(t)+=c
|
||||
# XXX
|
||||
|
||||
&adc("edx",0); # H(t)+=carry
|
||||
&mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
|
||||
|
||||
&mov($c,"edx"); # c= H(t);
|
||||
}
|
||||
|
||||
&comment("");
|
||||
&add($a,32);
|
||||
&add($r,32);
|
||||
&sub($num,8);
|
||||
&jz(&label("mw_finish"));
|
||||
&jmp(&label("mw_loop"));
|
||||
|
||||
&set_label("mw_finish",0);
|
||||
&mov($num,&wparam(2)); # get num
|
||||
&and($num,7);
|
||||
&jnz(&label("mw_finish2"));
|
||||
&jmp(&label("mw_end"));
|
||||
|
||||
&set_label("mw_finish2",1);
|
||||
for ($i=0; $i<7; $i++)
|
||||
{
|
||||
&comment("Tail Round $i");
|
||||
&mov("eax",&DWP($i*4,$a,"",0));# *a
|
||||
&mul($w); # *a * w
|
||||
&add("eax",$c); # L(t)+=c
|
||||
# XXX
|
||||
&adc("edx",0); # H(t)+=carry
|
||||
&mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
|
||||
&mov($c,"edx"); # c= H(t);
|
||||
&dec($num) if ($i != 7-1);
|
||||
&jz(&label("mw_end")) if ($i != 7-1);
|
||||
}
|
||||
&set_label("mw_end",0);
|
||||
&mov("eax",$c);
|
||||
|
||||
&function_end($name);
|
||||
}
|
||||
|
||||
sub bn_sqr_words
|
||||
{
|
||||
local($name)=@_;
|
||||
|
||||
&function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
|
||||
|
||||
$r="eax";
|
||||
$a="edx";
|
||||
$c="ecx";
|
||||
|
||||
if ($sse2) {
|
||||
&picmeup("eax","OPENSSL_ia32cap_P");
|
||||
&bt(&DWP(0,"eax"),26);
|
||||
&jnc(&label("sqr_non_sse2"));
|
||||
|
||||
&mov($r,&wparam(0));
|
||||
&mov($a,&wparam(1));
|
||||
&mov($c,&wparam(2));
|
||||
|
||||
&set_label("sqr_sse2_loop",16);
|
||||
&movd("mm0",&DWP(0,$a)); # mm0 = a[i]
|
||||
&pmuludq("mm0","mm0"); # a[i] *= a[i]
|
||||
&lea($a,&DWP(4,$a)); # a++
|
||||
&movq(&QWP(0,$r),"mm0"); # r[i] = a[i]*a[i]
|
||||
&sub($c,1);
|
||||
&lea($r,&DWP(8,$r)); # r += 2
|
||||
&jnz(&label("sqr_sse2_loop"));
|
||||
|
||||
&emms();
|
||||
&ret();
|
||||
&set_label("sqr_non_sse2",16);
|
||||
}
|
||||
|
||||
# function_begin prologue
|
||||
&push("ebp");
|
||||
&push("ebx");
|
||||
&push("esi");
|
||||
&push("edi");
|
||||
|
||||
&comment("");
|
||||
$r="esi";
|
||||
$a="edi";
|
||||
$num="ebx";
|
||||
|
||||
&mov($r,&wparam(0)); #
|
||||
&mov($a,&wparam(1)); #
|
||||
&mov($num,&wparam(2)); #
|
||||
|
||||
&and($num,0xfffffff8); # num / 8
|
||||
&jz(&label("sw_finish"));
|
||||
|
||||
&set_label("sw_loop",0);
|
||||
for ($i=0; $i<32; $i+=4)
|
||||
{
|
||||
&comment("Round $i");
|
||||
&mov("eax",&DWP($i,$a,"",0)); # *a
|
||||
# XXX
|
||||
&mul("eax"); # *a * *a
|
||||
&mov(&DWP($i*2,$r,"",0),"eax"); #
|
||||
&mov(&DWP($i*2+4,$r,"",0),"edx");#
|
||||
}
|
||||
|
||||
&comment("");
|
||||
&add($a,32);
|
||||
&add($r,64);
|
||||
&sub($num,8);
|
||||
&jnz(&label("sw_loop"));
|
||||
|
||||
&set_label("sw_finish",0);
|
||||
&mov($num,&wparam(2)); # get num
|
||||
&and($num,7);
|
||||
&jz(&label("sw_end"));
|
||||
|
||||
for ($i=0; $i<7; $i++)
|
||||
{
|
||||
&comment("Tail Round $i");
|
||||
&mov("eax",&DWP($i*4,$a,"",0)); # *a
|
||||
# XXX
|
||||
&mul("eax"); # *a * *a
|
||||
&mov(&DWP($i*8,$r,"",0),"eax"); #
|
||||
&dec($num) if ($i != 7-1);
|
||||
&mov(&DWP($i*8+4,$r,"",0),"edx");
|
||||
&jz(&label("sw_end")) if ($i != 7-1);
|
||||
}
|
||||
&set_label("sw_end",0);
|
||||
|
||||
&function_end($name);
|
||||
}
|
||||
|
||||
sub bn_div_words
|
||||
{
|
||||
local($name)=@_;
|
||||
|
||||
&function_begin_B($name,"");
|
||||
&mov("edx",&wparam(0)); #
|
||||
&mov("eax",&wparam(1)); #
|
||||
&mov("ecx",&wparam(2)); #
|
||||
&div("ecx");
|
||||
&ret();
|
||||
&function_end_B($name);
|
||||
}
|
||||
|
||||
sub bn_add_words
|
||||
{
|
||||
local($name)=@_;
|
||||
|
||||
&function_begin($name,"");
|
||||
|
||||
&comment("");
|
||||
$a="esi";
|
||||
$b="edi";
|
||||
$c="eax";
|
||||
$r="ebx";
|
||||
$tmp1="ecx";
|
||||
$tmp2="edx";
|
||||
$num="ebp";
|
||||
|
||||
&mov($r,&wparam(0)); # get r
|
||||
&mov($a,&wparam(1)); # get a
|
||||
&mov($b,&wparam(2)); # get b
|
||||
&mov($num,&wparam(3)); # get num
|
||||
&xor($c,$c); # clear carry
|
||||
&and($num,0xfffffff8); # num / 8
|
||||
|
||||
&jz(&label("aw_finish"));
|
||||
|
||||
&set_label("aw_loop",0);
|
||||
for ($i=0; $i<8; $i++)
|
||||
{
|
||||
&comment("Round $i");
|
||||
|
||||
&mov($tmp1,&DWP($i*4,$a,"",0)); # *a
|
||||
&mov($tmp2,&DWP($i*4,$b,"",0)); # *b
|
||||
&add($tmp1,$c);
|
||||
&mov($c,0);
|
||||
&adc($c,$c);
|
||||
&add($tmp1,$tmp2);
|
||||
&adc($c,0);
|
||||
&mov(&DWP($i*4,$r,"",0),$tmp1); # *r
|
||||
}
|
||||
|
||||
&comment("");
|
||||
&add($a,32);
|
||||
&add($b,32);
|
||||
&add($r,32);
|
||||
&sub($num,8);
|
||||
&jnz(&label("aw_loop"));
|
||||
|
||||
&set_label("aw_finish",0);
|
||||
&mov($num,&wparam(3)); # get num
|
||||
&and($num,7);
|
||||
&jz(&label("aw_end"));
|
||||
|
||||
for ($i=0; $i<7; $i++)
|
||||
{
|
||||
&comment("Tail Round $i");
|
||||
&mov($tmp1,&DWP($i*4,$a,"",0)); # *a
|
||||
&mov($tmp2,&DWP($i*4,$b,"",0));# *b
|
||||
&add($tmp1,$c);
|
||||
&mov($c,0);
|
||||
&adc($c,$c);
|
||||
&add($tmp1,$tmp2);
|
||||
&adc($c,0);
|
||||
&dec($num) if ($i != 6);
|
||||
&mov(&DWP($i*4,$r,"",0),$tmp1); # *r
|
||||
&jz(&label("aw_end")) if ($i != 6);
|
||||
}
|
||||
&set_label("aw_end",0);
|
||||
|
||||
# &mov("eax",$c); # $c is "eax"
|
||||
|
||||
&function_end($name);
|
||||
}
|
||||
|
||||
sub bn_sub_words
|
||||
{
|
||||
local($name)=@_;
|
||||
|
||||
&function_begin($name,"");
|
||||
|
||||
&comment("");
|
||||
$a="esi";
|
||||
$b="edi";
|
||||
$c="eax";
|
||||
$r="ebx";
|
||||
$tmp1="ecx";
|
||||
$tmp2="edx";
|
||||
$num="ebp";
|
||||
|
||||
&mov($r,&wparam(0)); # get r
|
||||
&mov($a,&wparam(1)); # get a
|
||||
&mov($b,&wparam(2)); # get b
|
||||
&mov($num,&wparam(3)); # get num
|
||||
&xor($c,$c); # clear carry
|
||||
&and($num,0xfffffff8); # num / 8
|
||||
|
||||
&jz(&label("aw_finish"));
|
||||
|
||||
&set_label("aw_loop",0);
|
||||
for ($i=0; $i<8; $i++)
|
||||
{
|
||||
&comment("Round $i");
|
||||
|
||||
&mov($tmp1,&DWP($i*4,$a,"",0)); # *a
|
||||
&mov($tmp2,&DWP($i*4,$b,"",0)); # *b
|
||||
&sub($tmp1,$c);
|
||||
&mov($c,0);
|
||||
&adc($c,$c);
|
||||
&sub($tmp1,$tmp2);
|
||||
&adc($c,0);
|
||||
&mov(&DWP($i*4,$r,"",0),$tmp1); # *r
|
||||
}
|
||||
|
||||
&comment("");
|
||||
&add($a,32);
|
||||
&add($b,32);
|
||||
&add($r,32);
|
||||
&sub($num,8);
|
||||
&jnz(&label("aw_loop"));
|
||||
|
||||
&set_label("aw_finish",0);
|
||||
&mov($num,&wparam(3)); # get num
|
||||
&and($num,7);
|
||||
&jz(&label("aw_end"));
|
||||
|
||||
for ($i=0; $i<7; $i++)
|
||||
{
|
||||
&comment("Tail Round $i");
|
||||
&mov($tmp1,&DWP($i*4,$a,"",0)); # *a
|
||||
&mov($tmp2,&DWP($i*4,$b,"",0));# *b
|
||||
&sub($tmp1,$c);
|
||||
&mov($c,0);
|
||||
&adc($c,$c);
|
||||
&sub($tmp1,$tmp2);
|
||||
&adc($c,0);
|
||||
&dec($num) if ($i != 6);
|
||||
&mov(&DWP($i*4,$r,"",0),$tmp1); # *r
|
||||
&jz(&label("aw_end")) if ($i != 6);
|
||||
}
|
||||
&set_label("aw_end",0);
|
||||
|
||||
# &mov("eax",$c); # $c is "eax"
|
||||
|
||||
&function_end($name);
|
||||
}
|
||||
|
||||
sub bn_sub_part_words
|
||||
{
|
||||
local($name)=@_;
|
||||
|
||||
&function_begin($name,"");
|
||||
|
||||
&comment("");
|
||||
$a="esi";
|
||||
$b="edi";
|
||||
$c="eax";
|
||||
$r="ebx";
|
||||
$tmp1="ecx";
|
||||
$tmp2="edx";
|
||||
$num="ebp";
|
||||
|
||||
&mov($r,&wparam(0)); # get r
|
||||
&mov($a,&wparam(1)); # get a
|
||||
&mov($b,&wparam(2)); # get b
|
||||
&mov($num,&wparam(3)); # get num
|
||||
&xor($c,$c); # clear carry
|
||||
&and($num,0xfffffff8); # num / 8
|
||||
|
||||
&jz(&label("aw_finish"));
|
||||
|
||||
&set_label("aw_loop",0);
|
||||
for ($i=0; $i<8; $i++)
|
||||
{
|
||||
&comment("Round $i");
|
||||
|
||||
&mov($tmp1,&DWP($i*4,$a,"",0)); # *a
|
||||
&mov($tmp2,&DWP($i*4,$b,"",0)); # *b
|
||||
&sub($tmp1,$c);
|
||||
&mov($c,0);
|
||||
&adc($c,$c);
|
||||
&sub($tmp1,$tmp2);
|
||||
&adc($c,0);
|
||||
&mov(&DWP($i*4,$r,"",0),$tmp1); # *r
|
||||
}
|
||||
|
||||
&comment("");
|
||||
&add($a,32);
|
||||
&add($b,32);
|
||||
&add($r,32);
|
||||
&sub($num,8);
|
||||
&jnz(&label("aw_loop"));
|
||||
|
||||
&set_label("aw_finish",0);
|
||||
&mov($num,&wparam(3)); # get num
|
||||
&and($num,7);
|
||||
&jz(&label("aw_end"));
|
||||
|
||||
for ($i=0; $i<7; $i++)
|
||||
{
|
||||
&comment("Tail Round $i");
|
||||
&mov($tmp1,&DWP(0,$a,"",0)); # *a
|
||||
&mov($tmp2,&DWP(0,$b,"",0));# *b
|
||||
&sub($tmp1,$c);
|
||||
&mov($c,0);
|
||||
&adc($c,$c);
|
||||
&sub($tmp1,$tmp2);
|
||||
&adc($c,0);
|
||||
&mov(&DWP(0,$r,"",0),$tmp1); # *r
|
||||
&add($a, 4);
|
||||
&add($b, 4);
|
||||
&add($r, 4);
|
||||
&dec($num) if ($i != 6);
|
||||
&jz(&label("aw_end")) if ($i != 6);
|
||||
}
|
||||
&set_label("aw_end",0);
|
||||
|
||||
&cmp(&wparam(4),0);
|
||||
&je(&label("pw_end"));
|
||||
|
||||
&mov($num,&wparam(4)); # get dl
|
||||
&cmp($num,0);
|
||||
&je(&label("pw_end"));
|
||||
&jge(&label("pw_pos"));
|
||||
|
||||
&comment("pw_neg");
|
||||
&mov($tmp2,0);
|
||||
&sub($tmp2,$num);
|
||||
&mov($num,$tmp2);
|
||||
&and($num,0xfffffff8); # num / 8
|
||||
&jz(&label("pw_neg_finish"));
|
||||
|
||||
&set_label("pw_neg_loop",0);
|
||||
for ($i=0; $i<8; $i++)
|
||||
{
|
||||
&comment("dl<0 Round $i");
|
||||
|
||||
&mov($tmp1,0);
|
||||
&mov($tmp2,&DWP($i*4,$b,"",0)); # *b
|
||||
&sub($tmp1,$c);
|
||||
&mov($c,0);
|
||||
&adc($c,$c);
|
||||
&sub($tmp1,$tmp2);
|
||||
&adc($c,0);
|
||||
&mov(&DWP($i*4,$r,"",0),$tmp1); # *r
|
||||
}
|
||||
|
||||
&comment("");
|
||||
&add($b,32);
|
||||
&add($r,32);
|
||||
&sub($num,8);
|
||||
&jnz(&label("pw_neg_loop"));
|
||||
|
||||
&set_label("pw_neg_finish",0);
|
||||
&mov($tmp2,&wparam(4)); # get dl
|
||||
&mov($num,0);
|
||||
&sub($num,$tmp2);
|
||||
&and($num,7);
|
||||
&jz(&label("pw_end"));
|
||||
|
||||
for ($i=0; $i<7; $i++)
|
||||
{
|
||||
&comment("dl<0 Tail Round $i");
|
||||
&mov($tmp1,0);
|
||||
&mov($tmp2,&DWP($i*4,$b,"",0));# *b
|
||||
&sub($tmp1,$c);
|
||||
&mov($c,0);
|
||||
&adc($c,$c);
|
||||
&sub($tmp1,$tmp2);
|
||||
&adc($c,0);
|
||||
&dec($num) if ($i != 6);
|
||||
&mov(&DWP($i*4,$r,"",0),$tmp1); # *r
|
||||
&jz(&label("pw_end")) if ($i != 6);
|
||||
}
|
||||
|
||||
&jmp(&label("pw_end"));
|
||||
|
||||
&set_label("pw_pos",0);
|
||||
|
||||
&and($num,0xfffffff8); # num / 8
|
||||
&jz(&label("pw_pos_finish"));
|
||||
|
||||
&set_label("pw_pos_loop",0);
|
||||
|
||||
for ($i=0; $i<8; $i++)
|
||||
{
|
||||
&comment("dl>0 Round $i");
|
||||
|
||||
&mov($tmp1,&DWP($i*4,$a,"",0)); # *a
|
||||
&sub($tmp1,$c);
|
||||
&mov(&DWP($i*4,$r,"",0),$tmp1); # *r
|
||||
&jnc(&label("pw_nc".$i));
|
||||
}
|
||||
|
||||
&comment("");
|
||||
&add($a,32);
|
||||
&add($r,32);
|
||||
&sub($num,8);
|
||||
&jnz(&label("pw_pos_loop"));
|
||||
|
||||
&set_label("pw_pos_finish",0);
|
||||
&mov($num,&wparam(4)); # get dl
|
||||
&and($num,7);
|
||||
&jz(&label("pw_end"));
|
||||
|
||||
for ($i=0; $i<7; $i++)
|
||||
{
|
||||
&comment("dl>0 Tail Round $i");
|
||||
&mov($tmp1,&DWP($i*4,$a,"",0)); # *a
|
||||
&sub($tmp1,$c);
|
||||
&mov(&DWP($i*4,$r,"",0),$tmp1); # *r
|
||||
&jnc(&label("pw_tail_nc".$i));
|
||||
&dec($num) if ($i != 6);
|
||||
&jz(&label("pw_end")) if ($i != 6);
|
||||
}
|
||||
&mov($c,1);
|
||||
&jmp(&label("pw_end"));
|
||||
|
||||
&set_label("pw_nc_loop",0);
|
||||
for ($i=0; $i<8; $i++)
|
||||
{
|
||||
&mov($tmp1,&DWP($i*4,$a,"",0)); # *a
|
||||
&mov(&DWP($i*4,$r,"",0),$tmp1); # *r
|
||||
&set_label("pw_nc".$i,0);
|
||||
}
|
||||
|
||||
&comment("");
|
||||
&add($a,32);
|
||||
&add($r,32);
|
||||
&sub($num,8);
|
||||
&jnz(&label("pw_nc_loop"));
|
||||
|
||||
&mov($num,&wparam(4)); # get dl
|
||||
&and($num,7);
|
||||
&jz(&label("pw_nc_end"));
|
||||
|
||||
for ($i=0; $i<7; $i++)
|
||||
{
|
||||
&mov($tmp1,&DWP($i*4,$a,"",0)); # *a
|
||||
&mov(&DWP($i*4,$r,"",0),$tmp1); # *r
|
||||
&set_label("pw_tail_nc".$i,0);
|
||||
&dec($num) if ($i != 6);
|
||||
&jz(&label("pw_nc_end")) if ($i != 6);
|
||||
}
|
||||
|
||||
&set_label("pw_nc_end",0);
|
||||
&mov($c,0);
|
||||
|
||||
&set_label("pw_end",0);
|
||||
|
||||
# &mov("eax",$c); # $c is "eax"
|
||||
|
||||
&function_end($name);
|
||||
}
|
382
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/bn-c64xplus.asm
vendored
Normal file
382
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/bn-c64xplus.asm
vendored
Normal file
|
@ -0,0 +1,382 @@
|
|||
;; Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
;;
|
||||
;; Licensed under the OpenSSL license (the "License"). You may not use
|
||||
;; this file except in compliance with the License. You can obtain a copy
|
||||
;; in the file LICENSE in the source distribution or at
|
||||
;; https://www.openssl.org/source/license.html
|
||||
;;
|
||||
;;====================================================================
|
||||
;; Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
;; project.
|
||||
;;
|
||||
;; Rights for redistribution and usage in source and binary forms are
|
||||
;; granted according to the OpenSSL license. Warranty of any kind is
|
||||
;; disclaimed.
|
||||
;;====================================================================
|
||||
;; Compiler-generated multiply-n-add SPLOOP runs at 12*n cycles, n
|
||||
;; being the number of 32-bit words, addition - 8*n. Corresponding 4x
|
||||
;; unrolled SPLOOP-free loops - at ~8*n and ~5*n. Below assembler
|
||||
;; SPLOOPs spin at ... 2*n cycles [plus epilogue].
|
||||
;;====================================================================
|
||||
.text
|
||||
|
||||
.if .ASSEMBLER_VERSION<7000000
|
||||
.asg 0,__TI_EABI__
|
||||
.endif
|
||||
.if __TI_EABI__
|
||||
.asg bn_mul_add_words,_bn_mul_add_words
|
||||
.asg bn_mul_words,_bn_mul_words
|
||||
.asg bn_sqr_words,_bn_sqr_words
|
||||
.asg bn_add_words,_bn_add_words
|
||||
.asg bn_sub_words,_bn_sub_words
|
||||
.asg bn_div_words,_bn_div_words
|
||||
.asg bn_sqr_comba8,_bn_sqr_comba8
|
||||
.asg bn_mul_comba8,_bn_mul_comba8
|
||||
.asg bn_sqr_comba4,_bn_sqr_comba4
|
||||
.asg bn_mul_comba4,_bn_mul_comba4
|
||||
.endif
|
||||
|
||||
.asg B3,RA
|
||||
.asg A4,ARG0
|
||||
.asg B4,ARG1
|
||||
.asg A6,ARG2
|
||||
.asg B6,ARG3
|
||||
.asg A8,ARG4
|
||||
.asg B8,ARG5
|
||||
.asg A4,RET
|
||||
.asg A15,FP
|
||||
.asg B14,DP
|
||||
.asg B15,SP
|
||||
|
||||
.global _bn_mul_add_words
|
||||
_bn_mul_add_words:
|
||||
.asmfunc
|
||||
MV ARG2,B0
|
||||
[!B0] BNOP RA
|
||||
||[!B0] MVK 0,RET
|
||||
[B0] MVC B0,ILC
|
||||
[B0] ZERO A19 ; high part of accumulator
|
||||
|| [B0] MV ARG0,A2
|
||||
|| [B0] MV ARG3,A3
|
||||
NOP 3
|
||||
|
||||
SPLOOP 2 ; 2*n+10
|
||||
;;====================================================================
|
||||
LDW *ARG1++,B7 ; ap[i]
|
||||
NOP 3
|
||||
LDW *ARG0++,A7 ; rp[i]
|
||||
MPY32U B7,A3,A17:A16
|
||||
NOP 3 ; [2,0] in epilogue
|
||||
ADDU A16,A7,A21:A20
|
||||
ADDU A19,A21:A20,A19:A18
|
||||
|| MV.S A17,A23
|
||||
SPKERNEL 2,1 ; leave slot for "return value"
|
||||
|| STW A18,*A2++ ; rp[i]
|
||||
|| ADD A19,A23,A19
|
||||
;;====================================================================
|
||||
BNOP RA,4
|
||||
MV A19,RET ; return value
|
||||
.endasmfunc
|
||||
|
||||
.global _bn_mul_words
|
||||
_bn_mul_words:
|
||||
.asmfunc
|
||||
MV ARG2,B0
|
||||
[!B0] BNOP RA
|
||||
||[!B0] MVK 0,RET
|
||||
[B0] MVC B0,ILC
|
||||
[B0] ZERO A19 ; high part of accumulator
|
||||
NOP 3
|
||||
|
||||
SPLOOP 2 ; 2*n+10
|
||||
;;====================================================================
|
||||
LDW *ARG1++,A7 ; ap[i]
|
||||
NOP 4
|
||||
MPY32U A7,ARG3,A17:A16
|
||||
NOP 4 ; [2,0] in epiloque
|
||||
ADDU A19,A16,A19:A18
|
||||
|| MV.S A17,A21
|
||||
SPKERNEL 2,1 ; leave slot for "return value"
|
||||
|| STW A18,*ARG0++ ; rp[i]
|
||||
|| ADD.L A19,A21,A19
|
||||
;;====================================================================
|
||||
BNOP RA,4
|
||||
MV A19,RET ; return value
|
||||
.endasmfunc
|
||||
|
||||
.global _bn_sqr_words
|
||||
_bn_sqr_words:
|
||||
.asmfunc
|
||||
MV ARG2,B0
|
||||
[!B0] BNOP RA
|
||||
||[!B0] MVK 0,RET
|
||||
[B0] MVC B0,ILC
|
||||
[B0] MV ARG0,B2
|
||||
|| [B0] ADD 4,ARG0,ARG0
|
||||
NOP 3
|
||||
|
||||
SPLOOP 2 ; 2*n+10
|
||||
;;====================================================================
|
||||
LDW *ARG1++,B7 ; ap[i]
|
||||
NOP 4
|
||||
MPY32U B7,B7,B1:B0
|
||||
NOP 3 ; [2,0] in epilogue
|
||||
STW B0,*B2++(8) ; rp[2*i]
|
||||
MV B1,A1
|
||||
SPKERNEL 2,0 ; fully overlap BNOP RA,5
|
||||
|| STW A1,*ARG0++(8) ; rp[2*i+1]
|
||||
;;====================================================================
|
||||
BNOP RA,5
|
||||
.endasmfunc
|
||||
|
||||
.global _bn_add_words
|
||||
_bn_add_words:
|
||||
.asmfunc
|
||||
MV ARG3,B0
|
||||
[!B0] BNOP RA
|
||||
||[!B0] MVK 0,RET
|
||||
[B0] MVC B0,ILC
|
||||
[B0] ZERO A1 ; carry flag
|
||||
|| [B0] MV ARG0,A3
|
||||
NOP 3
|
||||
|
||||
SPLOOP 2 ; 2*n+6
|
||||
;;====================================================================
|
||||
LDW *ARG2++,A7 ; bp[i]
|
||||
|| LDW *ARG1++,B7 ; ap[i]
|
||||
NOP 4
|
||||
ADDU A7,B7,A9:A8
|
||||
ADDU A1,A9:A8,A1:A0
|
||||
SPKERNEL 0,0 ; fully overlap BNOP RA,5
|
||||
|| STW A0,*A3++ ; write result
|
||||
|| MV A1,RET ; keep carry flag in RET
|
||||
;;====================================================================
|
||||
BNOP RA,5
|
||||
.endasmfunc
|
||||
|
||||
.global _bn_sub_words
|
||||
_bn_sub_words:
|
||||
.asmfunc
|
||||
MV ARG3,B0
|
||||
[!B0] BNOP RA
|
||||
||[!B0] MVK 0,RET
|
||||
[B0] MVC B0,ILC
|
||||
[B0] ZERO A2 ; borrow flag
|
||||
|| [B0] MV ARG0,A3
|
||||
NOP 3
|
||||
|
||||
SPLOOP 2 ; 2*n+6
|
||||
;;====================================================================
|
||||
LDW *ARG2++,A7 ; bp[i]
|
||||
|| LDW *ARG1++,B7 ; ap[i]
|
||||
NOP 4
|
||||
SUBU B7,A7,A1:A0
|
||||
[A2] SUB A1:A0,1,A1:A0
|
||||
SPKERNEL 0,1 ; leave slot for "return borrow flag"
|
||||
|| STW A0,*A3++ ; write result
|
||||
|| AND 1,A1,A2 ; pass on borrow flag
|
||||
;;====================================================================
|
||||
BNOP RA,4
|
||||
AND 1,A1,RET ; return borrow flag
|
||||
.endasmfunc
|
||||
|
||||
.global _bn_div_words
|
||||
_bn_div_words:
|
||||
.asmfunc
|
||||
LMBD 1,A6,A0 ; leading zero bits in dv
|
||||
LMBD 1,A4,A1 ; leading zero bits in hi
|
||||
|| MVK 32,B0
|
||||
CMPLTU A1,A0,A2
|
||||
|| ADD A0,B0,B0
|
||||
[ A2] BNOP RA
|
||||
||[ A2] MVK -1,A4 ; return overflow
|
||||
||[!A2] MV A4,A3 ; reassign hi
|
||||
[!A2] MV B4,A4 ; reassign lo, will be quotient
|
||||
||[!A2] MVC B0,ILC
|
||||
[!A2] SHL A6,A0,A6 ; normalize dv
|
||||
|| MVK 1,A1
|
||||
|
||||
[!A2] CMPLTU A3,A6,A1 ; hi<dv?
|
||||
||[!A2] SHL A4,1,A5:A4 ; lo<<1
|
||||
[!A1] SUB A3,A6,A3 ; hi-=dv
|
||||
||[!A1] OR 1,A4,A4
|
||||
[!A2] SHRU A3,31,A1 ; upper bit
|
||||
||[!A2] ADDAH A5,A3,A3 ; hi<<1|lo>>31
|
||||
|
||||
SPLOOP 3
|
||||
[!A1] CMPLTU A3,A6,A1 ; hi<dv?
|
||||
||[ A1] ZERO A1
|
||||
|| SHL A4,1,A5:A4 ; lo<<1
|
||||
[!A1] SUB A3,A6,A3 ; hi-=dv
|
||||
||[!A1] OR 1,A4,A4 ; quotient
|
||||
SHRU A3,31,A1 ; upper bit
|
||||
|| ADDAH A5,A3,A3 ; hi<<1|lo>>31
|
||||
SPKERNEL
|
||||
|
||||
BNOP RA,5
|
||||
.endasmfunc
|
||||
|
||||
;;====================================================================
|
||||
;; Not really Comba algorithm, just straightforward NxM... Dedicated
|
||||
;; fully unrolled real Comba implementations are asymptotically 2x
|
||||
;; faster, but naturally larger undertaking. Purpose of this exercise
|
||||
;; was rather to learn to master nested SPLOOPs...
|
||||
;;====================================================================
|
||||
.global _bn_sqr_comba8
|
||||
.global _bn_mul_comba8
|
||||
_bn_sqr_comba8:
|
||||
MV ARG1,ARG2
|
||||
_bn_mul_comba8:
|
||||
.asmfunc
|
||||
MVK 8,B0 ; N, RILC
|
||||
|| MVK 8,A0 ; M, outer loop counter
|
||||
|| MV ARG1,A5 ; copy ap
|
||||
|| MV ARG0,B4 ; copy rp
|
||||
|| ZERO B19 ; high part of accumulator
|
||||
MVC B0,RILC
|
||||
|| SUB B0,2,B1 ; N-2, initial ILC
|
||||
|| SUB B0,1,B2 ; const B2=N-1
|
||||
|| LDW *A5++,B6 ; ap[0]
|
||||
|| MV A0,A3 ; const A3=M
|
||||
sploopNxM?: ; for best performance arrange M<=N
|
||||
[A0] SPLOOPD 2 ; 2*n+10
|
||||
|| MVC B1,ILC
|
||||
|| ADDAW B4,B0,B5
|
||||
|| ZERO B7
|
||||
|| LDW *A5++,A9 ; pre-fetch ap[1]
|
||||
|| ZERO A1
|
||||
|| SUB A0,1,A0
|
||||
;;====================================================================
|
||||
;; SPLOOP from bn_mul_add_words, but with flipped A<>B register files.
|
||||
;; This is because of Advisory 15 from TI publication SPRZ247I.
|
||||
LDW *ARG2++,A7 ; bp[i]
|
||||
NOP 3
|
||||
[A1] LDW *B5++,B7 ; rp[i]
|
||||
MPY32U A7,B6,B17:B16
|
||||
NOP 3
|
||||
ADDU B16,B7,B21:B20
|
||||
ADDU B19,B21:B20,B19:B18
|
||||
|| MV.S B17,B23
|
||||
SPKERNEL
|
||||
|| STW B18,*B4++ ; rp[i]
|
||||
|| ADD.S B19,B23,B19
|
||||
;;====================================================================
|
||||
outer?: ; m*2*(n+1)+10
|
||||
SUBAW ARG2,A3,ARG2 ; rewind bp to bp[0]
|
||||
SPMASKR
|
||||
|| CMPGT A0,1,A2 ; done pre-fetching ap[i+1]?
|
||||
MVD A9,B6 ; move through .M unit(*)
|
||||
[A2] LDW *A5++,A9 ; pre-fetch ap[i+1]
|
||||
SUBAW B5,B2,B5 ; rewind rp to rp[1]
|
||||
MVK 1,A1
|
||||
[A0] BNOP.S1 outer?,4
|
||||
|| [A0] SUB.L A0,1,A0
|
||||
STW B19,*B4--[B2] ; rewind rp tp rp[1]
|
||||
|| ZERO.S B19 ; high part of accumulator
|
||||
;; end of outer?
|
||||
BNOP RA,5 ; return
|
||||
.endasmfunc
|
||||
;; (*) It should be noted that B6 is used as input to MPY32U in
|
||||
;; chronologically next cycle in *preceding* SPLOOP iteration.
|
||||
;; Normally such arrangement would require DINT, but at this
|
||||
;; point SPLOOP is draining and interrupts are disabled
|
||||
;; implicitly.
|
||||
|
||||
.global _bn_sqr_comba4
|
||||
.global _bn_mul_comba4
|
||||
_bn_sqr_comba4:
|
||||
MV ARG1,ARG2
|
||||
_bn_mul_comba4:
|
||||
.asmfunc
|
||||
.if 0
|
||||
BNOP sploopNxM?,3
|
||||
;; Above mentioned m*2*(n+1)+10 does not apply in n=m=4 case,
|
||||
;; because of low-counter effect, when prologue phase finishes
|
||||
;; before SPKERNEL instruction is reached. As result it's 25%
|
||||
;; slower than expected...
|
||||
MVK 4,B0 ; N, RILC
|
||||
|| MVK 4,A0 ; M, outer loop counter
|
||||
|| MV ARG1,A5 ; copy ap
|
||||
|| MV ARG0,B4 ; copy rp
|
||||
|| ZERO B19 ; high part of accumulator
|
||||
MVC B0,RILC
|
||||
|| SUB B0,2,B1 ; first ILC
|
||||
|| SUB B0,1,B2 ; const B2=N-1
|
||||
|| LDW *A5++,B6 ; ap[0]
|
||||
|| MV A0,A3 ; const A3=M
|
||||
.else
|
||||
;; This alternative is an exercise in fully unrolled Comba
|
||||
;; algorithm implementation that operates at n*(n+1)+12, or
|
||||
;; as little as 32 cycles...
|
||||
LDW *ARG1[0],B16 ; a[0]
|
||||
|| LDW *ARG2[0],A16 ; b[0]
|
||||
LDW *ARG1[1],B17 ; a[1]
|
||||
|| LDW *ARG2[1],A17 ; b[1]
|
||||
LDW *ARG1[2],B18 ; a[2]
|
||||
|| LDW *ARG2[2],A18 ; b[2]
|
||||
LDW *ARG1[3],B19 ; a[3]
|
||||
|| LDW *ARG2[3],A19 ; b[3]
|
||||
NOP
|
||||
MPY32U A16,B16,A1:A0 ; a[0]*b[0]
|
||||
MPY32U A17,B16,A23:A22 ; a[0]*b[1]
|
||||
MPY32U A16,B17,A25:A24 ; a[1]*b[0]
|
||||
MPY32U A16,B18,A27:A26 ; a[2]*b[0]
|
||||
STW A0,*ARG0[0]
|
||||
|| MPY32U A17,B17,A29:A28 ; a[1]*b[1]
|
||||
MPY32U A18,B16,A31:A30 ; a[0]*b[2]
|
||||
|| ADDU A22,A1,A1:A0
|
||||
MV A23,B0
|
||||
|| MPY32U A19,B16,A21:A20 ; a[3]*b[0]
|
||||
|| ADDU A24,A1:A0,A1:A0
|
||||
ADDU A25,B0,B1:B0
|
||||
|| STW A0,*ARG0[1]
|
||||
|| MPY32U A18,B17,A23:A22 ; a[2]*b[1]
|
||||
|| ADDU A26,A1,A9:A8
|
||||
ADDU A27,B1,B9:B8
|
||||
|| MPY32U A17,B18,A25:A24 ; a[1]*b[2]
|
||||
|| ADDU A28,A9:A8,A9:A8
|
||||
ADDU A29,B9:B8,B9:B8
|
||||
|| MPY32U A16,B19,A27:A26 ; a[0]*b[3]
|
||||
|| ADDU A30,A9:A8,A9:A8
|
||||
ADDU A31,B9:B8,B9:B8
|
||||
|| ADDU B0,A9:A8,A9:A8
|
||||
STW A8,*ARG0[2]
|
||||
|| ADDU A20,A9,A1:A0
|
||||
ADDU A21,B9,B1:B0
|
||||
|| MPY32U A19,B17,A21:A20 ; a[3]*b[1]
|
||||
|| ADDU A22,A1:A0,A1:A0
|
||||
ADDU A23,B1:B0,B1:B0
|
||||
|| MPY32U A18,B18,A23:A22 ; a[2]*b[2]
|
||||
|| ADDU A24,A1:A0,A1:A0
|
||||
ADDU A25,B1:B0,B1:B0
|
||||
|| MPY32U A17,B19,A25:A24 ; a[1]*b[3]
|
||||
|| ADDU A26,A1:A0,A1:A0
|
||||
ADDU A27,B1:B0,B1:B0
|
||||
|| ADDU B8,A1:A0,A1:A0
|
||||
STW A0,*ARG0[3]
|
||||
|| MPY32U A19,B18,A27:A26 ; a[3]*b[2]
|
||||
|| ADDU A20,A1,A9:A8
|
||||
ADDU A21,B1,B9:B8
|
||||
|| MPY32U A18,B19,A29:A28 ; a[2]*b[3]
|
||||
|| ADDU A22,A9:A8,A9:A8
|
||||
ADDU A23,B9:B8,B9:B8
|
||||
|| MPY32U A19,B19,A31:A30 ; a[3]*b[3]
|
||||
|| ADDU A24,A9:A8,A9:A8
|
||||
ADDU A25,B9:B8,B9:B8
|
||||
|| ADDU B0,A9:A8,A9:A8
|
||||
STW A8,*ARG0[4]
|
||||
|| ADDU A26,A9,A1:A0
|
||||
ADDU A27,B9,B1:B0
|
||||
|| ADDU A28,A1:A0,A1:A0
|
||||
ADDU A29,B1:B0,B1:B0
|
||||
|| BNOP RA
|
||||
|| ADDU B8,A1:A0,A1:A0
|
||||
STW A0,*ARG0[5]
|
||||
|| ADDU A30,A1,A9:A8
|
||||
ADD A31,B1,B8
|
||||
ADDU B0,A9:A8,A9:A8 ; removed || to avoid cross-path stall below
|
||||
ADD B8,A9,A9
|
||||
|| STW A8,*ARG0[6]
|
||||
STW A9,*ARG0[7]
|
||||
.endif
|
||||
.endasmfunc
|
160
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/c64xplus-gf2m.pl
vendored
Normal file
160
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/c64xplus-gf2m.pl
vendored
Normal file
|
@ -0,0 +1,160 @@
|
|||
#! /usr/bin/env perl
|
||||
# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
#
|
||||
# February 2012
|
||||
#
|
||||
# The module implements bn_GF2m_mul_2x2 polynomial multiplication
|
||||
# used in bn_gf2m.c. It's kind of low-hanging mechanical port from
|
||||
# C for the time being... The subroutine runs in 37 cycles, which is
|
||||
# 4.5x faster than compiler-generated code. Though comparison is
|
||||
# totally unfair, because this module utilizes Galois Field Multiply
|
||||
# instruction.
|
||||
|
||||
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
|
||||
open STDOUT,">$output";
|
||||
|
||||
($rp,$a1,$a0,$b1,$b0)=("A4","B4","A6","B6","A8"); # argument vector
|
||||
|
||||
($Alo,$Alox0,$Alox1,$Alox2,$Alox3)=map("A$_",(16..20));
|
||||
($Ahi,$Ahix0,$Ahix1,$Ahix2,$Ahix3)=map("B$_",(16..20));
|
||||
($B_0,$B_1,$B_2,$B_3)=("B5","A5","A7","B7");
|
||||
($A,$B)=($Alo,$B_1);
|
||||
$xFF="B1";
|
||||
|
||||
sub mul_1x1_upper {
|
||||
my ($A,$B)=@_;
|
||||
$code.=<<___;
|
||||
EXTU $B,8,24,$B_2 ; smash $B to 4 bytes
|
||||
|| AND $B,$xFF,$B_0
|
||||
|| SHRU $B,24,$B_3
|
||||
SHRU $A,16, $Ahi ; smash $A to two halfwords
|
||||
|| EXTU $A,16,16,$Alo
|
||||
|
||||
XORMPY $Alo,$B_2,$Alox2 ; 16x8 bits multiplication
|
||||
|| XORMPY $Ahi,$B_2,$Ahix2
|
||||
|| EXTU $B,16,24,$B_1
|
||||
XORMPY $Alo,$B_0,$Alox0
|
||||
|| XORMPY $Ahi,$B_0,$Ahix0
|
||||
XORMPY $Alo,$B_3,$Alox3
|
||||
|| XORMPY $Ahi,$B_3,$Ahix3
|
||||
XORMPY $Alo,$B_1,$Alox1
|
||||
|| XORMPY $Ahi,$B_1,$Ahix1
|
||||
___
|
||||
}
|
||||
sub mul_1x1_merged {
|
||||
my ($OUTlo,$OUThi,$A,$B)=@_;
|
||||
$code.=<<___;
|
||||
EXTU $B,8,24,$B_2 ; smash $B to 4 bytes
|
||||
|| AND $B,$xFF,$B_0
|
||||
|| SHRU $B,24,$B_3
|
||||
SHRU $A,16, $Ahi ; smash $A to two halfwords
|
||||
|| EXTU $A,16,16,$Alo
|
||||
|
||||
XOR $Ahix0,$Alox2,$Ahix0
|
||||
|| MV $Ahix2,$OUThi
|
||||
|| XORMPY $Alo,$B_2,$Alox2
|
||||
XORMPY $Ahi,$B_2,$Ahix2
|
||||
|| EXTU $B,16,24,$B_1
|
||||
|| XORMPY $Alo,$B_0,A1 ; $Alox0
|
||||
XOR $Ahix1,$Alox3,$Ahix1
|
||||
|| SHL $Ahix0,16,$OUTlo
|
||||
|| SHRU $Ahix0,16,$Ahix0
|
||||
XOR $Alox0,$OUTlo,$OUTlo
|
||||
|| XOR $Ahix0,$OUThi,$OUThi
|
||||
|| XORMPY $Ahi,$B_0,$Ahix0
|
||||
|| XORMPY $Alo,$B_3,$Alox3
|
||||
|| SHL $Alox1,8,$Alox1
|
||||
|| SHL $Ahix3,8,$Ahix3
|
||||
XOR $Alox1,$OUTlo,$OUTlo
|
||||
|| XOR $Ahix3,$OUThi,$OUThi
|
||||
|| XORMPY $Ahi,$B_3,$Ahix3
|
||||
|| SHL $Ahix1,24,$Alox1
|
||||
|| SHRU $Ahix1,8, $Ahix1
|
||||
XOR $Alox1,$OUTlo,$OUTlo
|
||||
|| XOR $Ahix1,$OUThi,$OUThi
|
||||
|| XORMPY $Alo,$B_1,$Alox1
|
||||
|| XORMPY $Ahi,$B_1,$Ahix1
|
||||
|| MV A1,$Alox0
|
||||
___
|
||||
}
|
||||
sub mul_1x1_lower {
|
||||
my ($OUTlo,$OUThi)=@_;
|
||||
$code.=<<___;
|
||||
;NOP
|
||||
XOR $Ahix0,$Alox2,$Ahix0
|
||||
|| MV $Ahix2,$OUThi
|
||||
NOP
|
||||
XOR $Ahix1,$Alox3,$Ahix1
|
||||
|| SHL $Ahix0,16,$OUTlo
|
||||
|| SHRU $Ahix0,16,$Ahix0
|
||||
XOR $Alox0,$OUTlo,$OUTlo
|
||||
|| XOR $Ahix0,$OUThi,$OUThi
|
||||
|| SHL $Alox1,8,$Alox1
|
||||
|| SHL $Ahix3,8,$Ahix3
|
||||
XOR $Alox1,$OUTlo,$OUTlo
|
||||
|| XOR $Ahix3,$OUThi,$OUThi
|
||||
|| SHL $Ahix1,24,$Alox1
|
||||
|| SHRU $Ahix1,8, $Ahix1
|
||||
XOR $Alox1,$OUTlo,$OUTlo
|
||||
|| XOR $Ahix1,$OUThi,$OUThi
|
||||
___
|
||||
}
|
||||
$code.=<<___;
|
||||
.text
|
||||
|
||||
.if .ASSEMBLER_VERSION<7000000
|
||||
.asg 0,__TI_EABI__
|
||||
.endif
|
||||
.if __TI_EABI__
|
||||
.asg bn_GF2m_mul_2x2,_bn_GF2m_mul_2x2
|
||||
.endif
|
||||
|
||||
.global _bn_GF2m_mul_2x2
|
||||
_bn_GF2m_mul_2x2:
|
||||
.asmfunc
|
||||
MVK 0xFF,$xFF
|
||||
___
|
||||
&mul_1x1_upper($a0,$b0); # a0·b0
|
||||
$code.=<<___;
|
||||
|| MV $b1,$B
|
||||
MV $a1,$A
|
||||
___
|
||||
&mul_1x1_merged("A28","B28",$A,$B); # a0·b0/a1·b1
|
||||
$code.=<<___;
|
||||
|| XOR $b0,$b1,$B
|
||||
XOR $a0,$a1,$A
|
||||
___
|
||||
&mul_1x1_merged("A31","B31",$A,$B); # a1·b1/(a0+a1)·(b0+b1)
|
||||
$code.=<<___;
|
||||
XOR A28,A31,A29
|
||||
|| XOR B28,B31,B29 ; a0·b0+a1·b1
|
||||
___
|
||||
&mul_1x1_lower("A30","B30"); # (a0+a1)·(b0+b1)
|
||||
$code.=<<___;
|
||||
|| BNOP B3
|
||||
XOR A29,A30,A30
|
||||
|| XOR B29,B30,B30 ; (a0+a1)·(b0+b1)-a0·b0-a1·b1
|
||||
XOR B28,A30,A30
|
||||
|| STW A28,*${rp}[0]
|
||||
XOR B30,A31,A31
|
||||
|| STW A30,*${rp}[1]
|
||||
STW A31,*${rp}[2]
|
||||
STW B31,*${rp}[3]
|
||||
.endasmfunc
|
||||
___
|
||||
|
||||
print $code;
|
||||
close STDOUT;
|
298
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/co-586.pl
vendored
Normal file
298
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/co-586.pl
vendored
Normal file
|
@ -0,0 +1,298 @@
|
|||
#! /usr/bin/env perl
|
||||
# Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
push(@INC,"${dir}","${dir}../../perlasm");
|
||||
require "x86asm.pl";
|
||||
|
||||
$output = pop;
|
||||
open STDOUT,">$output";
|
||||
|
||||
&asm_init($ARGV[0]);
|
||||
|
||||
&bn_mul_comba("bn_mul_comba8",8);
|
||||
&bn_mul_comba("bn_mul_comba4",4);
|
||||
&bn_sqr_comba("bn_sqr_comba8",8);
|
||||
&bn_sqr_comba("bn_sqr_comba4",4);
|
||||
|
||||
&asm_finish();
|
||||
|
||||
close STDOUT;
|
||||
|
||||
sub mul_add_c
|
||||
{
|
||||
local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
|
||||
|
||||
# pos == -1 if eax and edx are pre-loaded, 0 to load from next
|
||||
# words, and 1 if load return value
|
||||
|
||||
&comment("mul a[$ai]*b[$bi]");
|
||||
|
||||
# "eax" and "edx" will always be pre-loaded.
|
||||
# &mov("eax",&DWP($ai*4,$a,"",0)) ;
|
||||
# &mov("edx",&DWP($bi*4,$b,"",0));
|
||||
|
||||
&mul("edx");
|
||||
&add($c0,"eax");
|
||||
&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a
|
||||
&mov("eax",&wparam(0)) if $pos > 0; # load r[]
|
||||
###
|
||||
&adc($c1,"edx");
|
||||
&mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b
|
||||
&mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b
|
||||
###
|
||||
&adc($c2,0);
|
||||
# is pos > 1, it means it is the last loop
|
||||
&mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
|
||||
&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a
|
||||
}
|
||||
|
||||
sub sqr_add_c
|
||||
{
|
||||
local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
|
||||
|
||||
# pos == -1 if eax and edx are pre-loaded, 0 to load from next
|
||||
# words, and 1 if load return value
|
||||
|
||||
&comment("sqr a[$ai]*a[$bi]");
|
||||
|
||||
# "eax" and "edx" will always be pre-loaded.
|
||||
# &mov("eax",&DWP($ai*4,$a,"",0)) ;
|
||||
# &mov("edx",&DWP($bi*4,$b,"",0));
|
||||
|
||||
if ($ai == $bi)
|
||||
{ &mul("eax");}
|
||||
else
|
||||
{ &mul("edx");}
|
||||
&add($c0,"eax");
|
||||
&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
|
||||
###
|
||||
&adc($c1,"edx");
|
||||
&mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
|
||||
###
|
||||
&adc($c2,0);
|
||||
# is pos > 1, it means it is the last loop
|
||||
&mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
|
||||
&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
|
||||
}
|
||||
|
||||
sub sqr_add_c2
|
||||
{
|
||||
local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
|
||||
|
||||
# pos == -1 if eax and edx are pre-loaded, 0 to load from next
|
||||
# words, and 1 if load return value
|
||||
|
||||
&comment("sqr a[$ai]*a[$bi]");
|
||||
|
||||
# "eax" and "edx" will always be pre-loaded.
|
||||
# &mov("eax",&DWP($ai*4,$a,"",0)) ;
|
||||
# &mov("edx",&DWP($bi*4,$a,"",0));
|
||||
|
||||
if ($ai == $bi)
|
||||
{ &mul("eax");}
|
||||
else
|
||||
{ &mul("edx");}
|
||||
&add("eax","eax");
|
||||
###
|
||||
&adc("edx","edx");
|
||||
###
|
||||
&adc($c2,0);
|
||||
&add($c0,"eax");
|
||||
&adc($c1,"edx");
|
||||
&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
|
||||
&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
|
||||
&adc($c2,0);
|
||||
&mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
|
||||
&mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
|
||||
###
|
||||
}
|
||||
|
||||
sub bn_mul_comba
|
||||
{
|
||||
local($name,$num)=@_;
|
||||
local($a,$b,$c0,$c1,$c2);
|
||||
local($i,$as,$ae,$bs,$be,$ai,$bi);
|
||||
local($tot,$end);
|
||||
|
||||
&function_begin_B($name,"");
|
||||
|
||||
$c0="ebx";
|
||||
$c1="ecx";
|
||||
$c2="ebp";
|
||||
$a="esi";
|
||||
$b="edi";
|
||||
|
||||
$as=0;
|
||||
$ae=0;
|
||||
$bs=0;
|
||||
$be=0;
|
||||
$tot=$num+$num-1;
|
||||
|
||||
&push("esi");
|
||||
&mov($a,&wparam(1));
|
||||
&push("edi");
|
||||
&mov($b,&wparam(2));
|
||||
&push("ebp");
|
||||
&push("ebx");
|
||||
|
||||
&xor($c0,$c0);
|
||||
&mov("eax",&DWP(0,$a,"",0)); # load the first word
|
||||
&xor($c1,$c1);
|
||||
&mov("edx",&DWP(0,$b,"",0)); # load the first second
|
||||
|
||||
for ($i=0; $i<$tot; $i++)
|
||||
{
|
||||
$ai=$as;
|
||||
$bi=$bs;
|
||||
$end=$be+1;
|
||||
|
||||
&comment("################## Calculate word $i");
|
||||
|
||||
for ($j=$bs; $j<$end; $j++)
|
||||
{
|
||||
&xor($c2,$c2) if ($j == $bs);
|
||||
if (($j+1) == $end)
|
||||
{
|
||||
$v=1;
|
||||
$v=2 if (($i+1) == $tot);
|
||||
}
|
||||
else
|
||||
{ $v=0; }
|
||||
if (($j+1) != $end)
|
||||
{
|
||||
$na=($ai-1);
|
||||
$nb=($bi+1);
|
||||
}
|
||||
else
|
||||
{
|
||||
$na=$as+($i < ($num-1));
|
||||
$nb=$bs+($i >= ($num-1));
|
||||
}
|
||||
#printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
|
||||
&mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
|
||||
if ($v)
|
||||
{
|
||||
&comment("saved r[$i]");
|
||||
# &mov("eax",&wparam(0));
|
||||
# &mov(&DWP($i*4,"eax","",0),$c0);
|
||||
($c0,$c1,$c2)=($c1,$c2,$c0);
|
||||
}
|
||||
$ai--;
|
||||
$bi++;
|
||||
}
|
||||
$as++ if ($i < ($num-1));
|
||||
$ae++ if ($i >= ($num-1));
|
||||
|
||||
$bs++ if ($i >= ($num-1));
|
||||
$be++ if ($i < ($num-1));
|
||||
}
|
||||
&comment("save r[$i]");
|
||||
# &mov("eax",&wparam(0));
|
||||
&mov(&DWP($i*4,"eax","",0),$c0);
|
||||
|
||||
&pop("ebx");
|
||||
&pop("ebp");
|
||||
&pop("edi");
|
||||
&pop("esi");
|
||||
&ret();
|
||||
&function_end_B($name);
|
||||
}
|
||||
|
||||
sub bn_sqr_comba
|
||||
{
|
||||
local($name,$num)=@_;
|
||||
local($r,$a,$c0,$c1,$c2)=@_;
|
||||
local($i,$as,$ae,$bs,$be,$ai,$bi);
|
||||
local($b,$tot,$end,$half);
|
||||
|
||||
&function_begin_B($name,"");
|
||||
|
||||
$c0="ebx";
|
||||
$c1="ecx";
|
||||
$c2="ebp";
|
||||
$a="esi";
|
||||
$r="edi";
|
||||
|
||||
&push("esi");
|
||||
&push("edi");
|
||||
&push("ebp");
|
||||
&push("ebx");
|
||||
&mov($r,&wparam(0));
|
||||
&mov($a,&wparam(1));
|
||||
&xor($c0,$c0);
|
||||
&xor($c1,$c1);
|
||||
&mov("eax",&DWP(0,$a,"",0)); # load the first word
|
||||
|
||||
$as=0;
|
||||
$ae=0;
|
||||
$bs=0;
|
||||
$be=0;
|
||||
$tot=$num+$num-1;
|
||||
|
||||
for ($i=0; $i<$tot; $i++)
|
||||
{
|
||||
$ai=$as;
|
||||
$bi=$bs;
|
||||
$end=$be+1;
|
||||
|
||||
&comment("############### Calculate word $i");
|
||||
for ($j=$bs; $j<$end; $j++)
|
||||
{
|
||||
&xor($c2,$c2) if ($j == $bs);
|
||||
if (($ai-1) < ($bi+1))
|
||||
{
|
||||
$v=1;
|
||||
$v=2 if ($i+1) == $tot;
|
||||
}
|
||||
else
|
||||
{ $v=0; }
|
||||
if (!$v)
|
||||
{
|
||||
$na=$ai-1;
|
||||
$nb=$bi+1;
|
||||
}
|
||||
else
|
||||
{
|
||||
$na=$as+($i < ($num-1));
|
||||
$nb=$bs+($i >= ($num-1));
|
||||
}
|
||||
if ($ai == $bi)
|
||||
{
|
||||
&sqr_add_c($r,$a,$ai,$bi,
|
||||
$c0,$c1,$c2,$v,$i,$na,$nb);
|
||||
}
|
||||
else
|
||||
{
|
||||
&sqr_add_c2($r,$a,$ai,$bi,
|
||||
$c0,$c1,$c2,$v,$i,$na,$nb);
|
||||
}
|
||||
if ($v)
|
||||
{
|
||||
&comment("saved r[$i]");
|
||||
#&mov(&DWP($i*4,$r,"",0),$c0);
|
||||
($c0,$c1,$c2)=($c1,$c2,$c0);
|
||||
last;
|
||||
}
|
||||
$ai--;
|
||||
$bi++;
|
||||
}
|
||||
$as++ if ($i < ($num-1));
|
||||
$ae++ if ($i >= ($num-1));
|
||||
|
||||
$bs++ if ($i >= ($num-1));
|
||||
$be++ if ($i < ($num-1));
|
||||
}
|
||||
&mov(&DWP($i*4,$r,"",0),$c0);
|
||||
&pop("ebx");
|
||||
&pop("ebp");
|
||||
&pop("edi");
|
||||
&pop("esi");
|
||||
&ret();
|
||||
&function_end_B($name);
|
||||
}
|
860
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/ia64-mont.pl
vendored
Normal file
860
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/ia64-mont.pl
vendored
Normal file
|
@ -0,0 +1,860 @@
|
|||
#! /usr/bin/env perl
|
||||
# Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
|
||||
# January 2010
|
||||
#
|
||||
# "Teaser" Montgomery multiplication module for IA-64. There are
|
||||
# several possibilities for improvement:
|
||||
#
|
||||
# - modulo-scheduling outer loop would eliminate quite a number of
|
||||
# stalls after ldf8, xma and getf.sig outside inner loop and
|
||||
# improve shorter key performance;
|
||||
# - shorter vector support [with input vectors being fetched only
|
||||
# once] should be added;
|
||||
# - 2x unroll with help of n0[1] would make the code scalable on
|
||||
# "wider" IA-64, "wider" than Itanium 2 that is, which is not of
|
||||
# acute interest, because upcoming Tukwila's individual cores are
|
||||
# reportedly based on Itanium 2 design;
|
||||
# - dedicated squaring procedure(?);
|
||||
#
|
||||
# January 2010
|
||||
#
|
||||
# Shorter vector support is implemented by zero-padding ap and np
|
||||
# vectors up to 8 elements, or 512 bits. This means that 256-bit
|
||||
# inputs will be processed only 2 times faster than 512-bit inputs,
|
||||
# not 4 [as one would expect, because algorithm complexity is n^2].
|
||||
# The reason for padding is that inputs shorter than 512 bits won't
|
||||
# be processed faster anyway, because minimal critical path of the
|
||||
# core loop happens to match 512-bit timing. Either way, it resulted
|
||||
# in >100% improvement of 512-bit RSA sign benchmark and 50% - of
|
||||
# 1024-bit one [in comparison to original version of *this* module].
|
||||
#
|
||||
# So far 'openssl speed rsa dsa' output on 900MHz Itanium 2 *with*
|
||||
# this module is:
|
||||
# sign verify sign/s verify/s
|
||||
# rsa 512 bits 0.000290s 0.000024s 3452.8 42031.4
|
||||
# rsa 1024 bits 0.000793s 0.000058s 1261.7 17172.0
|
||||
# rsa 2048 bits 0.005908s 0.000148s 169.3 6754.0
|
||||
# rsa 4096 bits 0.033456s 0.000469s 29.9 2133.6
|
||||
# dsa 512 bits 0.000253s 0.000198s 3949.9 5057.0
|
||||
# dsa 1024 bits 0.000585s 0.000607s 1708.4 1647.4
|
||||
# dsa 2048 bits 0.001453s 0.001703s 688.1 587.4
|
||||
#
|
||||
# ... and *without* (but still with ia64.S):
|
||||
#
|
||||
# rsa 512 bits 0.000670s 0.000041s 1491.8 24145.5
|
||||
# rsa 1024 bits 0.001988s 0.000080s 502.9 12499.3
|
||||
# rsa 2048 bits 0.008702s 0.000189s 114.9 5293.9
|
||||
# rsa 4096 bits 0.043860s 0.000533s 22.8 1875.9
|
||||
# dsa 512 bits 0.000441s 0.000427s 2265.3 2340.6
|
||||
# dsa 1024 bits 0.000823s 0.000867s 1215.6 1153.2
|
||||
# dsa 2048 bits 0.001894s 0.002179s 528.1 458.9
|
||||
#
|
||||
# As it can be seen, RSA sign performance improves by 130-30%,
|
||||
# hereafter less for longer keys, while verify - by 74-13%.
|
||||
# DSA performance improves by 115-30%.
|
||||
|
||||
$output=pop;
|
||||
|
||||
if ($^O eq "hpux") {
|
||||
$ADDP="addp4";
|
||||
for (@ARGV) { $ADDP="add" if (/[\+DD|\-mlp]64/); }
|
||||
} else { $ADDP="add"; }
|
||||
|
||||
$code=<<___;
|
||||
.explicit
|
||||
.text
|
||||
|
||||
// int bn_mul_mont (BN_ULONG *rp,const BN_ULONG *ap,
|
||||
// const BN_ULONG *bp,const BN_ULONG *np,
|
||||
// const BN_ULONG *n0p,int num);
|
||||
.align 64
|
||||
.global bn_mul_mont#
|
||||
.proc bn_mul_mont#
|
||||
bn_mul_mont:
|
||||
.prologue
|
||||
.body
|
||||
{ .mmi; cmp4.le p6,p7=2,r37;;
|
||||
(p6) cmp4.lt.unc p8,p9=8,r37
|
||||
mov ret0=r0 };;
|
||||
{ .bbb;
|
||||
(p9) br.cond.dptk.many bn_mul_mont_8
|
||||
(p8) br.cond.dpnt.many bn_mul_mont_general
|
||||
(p7) br.ret.spnt.many b0 };;
|
||||
.endp bn_mul_mont#
|
||||
|
||||
prevfs=r2; prevpr=r3; prevlc=r10; prevsp=r11;
|
||||
|
||||
rptr=r8; aptr=r9; bptr=r14; nptr=r15;
|
||||
tptr=r16; // &tp[0]
|
||||
tp_1=r17; // &tp[-1]
|
||||
num=r18; len=r19; lc=r20;
|
||||
topbit=r21; // carry bit from tmp[num]
|
||||
|
||||
n0=f6;
|
||||
m0=f7;
|
||||
bi=f8;
|
||||
|
||||
.align 64
|
||||
.local bn_mul_mont_general#
|
||||
.proc bn_mul_mont_general#
|
||||
bn_mul_mont_general:
|
||||
.prologue
|
||||
{ .mmi; .save ar.pfs,prevfs
|
||||
alloc prevfs=ar.pfs,6,2,0,8
|
||||
$ADDP aptr=0,in1
|
||||
.save ar.lc,prevlc
|
||||
mov prevlc=ar.lc }
|
||||
{ .mmi; .vframe prevsp
|
||||
mov prevsp=sp
|
||||
$ADDP bptr=0,in2
|
||||
.save pr,prevpr
|
||||
mov prevpr=pr };;
|
||||
|
||||
.body
|
||||
.rotf alo[6],nlo[4],ahi[8],nhi[6]
|
||||
.rotr a[3],n[3],t[2]
|
||||
|
||||
{ .mmi; ldf8 bi=[bptr],8 // (*bp++)
|
||||
ldf8 alo[4]=[aptr],16 // ap[0]
|
||||
$ADDP r30=8,in1 };;
|
||||
{ .mmi; ldf8 alo[3]=[r30],16 // ap[1]
|
||||
ldf8 alo[2]=[aptr],16 // ap[2]
|
||||
$ADDP in4=0,in4 };;
|
||||
{ .mmi; ldf8 alo[1]=[r30] // ap[3]
|
||||
ldf8 n0=[in4] // n0
|
||||
$ADDP rptr=0,in0 }
|
||||
{ .mmi; $ADDP nptr=0,in3
|
||||
mov r31=16
|
||||
zxt4 num=in5 };;
|
||||
{ .mmi; ldf8 nlo[2]=[nptr],8 // np[0]
|
||||
shladd len=num,3,r0
|
||||
shladd r31=num,3,r31 };;
|
||||
{ .mmi; ldf8 nlo[1]=[nptr],8 // np[1]
|
||||
add lc=-5,num
|
||||
sub r31=sp,r31 };;
|
||||
{ .mfb; and sp=-16,r31 // alloca
|
||||
xmpy.hu ahi[2]=alo[4],bi // ap[0]*bp[0]
|
||||
nop.b 0 }
|
||||
{ .mfb; nop.m 0
|
||||
xmpy.lu alo[4]=alo[4],bi
|
||||
brp.loop.imp .L1st_ctop,.L1st_cend-16
|
||||
};;
|
||||
{ .mfi; nop.m 0
|
||||
xma.hu ahi[1]=alo[3],bi,ahi[2] // ap[1]*bp[0]
|
||||
add tp_1=8,sp }
|
||||
{ .mfi; nop.m 0
|
||||
xma.lu alo[3]=alo[3],bi,ahi[2]
|
||||
mov pr.rot=0x20001f<<16
|
||||
// ------^----- (p40) at first (p23)
|
||||
// ----------^^ p[16:20]=1
|
||||
};;
|
||||
{ .mfi; nop.m 0
|
||||
xmpy.lu m0=alo[4],n0 // (ap[0]*bp[0])*n0
|
||||
mov ar.lc=lc }
|
||||
{ .mfi; nop.m 0
|
||||
fcvt.fxu.s1 nhi[1]=f0
|
||||
mov ar.ec=8 };;
|
||||
|
||||
.align 32
|
||||
.L1st_ctop:
|
||||
.pred.rel "mutex",p40,p42
|
||||
{ .mfi; (p16) ldf8 alo[0]=[aptr],8 // *(aptr++)
|
||||
(p18) xma.hu ahi[0]=alo[2],bi,ahi[1]
|
||||
(p40) add n[2]=n[2],a[2] } // (p23) }
|
||||
{ .mfi; (p18) ldf8 nlo[0]=[nptr],8 // *(nptr++)(p16)
|
||||
(p18) xma.lu alo[2]=alo[2],bi,ahi[1]
|
||||
(p42) add n[2]=n[2],a[2],1 };; // (p23)
|
||||
{ .mfi; (p21) getf.sig a[0]=alo[5]
|
||||
(p20) xma.hu nhi[0]=nlo[2],m0,nhi[1]
|
||||
(p42) cmp.leu p41,p39=n[2],a[2] } // (p23)
|
||||
{ .mfi; (p23) st8 [tp_1]=n[2],8
|
||||
(p20) xma.lu nlo[2]=nlo[2],m0,nhi[1]
|
||||
(p40) cmp.ltu p41,p39=n[2],a[2] } // (p23)
|
||||
{ .mmb; (p21) getf.sig n[0]=nlo[3]
|
||||
(p16) nop.m 0
|
||||
br.ctop.sptk .L1st_ctop };;
|
||||
.L1st_cend:
|
||||
|
||||
{ .mmi; getf.sig a[0]=ahi[6] // (p24)
|
||||
getf.sig n[0]=nhi[4]
|
||||
add num=-1,num };; // num--
|
||||
{ .mmi; .pred.rel "mutex",p40,p42
|
||||
(p40) add n[0]=n[0],a[0]
|
||||
(p42) add n[0]=n[0],a[0],1
|
||||
sub aptr=aptr,len };; // rewind
|
||||
{ .mmi; .pred.rel "mutex",p40,p42
|
||||
(p40) cmp.ltu p41,p39=n[0],a[0]
|
||||
(p42) cmp.leu p41,p39=n[0],a[0]
|
||||
sub nptr=nptr,len };;
|
||||
{ .mmi; .pred.rel "mutex",p39,p41
|
||||
(p39) add topbit=r0,r0
|
||||
(p41) add topbit=r0,r0,1
|
||||
nop.i 0 }
|
||||
{ .mmi; st8 [tp_1]=n[0]
|
||||
add tptr=16,sp
|
||||
add tp_1=8,sp };;
|
||||
|
||||
.Louter:
|
||||
{ .mmi; ldf8 bi=[bptr],8 // (*bp++)
|
||||
ldf8 ahi[3]=[tptr] // tp[0]
|
||||
add r30=8,aptr };;
|
||||
{ .mmi; ldf8 alo[4]=[aptr],16 // ap[0]
|
||||
ldf8 alo[3]=[r30],16 // ap[1]
|
||||
add r31=8,nptr };;
|
||||
{ .mfb; ldf8 alo[2]=[aptr],16 // ap[2]
|
||||
xma.hu ahi[2]=alo[4],bi,ahi[3] // ap[0]*bp[i]+tp[0]
|
||||
brp.loop.imp .Linner_ctop,.Linner_cend-16
|
||||
}
|
||||
{ .mfb; ldf8 alo[1]=[r30] // ap[3]
|
||||
xma.lu alo[4]=alo[4],bi,ahi[3]
|
||||
clrrrb.pr };;
|
||||
{ .mfi; ldf8 nlo[2]=[nptr],16 // np[0]
|
||||
xma.hu ahi[1]=alo[3],bi,ahi[2] // ap[1]*bp[i]
|
||||
nop.i 0 }
|
||||
{ .mfi; ldf8 nlo[1]=[r31] // np[1]
|
||||
xma.lu alo[3]=alo[3],bi,ahi[2]
|
||||
mov pr.rot=0x20101f<<16
|
||||
// ------^----- (p40) at first (p23)
|
||||
// --------^--- (p30) at first (p22)
|
||||
// ----------^^ p[16:20]=1
|
||||
};;
|
||||
{ .mfi; st8 [tptr]=r0 // tp[0] is already accounted
|
||||
xmpy.lu m0=alo[4],n0 // (ap[0]*bp[i]+tp[0])*n0
|
||||
mov ar.lc=lc }
|
||||
{ .mfi;
|
||||
fcvt.fxu.s1 nhi[1]=f0
|
||||
mov ar.ec=8 };;
|
||||
|
||||
// This loop spins in 4*(n+7) ticks on Itanium 2 and should spin in
|
||||
// 7*(n+7) ticks on Itanium (the one codenamed Merced). Factor of 7
|
||||
// in latter case accounts for two-tick pipeline stall, which means
|
||||
// that its performance would be ~20% lower than optimal one. No
|
||||
// attempt was made to address this, because original Itanium is
|
||||
// hardly represented out in the wild...
|
||||
.align 32
|
||||
.Linner_ctop:
|
||||
.pred.rel "mutex",p40,p42
|
||||
.pred.rel "mutex",p30,p32
|
||||
{ .mfi; (p16) ldf8 alo[0]=[aptr],8 // *(aptr++)
|
||||
(p18) xma.hu ahi[0]=alo[2],bi,ahi[1]
|
||||
(p40) add n[2]=n[2],a[2] } // (p23)
|
||||
{ .mfi; (p16) nop.m 0
|
||||
(p18) xma.lu alo[2]=alo[2],bi,ahi[1]
|
||||
(p42) add n[2]=n[2],a[2],1 };; // (p23)
|
||||
{ .mfi; (p21) getf.sig a[0]=alo[5]
|
||||
(p16) nop.f 0
|
||||
(p40) cmp.ltu p41,p39=n[2],a[2] } // (p23)
|
||||
{ .mfi; (p21) ld8 t[0]=[tptr],8
|
||||
(p16) nop.f 0
|
||||
(p42) cmp.leu p41,p39=n[2],a[2] };; // (p23)
|
||||
{ .mfi; (p18) ldf8 nlo[0]=[nptr],8 // *(nptr++)
|
||||
(p20) xma.hu nhi[0]=nlo[2],m0,nhi[1]
|
||||
(p30) add a[1]=a[1],t[1] } // (p22)
|
||||
{ .mfi; (p16) nop.m 0
|
||||
(p20) xma.lu nlo[2]=nlo[2],m0,nhi[1]
|
||||
(p32) add a[1]=a[1],t[1],1 };; // (p22)
|
||||
{ .mmi; (p21) getf.sig n[0]=nlo[3]
|
||||
(p16) nop.m 0
|
||||
(p30) cmp.ltu p31,p29=a[1],t[1] } // (p22)
|
||||
{ .mmb; (p23) st8 [tp_1]=n[2],8
|
||||
(p32) cmp.leu p31,p29=a[1],t[1] // (p22)
|
||||
br.ctop.sptk .Linner_ctop };;
|
||||
.Linner_cend:
|
||||
|
||||
{ .mmi; getf.sig a[0]=ahi[6] // (p24)
|
||||
getf.sig n[0]=nhi[4]
|
||||
nop.i 0 };;
|
||||
|
||||
{ .mmi; .pred.rel "mutex",p31,p33
|
||||
(p31) add a[0]=a[0],topbit
|
||||
(p33) add a[0]=a[0],topbit,1
|
||||
mov topbit=r0 };;
|
||||
{ .mfi; .pred.rel "mutex",p31,p33
|
||||
(p31) cmp.ltu p32,p30=a[0],topbit
|
||||
(p33) cmp.leu p32,p30=a[0],topbit
|
||||
}
|
||||
{ .mfi; .pred.rel "mutex",p40,p42
|
||||
(p40) add n[0]=n[0],a[0]
|
||||
(p42) add n[0]=n[0],a[0],1
|
||||
};;
|
||||
{ .mmi; .pred.rel "mutex",p44,p46
|
||||
(p40) cmp.ltu p41,p39=n[0],a[0]
|
||||
(p42) cmp.leu p41,p39=n[0],a[0]
|
||||
(p32) add topbit=r0,r0,1 }
|
||||
|
||||
{ .mmi; st8 [tp_1]=n[0],8
|
||||
cmp4.ne p6,p0=1,num
|
||||
sub aptr=aptr,len };; // rewind
|
||||
{ .mmi; sub nptr=nptr,len
|
||||
(p41) add topbit=r0,r0,1
|
||||
add tptr=16,sp }
|
||||
{ .mmb; add tp_1=8,sp
|
||||
add num=-1,num // num--
|
||||
(p6) br.cond.sptk.many .Louter };;
|
||||
|
||||
{ .mbb; add lc=4,lc
|
||||
brp.loop.imp .Lsub_ctop,.Lsub_cend-16
|
||||
clrrrb.pr };;
|
||||
{ .mii; nop.m 0
|
||||
mov pr.rot=0x10001<<16
|
||||
// ------^---- (p33) at first (p17)
|
||||
mov ar.lc=lc }
|
||||
{ .mii; nop.m 0
|
||||
mov ar.ec=3
|
||||
nop.i 0 };;
|
||||
|
||||
.Lsub_ctop:
|
||||
.pred.rel "mutex",p33,p35
|
||||
{ .mfi; (p16) ld8 t[0]=[tptr],8 // t=*(tp++)
|
||||
(p16) nop.f 0
|
||||
(p33) sub n[1]=t[1],n[1] } // (p17)
|
||||
{ .mfi; (p16) ld8 n[0]=[nptr],8 // n=*(np++)
|
||||
(p16) nop.f 0
|
||||
(p35) sub n[1]=t[1],n[1],1 };; // (p17)
|
||||
{ .mib; (p18) st8 [rptr]=n[2],8 // *(rp++)=r
|
||||
(p33) cmp.gtu p34,p32=n[1],t[1] // (p17)
|
||||
(p18) nop.b 0 }
|
||||
{ .mib; (p18) nop.m 0
|
||||
(p35) cmp.geu p34,p32=n[1],t[1] // (p17)
|
||||
br.ctop.sptk .Lsub_ctop };;
|
||||
.Lsub_cend:
|
||||
|
||||
{ .mmb; .pred.rel "mutex",p34,p36
|
||||
(p34) sub topbit=topbit,r0 // (p19)
|
||||
(p36) sub topbit=topbit,r0,1
|
||||
brp.loop.imp .Lcopy_ctop,.Lcopy_cend-16
|
||||
}
|
||||
{ .mmb; sub rptr=rptr,len // rewind
|
||||
sub tptr=tptr,len
|
||||
clrrrb.pr };;
|
||||
{ .mmi; mov aptr=rptr
|
||||
mov bptr=tptr
|
||||
mov pr.rot=1<<16 };;
|
||||
{ .mii; cmp.eq p0,p6=topbit,r0
|
||||
mov ar.lc=lc
|
||||
mov ar.ec=2 };;
|
||||
|
||||
.Lcopy_ctop:
|
||||
{ .mmi; (p16) ld8 a[0]=[aptr],8
|
||||
(p16) ld8 t[0]=[bptr],8
|
||||
(p6) mov a[1]=t[1] };; // (p17)
|
||||
{ .mmb; (p17) st8 [rptr]=a[1],8
|
||||
(p17) st8 [tptr]=r0,8
|
||||
br.ctop.sptk .Lcopy_ctop };;
|
||||
.Lcopy_cend:
|
||||
|
||||
{ .mmi; mov ret0=1 // signal "handled"
|
||||
rum 1<<5 // clear um.mfh
|
||||
mov ar.lc=prevlc }
|
||||
{ .mib; .restore sp
|
||||
mov sp=prevsp
|
||||
mov pr=prevpr,0x1ffff
|
||||
br.ret.sptk.many b0 };;
|
||||
.endp bn_mul_mont_general#
|
||||
|
||||
a1=r16; a2=r17; a3=r18; a4=r19; a5=r20; a6=r21; a7=r22; a8=r23;
|
||||
n1=r24; n2=r25; n3=r26; n4=r27; n5=r28; n6=r29; n7=r30; n8=r31;
|
||||
t0=r15;
|
||||
|
||||
ai0=f8; ai1=f9; ai2=f10; ai3=f11; ai4=f12; ai5=f13; ai6=f14; ai7=f15;
|
||||
ni0=f16; ni1=f17; ni2=f18; ni3=f19; ni4=f20; ni5=f21; ni6=f22; ni7=f23;
|
||||
|
||||
.align 64
|
||||
.skip 48 // aligns loop body
|
||||
.local bn_mul_mont_8#
|
||||
.proc bn_mul_mont_8#
|
||||
bn_mul_mont_8:
|
||||
.prologue
|
||||
{ .mmi; .save ar.pfs,prevfs
|
||||
alloc prevfs=ar.pfs,6,2,0,8
|
||||
.vframe prevsp
|
||||
mov prevsp=sp
|
||||
.save ar.lc,prevlc
|
||||
mov prevlc=ar.lc }
|
||||
{ .mmi; add r17=-6*16,sp
|
||||
add sp=-7*16,sp
|
||||
.save pr,prevpr
|
||||
mov prevpr=pr };;
|
||||
|
||||
{ .mmi; .save.gf 0,0x10
|
||||
stf.spill [sp]=f16,-16
|
||||
.save.gf 0,0x20
|
||||
stf.spill [r17]=f17,32
|
||||
add r16=-5*16,prevsp};;
|
||||
{ .mmi; .save.gf 0,0x40
|
||||
stf.spill [r16]=f18,32
|
||||
.save.gf 0,0x80
|
||||
stf.spill [r17]=f19,32
|
||||
$ADDP aptr=0,in1 };;
|
||||
{ .mmi; .save.gf 0,0x100
|
||||
stf.spill [r16]=f20,32
|
||||
.save.gf 0,0x200
|
||||
stf.spill [r17]=f21,32
|
||||
$ADDP r29=8,in1 };;
|
||||
{ .mmi; .save.gf 0,0x400
|
||||
stf.spill [r16]=f22
|
||||
.save.gf 0,0x800
|
||||
stf.spill [r17]=f23
|
||||
$ADDP rptr=0,in0 };;
|
||||
|
||||
.body
|
||||
.rotf bj[8],mj[2],tf[2],alo[10],ahi[10],nlo[10],nhi[10]
|
||||
.rotr t[8]
|
||||
|
||||
// load input vectors padding them to 8 elements
|
||||
{ .mmi; ldf8 ai0=[aptr],16 // ap[0]
|
||||
ldf8 ai1=[r29],16 // ap[1]
|
||||
$ADDP bptr=0,in2 }
|
||||
{ .mmi; $ADDP r30=8,in2
|
||||
$ADDP nptr=0,in3
|
||||
$ADDP r31=8,in3 };;
|
||||
{ .mmi; ldf8 bj[7]=[bptr],16 // bp[0]
|
||||
ldf8 bj[6]=[r30],16 // bp[1]
|
||||
cmp4.le p4,p5=3,in5 }
|
||||
{ .mmi; ldf8 ni0=[nptr],16 // np[0]
|
||||
ldf8 ni1=[r31],16 // np[1]
|
||||
cmp4.le p6,p7=4,in5 };;
|
||||
|
||||
{ .mfi; (p4)ldf8 ai2=[aptr],16 // ap[2]
|
||||
(p5)fcvt.fxu ai2=f0
|
||||
cmp4.le p8,p9=5,in5 }
|
||||
{ .mfi; (p6)ldf8 ai3=[r29],16 // ap[3]
|
||||
(p7)fcvt.fxu ai3=f0
|
||||
cmp4.le p10,p11=6,in5 }
|
||||
{ .mfi; (p4)ldf8 bj[5]=[bptr],16 // bp[2]
|
||||
(p5)fcvt.fxu bj[5]=f0
|
||||
cmp4.le p12,p13=7,in5 }
|
||||
{ .mfi; (p6)ldf8 bj[4]=[r30],16 // bp[3]
|
||||
(p7)fcvt.fxu bj[4]=f0
|
||||
cmp4.le p14,p15=8,in5 }
|
||||
{ .mfi; (p4)ldf8 ni2=[nptr],16 // np[2]
|
||||
(p5)fcvt.fxu ni2=f0
|
||||
addp4 r28=-1,in5 }
|
||||
{ .mfi; (p6)ldf8 ni3=[r31],16 // np[3]
|
||||
(p7)fcvt.fxu ni3=f0
|
||||
$ADDP in4=0,in4 };;
|
||||
|
||||
{ .mfi; ldf8 n0=[in4]
|
||||
fcvt.fxu tf[1]=f0
|
||||
nop.i 0 }
|
||||
|
||||
{ .mfi; (p8)ldf8 ai4=[aptr],16 // ap[4]
|
||||
(p9)fcvt.fxu ai4=f0
|
||||
mov t[0]=r0 }
|
||||
{ .mfi; (p10)ldf8 ai5=[r29],16 // ap[5]
|
||||
(p11)fcvt.fxu ai5=f0
|
||||
mov t[1]=r0 }
|
||||
{ .mfi; (p8)ldf8 bj[3]=[bptr],16 // bp[4]
|
||||
(p9)fcvt.fxu bj[3]=f0
|
||||
mov t[2]=r0 }
|
||||
{ .mfi; (p10)ldf8 bj[2]=[r30],16 // bp[5]
|
||||
(p11)fcvt.fxu bj[2]=f0
|
||||
mov t[3]=r0 }
|
||||
{ .mfi; (p8)ldf8 ni4=[nptr],16 // np[4]
|
||||
(p9)fcvt.fxu ni4=f0
|
||||
mov t[4]=r0 }
|
||||
{ .mfi; (p10)ldf8 ni5=[r31],16 // np[5]
|
||||
(p11)fcvt.fxu ni5=f0
|
||||
mov t[5]=r0 };;
|
||||
|
||||
{ .mfi; (p12)ldf8 ai6=[aptr],16 // ap[6]
|
||||
(p13)fcvt.fxu ai6=f0
|
||||
mov t[6]=r0 }
|
||||
{ .mfi; (p14)ldf8 ai7=[r29],16 // ap[7]
|
||||
(p15)fcvt.fxu ai7=f0
|
||||
mov t[7]=r0 }
|
||||
{ .mfi; (p12)ldf8 bj[1]=[bptr],16 // bp[6]
|
||||
(p13)fcvt.fxu bj[1]=f0
|
||||
mov ar.lc=r28 }
|
||||
{ .mfi; (p14)ldf8 bj[0]=[r30],16 // bp[7]
|
||||
(p15)fcvt.fxu bj[0]=f0
|
||||
mov ar.ec=1 }
|
||||
{ .mfi; (p12)ldf8 ni6=[nptr],16 // np[6]
|
||||
(p13)fcvt.fxu ni6=f0
|
||||
mov pr.rot=1<<16 }
|
||||
{ .mfb; (p14)ldf8 ni7=[r31],16 // np[7]
|
||||
(p15)fcvt.fxu ni7=f0
|
||||
brp.loop.imp .Louter_8_ctop,.Louter_8_cend-16
|
||||
};;
|
||||
|
||||
// The loop is scheduled for 32*n ticks on Itanium 2. Actual attempt
|
||||
// to measure with help of Interval Time Counter indicated that the
|
||||
// factor is a tad higher: 33 or 34, if not 35. Exact measurement and
|
||||
// addressing the issue is problematic, because I don't have access
|
||||
// to platform-specific instruction-level profiler. On Itanium it
|
||||
// should run in 56*n ticks, because of higher xma latency...
|
||||
.Louter_8_ctop:
|
||||
.pred.rel "mutex",p40,p42
|
||||
.pred.rel "mutex",p48,p50
|
||||
{ .mfi; (p16) nop.m 0 // 0:
|
||||
(p16) xma.hu ahi[0]=ai0,bj[7],tf[1] // ap[0]*b[i]+t[0]
|
||||
(p40) add a3=a3,n3 } // (p17) a3+=n3
|
||||
{ .mfi; (p42) add a3=a3,n3,1
|
||||
(p16) xma.lu alo[0]=ai0,bj[7],tf[1]
|
||||
(p16) nop.i 0 };;
|
||||
{ .mii; (p17) getf.sig a7=alo[8] // 1:
|
||||
(p48) add t[6]=t[6],a3 // (p17) t[6]+=a3
|
||||
(p50) add t[6]=t[6],a3,1 };;
|
||||
{ .mfi; (p17) getf.sig a8=ahi[8] // 2:
|
||||
(p17) xma.hu nhi[7]=ni6,mj[1],nhi[6] // np[6]*m0
|
||||
(p40) cmp.ltu p43,p41=a3,n3 }
|
||||
{ .mfi; (p42) cmp.leu p43,p41=a3,n3
|
||||
(p17) xma.lu nlo[7]=ni6,mj[1],nhi[6]
|
||||
(p16) nop.i 0 };;
|
||||
{ .mii; (p17) getf.sig n5=nlo[6] // 3:
|
||||
(p48) cmp.ltu p51,p49=t[6],a3
|
||||
(p50) cmp.leu p51,p49=t[6],a3 };;
|
||||
.pred.rel "mutex",p41,p43
|
||||
.pred.rel "mutex",p49,p51
|
||||
{ .mfi; (p16) nop.m 0 // 4:
|
||||
(p16) xma.hu ahi[1]=ai1,bj[7],ahi[0] // ap[1]*b[i]
|
||||
(p41) add a4=a4,n4 } // (p17) a4+=n4
|
||||
{ .mfi; (p43) add a4=a4,n4,1
|
||||
(p16) xma.lu alo[1]=ai1,bj[7],ahi[0]
|
||||
(p16) nop.i 0 };;
|
||||
{ .mfi; (p49) add t[5]=t[5],a4 // 5: (p17) t[5]+=a4
|
||||
(p16) xmpy.lu mj[0]=alo[0],n0 // (ap[0]*b[i]+t[0])*n0
|
||||
(p51) add t[5]=t[5],a4,1 };;
|
||||
{ .mfi; (p16) nop.m 0 // 6:
|
||||
(p17) xma.hu nhi[8]=ni7,mj[1],nhi[7] // np[7]*m0
|
||||
(p41) cmp.ltu p42,p40=a4,n4 }
|
||||
{ .mfi; (p43) cmp.leu p42,p40=a4,n4
|
||||
(p17) xma.lu nlo[8]=ni7,mj[1],nhi[7]
|
||||
(p16) nop.i 0 };;
|
||||
{ .mii; (p17) getf.sig n6=nlo[7] // 7:
|
||||
(p49) cmp.ltu p50,p48=t[5],a4
|
||||
(p51) cmp.leu p50,p48=t[5],a4 };;
|
||||
.pred.rel "mutex",p40,p42
|
||||
.pred.rel "mutex",p48,p50
|
||||
{ .mfi; (p16) nop.m 0 // 8:
|
||||
(p16) xma.hu ahi[2]=ai2,bj[7],ahi[1] // ap[2]*b[i]
|
||||
(p40) add a5=a5,n5 } // (p17) a5+=n5
|
||||
{ .mfi; (p42) add a5=a5,n5,1
|
||||
(p16) xma.lu alo[2]=ai2,bj[7],ahi[1]
|
||||
(p16) nop.i 0 };;
|
||||
{ .mii; (p16) getf.sig a1=alo[1] // 9:
|
||||
(p48) add t[4]=t[4],a5 // p(17) t[4]+=a5
|
||||
(p50) add t[4]=t[4],a5,1 };;
|
||||
{ .mfi; (p16) nop.m 0 // 10:
|
||||
(p16) xma.hu nhi[0]=ni0,mj[0],alo[0] // np[0]*m0
|
||||
(p40) cmp.ltu p43,p41=a5,n5 }
|
||||
{ .mfi; (p42) cmp.leu p43,p41=a5,n5
|
||||
(p16) xma.lu nlo[0]=ni0,mj[0],alo[0]
|
||||
(p16) nop.i 0 };;
|
||||
{ .mii; (p17) getf.sig n7=nlo[8] // 11:
|
||||
(p48) cmp.ltu p51,p49=t[4],a5
|
||||
(p50) cmp.leu p51,p49=t[4],a5 };;
|
||||
.pred.rel "mutex",p41,p43
|
||||
.pred.rel "mutex",p49,p51
|
||||
{ .mfi; (p17) getf.sig n8=nhi[8] // 12:
|
||||
(p16) xma.hu ahi[3]=ai3,bj[7],ahi[2] // ap[3]*b[i]
|
||||
(p41) add a6=a6,n6 } // (p17) a6+=n6
|
||||
{ .mfi; (p43) add a6=a6,n6,1
|
||||
(p16) xma.lu alo[3]=ai3,bj[7],ahi[2]
|
||||
(p16) nop.i 0 };;
|
||||
{ .mii; (p16) getf.sig a2=alo[2] // 13:
|
||||
(p49) add t[3]=t[3],a6 // (p17) t[3]+=a6
|
||||
(p51) add t[3]=t[3],a6,1 };;
|
||||
{ .mfi; (p16) nop.m 0 // 14:
|
||||
(p16) xma.hu nhi[1]=ni1,mj[0],nhi[0] // np[1]*m0
|
||||
(p41) cmp.ltu p42,p40=a6,n6 }
|
||||
{ .mfi; (p43) cmp.leu p42,p40=a6,n6
|
||||
(p16) xma.lu nlo[1]=ni1,mj[0],nhi[0]
|
||||
(p16) nop.i 0 };;
|
||||
{ .mii; (p16) nop.m 0 // 15:
|
||||
(p49) cmp.ltu p50,p48=t[3],a6
|
||||
(p51) cmp.leu p50,p48=t[3],a6 };;
|
||||
.pred.rel "mutex",p40,p42
|
||||
.pred.rel "mutex",p48,p50
|
||||
{ .mfi; (p16) nop.m 0 // 16:
|
||||
(p16) xma.hu ahi[4]=ai4,bj[7],ahi[3] // ap[4]*b[i]
|
||||
(p40) add a7=a7,n7 } // (p17) a7+=n7
|
||||
{ .mfi; (p42) add a7=a7,n7,1
|
||||
(p16) xma.lu alo[4]=ai4,bj[7],ahi[3]
|
||||
(p16) nop.i 0 };;
|
||||
{ .mii; (p16) getf.sig a3=alo[3] // 17:
|
||||
(p48) add t[2]=t[2],a7 // (p17) t[2]+=a7
|
||||
(p50) add t[2]=t[2],a7,1 };;
|
||||
{ .mfi; (p16) nop.m 0 // 18:
|
||||
(p16) xma.hu nhi[2]=ni2,mj[0],nhi[1] // np[2]*m0
|
||||
(p40) cmp.ltu p43,p41=a7,n7 }
|
||||
{ .mfi; (p42) cmp.leu p43,p41=a7,n7
|
||||
(p16) xma.lu nlo[2]=ni2,mj[0],nhi[1]
|
||||
(p16) nop.i 0 };;
|
||||
{ .mii; (p16) getf.sig n1=nlo[1] // 19:
|
||||
(p48) cmp.ltu p51,p49=t[2],a7
|
||||
(p50) cmp.leu p51,p49=t[2],a7 };;
|
||||
.pred.rel "mutex",p41,p43
|
||||
.pred.rel "mutex",p49,p51
|
||||
{ .mfi; (p16) nop.m 0 // 20:
|
||||
(p16) xma.hu ahi[5]=ai5,bj[7],ahi[4] // ap[5]*b[i]
|
||||
(p41) add a8=a8,n8 } // (p17) a8+=n8
|
||||
{ .mfi; (p43) add a8=a8,n8,1
|
||||
(p16) xma.lu alo[5]=ai5,bj[7],ahi[4]
|
||||
(p16) nop.i 0 };;
|
||||
{ .mii; (p16) getf.sig a4=alo[4] // 21:
|
||||
(p49) add t[1]=t[1],a8 // (p17) t[1]+=a8
|
||||
(p51) add t[1]=t[1],a8,1 };;
|
||||
{ .mfi; (p16) nop.m 0 // 22:
|
||||
(p16) xma.hu nhi[3]=ni3,mj[0],nhi[2] // np[3]*m0
|
||||
(p41) cmp.ltu p42,p40=a8,n8 }
|
||||
{ .mfi; (p43) cmp.leu p42,p40=a8,n8
|
||||
(p16) xma.lu nlo[3]=ni3,mj[0],nhi[2]
|
||||
(p16) nop.i 0 };;
|
||||
{ .mii; (p16) getf.sig n2=nlo[2] // 23:
|
||||
(p49) cmp.ltu p50,p48=t[1],a8
|
||||
(p51) cmp.leu p50,p48=t[1],a8 };;
|
||||
{ .mfi; (p16) nop.m 0 // 24:
|
||||
(p16) xma.hu ahi[6]=ai6,bj[7],ahi[5] // ap[6]*b[i]
|
||||
(p16) add a1=a1,n1 } // (p16) a1+=n1
|
||||
{ .mfi; (p16) nop.m 0
|
||||
(p16) xma.lu alo[6]=ai6,bj[7],ahi[5]
|
||||
(p17) mov t[0]=r0 };;
|
||||
{ .mii; (p16) getf.sig a5=alo[5] // 25:
|
||||
(p16) add t0=t[7],a1 // (p16) t[7]+=a1
|
||||
(p42) add t[0]=t[0],r0,1 };;
|
||||
{ .mfi; (p16) setf.sig tf[0]=t0 // 26:
|
||||
(p16) xma.hu nhi[4]=ni4,mj[0],nhi[3] // np[4]*m0
|
||||
(p50) add t[0]=t[0],r0,1 }
|
||||
{ .mfi; (p16) cmp.ltu.unc p42,p40=a1,n1
|
||||
(p16) xma.lu nlo[4]=ni4,mj[0],nhi[3]
|
||||
(p16) nop.i 0 };;
|
||||
{ .mii; (p16) getf.sig n3=nlo[3] // 27:
|
||||
(p16) cmp.ltu.unc p50,p48=t0,a1
|
||||
(p16) nop.i 0 };;
|
||||
.pred.rel "mutex",p40,p42
|
||||
.pred.rel "mutex",p48,p50
|
||||
{ .mfi; (p16) nop.m 0 // 28:
|
||||
(p16) xma.hu ahi[7]=ai7,bj[7],ahi[6] // ap[7]*b[i]
|
||||
(p40) add a2=a2,n2 } // (p16) a2+=n2
|
||||
{ .mfi; (p42) add a2=a2,n2,1
|
||||
(p16) xma.lu alo[7]=ai7,bj[7],ahi[6]
|
||||
(p16) nop.i 0 };;
|
||||
{ .mii; (p16) getf.sig a6=alo[6] // 29:
|
||||
(p48) add t[6]=t[6],a2 // (p16) t[6]+=a2
|
||||
(p50) add t[6]=t[6],a2,1 };;
|
||||
{ .mfi; (p16) nop.m 0 // 30:
|
||||
(p16) xma.hu nhi[5]=ni5,mj[0],nhi[4] // np[5]*m0
|
||||
(p40) cmp.ltu p41,p39=a2,n2 }
|
||||
{ .mfi; (p42) cmp.leu p41,p39=a2,n2
|
||||
(p16) xma.lu nlo[5]=ni5,mj[0],nhi[4]
|
||||
(p16) nop.i 0 };;
|
||||
{ .mfi; (p16) getf.sig n4=nlo[4] // 31:
|
||||
(p16) nop.f 0
|
||||
(p48) cmp.ltu p49,p47=t[6],a2 }
|
||||
{ .mfb; (p50) cmp.leu p49,p47=t[6],a2
|
||||
(p16) nop.f 0
|
||||
br.ctop.sptk.many .Louter_8_ctop };;
|
||||
.Louter_8_cend:
|
||||
|
||||
// above loop has to execute one more time, without (p16), which is
|
||||
// replaced with merged move of np[8] to GPR bank
|
||||
.pred.rel "mutex",p40,p42
|
||||
.pred.rel "mutex",p48,p50
|
||||
{ .mmi; (p0) getf.sig n1=ni0 // 0:
|
||||
(p40) add a3=a3,n3 // (p17) a3+=n3
|
||||
(p42) add a3=a3,n3,1 };;
|
||||
{ .mii; (p17) getf.sig a7=alo[8] // 1:
|
||||
(p48) add t[6]=t[6],a3 // (p17) t[6]+=a3
|
||||
(p50) add t[6]=t[6],a3,1 };;
|
||||
{ .mfi; (p17) getf.sig a8=ahi[8] // 2:
|
||||
(p17) xma.hu nhi[7]=ni6,mj[1],nhi[6] // np[6]*m0
|
||||
(p40) cmp.ltu p43,p41=a3,n3 }
|
||||
{ .mfi; (p42) cmp.leu p43,p41=a3,n3
|
||||
(p17) xma.lu nlo[7]=ni6,mj[1],nhi[6]
|
||||
(p0) nop.i 0 };;
|
||||
{ .mii; (p17) getf.sig n5=nlo[6] // 3:
|
||||
(p48) cmp.ltu p51,p49=t[6],a3
|
||||
(p50) cmp.leu p51,p49=t[6],a3 };;
|
||||
.pred.rel "mutex",p41,p43
|
||||
.pred.rel "mutex",p49,p51
|
||||
{ .mmi; (p0) getf.sig n2=ni1 // 4:
|
||||
(p41) add a4=a4,n4 // (p17) a4+=n4
|
||||
(p43) add a4=a4,n4,1 };;
|
||||
{ .mfi; (p49) add t[5]=t[5],a4 // 5: (p17) t[5]+=a4
|
||||
(p0) nop.f 0
|
||||
(p51) add t[5]=t[5],a4,1 };;
|
||||
{ .mfi; (p0) getf.sig n3=ni2 // 6:
|
||||
(p17) xma.hu nhi[8]=ni7,mj[1],nhi[7] // np[7]*m0
|
||||
(p41) cmp.ltu p42,p40=a4,n4 }
|
||||
{ .mfi; (p43) cmp.leu p42,p40=a4,n4
|
||||
(p17) xma.lu nlo[8]=ni7,mj[1],nhi[7]
|
||||
(p0) nop.i 0 };;
|
||||
{ .mii; (p17) getf.sig n6=nlo[7] // 7:
|
||||
(p49) cmp.ltu p50,p48=t[5],a4
|
||||
(p51) cmp.leu p50,p48=t[5],a4 };;
|
||||
.pred.rel "mutex",p40,p42
|
||||
.pred.rel "mutex",p48,p50
|
||||
{ .mii; (p0) getf.sig n4=ni3 // 8:
|
||||
(p40) add a5=a5,n5 // (p17) a5+=n5
|
||||
(p42) add a5=a5,n5,1 };;
|
||||
{ .mii; (p0) nop.m 0 // 9:
|
||||
(p48) add t[4]=t[4],a5 // p(17) t[4]+=a5
|
||||
(p50) add t[4]=t[4],a5,1 };;
|
||||
{ .mii; (p0) nop.m 0 // 10:
|
||||
(p40) cmp.ltu p43,p41=a5,n5
|
||||
(p42) cmp.leu p43,p41=a5,n5 };;
|
||||
{ .mii; (p17) getf.sig n7=nlo[8] // 11:
|
||||
(p48) cmp.ltu p51,p49=t[4],a5
|
||||
(p50) cmp.leu p51,p49=t[4],a5 };;
|
||||
.pred.rel "mutex",p41,p43
|
||||
.pred.rel "mutex",p49,p51
|
||||
{ .mii; (p17) getf.sig n8=nhi[8] // 12:
|
||||
(p41) add a6=a6,n6 // (p17) a6+=n6
|
||||
(p43) add a6=a6,n6,1 };;
|
||||
{ .mii; (p0) getf.sig n5=ni4 // 13:
|
||||
(p49) add t[3]=t[3],a6 // (p17) t[3]+=a6
|
||||
(p51) add t[3]=t[3],a6,1 };;
|
||||
{ .mii; (p0) nop.m 0 // 14:
|
||||
(p41) cmp.ltu p42,p40=a6,n6
|
||||
(p43) cmp.leu p42,p40=a6,n6 };;
|
||||
{ .mii; (p0) getf.sig n6=ni5 // 15:
|
||||
(p49) cmp.ltu p50,p48=t[3],a6
|
||||
(p51) cmp.leu p50,p48=t[3],a6 };;
|
||||
.pred.rel "mutex",p40,p42
|
||||
.pred.rel "mutex",p48,p50
|
||||
{ .mii; (p0) nop.m 0 // 16:
|
||||
(p40) add a7=a7,n7 // (p17) a7+=n7
|
||||
(p42) add a7=a7,n7,1 };;
|
||||
{ .mii; (p0) nop.m 0 // 17:
|
||||
(p48) add t[2]=t[2],a7 // (p17) t[2]+=a7
|
||||
(p50) add t[2]=t[2],a7,1 };;
|
||||
{ .mii; (p0) nop.m 0 // 18:
|
||||
(p40) cmp.ltu p43,p41=a7,n7
|
||||
(p42) cmp.leu p43,p41=a7,n7 };;
|
||||
{ .mii; (p0) getf.sig n7=ni6 // 19:
|
||||
(p48) cmp.ltu p51,p49=t[2],a7
|
||||
(p50) cmp.leu p51,p49=t[2],a7 };;
|
||||
.pred.rel "mutex",p41,p43
|
||||
.pred.rel "mutex",p49,p51
|
||||
{ .mii; (p0) nop.m 0 // 20:
|
||||
(p41) add a8=a8,n8 // (p17) a8+=n8
|
||||
(p43) add a8=a8,n8,1 };;
|
||||
{ .mmi; (p0) nop.m 0 // 21:
|
||||
(p49) add t[1]=t[1],a8 // (p17) t[1]+=a8
|
||||
(p51) add t[1]=t[1],a8,1 }
|
||||
{ .mmi; (p17) mov t[0]=r0
|
||||
(p41) cmp.ltu p42,p40=a8,n8
|
||||
(p43) cmp.leu p42,p40=a8,n8 };;
|
||||
{ .mmi; (p0) getf.sig n8=ni7 // 22:
|
||||
(p49) cmp.ltu p50,p48=t[1],a8
|
||||
(p51) cmp.leu p50,p48=t[1],a8 }
|
||||
{ .mmi; (p42) add t[0]=t[0],r0,1
|
||||
(p0) add r16=-7*16,prevsp
|
||||
(p0) add r17=-6*16,prevsp };;
|
||||
|
||||
// subtract np[8] from carrybit|tmp[8]
|
||||
// carrybit|tmp[8] layout upon exit from above loop is:
|
||||
// t[0]|t[1]|t[2]|t[3]|t[4]|t[5]|t[6]|t[7]|t0 (least significant)
|
||||
{ .mmi; (p50)add t[0]=t[0],r0,1
|
||||
add r18=-5*16,prevsp
|
||||
sub n1=t0,n1 };;
|
||||
{ .mmi; cmp.gtu p34,p32=n1,t0;;
|
||||
.pred.rel "mutex",p32,p34
|
||||
(p32)sub n2=t[7],n2
|
||||
(p34)sub n2=t[7],n2,1 };;
|
||||
{ .mii; (p32)cmp.gtu p35,p33=n2,t[7]
|
||||
(p34)cmp.geu p35,p33=n2,t[7];;
|
||||
.pred.rel "mutex",p33,p35
|
||||
(p33)sub n3=t[6],n3 }
|
||||
{ .mmi; (p35)sub n3=t[6],n3,1;;
|
||||
(p33)cmp.gtu p34,p32=n3,t[6]
|
||||
(p35)cmp.geu p34,p32=n3,t[6] };;
|
||||
.pred.rel "mutex",p32,p34
|
||||
{ .mii; (p32)sub n4=t[5],n4
|
||||
(p34)sub n4=t[5],n4,1;;
|
||||
(p32)cmp.gtu p35,p33=n4,t[5] }
|
||||
{ .mmi; (p34)cmp.geu p35,p33=n4,t[5];;
|
||||
.pred.rel "mutex",p33,p35
|
||||
(p33)sub n5=t[4],n5
|
||||
(p35)sub n5=t[4],n5,1 };;
|
||||
{ .mii; (p33)cmp.gtu p34,p32=n5,t[4]
|
||||
(p35)cmp.geu p34,p32=n5,t[4];;
|
||||
.pred.rel "mutex",p32,p34
|
||||
(p32)sub n6=t[3],n6 }
|
||||
{ .mmi; (p34)sub n6=t[3],n6,1;;
|
||||
(p32)cmp.gtu p35,p33=n6,t[3]
|
||||
(p34)cmp.geu p35,p33=n6,t[3] };;
|
||||
.pred.rel "mutex",p33,p35
|
||||
{ .mii; (p33)sub n7=t[2],n7
|
||||
(p35)sub n7=t[2],n7,1;;
|
||||
(p33)cmp.gtu p34,p32=n7,t[2] }
|
||||
{ .mmi; (p35)cmp.geu p34,p32=n7,t[2];;
|
||||
.pred.rel "mutex",p32,p34
|
||||
(p32)sub n8=t[1],n8
|
||||
(p34)sub n8=t[1],n8,1 };;
|
||||
{ .mii; (p32)cmp.gtu p35,p33=n8,t[1]
|
||||
(p34)cmp.geu p35,p33=n8,t[1];;
|
||||
.pred.rel "mutex",p33,p35
|
||||
(p33)sub a8=t[0],r0 }
|
||||
{ .mmi; (p35)sub a8=t[0],r0,1;;
|
||||
(p33)cmp.gtu p34,p32=a8,t[0]
|
||||
(p35)cmp.geu p34,p32=a8,t[0] };;
|
||||
|
||||
// save the result, either tmp[num] or tmp[num]-np[num]
|
||||
.pred.rel "mutex",p32,p34
|
||||
{ .mmi; (p32)st8 [rptr]=n1,8
|
||||
(p34)st8 [rptr]=t0,8
|
||||
add r19=-4*16,prevsp};;
|
||||
{ .mmb; (p32)st8 [rptr]=n2,8
|
||||
(p34)st8 [rptr]=t[7],8
|
||||
(p5)br.cond.dpnt.few .Ldone };;
|
||||
{ .mmb; (p32)st8 [rptr]=n3,8
|
||||
(p34)st8 [rptr]=t[6],8
|
||||
(p7)br.cond.dpnt.few .Ldone };;
|
||||
{ .mmb; (p32)st8 [rptr]=n4,8
|
||||
(p34)st8 [rptr]=t[5],8
|
||||
(p9)br.cond.dpnt.few .Ldone };;
|
||||
{ .mmb; (p32)st8 [rptr]=n5,8
|
||||
(p34)st8 [rptr]=t[4],8
|
||||
(p11)br.cond.dpnt.few .Ldone };;
|
||||
{ .mmb; (p32)st8 [rptr]=n6,8
|
||||
(p34)st8 [rptr]=t[3],8
|
||||
(p13)br.cond.dpnt.few .Ldone };;
|
||||
{ .mmb; (p32)st8 [rptr]=n7,8
|
||||
(p34)st8 [rptr]=t[2],8
|
||||
(p15)br.cond.dpnt.few .Ldone };;
|
||||
{ .mmb; (p32)st8 [rptr]=n8,8
|
||||
(p34)st8 [rptr]=t[1],8
|
||||
nop.b 0 };;
|
||||
.Ldone: // epilogue
|
||||
{ .mmi; ldf.fill f16=[r16],64
|
||||
ldf.fill f17=[r17],64
|
||||
nop.i 0 }
|
||||
{ .mmi; ldf.fill f18=[r18],64
|
||||
ldf.fill f19=[r19],64
|
||||
mov pr=prevpr,0x1ffff };;
|
||||
{ .mmi; ldf.fill f20=[r16]
|
||||
ldf.fill f21=[r17]
|
||||
mov ar.lc=prevlc }
|
||||
{ .mmi; ldf.fill f22=[r18]
|
||||
ldf.fill f23=[r19]
|
||||
mov ret0=1 } // signal "handled"
|
||||
{ .mib; rum 1<<5
|
||||
.restore sp
|
||||
mov sp=prevsp
|
||||
br.ret.sptk.many b0 };;
|
||||
.endp bn_mul_mont_8#
|
||||
|
||||
.type copyright#,\@object
|
||||
copyright:
|
||||
stringz "Montgomery multiplication for IA-64, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
___
|
||||
|
||||
open STDOUT,">$output" if $output;
|
||||
print $code;
|
||||
close STDOUT;
|
1565
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/ia64.S
vendored
Normal file
1565
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/ia64.S
vendored
Normal file
File diff suppressed because it is too large
Load diff
433
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/mips-mont.pl
vendored
Normal file
433
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/mips-mont.pl
vendored
Normal file
|
@ -0,0 +1,433 @@
|
|||
#! /usr/bin/env perl
|
||||
# Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
|
||||
# This module doesn't present direct interest for OpenSSL, because it
|
||||
# doesn't provide better performance for longer keys, at least not on
|
||||
# in-order-execution cores. While 512-bit RSA sign operations can be
|
||||
# 65% faster in 64-bit mode, 1024-bit ones are only 15% faster, and
|
||||
# 4096-bit ones are up to 15% slower. In 32-bit mode it varies from
|
||||
# 16% improvement for 512-bit RSA sign to -33% for 4096-bit RSA
|
||||
# verify:-( All comparisons are against bn_mul_mont-free assembler.
|
||||
# The module might be of interest to embedded system developers, as
|
||||
# the code is smaller than 1KB, yet offers >3x improvement on MIPS64
|
||||
# and 75-30% [less for longer keys] on MIPS32 over compiler-generated
|
||||
# code.
|
||||
|
||||
######################################################################
|
||||
# There is a number of MIPS ABI in use, O32 and N32/64 are most
|
||||
# widely used. Then there is a new contender: NUBI. It appears that if
|
||||
# one picks the latter, it's possible to arrange code in ABI neutral
|
||||
# manner. Therefore let's stick to NUBI register layout:
|
||||
#
|
||||
($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
|
||||
($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
|
||||
($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
|
||||
($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
|
||||
#
|
||||
# The return value is placed in $a0. Following coding rules facilitate
|
||||
# interoperability:
|
||||
#
|
||||
# - never ever touch $tp, "thread pointer", former $gp;
|
||||
# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
|
||||
# old code];
|
||||
# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
|
||||
#
|
||||
# For reference here is register layout for N32/64 MIPS ABIs:
|
||||
#
|
||||
# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
|
||||
# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
|
||||
# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
|
||||
# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
|
||||
# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
|
||||
#
|
||||
$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
|
||||
|
||||
if ($flavour =~ /64|n32/i) {
|
||||
$PTR_ADD="daddu"; # incidentally works even on n32
|
||||
$PTR_SUB="dsubu"; # incidentally works even on n32
|
||||
$REG_S="sd";
|
||||
$REG_L="ld";
|
||||
$SZREG=8;
|
||||
} else {
|
||||
$PTR_ADD="addu";
|
||||
$PTR_SUB="subu";
|
||||
$REG_S="sw";
|
||||
$REG_L="lw";
|
||||
$SZREG=4;
|
||||
}
|
||||
$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0x00fff000 : 0x00ff0000;
|
||||
#
|
||||
# <appro@openssl.org>
|
||||
#
|
||||
######################################################################
|
||||
|
||||
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
|
||||
open STDOUT,">$output";
|
||||
|
||||
if ($flavour =~ /64|n32/i) {
|
||||
$LD="ld";
|
||||
$ST="sd";
|
||||
$MULTU="dmultu";
|
||||
$ADDU="daddu";
|
||||
$SUBU="dsubu";
|
||||
$BNSZ=8;
|
||||
} else {
|
||||
$LD="lw";
|
||||
$ST="sw";
|
||||
$MULTU="multu";
|
||||
$ADDU="addu";
|
||||
$SUBU="subu";
|
||||
$BNSZ=4;
|
||||
}
|
||||
|
||||
# int bn_mul_mont(
|
||||
$rp=$a0; # BN_ULONG *rp,
|
||||
$ap=$a1; # const BN_ULONG *ap,
|
||||
$bp=$a2; # const BN_ULONG *bp,
|
||||
$np=$a3; # const BN_ULONG *np,
|
||||
$n0=$a4; # const BN_ULONG *n0,
|
||||
$num=$a5; # int num);
|
||||
|
||||
$lo0=$a6;
|
||||
$hi0=$a7;
|
||||
$lo1=$t1;
|
||||
$hi1=$t2;
|
||||
$aj=$s0;
|
||||
$bi=$s1;
|
||||
$nj=$s2;
|
||||
$tp=$s3;
|
||||
$alo=$s4;
|
||||
$ahi=$s5;
|
||||
$nlo=$s6;
|
||||
$nhi=$s7;
|
||||
$tj=$s8;
|
||||
$i=$s9;
|
||||
$j=$s10;
|
||||
$m1=$s11;
|
||||
|
||||
$FRAMESIZE=14;
|
||||
|
||||
$code=<<___;
|
||||
#include "mips_arch.h"
|
||||
|
||||
.text
|
||||
|
||||
.set noat
|
||||
.set noreorder
|
||||
|
||||
.align 5
|
||||
.globl bn_mul_mont
|
||||
.ent bn_mul_mont
|
||||
bn_mul_mont:
|
||||
___
|
||||
$code.=<<___ if ($flavour =~ /o32/i);
|
||||
lw $n0,16($sp)
|
||||
lw $num,20($sp)
|
||||
___
|
||||
$code.=<<___;
|
||||
slt $at,$num,4
|
||||
bnez $at,1f
|
||||
li $t0,0
|
||||
slt $at,$num,17 # on in-order CPU
|
||||
bnez $at,bn_mul_mont_internal
|
||||
nop
|
||||
1: jr $ra
|
||||
li $a0,0
|
||||
.end bn_mul_mont
|
||||
|
||||
.align 5
|
||||
.ent bn_mul_mont_internal
|
||||
bn_mul_mont_internal:
|
||||
.frame $fp,$FRAMESIZE*$SZREG,$ra
|
||||
.mask 0x40000000|$SAVED_REGS_MASK,-$SZREG
|
||||
$PTR_SUB $sp,$FRAMESIZE*$SZREG
|
||||
$REG_S $fp,($FRAMESIZE-1)*$SZREG($sp)
|
||||
$REG_S $s11,($FRAMESIZE-2)*$SZREG($sp)
|
||||
$REG_S $s10,($FRAMESIZE-3)*$SZREG($sp)
|
||||
$REG_S $s9,($FRAMESIZE-4)*$SZREG($sp)
|
||||
$REG_S $s8,($FRAMESIZE-5)*$SZREG($sp)
|
||||
$REG_S $s7,($FRAMESIZE-6)*$SZREG($sp)
|
||||
$REG_S $s6,($FRAMESIZE-7)*$SZREG($sp)
|
||||
$REG_S $s5,($FRAMESIZE-8)*$SZREG($sp)
|
||||
$REG_S $s4,($FRAMESIZE-9)*$SZREG($sp)
|
||||
___
|
||||
$code.=<<___ if ($flavour =~ /nubi/i);
|
||||
$REG_S $s3,($FRAMESIZE-10)*$SZREG($sp)
|
||||
$REG_S $s2,($FRAMESIZE-11)*$SZREG($sp)
|
||||
$REG_S $s1,($FRAMESIZE-12)*$SZREG($sp)
|
||||
$REG_S $s0,($FRAMESIZE-13)*$SZREG($sp)
|
||||
___
|
||||
$code.=<<___;
|
||||
move $fp,$sp
|
||||
|
||||
.set reorder
|
||||
$LD $n0,0($n0)
|
||||
$LD $bi,0($bp) # bp[0]
|
||||
$LD $aj,0($ap) # ap[0]
|
||||
$LD $nj,0($np) # np[0]
|
||||
|
||||
$PTR_SUB $sp,2*$BNSZ # place for two extra words
|
||||
sll $num,`log($BNSZ)/log(2)`
|
||||
li $at,-4096
|
||||
$PTR_SUB $sp,$num
|
||||
and $sp,$at
|
||||
|
||||
$MULTU ($aj,$bi)
|
||||
$LD $ahi,$BNSZ($ap)
|
||||
$LD $nhi,$BNSZ($np)
|
||||
mflo ($lo0,$aj,$bi)
|
||||
mfhi ($hi0,$aj,$bi)
|
||||
$MULTU ($lo0,$n0)
|
||||
mflo ($m1,$lo0,$n0)
|
||||
|
||||
$MULTU ($ahi,$bi)
|
||||
mflo ($alo,$ahi,$bi)
|
||||
mfhi ($ahi,$ahi,$bi)
|
||||
|
||||
$MULTU ($nj,$m1)
|
||||
mflo ($lo1,$nj,$m1)
|
||||
mfhi ($hi1,$nj,$m1)
|
||||
$MULTU ($nhi,$m1)
|
||||
$ADDU $lo1,$lo0
|
||||
sltu $at,$lo1,$lo0
|
||||
$ADDU $hi1,$at
|
||||
mflo ($nlo,$nhi,$m1)
|
||||
mfhi ($nhi,$nhi,$m1)
|
||||
|
||||
move $tp,$sp
|
||||
li $j,2*$BNSZ
|
||||
.align 4
|
||||
.L1st:
|
||||
.set noreorder
|
||||
$PTR_ADD $aj,$ap,$j
|
||||
$PTR_ADD $nj,$np,$j
|
||||
$LD $aj,($aj)
|
||||
$LD $nj,($nj)
|
||||
|
||||
$MULTU ($aj,$bi)
|
||||
$ADDU $lo0,$alo,$hi0
|
||||
$ADDU $lo1,$nlo,$hi1
|
||||
sltu $at,$lo0,$hi0
|
||||
sltu $t0,$lo1,$hi1
|
||||
$ADDU $hi0,$ahi,$at
|
||||
$ADDU $hi1,$nhi,$t0
|
||||
mflo ($alo,$aj,$bi)
|
||||
mfhi ($ahi,$aj,$bi)
|
||||
|
||||
$ADDU $lo1,$lo0
|
||||
sltu $at,$lo1,$lo0
|
||||
$MULTU ($nj,$m1)
|
||||
$ADDU $hi1,$at
|
||||
addu $j,$BNSZ
|
||||
$ST $lo1,($tp)
|
||||
sltu $t0,$j,$num
|
||||
mflo ($nlo,$nj,$m1)
|
||||
mfhi ($nhi,$nj,$m1)
|
||||
|
||||
bnez $t0,.L1st
|
||||
$PTR_ADD $tp,$BNSZ
|
||||
.set reorder
|
||||
|
||||
$ADDU $lo0,$alo,$hi0
|
||||
sltu $at,$lo0,$hi0
|
||||
$ADDU $hi0,$ahi,$at
|
||||
|
||||
$ADDU $lo1,$nlo,$hi1
|
||||
sltu $t0,$lo1,$hi1
|
||||
$ADDU $hi1,$nhi,$t0
|
||||
$ADDU $lo1,$lo0
|
||||
sltu $at,$lo1,$lo0
|
||||
$ADDU $hi1,$at
|
||||
|
||||
$ST $lo1,($tp)
|
||||
|
||||
$ADDU $hi1,$hi0
|
||||
sltu $at,$hi1,$hi0
|
||||
$ST $hi1,$BNSZ($tp)
|
||||
$ST $at,2*$BNSZ($tp)
|
||||
|
||||
li $i,$BNSZ
|
||||
.align 4
|
||||
.Louter:
|
||||
$PTR_ADD $bi,$bp,$i
|
||||
$LD $bi,($bi)
|
||||
$LD $aj,($ap)
|
||||
$LD $ahi,$BNSZ($ap)
|
||||
$LD $tj,($sp)
|
||||
|
||||
$MULTU ($aj,$bi)
|
||||
$LD $nj,($np)
|
||||
$LD $nhi,$BNSZ($np)
|
||||
mflo ($lo0,$aj,$bi)
|
||||
mfhi ($hi0,$aj,$bi)
|
||||
$ADDU $lo0,$tj
|
||||
$MULTU ($lo0,$n0)
|
||||
sltu $at,$lo0,$tj
|
||||
$ADDU $hi0,$at
|
||||
mflo ($m1,$lo0,$n0)
|
||||
|
||||
$MULTU ($ahi,$bi)
|
||||
mflo ($alo,$ahi,$bi)
|
||||
mfhi ($ahi,$ahi,$bi)
|
||||
|
||||
$MULTU ($nj,$m1)
|
||||
mflo ($lo1,$nj,$m1)
|
||||
mfhi ($hi1,$nj,$m1)
|
||||
|
||||
$MULTU ($nhi,$m1)
|
||||
$ADDU $lo1,$lo0
|
||||
sltu $at,$lo1,$lo0
|
||||
$ADDU $hi1,$at
|
||||
mflo ($nlo,$nhi,$m1)
|
||||
mfhi ($nhi,$nhi,$m1)
|
||||
|
||||
move $tp,$sp
|
||||
li $j,2*$BNSZ
|
||||
$LD $tj,$BNSZ($tp)
|
||||
.align 4
|
||||
.Linner:
|
||||
.set noreorder
|
||||
$PTR_ADD $aj,$ap,$j
|
||||
$PTR_ADD $nj,$np,$j
|
||||
$LD $aj,($aj)
|
||||
$LD $nj,($nj)
|
||||
|
||||
$MULTU ($aj,$bi)
|
||||
$ADDU $lo0,$alo,$hi0
|
||||
$ADDU $lo1,$nlo,$hi1
|
||||
sltu $at,$lo0,$hi0
|
||||
sltu $t0,$lo1,$hi1
|
||||
$ADDU $hi0,$ahi,$at
|
||||
$ADDU $hi1,$nhi,$t0
|
||||
mflo ($alo,$aj,$bi)
|
||||
mfhi ($ahi,$aj,$bi)
|
||||
|
||||
$ADDU $lo0,$tj
|
||||
addu $j,$BNSZ
|
||||
$MULTU ($nj,$m1)
|
||||
sltu $at,$lo0,$tj
|
||||
$ADDU $lo1,$lo0
|
||||
$ADDU $hi0,$at
|
||||
sltu $t0,$lo1,$lo0
|
||||
$LD $tj,2*$BNSZ($tp)
|
||||
$ADDU $hi1,$t0
|
||||
sltu $at,$j,$num
|
||||
mflo ($nlo,$nj,$m1)
|
||||
mfhi ($nhi,$nj,$m1)
|
||||
$ST $lo1,($tp)
|
||||
bnez $at,.Linner
|
||||
$PTR_ADD $tp,$BNSZ
|
||||
.set reorder
|
||||
|
||||
$ADDU $lo0,$alo,$hi0
|
||||
sltu $at,$lo0,$hi0
|
||||
$ADDU $hi0,$ahi,$at
|
||||
$ADDU $lo0,$tj
|
||||
sltu $t0,$lo0,$tj
|
||||
$ADDU $hi0,$t0
|
||||
|
||||
$LD $tj,2*$BNSZ($tp)
|
||||
$ADDU $lo1,$nlo,$hi1
|
||||
sltu $at,$lo1,$hi1
|
||||
$ADDU $hi1,$nhi,$at
|
||||
$ADDU $lo1,$lo0
|
||||
sltu $t0,$lo1,$lo0
|
||||
$ADDU $hi1,$t0
|
||||
$ST $lo1,($tp)
|
||||
|
||||
$ADDU $lo1,$hi1,$hi0
|
||||
sltu $hi1,$lo1,$hi0
|
||||
$ADDU $lo1,$tj
|
||||
sltu $at,$lo1,$tj
|
||||
$ADDU $hi1,$at
|
||||
$ST $lo1,$BNSZ($tp)
|
||||
$ST $hi1,2*$BNSZ($tp)
|
||||
|
||||
addu $i,$BNSZ
|
||||
sltu $t0,$i,$num
|
||||
bnez $t0,.Louter
|
||||
|
||||
.set noreorder
|
||||
$PTR_ADD $tj,$sp,$num # &tp[num]
|
||||
move $tp,$sp
|
||||
move $ap,$sp
|
||||
li $hi0,0 # clear borrow bit
|
||||
|
||||
.align 4
|
||||
.Lsub: $LD $lo0,($tp)
|
||||
$LD $lo1,($np)
|
||||
$PTR_ADD $tp,$BNSZ
|
||||
$PTR_ADD $np,$BNSZ
|
||||
$SUBU $lo1,$lo0,$lo1 # tp[i]-np[i]
|
||||
sgtu $at,$lo1,$lo0
|
||||
$SUBU $lo0,$lo1,$hi0
|
||||
sgtu $hi0,$lo0,$lo1
|
||||
$ST $lo0,($rp)
|
||||
or $hi0,$at
|
||||
sltu $at,$tp,$tj
|
||||
bnez $at,.Lsub
|
||||
$PTR_ADD $rp,$BNSZ
|
||||
|
||||
$SUBU $hi0,$hi1,$hi0 # handle upmost overflow bit
|
||||
move $tp,$sp
|
||||
$PTR_SUB $rp,$num # restore rp
|
||||
not $hi1,$hi0
|
||||
|
||||
.Lcopy: $LD $nj,($tp) # conditional move
|
||||
$LD $aj,($rp)
|
||||
$ST $zero,($tp)
|
||||
$PTR_ADD $tp,$BNSZ
|
||||
and $nj,$hi0
|
||||
and $aj,$hi1
|
||||
or $aj,$nj
|
||||
sltu $at,$tp,$tj
|
||||
$ST $aj,($rp)
|
||||
bnez $at,.Lcopy
|
||||
$PTR_ADD $rp,$BNSZ
|
||||
|
||||
li $a0,1
|
||||
li $t0,1
|
||||
|
||||
.set noreorder
|
||||
move $sp,$fp
|
||||
$REG_L $fp,($FRAMESIZE-1)*$SZREG($sp)
|
||||
$REG_L $s11,($FRAMESIZE-2)*$SZREG($sp)
|
||||
$REG_L $s10,($FRAMESIZE-3)*$SZREG($sp)
|
||||
$REG_L $s9,($FRAMESIZE-4)*$SZREG($sp)
|
||||
$REG_L $s8,($FRAMESIZE-5)*$SZREG($sp)
|
||||
$REG_L $s7,($FRAMESIZE-6)*$SZREG($sp)
|
||||
$REG_L $s6,($FRAMESIZE-7)*$SZREG($sp)
|
||||
$REG_L $s5,($FRAMESIZE-8)*$SZREG($sp)
|
||||
$REG_L $s4,($FRAMESIZE-9)*$SZREG($sp)
|
||||
___
|
||||
$code.=<<___ if ($flavour =~ /nubi/i);
|
||||
$REG_L $s3,($FRAMESIZE-10)*$SZREG($sp)
|
||||
$REG_L $s2,($FRAMESIZE-11)*$SZREG($sp)
|
||||
$REG_L $s1,($FRAMESIZE-12)*$SZREG($sp)
|
||||
$REG_L $s0,($FRAMESIZE-13)*$SZREG($sp)
|
||||
___
|
||||
$code.=<<___;
|
||||
jr $ra
|
||||
$PTR_ADD $sp,$FRAMESIZE*$SZREG
|
||||
.end bn_mul_mont_internal
|
||||
.rdata
|
||||
.asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
___
|
||||
|
||||
$code =~ s/\`([^\`]*)\`/eval $1/gem;
|
||||
|
||||
print $code;
|
||||
close STDOUT;
|
2263
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/mips.pl
vendored
Normal file
2263
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/mips.pl
vendored
Normal file
File diff suppressed because it is too large
Load diff
1006
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/parisc-mont.pl
vendored
Normal file
1006
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/parisc-mont.pl
vendored
Normal file
File diff suppressed because it is too large
Load diff
1990
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/ppc-mont.pl
vendored
Normal file
1990
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/ppc-mont.pl
vendored
Normal file
File diff suppressed because it is too large
Load diff
2011
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/ppc.pl
vendored
Normal file
2011
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/ppc.pl
vendored
Normal file
File diff suppressed because it is too large
Load diff
1652
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/ppc64-mont.pl
vendored
Normal file
1652
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/ppc64-mont.pl
vendored
Normal file
File diff suppressed because it is too large
Load diff
1982
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/rsaz-avx2.pl
vendored
Executable file
1982
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/rsaz-avx2.pl
vendored
Executable file
File diff suppressed because it is too large
Load diff
2404
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/rsaz-x86_64.pl
vendored
Executable file
2404
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/rsaz-x86_64.pl
vendored
Executable file
File diff suppressed because it is too large
Load diff
228
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/s390x-gf2m.pl
vendored
Normal file
228
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/s390x-gf2m.pl
vendored
Normal file
|
@ -0,0 +1,228 @@
|
|||
#! /usr/bin/env perl
|
||||
# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
#
|
||||
# May 2011
|
||||
#
|
||||
# The module implements bn_GF2m_mul_2x2 polynomial multiplication used
|
||||
# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
|
||||
# the time being... gcc 4.3 appeared to generate poor code, therefore
|
||||
# the effort. And indeed, the module delivers 55%-90%(*) improvement
|
||||
# on heaviest ECDSA verify and ECDH benchmarks for 163- and 571-bit
|
||||
# key lengths on z990, 30%-55%(*) - on z10, and 70%-110%(*) - on z196.
|
||||
# This is for 64-bit build. In 32-bit "highgprs" case improvement is
|
||||
# even higher, for example on z990 it was measured 80%-150%. ECDSA
|
||||
# sign is modest 9%-12% faster. Keep in mind that these coefficients
|
||||
# are not ones for bn_GF2m_mul_2x2 itself, as not all CPU time is
|
||||
# burnt in it...
|
||||
#
|
||||
# (*) gcc 4.1 was observed to deliver better results than gcc 4.3,
|
||||
# so that improvement coefficients can vary from one specific
|
||||
# setup to another.
|
||||
|
||||
$flavour = shift;
|
||||
|
||||
if ($flavour =~ /3[12]/) {
|
||||
$SIZE_T=4;
|
||||
$g="";
|
||||
} else {
|
||||
$SIZE_T=8;
|
||||
$g="g";
|
||||
}
|
||||
|
||||
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
|
||||
open STDOUT,">$output";
|
||||
|
||||
$stdframe=16*$SIZE_T+4*8;
|
||||
|
||||
$rp="%r2";
|
||||
$a1="%r3";
|
||||
$a0="%r4";
|
||||
$b1="%r5";
|
||||
$b0="%r6";
|
||||
|
||||
$ra="%r14";
|
||||
$sp="%r15";
|
||||
|
||||
@T=("%r0","%r1");
|
||||
@i=("%r12","%r13");
|
||||
|
||||
($a1,$a2,$a4,$a8,$a12,$a48)=map("%r$_",(6..11));
|
||||
($lo,$hi,$b)=map("%r$_",(3..5)); $a=$lo; $mask=$a8;
|
||||
|
||||
$code.=<<___;
|
||||
.text
|
||||
|
||||
.type _mul_1x1,\@function
|
||||
.align 16
|
||||
_mul_1x1:
|
||||
lgr $a1,$a
|
||||
sllg $a2,$a,1
|
||||
sllg $a4,$a,2
|
||||
sllg $a8,$a,3
|
||||
|
||||
srag $lo,$a1,63 # broadcast 63rd bit
|
||||
nihh $a1,0x1fff
|
||||
srag @i[0],$a2,63 # broadcast 62nd bit
|
||||
nihh $a2,0x3fff
|
||||
srag @i[1],$a4,63 # broadcast 61st bit
|
||||
nihh $a4,0x7fff
|
||||
ngr $lo,$b
|
||||
ngr @i[0],$b
|
||||
ngr @i[1],$b
|
||||
|
||||
lghi @T[0],0
|
||||
lgr $a12,$a1
|
||||
stg @T[0],`$stdframe+0*8`($sp) # tab[0]=0
|
||||
xgr $a12,$a2
|
||||
stg $a1,`$stdframe+1*8`($sp) # tab[1]=a1
|
||||
lgr $a48,$a4
|
||||
stg $a2,`$stdframe+2*8`($sp) # tab[2]=a2
|
||||
xgr $a48,$a8
|
||||
stg $a12,`$stdframe+3*8`($sp) # tab[3]=a1^a2
|
||||
xgr $a1,$a4
|
||||
|
||||
stg $a4,`$stdframe+4*8`($sp) # tab[4]=a4
|
||||
xgr $a2,$a4
|
||||
stg $a1,`$stdframe+5*8`($sp) # tab[5]=a1^a4
|
||||
xgr $a12,$a4
|
||||
stg $a2,`$stdframe+6*8`($sp) # tab[6]=a2^a4
|
||||
xgr $a1,$a48
|
||||
stg $a12,`$stdframe+7*8`($sp) # tab[7]=a1^a2^a4
|
||||
xgr $a2,$a48
|
||||
|
||||
stg $a8,`$stdframe+8*8`($sp) # tab[8]=a8
|
||||
xgr $a12,$a48
|
||||
stg $a1,`$stdframe+9*8`($sp) # tab[9]=a1^a8
|
||||
xgr $a1,$a4
|
||||
stg $a2,`$stdframe+10*8`($sp) # tab[10]=a2^a8
|
||||
xgr $a2,$a4
|
||||
stg $a12,`$stdframe+11*8`($sp) # tab[11]=a1^a2^a8
|
||||
|
||||
xgr $a12,$a4
|
||||
stg $a48,`$stdframe+12*8`($sp) # tab[12]=a4^a8
|
||||
srlg $hi,$lo,1
|
||||
stg $a1,`$stdframe+13*8`($sp) # tab[13]=a1^a4^a8
|
||||
sllg $lo,$lo,63
|
||||
stg $a2,`$stdframe+14*8`($sp) # tab[14]=a2^a4^a8
|
||||
srlg @T[0],@i[0],2
|
||||
stg $a12,`$stdframe+15*8`($sp) # tab[15]=a1^a2^a4^a8
|
||||
|
||||
lghi $mask,`0xf<<3`
|
||||
sllg $a1,@i[0],62
|
||||
sllg @i[0],$b,3
|
||||
srlg @T[1],@i[1],3
|
||||
ngr @i[0],$mask
|
||||
sllg $a2,@i[1],61
|
||||
srlg @i[1],$b,4-3
|
||||
xgr $hi,@T[0]
|
||||
ngr @i[1],$mask
|
||||
xgr $lo,$a1
|
||||
xgr $hi,@T[1]
|
||||
xgr $lo,$a2
|
||||
|
||||
xg $lo,$stdframe(@i[0],$sp)
|
||||
srlg @i[0],$b,8-3
|
||||
ngr @i[0],$mask
|
||||
___
|
||||
for($n=1;$n<14;$n++) {
|
||||
$code.=<<___;
|
||||
lg @T[1],$stdframe(@i[1],$sp)
|
||||
srlg @i[1],$b,`($n+2)*4`-3
|
||||
sllg @T[0],@T[1],`$n*4`
|
||||
ngr @i[1],$mask
|
||||
srlg @T[1],@T[1],`64-$n*4`
|
||||
xgr $lo,@T[0]
|
||||
xgr $hi,@T[1]
|
||||
___
|
||||
push(@i,shift(@i)); push(@T,shift(@T));
|
||||
}
|
||||
$code.=<<___;
|
||||
lg @T[1],$stdframe(@i[1],$sp)
|
||||
sllg @T[0],@T[1],`$n*4`
|
||||
srlg @T[1],@T[1],`64-$n*4`
|
||||
xgr $lo,@T[0]
|
||||
xgr $hi,@T[1]
|
||||
|
||||
lg @T[0],$stdframe(@i[0],$sp)
|
||||
sllg @T[1],@T[0],`($n+1)*4`
|
||||
srlg @T[0],@T[0],`64-($n+1)*4`
|
||||
xgr $lo,@T[1]
|
||||
xgr $hi,@T[0]
|
||||
|
||||
br $ra
|
||||
.size _mul_1x1,.-_mul_1x1
|
||||
|
||||
.globl bn_GF2m_mul_2x2
|
||||
.type bn_GF2m_mul_2x2,\@function
|
||||
.align 16
|
||||
bn_GF2m_mul_2x2:
|
||||
stm${g} %r3,%r15,3*$SIZE_T($sp)
|
||||
|
||||
lghi %r1,-$stdframe-128
|
||||
la %r0,0($sp)
|
||||
la $sp,0(%r1,$sp) # alloca
|
||||
st${g} %r0,0($sp) # back chain
|
||||
___
|
||||
if ($SIZE_T==8) {
|
||||
my @r=map("%r$_",(6..9));
|
||||
$code.=<<___;
|
||||
bras $ra,_mul_1x1 # a1·b1
|
||||
stmg $lo,$hi,16($rp)
|
||||
|
||||
lg $a,`$stdframe+128+4*$SIZE_T`($sp)
|
||||
lg $b,`$stdframe+128+6*$SIZE_T`($sp)
|
||||
bras $ra,_mul_1x1 # a0·b0
|
||||
stmg $lo,$hi,0($rp)
|
||||
|
||||
lg $a,`$stdframe+128+3*$SIZE_T`($sp)
|
||||
lg $b,`$stdframe+128+5*$SIZE_T`($sp)
|
||||
xg $a,`$stdframe+128+4*$SIZE_T`($sp)
|
||||
xg $b,`$stdframe+128+6*$SIZE_T`($sp)
|
||||
bras $ra,_mul_1x1 # (a0+a1)·(b0+b1)
|
||||
lmg @r[0],@r[3],0($rp)
|
||||
|
||||
xgr $lo,$hi
|
||||
xgr $hi,@r[1]
|
||||
xgr $lo,@r[0]
|
||||
xgr $hi,@r[2]
|
||||
xgr $lo,@r[3]
|
||||
xgr $hi,@r[3]
|
||||
xgr $lo,$hi
|
||||
stg $hi,16($rp)
|
||||
stg $lo,8($rp)
|
||||
___
|
||||
} else {
|
||||
$code.=<<___;
|
||||
sllg %r3,%r3,32
|
||||
sllg %r5,%r5,32
|
||||
or %r3,%r4
|
||||
or %r5,%r6
|
||||
bras $ra,_mul_1x1
|
||||
rllg $lo,$lo,32
|
||||
rllg $hi,$hi,32
|
||||
stmg $lo,$hi,0($rp)
|
||||
___
|
||||
}
|
||||
$code.=<<___;
|
||||
lm${g} %r6,%r15,`$stdframe+128+6*$SIZE_T`($sp)
|
||||
br $ra
|
||||
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
|
||||
.string "GF(2^m) Multiplication for s390x, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
___
|
||||
|
||||
$code =~ s/\`([^\`]*)\`/eval($1)/gem;
|
||||
print $code;
|
||||
close STDOUT;
|
284
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/s390x-mont.pl
vendored
Normal file
284
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/s390x-mont.pl
vendored
Normal file
|
@ -0,0 +1,284 @@
|
|||
#! /usr/bin/env perl
|
||||
# Copyright 2007-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
|
||||
# April 2007.
|
||||
#
|
||||
# Performance improvement over vanilla C code varies from 85% to 45%
|
||||
# depending on key length and benchmark. Unfortunately in this context
|
||||
# these are not very impressive results [for code that utilizes "wide"
|
||||
# 64x64=128-bit multiplication, which is not commonly available to C
|
||||
# programmers], at least hand-coded bn_asm.c replacement is known to
|
||||
# provide 30-40% better results for longest keys. Well, on a second
|
||||
# thought it's not very surprising, because z-CPUs are single-issue
|
||||
# and _strictly_ in-order execution, while bn_mul_mont is more or less
|
||||
# dependent on CPU ability to pipe-line instructions and have several
|
||||
# of them "in-flight" at the same time. I mean while other methods,
|
||||
# for example Karatsuba, aim to minimize amount of multiplications at
|
||||
# the cost of other operations increase, bn_mul_mont aim to neatly
|
||||
# "overlap" multiplications and the other operations [and on most
|
||||
# platforms even minimize the amount of the other operations, in
|
||||
# particular references to memory]. But it's possible to improve this
|
||||
# module performance by implementing dedicated squaring code-path and
|
||||
# possibly by unrolling loops...
|
||||
|
||||
# January 2009.
|
||||
#
|
||||
# Reschedule to minimize/avoid Address Generation Interlock hazard,
|
||||
# make inner loops counter-based.
|
||||
|
||||
# November 2010.
|
||||
#
|
||||
# Adapt for -m31 build. If kernel supports what's called "highgprs"
|
||||
# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
|
||||
# instructions and achieve "64-bit" performance even in 31-bit legacy
|
||||
# application context. The feature is not specific to any particular
|
||||
# processor, as long as it's "z-CPU". Latter implies that the code
|
||||
# remains z/Architecture specific. Compatibility with 32-bit BN_ULONG
|
||||
# is achieved by swapping words after 64-bit loads, follow _dswap-s.
|
||||
# On z990 it was measured to perform 2.6-2.2 times better than
|
||||
# compiler-generated code, less for longer keys...
|
||||
|
||||
$flavour = shift;
|
||||
|
||||
if ($flavour =~ /3[12]/) {
|
||||
$SIZE_T=4;
|
||||
$g="";
|
||||
} else {
|
||||
$SIZE_T=8;
|
||||
$g="g";
|
||||
}
|
||||
|
||||
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
|
||||
open STDOUT,">$output";
|
||||
|
||||
$stdframe=16*$SIZE_T+4*8;
|
||||
|
||||
$mn0="%r0";
|
||||
$num="%r1";
|
||||
|
||||
# int bn_mul_mont(
|
||||
$rp="%r2"; # BN_ULONG *rp,
|
||||
$ap="%r3"; # const BN_ULONG *ap,
|
||||
$bp="%r4"; # const BN_ULONG *bp,
|
||||
$np="%r5"; # const BN_ULONG *np,
|
||||
$n0="%r6"; # const BN_ULONG *n0,
|
||||
#$num="160(%r15)" # int num);
|
||||
|
||||
$bi="%r2"; # zaps rp
|
||||
$j="%r7";
|
||||
|
||||
$ahi="%r8";
|
||||
$alo="%r9";
|
||||
$nhi="%r10";
|
||||
$nlo="%r11";
|
||||
$AHI="%r12";
|
||||
$NHI="%r13";
|
||||
$count="%r14";
|
||||
$sp="%r15";
|
||||
|
||||
$code.=<<___;
|
||||
.text
|
||||
.globl bn_mul_mont
|
||||
.type bn_mul_mont,\@function
|
||||
bn_mul_mont:
|
||||
lgf $num,`$stdframe+$SIZE_T-4`($sp) # pull $num
|
||||
sla $num,`log($SIZE_T)/log(2)` # $num to enumerate bytes
|
||||
la $bp,0($num,$bp)
|
||||
|
||||
st${g} %r2,2*$SIZE_T($sp)
|
||||
|
||||
cghi $num,16 #
|
||||
lghi %r2,0 #
|
||||
blr %r14 # if($num<16) return 0;
|
||||
___
|
||||
$code.=<<___ if ($flavour =~ /3[12]/);
|
||||
tmll $num,4
|
||||
bnzr %r14 # if ($num&1) return 0;
|
||||
___
|
||||
$code.=<<___ if ($flavour !~ /3[12]/);
|
||||
cghi $num,96 #
|
||||
bhr %r14 # if($num>96) return 0;
|
||||
___
|
||||
$code.=<<___;
|
||||
stm${g} %r3,%r15,3*$SIZE_T($sp)
|
||||
|
||||
lghi $rp,-$stdframe-8 # leave room for carry bit
|
||||
lcgr $j,$num # -$num
|
||||
lgr %r0,$sp
|
||||
la $rp,0($rp,$sp)
|
||||
la $sp,0($j,$rp) # alloca
|
||||
st${g} %r0,0($sp) # back chain
|
||||
|
||||
sra $num,3 # restore $num
|
||||
la $bp,0($j,$bp) # restore $bp
|
||||
ahi $num,-1 # adjust $num for inner loop
|
||||
lg $n0,0($n0) # pull n0
|
||||
_dswap $n0
|
||||
|
||||
lg $bi,0($bp)
|
||||
_dswap $bi
|
||||
lg $alo,0($ap)
|
||||
_dswap $alo
|
||||
mlgr $ahi,$bi # ap[0]*bp[0]
|
||||
lgr $AHI,$ahi
|
||||
|
||||
lgr $mn0,$alo # "tp[0]"*n0
|
||||
msgr $mn0,$n0
|
||||
|
||||
lg $nlo,0($np) #
|
||||
_dswap $nlo
|
||||
mlgr $nhi,$mn0 # np[0]*m1
|
||||
algr $nlo,$alo # +="tp[0]"
|
||||
lghi $NHI,0
|
||||
alcgr $NHI,$nhi
|
||||
|
||||
la $j,8(%r0) # j=1
|
||||
lr $count,$num
|
||||
|
||||
.align 16
|
||||
.L1st:
|
||||
lg $alo,0($j,$ap)
|
||||
_dswap $alo
|
||||
mlgr $ahi,$bi # ap[j]*bp[0]
|
||||
algr $alo,$AHI
|
||||
lghi $AHI,0
|
||||
alcgr $AHI,$ahi
|
||||
|
||||
lg $nlo,0($j,$np)
|
||||
_dswap $nlo
|
||||
mlgr $nhi,$mn0 # np[j]*m1
|
||||
algr $nlo,$NHI
|
||||
lghi $NHI,0
|
||||
alcgr $nhi,$NHI # +="tp[j]"
|
||||
algr $nlo,$alo
|
||||
alcgr $NHI,$nhi
|
||||
|
||||
stg $nlo,$stdframe-8($j,$sp) # tp[j-1]=
|
||||
la $j,8($j) # j++
|
||||
brct $count,.L1st
|
||||
|
||||
algr $NHI,$AHI
|
||||
lghi $AHI,0
|
||||
alcgr $AHI,$AHI # upmost overflow bit
|
||||
stg $NHI,$stdframe-8($j,$sp)
|
||||
stg $AHI,$stdframe($j,$sp)
|
||||
la $bp,8($bp) # bp++
|
||||
|
||||
.Louter:
|
||||
lg $bi,0($bp) # bp[i]
|
||||
_dswap $bi
|
||||
lg $alo,0($ap)
|
||||
_dswap $alo
|
||||
mlgr $ahi,$bi # ap[0]*bp[i]
|
||||
alg $alo,$stdframe($sp) # +=tp[0]
|
||||
lghi $AHI,0
|
||||
alcgr $AHI,$ahi
|
||||
|
||||
lgr $mn0,$alo
|
||||
msgr $mn0,$n0 # tp[0]*n0
|
||||
|
||||
lg $nlo,0($np) # np[0]
|
||||
_dswap $nlo
|
||||
mlgr $nhi,$mn0 # np[0]*m1
|
||||
algr $nlo,$alo # +="tp[0]"
|
||||
lghi $NHI,0
|
||||
alcgr $NHI,$nhi
|
||||
|
||||
la $j,8(%r0) # j=1
|
||||
lr $count,$num
|
||||
|
||||
.align 16
|
||||
.Linner:
|
||||
lg $alo,0($j,$ap)
|
||||
_dswap $alo
|
||||
mlgr $ahi,$bi # ap[j]*bp[i]
|
||||
algr $alo,$AHI
|
||||
lghi $AHI,0
|
||||
alcgr $ahi,$AHI
|
||||
alg $alo,$stdframe($j,$sp)# +=tp[j]
|
||||
alcgr $AHI,$ahi
|
||||
|
||||
lg $nlo,0($j,$np)
|
||||
_dswap $nlo
|
||||
mlgr $nhi,$mn0 # np[j]*m1
|
||||
algr $nlo,$NHI
|
||||
lghi $NHI,0
|
||||
alcgr $nhi,$NHI
|
||||
algr $nlo,$alo # +="tp[j]"
|
||||
alcgr $NHI,$nhi
|
||||
|
||||
stg $nlo,$stdframe-8($j,$sp) # tp[j-1]=
|
||||
la $j,8($j) # j++
|
||||
brct $count,.Linner
|
||||
|
||||
algr $NHI,$AHI
|
||||
lghi $AHI,0
|
||||
alcgr $AHI,$AHI
|
||||
alg $NHI,$stdframe($j,$sp)# accumulate previous upmost overflow bit
|
||||
lghi $ahi,0
|
||||
alcgr $AHI,$ahi # new upmost overflow bit
|
||||
stg $NHI,$stdframe-8($j,$sp)
|
||||
stg $AHI,$stdframe($j,$sp)
|
||||
|
||||
la $bp,8($bp) # bp++
|
||||
cl${g} $bp,`$stdframe+8+4*$SIZE_T`($j,$sp) # compare to &bp[num]
|
||||
jne .Louter
|
||||
|
||||
l${g} $rp,`$stdframe+8+2*$SIZE_T`($j,$sp) # reincarnate rp
|
||||
la $ap,$stdframe($sp)
|
||||
ahi $num,1 # restore $num, incidentally clears "borrow"
|
||||
|
||||
la $j,0(%r0)
|
||||
lr $count,$num
|
||||
.Lsub: lg $alo,0($j,$ap)
|
||||
lg $nlo,0($j,$np)
|
||||
_dswap $nlo
|
||||
slbgr $alo,$nlo
|
||||
stg $alo,0($j,$rp)
|
||||
la $j,8($j)
|
||||
brct $count,.Lsub
|
||||
lghi $ahi,0
|
||||
slbgr $AHI,$ahi # handle upmost carry
|
||||
lghi $NHI,-1
|
||||
xgr $NHI,$AHI
|
||||
|
||||
la $j,0(%r0)
|
||||
lgr $count,$num
|
||||
.Lcopy: lg $ahi,$stdframe($j,$sp) # conditional copy
|
||||
lg $alo,0($j,$rp)
|
||||
ngr $ahi,$AHI
|
||||
ngr $alo,$NHI
|
||||
ogr $alo,$ahi
|
||||
_dswap $alo
|
||||
stg $j,$stdframe($j,$sp) # zap tp
|
||||
stg $alo,0($j,$rp)
|
||||
la $j,8($j)
|
||||
brct $count,.Lcopy
|
||||
|
||||
la %r1,`$stdframe+8+6*$SIZE_T`($j,$sp)
|
||||
lm${g} %r6,%r15,0(%r1)
|
||||
lghi %r2,1 # signal "processed"
|
||||
br %r14
|
||||
.size bn_mul_mont,.-bn_mul_mont
|
||||
.string "Montgomery Multiplication for s390x, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
___
|
||||
|
||||
foreach (split("\n",$code)) {
|
||||
s/\`([^\`]*)\`/eval $1/ge;
|
||||
s/_dswap\s+(%r[0-9]+)/sprintf("rllg\t%s,%s,32",$1,$1) if($SIZE_T==4)/e;
|
||||
print $_,"\n";
|
||||
}
|
||||
close STDOUT;
|
713
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/s390x.S
vendored
Normal file
713
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/s390x.S
vendored
Normal file
|
@ -0,0 +1,713 @@
|
|||
.ident "s390x.S, version 1.1"
|
||||
// ====================================================================
|
||||
// Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the OpenSSL license (the "License"). You may not use
|
||||
// this file except in compliance with the License. You can obtain a copy
|
||||
// in the file LICENSE in the source distribution or at
|
||||
// https://www.openssl.org/source/license.html
|
||||
// ====================================================================
|
||||
|
||||
.text
|
||||
|
||||
#define zero %r0
|
||||
|
||||
// BN_ULONG bn_mul_add_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5);
|
||||
.globl bn_mul_add_words
|
||||
.type bn_mul_add_words,@function
|
||||
.align 4
|
||||
bn_mul_add_words:
|
||||
lghi zero,0 // zero = 0
|
||||
la %r1,0(%r2) // put rp aside [to give way to]
|
||||
lghi %r2,0 // return value
|
||||
ltgfr %r4,%r4
|
||||
bler %r14 // if (len<=0) return 0;
|
||||
|
||||
stmg %r6,%r13,48(%r15)
|
||||
lghi %r2,3
|
||||
lghi %r12,0 // carry = 0
|
||||
slgr %r1,%r3 // rp-=ap
|
||||
nr %r2,%r4 // len%4
|
||||
sra %r4,2 // cnt=len/4
|
||||
jz .Loop1_madd // carry is incidentally cleared if branch taken
|
||||
algr zero,zero // clear carry
|
||||
|
||||
lg %r7,0(%r3) // ap[0]
|
||||
lg %r9,8(%r3) // ap[1]
|
||||
mlgr %r6,%r5 // *=w
|
||||
brct %r4,.Loop4_madd
|
||||
j .Loop4_madd_tail
|
||||
|
||||
.Loop4_madd:
|
||||
mlgr %r8,%r5
|
||||
lg %r11,16(%r3) // ap[i+2]
|
||||
alcgr %r7,%r12 // +=carry
|
||||
alcgr %r6,zero
|
||||
alg %r7,0(%r3,%r1) // +=rp[i]
|
||||
stg %r7,0(%r3,%r1) // rp[i]=
|
||||
|
||||
mlgr %r10,%r5
|
||||
lg %r13,24(%r3)
|
||||
alcgr %r9,%r6
|
||||
alcgr %r8,zero
|
||||
alg %r9,8(%r3,%r1)
|
||||
stg %r9,8(%r3,%r1)
|
||||
|
||||
mlgr %r12,%r5
|
||||
lg %r7,32(%r3)
|
||||
alcgr %r11,%r8
|
||||
alcgr %r10,zero
|
||||
alg %r11,16(%r3,%r1)
|
||||
stg %r11,16(%r3,%r1)
|
||||
|
||||
mlgr %r6,%r5
|
||||
lg %r9,40(%r3)
|
||||
alcgr %r13,%r10
|
||||
alcgr %r12,zero
|
||||
alg %r13,24(%r3,%r1)
|
||||
stg %r13,24(%r3,%r1)
|
||||
|
||||
la %r3,32(%r3) // i+=4
|
||||
brct %r4,.Loop4_madd
|
||||
|
||||
.Loop4_madd_tail:
|
||||
mlgr %r8,%r5
|
||||
lg %r11,16(%r3)
|
||||
alcgr %r7,%r12 // +=carry
|
||||
alcgr %r6,zero
|
||||
alg %r7,0(%r3,%r1) // +=rp[i]
|
||||
stg %r7,0(%r3,%r1) // rp[i]=
|
||||
|
||||
mlgr %r10,%r5
|
||||
lg %r13,24(%r3)
|
||||
alcgr %r9,%r6
|
||||
alcgr %r8,zero
|
||||
alg %r9,8(%r3,%r1)
|
||||
stg %r9,8(%r3,%r1)
|
||||
|
||||
mlgr %r12,%r5
|
||||
alcgr %r11,%r8
|
||||
alcgr %r10,zero
|
||||
alg %r11,16(%r3,%r1)
|
||||
stg %r11,16(%r3,%r1)
|
||||
|
||||
alcgr %r13,%r10
|
||||
alcgr %r12,zero
|
||||
alg %r13,24(%r3,%r1)
|
||||
stg %r13,24(%r3,%r1)
|
||||
|
||||
la %r3,32(%r3) // i+=4
|
||||
|
||||
la %r2,1(%r2) // see if len%4 is zero ...
|
||||
brct %r2,.Loop1_madd // without touching condition code:-)
|
||||
|
||||
.Lend_madd:
|
||||
lgr %r2,zero // return value
|
||||
alcgr %r2,%r12 // collect even carry bit
|
||||
lmg %r6,%r13,48(%r15)
|
||||
br %r14
|
||||
|
||||
.Loop1_madd:
|
||||
lg %r7,0(%r3) // ap[i]
|
||||
mlgr %r6,%r5 // *=w
|
||||
alcgr %r7,%r12 // +=carry
|
||||
alcgr %r6,zero
|
||||
alg %r7,0(%r3,%r1) // +=rp[i]
|
||||
stg %r7,0(%r3,%r1) // rp[i]=
|
||||
|
||||
lgr %r12,%r6
|
||||
la %r3,8(%r3) // i++
|
||||
brct %r2,.Loop1_madd
|
||||
|
||||
j .Lend_madd
|
||||
.size bn_mul_add_words,.-bn_mul_add_words
|
||||
|
||||
// BN_ULONG bn_mul_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5);
|
||||
.globl bn_mul_words
|
||||
.type bn_mul_words,@function
|
||||
.align 4
|
||||
bn_mul_words:
|
||||
lghi zero,0 // zero = 0
|
||||
la %r1,0(%r2) // put rp aside
|
||||
lghi %r2,0 // i=0;
|
||||
ltgfr %r4,%r4
|
||||
bler %r14 // if (len<=0) return 0;
|
||||
|
||||
stmg %r6,%r10,48(%r15)
|
||||
lghi %r10,3
|
||||
lghi %r8,0 // carry = 0
|
||||
nr %r10,%r4 // len%4
|
||||
sra %r4,2 // cnt=len/4
|
||||
jz .Loop1_mul // carry is incidentally cleared if branch taken
|
||||
algr zero,zero // clear carry
|
||||
|
||||
.Loop4_mul:
|
||||
lg %r7,0(%r2,%r3) // ap[i]
|
||||
mlgr %r6,%r5 // *=w
|
||||
alcgr %r7,%r8 // +=carry
|
||||
stg %r7,0(%r2,%r1) // rp[i]=
|
||||
|
||||
lg %r9,8(%r2,%r3)
|
||||
mlgr %r8,%r5
|
||||
alcgr %r9,%r6
|
||||
stg %r9,8(%r2,%r1)
|
||||
|
||||
lg %r7,16(%r2,%r3)
|
||||
mlgr %r6,%r5
|
||||
alcgr %r7,%r8
|
||||
stg %r7,16(%r2,%r1)
|
||||
|
||||
lg %r9,24(%r2,%r3)
|
||||
mlgr %r8,%r5
|
||||
alcgr %r9,%r6
|
||||
stg %r9,24(%r2,%r1)
|
||||
|
||||
la %r2,32(%r2) // i+=4
|
||||
brct %r4,.Loop4_mul
|
||||
|
||||
la %r10,1(%r10) // see if len%4 is zero ...
|
||||
brct %r10,.Loop1_mul // without touching condition code:-)
|
||||
|
||||
.Lend_mul:
|
||||
alcgr %r8,zero // collect carry bit
|
||||
lgr %r2,%r8
|
||||
lmg %r6,%r10,48(%r15)
|
||||
br %r14
|
||||
|
||||
.Loop1_mul:
|
||||
lg %r7,0(%r2,%r3) // ap[i]
|
||||
mlgr %r6,%r5 // *=w
|
||||
alcgr %r7,%r8 // +=carry
|
||||
stg %r7,0(%r2,%r1) // rp[i]=
|
||||
|
||||
lgr %r8,%r6
|
||||
la %r2,8(%r2) // i++
|
||||
brct %r10,.Loop1_mul
|
||||
|
||||
j .Lend_mul
|
||||
.size bn_mul_words,.-bn_mul_words
|
||||
|
||||
// void bn_sqr_words(BN_ULONG *r2,BN_ULONG *r2,int r4)
|
||||
.globl bn_sqr_words
|
||||
.type bn_sqr_words,@function
|
||||
.align 4
|
||||
bn_sqr_words:
|
||||
ltgfr %r4,%r4
|
||||
bler %r14
|
||||
|
||||
stmg %r6,%r7,48(%r15)
|
||||
srag %r1,%r4,2 // cnt=len/4
|
||||
jz .Loop1_sqr
|
||||
|
||||
.Loop4_sqr:
|
||||
lg %r7,0(%r3)
|
||||
mlgr %r6,%r7
|
||||
stg %r7,0(%r2)
|
||||
stg %r6,8(%r2)
|
||||
|
||||
lg %r7,8(%r3)
|
||||
mlgr %r6,%r7
|
||||
stg %r7,16(%r2)
|
||||
stg %r6,24(%r2)
|
||||
|
||||
lg %r7,16(%r3)
|
||||
mlgr %r6,%r7
|
||||
stg %r7,32(%r2)
|
||||
stg %r6,40(%r2)
|
||||
|
||||
lg %r7,24(%r3)
|
||||
mlgr %r6,%r7
|
||||
stg %r7,48(%r2)
|
||||
stg %r6,56(%r2)
|
||||
|
||||
la %r3,32(%r3)
|
||||
la %r2,64(%r2)
|
||||
brct %r1,.Loop4_sqr
|
||||
|
||||
lghi %r1,3
|
||||
nr %r4,%r1 // cnt=len%4
|
||||
jz .Lend_sqr
|
||||
|
||||
.Loop1_sqr:
|
||||
lg %r7,0(%r3)
|
||||
mlgr %r6,%r7
|
||||
stg %r7,0(%r2)
|
||||
stg %r6,8(%r2)
|
||||
|
||||
la %r3,8(%r3)
|
||||
la %r2,16(%r2)
|
||||
brct %r4,.Loop1_sqr
|
||||
|
||||
.Lend_sqr:
|
||||
lmg %r6,%r7,48(%r15)
|
||||
br %r14
|
||||
.size bn_sqr_words,.-bn_sqr_words
|
||||
|
||||
// BN_ULONG bn_div_words(BN_ULONG h,BN_ULONG l,BN_ULONG d);
|
||||
.globl bn_div_words
|
||||
.type bn_div_words,@function
|
||||
.align 4
|
||||
bn_div_words:
|
||||
dlgr %r2,%r4
|
||||
lgr %r2,%r3
|
||||
br %r14
|
||||
.size bn_div_words,.-bn_div_words
|
||||
|
||||
// BN_ULONG bn_add_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5);
|
||||
.globl bn_add_words
|
||||
.type bn_add_words,@function
|
||||
.align 4
|
||||
bn_add_words:
|
||||
la %r1,0(%r2) // put rp aside
|
||||
lghi %r2,0 // i=0
|
||||
ltgfr %r5,%r5
|
||||
bler %r14 // if (len<=0) return 0;
|
||||
|
||||
stg %r6,48(%r15)
|
||||
lghi %r6,3
|
||||
nr %r6,%r5 // len%4
|
||||
sra %r5,2 // len/4, use sra because it sets condition code
|
||||
jz .Loop1_add // carry is incidentally cleared if branch taken
|
||||
algr %r2,%r2 // clear carry
|
||||
|
||||
.Loop4_add:
|
||||
lg %r0,0(%r2,%r3)
|
||||
alcg %r0,0(%r2,%r4)
|
||||
stg %r0,0(%r2,%r1)
|
||||
lg %r0,8(%r2,%r3)
|
||||
alcg %r0,8(%r2,%r4)
|
||||
stg %r0,8(%r2,%r1)
|
||||
lg %r0,16(%r2,%r3)
|
||||
alcg %r0,16(%r2,%r4)
|
||||
stg %r0,16(%r2,%r1)
|
||||
lg %r0,24(%r2,%r3)
|
||||
alcg %r0,24(%r2,%r4)
|
||||
stg %r0,24(%r2,%r1)
|
||||
|
||||
la %r2,32(%r2) // i+=4
|
||||
brct %r5,.Loop4_add
|
||||
|
||||
la %r6,1(%r6) // see if len%4 is zero ...
|
||||
brct %r6,.Loop1_add // without touching condition code:-)
|
||||
|
||||
.Lexit_add:
|
||||
lghi %r2,0
|
||||
alcgr %r2,%r2
|
||||
lg %r6,48(%r15)
|
||||
br %r14
|
||||
|
||||
.Loop1_add:
|
||||
lg %r0,0(%r2,%r3)
|
||||
alcg %r0,0(%r2,%r4)
|
||||
stg %r0,0(%r2,%r1)
|
||||
|
||||
la %r2,8(%r2) // i++
|
||||
brct %r6,.Loop1_add
|
||||
|
||||
j .Lexit_add
|
||||
.size bn_add_words,.-bn_add_words
|
||||
|
||||
// BN_ULONG bn_sub_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5);
|
||||
.globl bn_sub_words
|
||||
.type bn_sub_words,@function
|
||||
.align 4
|
||||
bn_sub_words:
|
||||
la %r1,0(%r2) // put rp aside
|
||||
lghi %r2,0 // i=0
|
||||
ltgfr %r5,%r5
|
||||
bler %r14 // if (len<=0) return 0;
|
||||
|
||||
stg %r6,48(%r15)
|
||||
lghi %r6,3
|
||||
nr %r6,%r5 // len%4
|
||||
sra %r5,2 // len/4, use sra because it sets condition code
|
||||
jnz .Loop4_sub // borrow is incidentally cleared if branch taken
|
||||
slgr %r2,%r2 // clear borrow
|
||||
|
||||
.Loop1_sub:
|
||||
lg %r0,0(%r2,%r3)
|
||||
slbg %r0,0(%r2,%r4)
|
||||
stg %r0,0(%r2,%r1)
|
||||
|
||||
la %r2,8(%r2) // i++
|
||||
brct %r6,.Loop1_sub
|
||||
j .Lexit_sub
|
||||
|
||||
.Loop4_sub:
|
||||
lg %r0,0(%r2,%r3)
|
||||
slbg %r0,0(%r2,%r4)
|
||||
stg %r0,0(%r2,%r1)
|
||||
lg %r0,8(%r2,%r3)
|
||||
slbg %r0,8(%r2,%r4)
|
||||
stg %r0,8(%r2,%r1)
|
||||
lg %r0,16(%r2,%r3)
|
||||
slbg %r0,16(%r2,%r4)
|
||||
stg %r0,16(%r2,%r1)
|
||||
lg %r0,24(%r2,%r3)
|
||||
slbg %r0,24(%r2,%r4)
|
||||
stg %r0,24(%r2,%r1)
|
||||
|
||||
la %r2,32(%r2) // i+=4
|
||||
brct %r5,.Loop4_sub
|
||||
|
||||
la %r6,1(%r6) // see if len%4 is zero ...
|
||||
brct %r6,.Loop1_sub // without touching condition code:-)
|
||||
|
||||
.Lexit_sub:
|
||||
lghi %r2,0
|
||||
slbgr %r2,%r2
|
||||
lcgr %r2,%r2
|
||||
lg %r6,48(%r15)
|
||||
br %r14
|
||||
.size bn_sub_words,.-bn_sub_words
|
||||
|
||||
#define c1 %r1
|
||||
#define c2 %r5
|
||||
#define c3 %r8
|
||||
|
||||
#define mul_add_c(ai,bi,c1,c2,c3) \
|
||||
lg %r7,ai*8(%r3); \
|
||||
mlg %r6,bi*8(%r4); \
|
||||
algr c1,%r7; \
|
||||
alcgr c2,%r6; \
|
||||
alcgr c3,zero
|
||||
|
||||
// void bn_mul_comba8(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4);
|
||||
.globl bn_mul_comba8
|
||||
.type bn_mul_comba8,@function
|
||||
.align 4
|
||||
bn_mul_comba8:
|
||||
stmg %r6,%r8,48(%r15)
|
||||
|
||||
lghi c1,0
|
||||
lghi c2,0
|
||||
lghi c3,0
|
||||
lghi zero,0
|
||||
|
||||
mul_add_c(0,0,c1,c2,c3);
|
||||
stg c1,0*8(%r2)
|
||||
lghi c1,0
|
||||
|
||||
mul_add_c(0,1,c2,c3,c1);
|
||||
mul_add_c(1,0,c2,c3,c1);
|
||||
stg c2,1*8(%r2)
|
||||
lghi c2,0
|
||||
|
||||
mul_add_c(2,0,c3,c1,c2);
|
||||
mul_add_c(1,1,c3,c1,c2);
|
||||
mul_add_c(0,2,c3,c1,c2);
|
||||
stg c3,2*8(%r2)
|
||||
lghi c3,0
|
||||
|
||||
mul_add_c(0,3,c1,c2,c3);
|
||||
mul_add_c(1,2,c1,c2,c3);
|
||||
mul_add_c(2,1,c1,c2,c3);
|
||||
mul_add_c(3,0,c1,c2,c3);
|
||||
stg c1,3*8(%r2)
|
||||
lghi c1,0
|
||||
|
||||
mul_add_c(4,0,c2,c3,c1);
|
||||
mul_add_c(3,1,c2,c3,c1);
|
||||
mul_add_c(2,2,c2,c3,c1);
|
||||
mul_add_c(1,3,c2,c3,c1);
|
||||
mul_add_c(0,4,c2,c3,c1);
|
||||
stg c2,4*8(%r2)
|
||||
lghi c2,0
|
||||
|
||||
mul_add_c(0,5,c3,c1,c2);
|
||||
mul_add_c(1,4,c3,c1,c2);
|
||||
mul_add_c(2,3,c3,c1,c2);
|
||||
mul_add_c(3,2,c3,c1,c2);
|
||||
mul_add_c(4,1,c3,c1,c2);
|
||||
mul_add_c(5,0,c3,c1,c2);
|
||||
stg c3,5*8(%r2)
|
||||
lghi c3,0
|
||||
|
||||
mul_add_c(6,0,c1,c2,c3);
|
||||
mul_add_c(5,1,c1,c2,c3);
|
||||
mul_add_c(4,2,c1,c2,c3);
|
||||
mul_add_c(3,3,c1,c2,c3);
|
||||
mul_add_c(2,4,c1,c2,c3);
|
||||
mul_add_c(1,5,c1,c2,c3);
|
||||
mul_add_c(0,6,c1,c2,c3);
|
||||
stg c1,6*8(%r2)
|
||||
lghi c1,0
|
||||
|
||||
mul_add_c(0,7,c2,c3,c1);
|
||||
mul_add_c(1,6,c2,c3,c1);
|
||||
mul_add_c(2,5,c2,c3,c1);
|
||||
mul_add_c(3,4,c2,c3,c1);
|
||||
mul_add_c(4,3,c2,c3,c1);
|
||||
mul_add_c(5,2,c2,c3,c1);
|
||||
mul_add_c(6,1,c2,c3,c1);
|
||||
mul_add_c(7,0,c2,c3,c1);
|
||||
stg c2,7*8(%r2)
|
||||
lghi c2,0
|
||||
|
||||
mul_add_c(7,1,c3,c1,c2);
|
||||
mul_add_c(6,2,c3,c1,c2);
|
||||
mul_add_c(5,3,c3,c1,c2);
|
||||
mul_add_c(4,4,c3,c1,c2);
|
||||
mul_add_c(3,5,c3,c1,c2);
|
||||
mul_add_c(2,6,c3,c1,c2);
|
||||
mul_add_c(1,7,c3,c1,c2);
|
||||
stg c3,8*8(%r2)
|
||||
lghi c3,0
|
||||
|
||||
mul_add_c(2,7,c1,c2,c3);
|
||||
mul_add_c(3,6,c1,c2,c3);
|
||||
mul_add_c(4,5,c1,c2,c3);
|
||||
mul_add_c(5,4,c1,c2,c3);
|
||||
mul_add_c(6,3,c1,c2,c3);
|
||||
mul_add_c(7,2,c1,c2,c3);
|
||||
stg c1,9*8(%r2)
|
||||
lghi c1,0
|
||||
|
||||
mul_add_c(7,3,c2,c3,c1);
|
||||
mul_add_c(6,4,c2,c3,c1);
|
||||
mul_add_c(5,5,c2,c3,c1);
|
||||
mul_add_c(4,6,c2,c3,c1);
|
||||
mul_add_c(3,7,c2,c3,c1);
|
||||
stg c2,10*8(%r2)
|
||||
lghi c2,0
|
||||
|
||||
mul_add_c(4,7,c3,c1,c2);
|
||||
mul_add_c(5,6,c3,c1,c2);
|
||||
mul_add_c(6,5,c3,c1,c2);
|
||||
mul_add_c(7,4,c3,c1,c2);
|
||||
stg c3,11*8(%r2)
|
||||
lghi c3,0
|
||||
|
||||
mul_add_c(7,5,c1,c2,c3);
|
||||
mul_add_c(6,6,c1,c2,c3);
|
||||
mul_add_c(5,7,c1,c2,c3);
|
||||
stg c1,12*8(%r2)
|
||||
lghi c1,0
|
||||
|
||||
|
||||
mul_add_c(6,7,c2,c3,c1);
|
||||
mul_add_c(7,6,c2,c3,c1);
|
||||
stg c2,13*8(%r2)
|
||||
lghi c2,0
|
||||
|
||||
mul_add_c(7,7,c3,c1,c2);
|
||||
stg c3,14*8(%r2)
|
||||
stg c1,15*8(%r2)
|
||||
|
||||
lmg %r6,%r8,48(%r15)
|
||||
br %r14
|
||||
.size bn_mul_comba8,.-bn_mul_comba8
|
||||
|
||||
// void bn_mul_comba4(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4);
|
||||
.globl bn_mul_comba4
|
||||
.type bn_mul_comba4,@function
|
||||
.align 4
|
||||
bn_mul_comba4:
|
||||
stmg %r6,%r8,48(%r15)
|
||||
|
||||
lghi c1,0
|
||||
lghi c2,0
|
||||
lghi c3,0
|
||||
lghi zero,0
|
||||
|
||||
mul_add_c(0,0,c1,c2,c3);
|
||||
stg c1,0*8(%r3)
|
||||
lghi c1,0
|
||||
|
||||
mul_add_c(0,1,c2,c3,c1);
|
||||
mul_add_c(1,0,c2,c3,c1);
|
||||
stg c2,1*8(%r2)
|
||||
lghi c2,0
|
||||
|
||||
mul_add_c(2,0,c3,c1,c2);
|
||||
mul_add_c(1,1,c3,c1,c2);
|
||||
mul_add_c(0,2,c3,c1,c2);
|
||||
stg c3,2*8(%r2)
|
||||
lghi c3,0
|
||||
|
||||
mul_add_c(0,3,c1,c2,c3);
|
||||
mul_add_c(1,2,c1,c2,c3);
|
||||
mul_add_c(2,1,c1,c2,c3);
|
||||
mul_add_c(3,0,c1,c2,c3);
|
||||
stg c1,3*8(%r2)
|
||||
lghi c1,0
|
||||
|
||||
mul_add_c(3,1,c2,c3,c1);
|
||||
mul_add_c(2,2,c2,c3,c1);
|
||||
mul_add_c(1,3,c2,c3,c1);
|
||||
stg c2,4*8(%r2)
|
||||
lghi c2,0
|
||||
|
||||
mul_add_c(2,3,c3,c1,c2);
|
||||
mul_add_c(3,2,c3,c1,c2);
|
||||
stg c3,5*8(%r2)
|
||||
lghi c3,0
|
||||
|
||||
mul_add_c(3,3,c1,c2,c3);
|
||||
stg c1,6*8(%r2)
|
||||
stg c2,7*8(%r2)
|
||||
|
||||
stmg %r6,%r8,48(%r15)
|
||||
br %r14
|
||||
.size bn_mul_comba4,.-bn_mul_comba4
|
||||
|
||||
#define sqr_add_c(ai,c1,c2,c3) \
|
||||
lg %r7,ai*8(%r3); \
|
||||
mlgr %r6,%r7; \
|
||||
algr c1,%r7; \
|
||||
alcgr c2,%r6; \
|
||||
alcgr c3,zero
|
||||
|
||||
#define sqr_add_c2(ai,aj,c1,c2,c3) \
|
||||
lg %r7,ai*8(%r3); \
|
||||
mlg %r6,aj*8(%r3); \
|
||||
algr c1,%r7; \
|
||||
alcgr c2,%r6; \
|
||||
alcgr c3,zero; \
|
||||
algr c1,%r7; \
|
||||
alcgr c2,%r6; \
|
||||
alcgr c3,zero
|
||||
|
||||
// void bn_sqr_comba8(BN_ULONG *r2,BN_ULONG *r3);
|
||||
.globl bn_sqr_comba8
|
||||
.type bn_sqr_comba8,@function
|
||||
.align 4
|
||||
bn_sqr_comba8:
|
||||
stmg %r6,%r8,48(%r15)
|
||||
|
||||
lghi c1,0
|
||||
lghi c2,0
|
||||
lghi c3,0
|
||||
lghi zero,0
|
||||
|
||||
sqr_add_c(0,c1,c2,c3);
|
||||
stg c1,0*8(%r2)
|
||||
lghi c1,0
|
||||
|
||||
sqr_add_c2(1,0,c2,c3,c1);
|
||||
stg c2,1*8(%r2)
|
||||
lghi c2,0
|
||||
|
||||
sqr_add_c(1,c3,c1,c2);
|
||||
sqr_add_c2(2,0,c3,c1,c2);
|
||||
stg c3,2*8(%r2)
|
||||
lghi c3,0
|
||||
|
||||
sqr_add_c2(3,0,c1,c2,c3);
|
||||
sqr_add_c2(2,1,c1,c2,c3);
|
||||
stg c1,3*8(%r2)
|
||||
lghi c1,0
|
||||
|
||||
sqr_add_c(2,c2,c3,c1);
|
||||
sqr_add_c2(3,1,c2,c3,c1);
|
||||
sqr_add_c2(4,0,c2,c3,c1);
|
||||
stg c2,4*8(%r2)
|
||||
lghi c2,0
|
||||
|
||||
sqr_add_c2(5,0,c3,c1,c2);
|
||||
sqr_add_c2(4,1,c3,c1,c2);
|
||||
sqr_add_c2(3,2,c3,c1,c2);
|
||||
stg c3,5*8(%r2)
|
||||
lghi c3,0
|
||||
|
||||
sqr_add_c(3,c1,c2,c3);
|
||||
sqr_add_c2(4,2,c1,c2,c3);
|
||||
sqr_add_c2(5,1,c1,c2,c3);
|
||||
sqr_add_c2(6,0,c1,c2,c3);
|
||||
stg c1,6*8(%r2)
|
||||
lghi c1,0
|
||||
|
||||
sqr_add_c2(7,0,c2,c3,c1);
|
||||
sqr_add_c2(6,1,c2,c3,c1);
|
||||
sqr_add_c2(5,2,c2,c3,c1);
|
||||
sqr_add_c2(4,3,c2,c3,c1);
|
||||
stg c2,7*8(%r2)
|
||||
lghi c2,0
|
||||
|
||||
sqr_add_c(4,c3,c1,c2);
|
||||
sqr_add_c2(5,3,c3,c1,c2);
|
||||
sqr_add_c2(6,2,c3,c1,c2);
|
||||
sqr_add_c2(7,1,c3,c1,c2);
|
||||
stg c3,8*8(%r2)
|
||||
lghi c3,0
|
||||
|
||||
sqr_add_c2(7,2,c1,c2,c3);
|
||||
sqr_add_c2(6,3,c1,c2,c3);
|
||||
sqr_add_c2(5,4,c1,c2,c3);
|
||||
stg c1,9*8(%r2)
|
||||
lghi c1,0
|
||||
|
||||
sqr_add_c(5,c2,c3,c1);
|
||||
sqr_add_c2(6,4,c2,c3,c1);
|
||||
sqr_add_c2(7,3,c2,c3,c1);
|
||||
stg c2,10*8(%r2)
|
||||
lghi c2,0
|
||||
|
||||
sqr_add_c2(7,4,c3,c1,c2);
|
||||
sqr_add_c2(6,5,c3,c1,c2);
|
||||
stg c3,11*8(%r2)
|
||||
lghi c3,0
|
||||
|
||||
sqr_add_c(6,c1,c2,c3);
|
||||
sqr_add_c2(7,5,c1,c2,c3);
|
||||
stg c1,12*8(%r2)
|
||||
lghi c1,0
|
||||
|
||||
sqr_add_c2(7,6,c2,c3,c1);
|
||||
stg c2,13*8(%r2)
|
||||
lghi c2,0
|
||||
|
||||
sqr_add_c(7,c3,c1,c2);
|
||||
stg c3,14*8(%r2)
|
||||
stg c1,15*8(%r2)
|
||||
|
||||
lmg %r6,%r8,48(%r15)
|
||||
br %r14
|
||||
.size bn_sqr_comba8,.-bn_sqr_comba8
|
||||
|
||||
// void bn_sqr_comba4(BN_ULONG *r2,BN_ULONG *r3);
|
||||
.globl bn_sqr_comba4
|
||||
.type bn_sqr_comba4,@function
|
||||
.align 4
|
||||
bn_sqr_comba4:
|
||||
stmg %r6,%r8,48(%r15)
|
||||
|
||||
lghi c1,0
|
||||
lghi c2,0
|
||||
lghi c3,0
|
||||
lghi zero,0
|
||||
|
||||
sqr_add_c(0,c1,c2,c3);
|
||||
stg c1,0*8(%r2)
|
||||
lghi c1,0
|
||||
|
||||
sqr_add_c2(1,0,c2,c3,c1);
|
||||
stg c2,1*8(%r2)
|
||||
lghi c2,0
|
||||
|
||||
sqr_add_c(1,c3,c1,c2);
|
||||
sqr_add_c2(2,0,c3,c1,c2);
|
||||
stg c3,2*8(%r2)
|
||||
lghi c3,0
|
||||
|
||||
sqr_add_c2(3,0,c1,c2,c3);
|
||||
sqr_add_c2(2,1,c1,c2,c3);
|
||||
stg c1,3*8(%r2)
|
||||
lghi c1,0
|
||||
|
||||
sqr_add_c(2,c2,c3,c1);
|
||||
sqr_add_c2(3,1,c2,c3,c1);
|
||||
stg c2,4*8(%r2)
|
||||
lghi c2,0
|
||||
|
||||
sqr_add_c2(3,2,c3,c1,c2);
|
||||
stg c3,5*8(%r2)
|
||||
lghi c3,0
|
||||
|
||||
sqr_add_c(3,c1,c2,c3);
|
||||
stg c1,6*8(%r2)
|
||||
stg c2,7*8(%r2)
|
||||
|
||||
lmg %r6,%r8,48(%r15)
|
||||
br %r14
|
||||
.size bn_sqr_comba4,.-bn_sqr_comba4
|
1228
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparct4-mont.pl
vendored
Executable file
1228
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparct4-mont.pl
vendored
Executable file
File diff suppressed because it is too large
Load diff
1458
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparcv8.S
vendored
Normal file
1458
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparcv8.S
vendored
Normal file
File diff suppressed because it is too large
Load diff
1558
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparcv8plus.S
vendored
Normal file
1558
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparcv8plus.S
vendored
Normal file
File diff suppressed because it is too large
Load diff
200
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparcv9-gf2m.pl
vendored
Normal file
200
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparcv9-gf2m.pl
vendored
Normal file
|
@ -0,0 +1,200 @@
|
|||
#! /usr/bin/env perl
|
||||
# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
#
|
||||
# October 2012
|
||||
#
|
||||
# The module implements bn_GF2m_mul_2x2 polynomial multiplication used
|
||||
# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
|
||||
# the time being... Except that it has two code paths: one suitable
|
||||
# for all SPARCv9 processors and one for VIS3-capable ones. Former
|
||||
# delivers ~25-45% more, more for longer keys, heaviest DH and DSA
|
||||
# verify operations on venerable UltraSPARC II. On T4 VIS3 code is
|
||||
# ~100-230% faster than gcc-generated code and ~35-90% faster than
|
||||
# the pure SPARCv9 code path.
|
||||
|
||||
$output = pop;
|
||||
open STDOUT,">$output";
|
||||
|
||||
$locals=16*8;
|
||||
|
||||
$tab="%l0";
|
||||
|
||||
@T=("%g2","%g3");
|
||||
@i=("%g4","%g5");
|
||||
|
||||
($a1,$a2,$a4,$a8,$a12,$a48)=map("%o$_",(0..5));
|
||||
($lo,$hi,$b)=("%g1",$a8,"%o7"); $a=$lo;
|
||||
|
||||
$code.=<<___;
|
||||
#include <sparc_arch.h>
|
||||
|
||||
#ifdef __arch64__
|
||||
.register %g2,#scratch
|
||||
.register %g3,#scratch
|
||||
#endif
|
||||
|
||||
#ifdef __PIC__
|
||||
SPARC_PIC_THUNK(%g1)
|
||||
#endif
|
||||
|
||||
.globl bn_GF2m_mul_2x2
|
||||
.align 16
|
||||
bn_GF2m_mul_2x2:
|
||||
SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
|
||||
ld [%g1+0],%g1 ! OPENSSL_sparcv9cap_P[0]
|
||||
|
||||
andcc %g1, SPARCV9_VIS3, %g0
|
||||
bz,pn %icc,.Lsoftware
|
||||
nop
|
||||
|
||||
sllx %o1, 32, %o1
|
||||
sllx %o3, 32, %o3
|
||||
or %o2, %o1, %o1
|
||||
or %o4, %o3, %o3
|
||||
.word 0x95b262ab ! xmulx %o1, %o3, %o2
|
||||
.word 0x99b262cb ! xmulxhi %o1, %o3, %o4
|
||||
srlx %o2, 32, %o1 ! 13 cycles later
|
||||
st %o2, [%o0+0]
|
||||
st %o1, [%o0+4]
|
||||
srlx %o4, 32, %o3
|
||||
st %o4, [%o0+8]
|
||||
retl
|
||||
st %o3, [%o0+12]
|
||||
|
||||
.align 16
|
||||
.Lsoftware:
|
||||
save %sp,-STACK_FRAME-$locals,%sp
|
||||
|
||||
sllx %i1,32,$a
|
||||
mov -1,$a12
|
||||
sllx %i3,32,$b
|
||||
or %i2,$a,$a
|
||||
srlx $a12,1,$a48 ! 0x7fff...
|
||||
or %i4,$b,$b
|
||||
srlx $a12,2,$a12 ! 0x3fff...
|
||||
add %sp,STACK_BIAS+STACK_FRAME,$tab
|
||||
|
||||
sllx $a,2,$a4
|
||||
mov $a,$a1
|
||||
sllx $a,1,$a2
|
||||
|
||||
srax $a4,63,@i[1] ! broadcast 61st bit
|
||||
and $a48,$a4,$a4 ! (a<<2)&0x7fff...
|
||||
srlx $a48,2,$a48
|
||||
srax $a2,63,@i[0] ! broadcast 62nd bit
|
||||
and $a12,$a2,$a2 ! (a<<1)&0x3fff...
|
||||
srax $a1,63,$lo ! broadcast 63rd bit
|
||||
and $a48,$a1,$a1 ! (a<<0)&0x1fff...
|
||||
|
||||
sllx $a1,3,$a8
|
||||
and $b,$lo,$lo
|
||||
and $b,@i[0],@i[0]
|
||||
and $b,@i[1],@i[1]
|
||||
|
||||
stx %g0,[$tab+0*8] ! tab[0]=0
|
||||
xor $a1,$a2,$a12
|
||||
stx $a1,[$tab+1*8] ! tab[1]=a1
|
||||
stx $a2,[$tab+2*8] ! tab[2]=a2
|
||||
xor $a4,$a8,$a48
|
||||
stx $a12,[$tab+3*8] ! tab[3]=a1^a2
|
||||
xor $a4,$a1,$a1
|
||||
|
||||
stx $a4,[$tab+4*8] ! tab[4]=a4
|
||||
xor $a4,$a2,$a2
|
||||
stx $a1,[$tab+5*8] ! tab[5]=a1^a4
|
||||
xor $a4,$a12,$a12
|
||||
stx $a2,[$tab+6*8] ! tab[6]=a2^a4
|
||||
xor $a48,$a1,$a1
|
||||
stx $a12,[$tab+7*8] ! tab[7]=a1^a2^a4
|
||||
xor $a48,$a2,$a2
|
||||
|
||||
stx $a8,[$tab+8*8] ! tab[8]=a8
|
||||
xor $a48,$a12,$a12
|
||||
stx $a1,[$tab+9*8] ! tab[9]=a1^a8
|
||||
xor $a4,$a1,$a1
|
||||
stx $a2,[$tab+10*8] ! tab[10]=a2^a8
|
||||
xor $a4,$a2,$a2
|
||||
stx $a12,[$tab+11*8] ! tab[11]=a1^a2^a8
|
||||
|
||||
xor $a4,$a12,$a12
|
||||
stx $a48,[$tab+12*8] ! tab[12]=a4^a8
|
||||
srlx $lo,1,$hi
|
||||
stx $a1,[$tab+13*8] ! tab[13]=a1^a4^a8
|
||||
sllx $lo,63,$lo
|
||||
stx $a2,[$tab+14*8] ! tab[14]=a2^a4^a8
|
||||
srlx @i[0],2,@T[0]
|
||||
stx $a12,[$tab+15*8] ! tab[15]=a1^a2^a4^a8
|
||||
|
||||
sllx @i[0],62,$a1
|
||||
sllx $b,3,@i[0]
|
||||
srlx @i[1],3,@T[1]
|
||||
and @i[0],`0xf<<3`,@i[0]
|
||||
sllx @i[1],61,$a2
|
||||
ldx [$tab+@i[0]],@i[0]
|
||||
srlx $b,4-3,@i[1]
|
||||
xor @T[0],$hi,$hi
|
||||
and @i[1],`0xf<<3`,@i[1]
|
||||
xor $a1,$lo,$lo
|
||||
ldx [$tab+@i[1]],@i[1]
|
||||
xor @T[1],$hi,$hi
|
||||
|
||||
xor @i[0],$lo,$lo
|
||||
srlx $b,8-3,@i[0]
|
||||
xor $a2,$lo,$lo
|
||||
and @i[0],`0xf<<3`,@i[0]
|
||||
___
|
||||
for($n=1;$n<14;$n++) {
|
||||
$code.=<<___;
|
||||
sllx @i[1],`$n*4`,@T[0]
|
||||
ldx [$tab+@i[0]],@i[0]
|
||||
srlx @i[1],`64-$n*4`,@T[1]
|
||||
xor @T[0],$lo,$lo
|
||||
srlx $b,`($n+2)*4`-3,@i[1]
|
||||
xor @T[1],$hi,$hi
|
||||
and @i[1],`0xf<<3`,@i[1]
|
||||
___
|
||||
push(@i,shift(@i)); push(@T,shift(@T));
|
||||
}
|
||||
$code.=<<___;
|
||||
sllx @i[1],`$n*4`,@T[0]
|
||||
ldx [$tab+@i[0]],@i[0]
|
||||
srlx @i[1],`64-$n*4`,@T[1]
|
||||
xor @T[0],$lo,$lo
|
||||
|
||||
sllx @i[0],`($n+1)*4`,@T[0]
|
||||
xor @T[1],$hi,$hi
|
||||
srlx @i[0],`64-($n+1)*4`,@T[1]
|
||||
xor @T[0],$lo,$lo
|
||||
xor @T[1],$hi,$hi
|
||||
|
||||
srlx $lo,32,%i1
|
||||
st $lo,[%i0+0]
|
||||
st %i1,[%i0+4]
|
||||
srlx $hi,32,%i2
|
||||
st $hi,[%i0+8]
|
||||
st %i2,[%i0+12]
|
||||
|
||||
ret
|
||||
restore
|
||||
.type bn_GF2m_mul_2x2,#function
|
||||
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
|
||||
.asciz "GF(2^m) Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.align 4
|
||||
___
|
||||
|
||||
$code =~ s/\`([^\`]*)\`/eval($1)/gem;
|
||||
print $code;
|
||||
close STDOUT;
|
620
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparcv9-mont.pl
vendored
Normal file
620
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparcv9-mont.pl
vendored
Normal file
|
@ -0,0 +1,620 @@
|
|||
#! /usr/bin/env perl
|
||||
# Copyright 2005-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
|
||||
# December 2005
|
||||
#
|
||||
# Pure SPARCv9/8+ and IALU-only bn_mul_mont implementation. The reasons
|
||||
# for undertaken effort are multiple. First of all, UltraSPARC is not
|
||||
# the whole SPARCv9 universe and other VIS-free implementations deserve
|
||||
# optimized code as much. Secondly, newly introduced UltraSPARC T1,
|
||||
# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive paths,
|
||||
# such as sparcv9a-mont, will simply sink it. Yes, T1 is equipped with
|
||||
# several integrated RSA/DSA accelerator circuits accessible through
|
||||
# kernel driver [only(*)], but having decent user-land software
|
||||
# implementation is important too. Finally, reasons like desire to
|
||||
# experiment with dedicated squaring procedure. Yes, this module
|
||||
# implements one, because it was easiest to draft it in SPARCv9
|
||||
# instructions...
|
||||
|
||||
# (*) Engine accessing the driver in question is on my TODO list.
|
||||
# For reference, accelerator is estimated to give 6 to 10 times
|
||||
# improvement on single-threaded RSA sign. It should be noted
|
||||
# that 6-10x improvement coefficient does not actually mean
|
||||
# something extraordinary in terms of absolute [single-threaded]
|
||||
# performance, as SPARCv9 instruction set is by all means least
|
||||
# suitable for high performance crypto among other 64 bit
|
||||
# platforms. 6-10x factor simply places T1 in same performance
|
||||
# domain as say AMD64 and IA-64. Improvement of RSA verify don't
|
||||
# appear impressive at all, but it's the sign operation which is
|
||||
# far more critical/interesting.
|
||||
|
||||
# You might notice that inner loops are modulo-scheduled:-) This has
|
||||
# essentially negligible impact on UltraSPARC performance, it's
|
||||
# Fujitsu SPARC64 V users who should notice and hopefully appreciate
|
||||
# the advantage... Currently this module surpasses sparcv9a-mont.pl
|
||||
# by ~20% on UltraSPARC-III and later cores, but recall that sparcv9a
|
||||
# module still have hidden potential [see TODO list there], which is
|
||||
# estimated to be larger than 20%...
|
||||
|
||||
$output = pop;
|
||||
open STDOUT,">$output";
|
||||
|
||||
# int bn_mul_mont(
|
||||
$rp="%i0"; # BN_ULONG *rp,
|
||||
$ap="%i1"; # const BN_ULONG *ap,
|
||||
$bp="%i2"; # const BN_ULONG *bp,
|
||||
$np="%i3"; # const BN_ULONG *np,
|
||||
$n0="%i4"; # const BN_ULONG *n0,
|
||||
$num="%i5"; # int num);
|
||||
|
||||
$frame="STACK_FRAME";
|
||||
$bias="STACK_BIAS";
|
||||
|
||||
$car0="%o0";
|
||||
$car1="%o1";
|
||||
$car2="%o2"; # 1 bit
|
||||
$acc0="%o3";
|
||||
$acc1="%o4";
|
||||
$mask="%g1"; # 32 bits, what a waste...
|
||||
$tmp0="%g4";
|
||||
$tmp1="%g5";
|
||||
|
||||
$i="%l0";
|
||||
$j="%l1";
|
||||
$mul0="%l2";
|
||||
$mul1="%l3";
|
||||
$tp="%l4";
|
||||
$apj="%l5";
|
||||
$npj="%l6";
|
||||
$tpj="%l7";
|
||||
|
||||
$fname="bn_mul_mont_int";
|
||||
|
||||
$code=<<___;
|
||||
#include "sparc_arch.h"
|
||||
|
||||
.section ".text",#alloc,#execinstr
|
||||
|
||||
.global $fname
|
||||
.align 32
|
||||
$fname:
|
||||
cmp %o5,4 ! 128 bits minimum
|
||||
bge,pt %icc,.Lenter
|
||||
sethi %hi(0xffffffff),$mask
|
||||
retl
|
||||
clr %o0
|
||||
.align 32
|
||||
.Lenter:
|
||||
save %sp,-$frame,%sp
|
||||
sll $num,2,$num ! num*=4
|
||||
or $mask,%lo(0xffffffff),$mask
|
||||
ld [$n0],$n0
|
||||
cmp $ap,$bp
|
||||
and $num,$mask,$num
|
||||
ld [$bp],$mul0 ! bp[0]
|
||||
nop
|
||||
|
||||
add %sp,$bias,%o7 ! real top of stack
|
||||
ld [$ap],$car0 ! ap[0] ! redundant in squaring context
|
||||
sub %o7,$num,%o7
|
||||
ld [$ap+4],$apj ! ap[1]
|
||||
and %o7,-1024,%o7
|
||||
ld [$np],$car1 ! np[0]
|
||||
sub %o7,$bias,%sp ! alloca
|
||||
ld [$np+4],$npj ! np[1]
|
||||
be,pt SIZE_T_CC,.Lbn_sqr_mont
|
||||
mov 12,$j
|
||||
|
||||
mulx $car0,$mul0,$car0 ! ap[0]*bp[0]
|
||||
mulx $apj,$mul0,$tmp0 !prologue! ap[1]*bp[0]
|
||||
and $car0,$mask,$acc0
|
||||
add %sp,$bias+$frame,$tp
|
||||
ld [$ap+8],$apj !prologue!
|
||||
|
||||
mulx $n0,$acc0,$mul1 ! "t[0]"*n0
|
||||
and $mul1,$mask,$mul1
|
||||
|
||||
mulx $car1,$mul1,$car1 ! np[0]*"t[0]"*n0
|
||||
mulx $npj,$mul1,$acc1 !prologue! np[1]*"t[0]"*n0
|
||||
srlx $car0,32,$car0
|
||||
add $acc0,$car1,$car1
|
||||
ld [$np+8],$npj !prologue!
|
||||
srlx $car1,32,$car1
|
||||
mov $tmp0,$acc0 !prologue!
|
||||
|
||||
.L1st:
|
||||
mulx $apj,$mul0,$tmp0
|
||||
mulx $npj,$mul1,$tmp1
|
||||
add $acc0,$car0,$car0
|
||||
ld [$ap+$j],$apj ! ap[j]
|
||||
and $car0,$mask,$acc0
|
||||
add $acc1,$car1,$car1
|
||||
ld [$np+$j],$npj ! np[j]
|
||||
srlx $car0,32,$car0
|
||||
add $acc0,$car1,$car1
|
||||
add $j,4,$j ! j++
|
||||
mov $tmp0,$acc0
|
||||
st $car1,[$tp]
|
||||
cmp $j,$num
|
||||
mov $tmp1,$acc1
|
||||
srlx $car1,32,$car1
|
||||
bl %icc,.L1st
|
||||
add $tp,4,$tp ! tp++
|
||||
!.L1st
|
||||
|
||||
mulx $apj,$mul0,$tmp0 !epilogue!
|
||||
mulx $npj,$mul1,$tmp1
|
||||
add $acc0,$car0,$car0
|
||||
and $car0,$mask,$acc0
|
||||
add $acc1,$car1,$car1
|
||||
srlx $car0,32,$car0
|
||||
add $acc0,$car1,$car1
|
||||
st $car1,[$tp]
|
||||
srlx $car1,32,$car1
|
||||
|
||||
add $tmp0,$car0,$car0
|
||||
and $car0,$mask,$acc0
|
||||
add $tmp1,$car1,$car1
|
||||
srlx $car0,32,$car0
|
||||
add $acc0,$car1,$car1
|
||||
st $car1,[$tp+4]
|
||||
srlx $car1,32,$car1
|
||||
|
||||
add $car0,$car1,$car1
|
||||
st $car1,[$tp+8]
|
||||
srlx $car1,32,$car2
|
||||
|
||||
mov 4,$i ! i++
|
||||
ld [$bp+4],$mul0 ! bp[1]
|
||||
.Louter:
|
||||
add %sp,$bias+$frame,$tp
|
||||
ld [$ap],$car0 ! ap[0]
|
||||
ld [$ap+4],$apj ! ap[1]
|
||||
ld [$np],$car1 ! np[0]
|
||||
ld [$np+4],$npj ! np[1]
|
||||
ld [$tp],$tmp1 ! tp[0]
|
||||
ld [$tp+4],$tpj ! tp[1]
|
||||
mov 12,$j
|
||||
|
||||
mulx $car0,$mul0,$car0
|
||||
mulx $apj,$mul0,$tmp0 !prologue!
|
||||
add $tmp1,$car0,$car0
|
||||
ld [$ap+8],$apj !prologue!
|
||||
and $car0,$mask,$acc0
|
||||
|
||||
mulx $n0,$acc0,$mul1
|
||||
and $mul1,$mask,$mul1
|
||||
|
||||
mulx $car1,$mul1,$car1
|
||||
mulx $npj,$mul1,$acc1 !prologue!
|
||||
srlx $car0,32,$car0
|
||||
add $acc0,$car1,$car1
|
||||
ld [$np+8],$npj !prologue!
|
||||
srlx $car1,32,$car1
|
||||
mov $tmp0,$acc0 !prologue!
|
||||
|
||||
.Linner:
|
||||
mulx $apj,$mul0,$tmp0
|
||||
mulx $npj,$mul1,$tmp1
|
||||
add $tpj,$car0,$car0
|
||||
ld [$ap+$j],$apj ! ap[j]
|
||||
add $acc0,$car0,$car0
|
||||
add $acc1,$car1,$car1
|
||||
ld [$np+$j],$npj ! np[j]
|
||||
and $car0,$mask,$acc0
|
||||
ld [$tp+8],$tpj ! tp[j]
|
||||
srlx $car0,32,$car0
|
||||
add $acc0,$car1,$car1
|
||||
add $j,4,$j ! j++
|
||||
mov $tmp0,$acc0
|
||||
st $car1,[$tp] ! tp[j-1]
|
||||
srlx $car1,32,$car1
|
||||
mov $tmp1,$acc1
|
||||
cmp $j,$num
|
||||
bl %icc,.Linner
|
||||
add $tp,4,$tp ! tp++
|
||||
!.Linner
|
||||
|
||||
mulx $apj,$mul0,$tmp0 !epilogue!
|
||||
mulx $npj,$mul1,$tmp1
|
||||
add $tpj,$car0,$car0
|
||||
add $acc0,$car0,$car0
|
||||
ld [$tp+8],$tpj ! tp[j]
|
||||
and $car0,$mask,$acc0
|
||||
add $acc1,$car1,$car1
|
||||
srlx $car0,32,$car0
|
||||
add $acc0,$car1,$car1
|
||||
st $car1,[$tp] ! tp[j-1]
|
||||
srlx $car1,32,$car1
|
||||
|
||||
add $tpj,$car0,$car0
|
||||
add $tmp0,$car0,$car0
|
||||
and $car0,$mask,$acc0
|
||||
add $tmp1,$car1,$car1
|
||||
add $acc0,$car1,$car1
|
||||
st $car1,[$tp+4] ! tp[j-1]
|
||||
srlx $car0,32,$car0
|
||||
add $i,4,$i ! i++
|
||||
srlx $car1,32,$car1
|
||||
|
||||
add $car0,$car1,$car1
|
||||
cmp $i,$num
|
||||
add $car2,$car1,$car1
|
||||
st $car1,[$tp+8]
|
||||
|
||||
srlx $car1,32,$car2
|
||||
bl,a %icc,.Louter
|
||||
ld [$bp+$i],$mul0 ! bp[i]
|
||||
!.Louter
|
||||
|
||||
add $tp,12,$tp
|
||||
|
||||
.Ltail:
|
||||
add $np,$num,$np
|
||||
add $rp,$num,$rp
|
||||
sub %g0,$num,%o7 ! k=-num
|
||||
ba .Lsub
|
||||
subcc %g0,%g0,%g0 ! clear %icc.c
|
||||
.align 16
|
||||
.Lsub:
|
||||
ld [$tp+%o7],%o0
|
||||
ld [$np+%o7],%o1
|
||||
subccc %o0,%o1,%o1 ! tp[j]-np[j]
|
||||
add $rp,%o7,$i
|
||||
add %o7,4,%o7
|
||||
brnz %o7,.Lsub
|
||||
st %o1,[$i]
|
||||
subccc $car2,0,$car2 ! handle upmost overflow bit
|
||||
sub %g0,$num,%o7
|
||||
|
||||
.Lcopy:
|
||||
ld [$tp+%o7],%o1 ! conditional copy
|
||||
ld [$rp+%o7],%o0
|
||||
st %g0,[$tp+%o7] ! zap tp
|
||||
movcs %icc,%o1,%o0
|
||||
st %o0,[$rp+%o7]
|
||||
add %o7,4,%o7
|
||||
brnz %o7,.Lcopy
|
||||
nop
|
||||
mov 1,%i0
|
||||
ret
|
||||
restore
|
||||
___
|
||||
|
||||
########
|
||||
######## .Lbn_sqr_mont gives up to 20% *overall* improvement over
|
||||
######## code without following dedicated squaring procedure.
|
||||
########
|
||||
$sbit="%o5";
|
||||
|
||||
$code.=<<___;
|
||||
.align 32
|
||||
.Lbn_sqr_mont:
|
||||
mulx $mul0,$mul0,$car0 ! ap[0]*ap[0]
|
||||
mulx $apj,$mul0,$tmp0 !prologue!
|
||||
and $car0,$mask,$acc0
|
||||
add %sp,$bias+$frame,$tp
|
||||
ld [$ap+8],$apj !prologue!
|
||||
|
||||
mulx $n0,$acc0,$mul1 ! "t[0]"*n0
|
||||
srlx $car0,32,$car0
|
||||
and $mul1,$mask,$mul1
|
||||
|
||||
mulx $car1,$mul1,$car1 ! np[0]*"t[0]"*n0
|
||||
mulx $npj,$mul1,$acc1 !prologue!
|
||||
and $car0,1,$sbit
|
||||
ld [$np+8],$npj !prologue!
|
||||
srlx $car0,1,$car0
|
||||
add $acc0,$car1,$car1
|
||||
srlx $car1,32,$car1
|
||||
mov $tmp0,$acc0 !prologue!
|
||||
|
||||
.Lsqr_1st:
|
||||
mulx $apj,$mul0,$tmp0
|
||||
mulx $npj,$mul1,$tmp1
|
||||
add $acc0,$car0,$car0 ! ap[j]*a0+c0
|
||||
add $acc1,$car1,$car1
|
||||
ld [$ap+$j],$apj ! ap[j]
|
||||
and $car0,$mask,$acc0
|
||||
ld [$np+$j],$npj ! np[j]
|
||||
srlx $car0,32,$car0
|
||||
add $acc0,$acc0,$acc0
|
||||
or $sbit,$acc0,$acc0
|
||||
mov $tmp1,$acc1
|
||||
srlx $acc0,32,$sbit
|
||||
add $j,4,$j ! j++
|
||||
and $acc0,$mask,$acc0
|
||||
cmp $j,$num
|
||||
add $acc0,$car1,$car1
|
||||
st $car1,[$tp]
|
||||
mov $tmp0,$acc0
|
||||
srlx $car1,32,$car1
|
||||
bl %icc,.Lsqr_1st
|
||||
add $tp,4,$tp ! tp++
|
||||
!.Lsqr_1st
|
||||
|
||||
mulx $apj,$mul0,$tmp0 ! epilogue
|
||||
mulx $npj,$mul1,$tmp1
|
||||
add $acc0,$car0,$car0 ! ap[j]*a0+c0
|
||||
add $acc1,$car1,$car1
|
||||
and $car0,$mask,$acc0
|
||||
srlx $car0,32,$car0
|
||||
add $acc0,$acc0,$acc0
|
||||
or $sbit,$acc0,$acc0
|
||||
srlx $acc0,32,$sbit
|
||||
and $acc0,$mask,$acc0
|
||||
add $acc0,$car1,$car1
|
||||
st $car1,[$tp]
|
||||
srlx $car1,32,$car1
|
||||
|
||||
add $tmp0,$car0,$car0 ! ap[j]*a0+c0
|
||||
add $tmp1,$car1,$car1
|
||||
and $car0,$mask,$acc0
|
||||
srlx $car0,32,$car0
|
||||
add $acc0,$acc0,$acc0
|
||||
or $sbit,$acc0,$acc0
|
||||
srlx $acc0,32,$sbit
|
||||
and $acc0,$mask,$acc0
|
||||
add $acc0,$car1,$car1
|
||||
st $car1,[$tp+4]
|
||||
srlx $car1,32,$car1
|
||||
|
||||
add $car0,$car0,$car0
|
||||
or $sbit,$car0,$car0
|
||||
add $car0,$car1,$car1
|
||||
st $car1,[$tp+8]
|
||||
srlx $car1,32,$car2
|
||||
|
||||
ld [%sp+$bias+$frame],$tmp0 ! tp[0]
|
||||
ld [%sp+$bias+$frame+4],$tmp1 ! tp[1]
|
||||
ld [%sp+$bias+$frame+8],$tpj ! tp[2]
|
||||
ld [$ap+4],$mul0 ! ap[1]
|
||||
ld [$ap+8],$apj ! ap[2]
|
||||
ld [$np],$car1 ! np[0]
|
||||
ld [$np+4],$npj ! np[1]
|
||||
mulx $n0,$tmp0,$mul1
|
||||
|
||||
mulx $mul0,$mul0,$car0
|
||||
and $mul1,$mask,$mul1
|
||||
|
||||
mulx $car1,$mul1,$car1
|
||||
mulx $npj,$mul1,$acc1
|
||||
add $tmp0,$car1,$car1
|
||||
and $car0,$mask,$acc0
|
||||
ld [$np+8],$npj ! np[2]
|
||||
srlx $car1,32,$car1
|
||||
add $tmp1,$car1,$car1
|
||||
srlx $car0,32,$car0
|
||||
add $acc0,$car1,$car1
|
||||
and $car0,1,$sbit
|
||||
add $acc1,$car1,$car1
|
||||
srlx $car0,1,$car0
|
||||
mov 12,$j
|
||||
st $car1,[%sp+$bias+$frame] ! tp[0]=
|
||||
srlx $car1,32,$car1
|
||||
add %sp,$bias+$frame+4,$tp
|
||||
|
||||
.Lsqr_2nd:
|
||||
mulx $apj,$mul0,$acc0
|
||||
mulx $npj,$mul1,$acc1
|
||||
add $acc0,$car0,$car0
|
||||
add $tpj,$sbit,$sbit
|
||||
ld [$ap+$j],$apj ! ap[j]
|
||||
and $car0,$mask,$acc0
|
||||
ld [$np+$j],$npj ! np[j]
|
||||
srlx $car0,32,$car0
|
||||
add $acc1,$car1,$car1
|
||||
ld [$tp+8],$tpj ! tp[j]
|
||||
add $acc0,$acc0,$acc0
|
||||
add $j,4,$j ! j++
|
||||
add $sbit,$acc0,$acc0
|
||||
srlx $acc0,32,$sbit
|
||||
and $acc0,$mask,$acc0
|
||||
cmp $j,$num
|
||||
add $acc0,$car1,$car1
|
||||
st $car1,[$tp] ! tp[j-1]
|
||||
srlx $car1,32,$car1
|
||||
bl %icc,.Lsqr_2nd
|
||||
add $tp,4,$tp ! tp++
|
||||
!.Lsqr_2nd
|
||||
|
||||
mulx $apj,$mul0,$acc0
|
||||
mulx $npj,$mul1,$acc1
|
||||
add $acc0,$car0,$car0
|
||||
add $tpj,$sbit,$sbit
|
||||
and $car0,$mask,$acc0
|
||||
srlx $car0,32,$car0
|
||||
add $acc1,$car1,$car1
|
||||
add $acc0,$acc0,$acc0
|
||||
add $sbit,$acc0,$acc0
|
||||
srlx $acc0,32,$sbit
|
||||
and $acc0,$mask,$acc0
|
||||
add $acc0,$car1,$car1
|
||||
st $car1,[$tp] ! tp[j-1]
|
||||
srlx $car1,32,$car1
|
||||
|
||||
add $car0,$car0,$car0
|
||||
add $sbit,$car0,$car0
|
||||
add $car0,$car1,$car1
|
||||
add $car2,$car1,$car1
|
||||
st $car1,[$tp+4]
|
||||
srlx $car1,32,$car2
|
||||
|
||||
ld [%sp+$bias+$frame],$tmp1 ! tp[0]
|
||||
ld [%sp+$bias+$frame+4],$tpj ! tp[1]
|
||||
ld [$ap+8],$mul0 ! ap[2]
|
||||
ld [$np],$car1 ! np[0]
|
||||
ld [$np+4],$npj ! np[1]
|
||||
mulx $n0,$tmp1,$mul1
|
||||
and $mul1,$mask,$mul1
|
||||
mov 8,$i
|
||||
|
||||
mulx $mul0,$mul0,$car0
|
||||
mulx $car1,$mul1,$car1
|
||||
and $car0,$mask,$acc0
|
||||
add $tmp1,$car1,$car1
|
||||
srlx $car0,32,$car0
|
||||
add %sp,$bias+$frame,$tp
|
||||
srlx $car1,32,$car1
|
||||
and $car0,1,$sbit
|
||||
srlx $car0,1,$car0
|
||||
mov 4,$j
|
||||
|
||||
.Lsqr_outer:
|
||||
.Lsqr_inner1:
|
||||
mulx $npj,$mul1,$acc1
|
||||
add $tpj,$car1,$car1
|
||||
add $j,4,$j
|
||||
ld [$tp+8],$tpj
|
||||
cmp $j,$i
|
||||
add $acc1,$car1,$car1
|
||||
ld [$np+$j],$npj
|
||||
st $car1,[$tp]
|
||||
srlx $car1,32,$car1
|
||||
bl %icc,.Lsqr_inner1
|
||||
add $tp,4,$tp
|
||||
!.Lsqr_inner1
|
||||
|
||||
add $j,4,$j
|
||||
ld [$ap+$j],$apj ! ap[j]
|
||||
mulx $npj,$mul1,$acc1
|
||||
add $tpj,$car1,$car1
|
||||
ld [$np+$j],$npj ! np[j]
|
||||
srlx $car1,32,$tmp0
|
||||
and $car1,$mask,$car1
|
||||
add $tmp0,$sbit,$sbit
|
||||
add $acc0,$car1,$car1
|
||||
ld [$tp+8],$tpj ! tp[j]
|
||||
add $acc1,$car1,$car1
|
||||
st $car1,[$tp]
|
||||
srlx $car1,32,$car1
|
||||
|
||||
add $j,4,$j
|
||||
cmp $j,$num
|
||||
be,pn %icc,.Lsqr_no_inner2
|
||||
add $tp,4,$tp
|
||||
|
||||
.Lsqr_inner2:
|
||||
mulx $apj,$mul0,$acc0
|
||||
mulx $npj,$mul1,$acc1
|
||||
add $tpj,$sbit,$sbit
|
||||
add $acc0,$car0,$car0
|
||||
ld [$ap+$j],$apj ! ap[j]
|
||||
and $car0,$mask,$acc0
|
||||
ld [$np+$j],$npj ! np[j]
|
||||
srlx $car0,32,$car0
|
||||
add $acc0,$acc0,$acc0
|
||||
ld [$tp+8],$tpj ! tp[j]
|
||||
add $sbit,$acc0,$acc0
|
||||
add $j,4,$j ! j++
|
||||
srlx $acc0,32,$sbit
|
||||
and $acc0,$mask,$acc0
|
||||
cmp $j,$num
|
||||
add $acc0,$car1,$car1
|
||||
add $acc1,$car1,$car1
|
||||
st $car1,[$tp] ! tp[j-1]
|
||||
srlx $car1,32,$car1
|
||||
bl %icc,.Lsqr_inner2
|
||||
add $tp,4,$tp ! tp++
|
||||
|
||||
.Lsqr_no_inner2:
|
||||
mulx $apj,$mul0,$acc0
|
||||
mulx $npj,$mul1,$acc1
|
||||
add $tpj,$sbit,$sbit
|
||||
add $acc0,$car0,$car0
|
||||
and $car0,$mask,$acc0
|
||||
srlx $car0,32,$car0
|
||||
add $acc0,$acc0,$acc0
|
||||
add $sbit,$acc0,$acc0
|
||||
srlx $acc0,32,$sbit
|
||||
and $acc0,$mask,$acc0
|
||||
add $acc0,$car1,$car1
|
||||
add $acc1,$car1,$car1
|
||||
st $car1,[$tp] ! tp[j-1]
|
||||
srlx $car1,32,$car1
|
||||
|
||||
add $car0,$car0,$car0
|
||||
add $sbit,$car0,$car0
|
||||
add $car0,$car1,$car1
|
||||
add $car2,$car1,$car1
|
||||
st $car1,[$tp+4]
|
||||
srlx $car1,32,$car2
|
||||
|
||||
add $i,4,$i ! i++
|
||||
ld [%sp+$bias+$frame],$tmp1 ! tp[0]
|
||||
ld [%sp+$bias+$frame+4],$tpj ! tp[1]
|
||||
ld [$ap+$i],$mul0 ! ap[j]
|
||||
ld [$np],$car1 ! np[0]
|
||||
ld [$np+4],$npj ! np[1]
|
||||
mulx $n0,$tmp1,$mul1
|
||||
and $mul1,$mask,$mul1
|
||||
add $i,4,$tmp0
|
||||
|
||||
mulx $mul0,$mul0,$car0
|
||||
mulx $car1,$mul1,$car1
|
||||
and $car0,$mask,$acc0
|
||||
add $tmp1,$car1,$car1
|
||||
srlx $car0,32,$car0
|
||||
add %sp,$bias+$frame,$tp
|
||||
srlx $car1,32,$car1
|
||||
and $car0,1,$sbit
|
||||
srlx $car0,1,$car0
|
||||
|
||||
cmp $tmp0,$num ! i<num-1
|
||||
bl %icc,.Lsqr_outer
|
||||
mov 4,$j
|
||||
|
||||
.Lsqr_last:
|
||||
mulx $npj,$mul1,$acc1
|
||||
add $tpj,$car1,$car1
|
||||
add $j,4,$j
|
||||
ld [$tp+8],$tpj
|
||||
cmp $j,$i
|
||||
add $acc1,$car1,$car1
|
||||
ld [$np+$j],$npj
|
||||
st $car1,[$tp]
|
||||
srlx $car1,32,$car1
|
||||
bl %icc,.Lsqr_last
|
||||
add $tp,4,$tp
|
||||
!.Lsqr_last
|
||||
|
||||
mulx $npj,$mul1,$acc1
|
||||
add $tpj,$acc0,$acc0
|
||||
srlx $acc0,32,$tmp0
|
||||
and $acc0,$mask,$acc0
|
||||
add $tmp0,$sbit,$sbit
|
||||
add $acc0,$car1,$car1
|
||||
add $acc1,$car1,$car1
|
||||
st $car1,[$tp]
|
||||
srlx $car1,32,$car1
|
||||
|
||||
add $car0,$car0,$car0 ! recover $car0
|
||||
add $sbit,$car0,$car0
|
||||
add $car0,$car1,$car1
|
||||
add $car2,$car1,$car1
|
||||
st $car1,[$tp+4]
|
||||
srlx $car1,32,$car2
|
||||
|
||||
ba .Ltail
|
||||
add $tp,8,$tp
|
||||
.type $fname,#function
|
||||
.size $fname,(.-$fname)
|
||||
.asciz "Montgomery Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.align 32
|
||||
___
|
||||
$code =~ s/\`([^\`]*)\`/eval($1)/gem;
|
||||
print $code;
|
||||
close STDOUT;
|
887
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparcv9a-mont.pl
vendored
Executable file
887
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparcv9a-mont.pl
vendored
Executable file
|
@ -0,0 +1,887 @@
|
|||
#! /usr/bin/env perl
|
||||
# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
|
||||
# October 2005
|
||||
#
|
||||
# "Teaser" Montgomery multiplication module for UltraSPARC. Why FPU?
|
||||
# Because unlike integer multiplier, which simply stalls whole CPU,
|
||||
# FPU is fully pipelined and can effectively emit 48 bit partial
|
||||
# product every cycle. Why not blended SPARC v9? One can argue that
|
||||
# making this module dependent on UltraSPARC VIS extension limits its
|
||||
# binary compatibility. Well yes, it does exclude SPARC64 prior-V(!)
|
||||
# implementations from compatibility matrix. But the rest, whole Sun
|
||||
# UltraSPARC family and brand new Fujitsu's SPARC64 V, all support
|
||||
# VIS extension instructions used in this module. This is considered
|
||||
# good enough to not care about HAL SPARC64 users [if any] who have
|
||||
# integer-only pure SPARCv9 module to "fall down" to.
|
||||
|
||||
# USI&II cores currently exhibit uniform 2x improvement [over pre-
|
||||
# bn_mul_mont codebase] for all key lengths and benchmarks. On USIII
|
||||
# performance improves few percents for shorter keys and worsens few
|
||||
# percents for longer keys. This is because USIII integer multiplier
|
||||
# is >3x faster than USI&II one, which is harder to match [but see
|
||||
# TODO list below]. It should also be noted that SPARC64 V features
|
||||
# out-of-order execution, which *might* mean that integer multiplier
|
||||
# is pipelined, which in turn *might* be impossible to match... On
|
||||
# additional note, SPARC64 V implements FP Multiply-Add instruction,
|
||||
# which is perfectly usable in this context... In other words, as far
|
||||
# as Fujitsu SPARC64 V goes, talk to the author:-)
|
||||
|
||||
# The implementation implies following "non-natural" limitations on
|
||||
# input arguments:
|
||||
# - num may not be less than 4;
|
||||
# - num has to be even;
|
||||
# Failure to meet either condition has no fatal effects, simply
|
||||
# doesn't give any performance gain.
|
||||
|
||||
# TODO:
|
||||
# - modulo-schedule inner loop for better performance (on in-order
|
||||
# execution core such as UltraSPARC this shall result in further
|
||||
# noticeable(!) improvement);
|
||||
# - dedicated squaring procedure[?];
|
||||
|
||||
######################################################################
|
||||
# November 2006
|
||||
#
|
||||
# Modulo-scheduled inner loops allow to interleave floating point and
|
||||
# integer instructions and minimize Read-After-Write penalties. This
|
||||
# results in *further* 20-50% performance improvement [depending on
|
||||
# key length, more for longer keys] on USI&II cores and 30-80% - on
|
||||
# USIII&IV.
|
||||
|
||||
$output = pop;
|
||||
open STDOUT,">$output";
|
||||
|
||||
$fname="bn_mul_mont_fpu";
|
||||
|
||||
$frame="STACK_FRAME";
|
||||
$bias="STACK_BIAS";
|
||||
$locals=64;
|
||||
|
||||
# In order to provide for 32-/64-bit ABI duality, I keep integers wider
|
||||
# than 32 bit in %g1-%g4 and %o0-%o5. %l0-%l7 and %i0-%i5 are used
|
||||
# exclusively for pointers, indexes and other small values...
|
||||
# int bn_mul_mont(
|
||||
$rp="%i0"; # BN_ULONG *rp,
|
||||
$ap="%i1"; # const BN_ULONG *ap,
|
||||
$bp="%i2"; # const BN_ULONG *bp,
|
||||
$np="%i3"; # const BN_ULONG *np,
|
||||
$n0="%i4"; # const BN_ULONG *n0,
|
||||
$num="%i5"; # int num);
|
||||
|
||||
$tp="%l0"; # t[num]
|
||||
$ap_l="%l1"; # a[num],n[num] are smashed to 32-bit words and saved
|
||||
$ap_h="%l2"; # to these four vectors as double-precision FP values.
|
||||
$np_l="%l3"; # This way a bunch of fxtods are eliminated in second
|
||||
$np_h="%l4"; # loop and L1-cache aliasing is minimized...
|
||||
$i="%l5";
|
||||
$j="%l6";
|
||||
$mask="%l7"; # 16-bit mask, 0xffff
|
||||
|
||||
$n0="%g4"; # reassigned(!) to "64-bit" register
|
||||
$carry="%i4"; # %i4 reused(!) for a carry bit
|
||||
|
||||
# FP register naming chart
|
||||
#
|
||||
# ..HILO
|
||||
# dcba
|
||||
# --------
|
||||
# LOa
|
||||
# LOb
|
||||
# LOc
|
||||
# LOd
|
||||
# HIa
|
||||
# HIb
|
||||
# HIc
|
||||
# HId
|
||||
# ..a
|
||||
# ..b
|
||||
$ba="%f0"; $bb="%f2"; $bc="%f4"; $bd="%f6";
|
||||
$na="%f8"; $nb="%f10"; $nc="%f12"; $nd="%f14";
|
||||
$alo="%f16"; $alo_="%f17"; $ahi="%f18"; $ahi_="%f19";
|
||||
$nlo="%f20"; $nlo_="%f21"; $nhi="%f22"; $nhi_="%f23";
|
||||
|
||||
$dota="%f24"; $dotb="%f26";
|
||||
|
||||
$aloa="%f32"; $alob="%f34"; $aloc="%f36"; $alod="%f38";
|
||||
$ahia="%f40"; $ahib="%f42"; $ahic="%f44"; $ahid="%f46";
|
||||
$nloa="%f48"; $nlob="%f50"; $nloc="%f52"; $nlod="%f54";
|
||||
$nhia="%f56"; $nhib="%f58"; $nhic="%f60"; $nhid="%f62";
|
||||
|
||||
$ASI_FL16_P=0xD2; # magic ASI value to engage 16-bit FP load
|
||||
|
||||
$code=<<___;
|
||||
#include "sparc_arch.h"
|
||||
|
||||
.section ".text",#alloc,#execinstr
|
||||
|
||||
.global $fname
|
||||
.align 32
|
||||
$fname:
|
||||
save %sp,-$frame-$locals,%sp
|
||||
|
||||
cmp $num,4
|
||||
bl,a,pn %icc,.Lret
|
||||
clr %i0
|
||||
andcc $num,1,%g0 ! $num has to be even...
|
||||
bnz,a,pn %icc,.Lret
|
||||
clr %i0 ! signal "unsupported input value"
|
||||
|
||||
srl $num,1,$num
|
||||
sethi %hi(0xffff),$mask
|
||||
ld [%i4+0],$n0 ! $n0 reassigned, remember?
|
||||
or $mask,%lo(0xffff),$mask
|
||||
ld [%i4+4],%o0
|
||||
sllx %o0,32,%o0
|
||||
or %o0,$n0,$n0 ! $n0=n0[1].n0[0]
|
||||
|
||||
sll $num,3,$num ! num*=8
|
||||
|
||||
add %sp,$bias,%o0 ! real top of stack
|
||||
sll $num,2,%o1
|
||||
add %o1,$num,%o1 ! %o1=num*5
|
||||
sub %o0,%o1,%o0
|
||||
and %o0,-2048,%o0 ! optimize TLB utilization
|
||||
sub %o0,$bias,%sp ! alloca(5*num*8)
|
||||
|
||||
rd %asi,%o7 ! save %asi
|
||||
add %sp,$bias+$frame+$locals,$tp
|
||||
add $tp,$num,$ap_l
|
||||
add $ap_l,$num,$ap_l ! [an]p_[lh] point at the vectors' ends !
|
||||
add $ap_l,$num,$ap_h
|
||||
add $ap_h,$num,$np_l
|
||||
add $np_l,$num,$np_h
|
||||
|
||||
wr %g0,$ASI_FL16_P,%asi ! setup %asi for 16-bit FP loads
|
||||
|
||||
add $rp,$num,$rp ! readjust input pointers to point
|
||||
add $ap,$num,$ap ! at the ends too...
|
||||
add $bp,$num,$bp
|
||||
add $np,$num,$np
|
||||
|
||||
stx %o7,[%sp+$bias+$frame+48] ! save %asi
|
||||
|
||||
sub %g0,$num,$i ! i=-num
|
||||
sub %g0,$num,$j ! j=-num
|
||||
|
||||
add $ap,$j,%o3
|
||||
add $bp,$i,%o4
|
||||
|
||||
ld [%o3+4],%g1 ! bp[0]
|
||||
ld [%o3+0],%o0
|
||||
ld [%o4+4],%g5 ! ap[0]
|
||||
sllx %g1,32,%g1
|
||||
ld [%o4+0],%o1
|
||||
sllx %g5,32,%g5
|
||||
or %g1,%o0,%o0
|
||||
or %g5,%o1,%o1
|
||||
|
||||
add $np,$j,%o5
|
||||
|
||||
mulx %o1,%o0,%o0 ! ap[0]*bp[0]
|
||||
mulx $n0,%o0,%o0 ! ap[0]*bp[0]*n0
|
||||
stx %o0,[%sp+$bias+$frame+0]
|
||||
|
||||
ld [%o3+0],$alo_ ! load a[j] as pair of 32-bit words
|
||||
fzeros $alo
|
||||
ld [%o3+4],$ahi_
|
||||
fzeros $ahi
|
||||
ld [%o5+0],$nlo_ ! load n[j] as pair of 32-bit words
|
||||
fzeros $nlo
|
||||
ld [%o5+4],$nhi_
|
||||
fzeros $nhi
|
||||
|
||||
! transfer b[i] to FPU as 4x16-bit values
|
||||
ldda [%o4+2]%asi,$ba
|
||||
fxtod $alo,$alo
|
||||
ldda [%o4+0]%asi,$bb
|
||||
fxtod $ahi,$ahi
|
||||
ldda [%o4+6]%asi,$bc
|
||||
fxtod $nlo,$nlo
|
||||
ldda [%o4+4]%asi,$bd
|
||||
fxtod $nhi,$nhi
|
||||
|
||||
! transfer ap[0]*b[0]*n0 to FPU as 4x16-bit values
|
||||
ldda [%sp+$bias+$frame+6]%asi,$na
|
||||
fxtod $ba,$ba
|
||||
ldda [%sp+$bias+$frame+4]%asi,$nb
|
||||
fxtod $bb,$bb
|
||||
ldda [%sp+$bias+$frame+2]%asi,$nc
|
||||
fxtod $bc,$bc
|
||||
ldda [%sp+$bias+$frame+0]%asi,$nd
|
||||
fxtod $bd,$bd
|
||||
|
||||
std $alo,[$ap_l+$j] ! save smashed ap[j] in double format
|
||||
fxtod $na,$na
|
||||
std $ahi,[$ap_h+$j]
|
||||
fxtod $nb,$nb
|
||||
std $nlo,[$np_l+$j] ! save smashed np[j] in double format
|
||||
fxtod $nc,$nc
|
||||
std $nhi,[$np_h+$j]
|
||||
fxtod $nd,$nd
|
||||
|
||||
fmuld $alo,$ba,$aloa
|
||||
fmuld $nlo,$na,$nloa
|
||||
fmuld $alo,$bb,$alob
|
||||
fmuld $nlo,$nb,$nlob
|
||||
fmuld $alo,$bc,$aloc
|
||||
faddd $aloa,$nloa,$nloa
|
||||
fmuld $nlo,$nc,$nloc
|
||||
fmuld $alo,$bd,$alod
|
||||
faddd $alob,$nlob,$nlob
|
||||
fmuld $nlo,$nd,$nlod
|
||||
fmuld $ahi,$ba,$ahia
|
||||
faddd $aloc,$nloc,$nloc
|
||||
fmuld $nhi,$na,$nhia
|
||||
fmuld $ahi,$bb,$ahib
|
||||
faddd $alod,$nlod,$nlod
|
||||
fmuld $nhi,$nb,$nhib
|
||||
fmuld $ahi,$bc,$ahic
|
||||
faddd $ahia,$nhia,$nhia
|
||||
fmuld $nhi,$nc,$nhic
|
||||
fmuld $ahi,$bd,$ahid
|
||||
faddd $ahib,$nhib,$nhib
|
||||
fmuld $nhi,$nd,$nhid
|
||||
|
||||
faddd $ahic,$nhic,$dota ! $nhic
|
||||
faddd $ahid,$nhid,$dotb ! $nhid
|
||||
|
||||
faddd $nloc,$nhia,$nloc
|
||||
faddd $nlod,$nhib,$nlod
|
||||
|
||||
fdtox $nloa,$nloa
|
||||
fdtox $nlob,$nlob
|
||||
fdtox $nloc,$nloc
|
||||
fdtox $nlod,$nlod
|
||||
|
||||
std $nloa,[%sp+$bias+$frame+0]
|
||||
add $j,8,$j
|
||||
std $nlob,[%sp+$bias+$frame+8]
|
||||
add $ap,$j,%o4
|
||||
std $nloc,[%sp+$bias+$frame+16]
|
||||
add $np,$j,%o5
|
||||
std $nlod,[%sp+$bias+$frame+24]
|
||||
|
||||
ld [%o4+0],$alo_ ! load a[j] as pair of 32-bit words
|
||||
fzeros $alo
|
||||
ld [%o4+4],$ahi_
|
||||
fzeros $ahi
|
||||
ld [%o5+0],$nlo_ ! load n[j] as pair of 32-bit words
|
||||
fzeros $nlo
|
||||
ld [%o5+4],$nhi_
|
||||
fzeros $nhi
|
||||
|
||||
fxtod $alo,$alo
|
||||
fxtod $ahi,$ahi
|
||||
fxtod $nlo,$nlo
|
||||
fxtod $nhi,$nhi
|
||||
|
||||
ldx [%sp+$bias+$frame+0],%o0
|
||||
fmuld $alo,$ba,$aloa
|
||||
ldx [%sp+$bias+$frame+8],%o1
|
||||
fmuld $nlo,$na,$nloa
|
||||
ldx [%sp+$bias+$frame+16],%o2
|
||||
fmuld $alo,$bb,$alob
|
||||
ldx [%sp+$bias+$frame+24],%o3
|
||||
fmuld $nlo,$nb,$nlob
|
||||
|
||||
srlx %o0,16,%o7
|
||||
std $alo,[$ap_l+$j] ! save smashed ap[j] in double format
|
||||
fmuld $alo,$bc,$aloc
|
||||
add %o7,%o1,%o1
|
||||
std $ahi,[$ap_h+$j]
|
||||
faddd $aloa,$nloa,$nloa
|
||||
fmuld $nlo,$nc,$nloc
|
||||
srlx %o1,16,%o7
|
||||
std $nlo,[$np_l+$j] ! save smashed np[j] in double format
|
||||
fmuld $alo,$bd,$alod
|
||||
add %o7,%o2,%o2
|
||||
std $nhi,[$np_h+$j]
|
||||
faddd $alob,$nlob,$nlob
|
||||
fmuld $nlo,$nd,$nlod
|
||||
srlx %o2,16,%o7
|
||||
fmuld $ahi,$ba,$ahia
|
||||
add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
|
||||
faddd $aloc,$nloc,$nloc
|
||||
fmuld $nhi,$na,$nhia
|
||||
!and %o0,$mask,%o0
|
||||
!and %o1,$mask,%o1
|
||||
!and %o2,$mask,%o2
|
||||
!sllx %o1,16,%o1
|
||||
!sllx %o2,32,%o2
|
||||
!sllx %o3,48,%o7
|
||||
!or %o1,%o0,%o0
|
||||
!or %o2,%o0,%o0
|
||||
!or %o7,%o0,%o0 ! 64-bit result
|
||||
srlx %o3,16,%g1 ! 34-bit carry
|
||||
fmuld $ahi,$bb,$ahib
|
||||
|
||||
faddd $alod,$nlod,$nlod
|
||||
fmuld $nhi,$nb,$nhib
|
||||
fmuld $ahi,$bc,$ahic
|
||||
faddd $ahia,$nhia,$nhia
|
||||
fmuld $nhi,$nc,$nhic
|
||||
fmuld $ahi,$bd,$ahid
|
||||
faddd $ahib,$nhib,$nhib
|
||||
fmuld $nhi,$nd,$nhid
|
||||
|
||||
faddd $dota,$nloa,$nloa
|
||||
faddd $dotb,$nlob,$nlob
|
||||
faddd $ahic,$nhic,$dota ! $nhic
|
||||
faddd $ahid,$nhid,$dotb ! $nhid
|
||||
|
||||
faddd $nloc,$nhia,$nloc
|
||||
faddd $nlod,$nhib,$nlod
|
||||
|
||||
fdtox $nloa,$nloa
|
||||
fdtox $nlob,$nlob
|
||||
fdtox $nloc,$nloc
|
||||
fdtox $nlod,$nlod
|
||||
|
||||
std $nloa,[%sp+$bias+$frame+0]
|
||||
std $nlob,[%sp+$bias+$frame+8]
|
||||
addcc $j,8,$j
|
||||
std $nloc,[%sp+$bias+$frame+16]
|
||||
bz,pn %icc,.L1stskip
|
||||
std $nlod,[%sp+$bias+$frame+24]
|
||||
|
||||
.align 32 ! incidentally already aligned !
|
||||
.L1st:
|
||||
add $ap,$j,%o4
|
||||
add $np,$j,%o5
|
||||
ld [%o4+0],$alo_ ! load a[j] as pair of 32-bit words
|
||||
fzeros $alo
|
||||
ld [%o4+4],$ahi_
|
||||
fzeros $ahi
|
||||
ld [%o5+0],$nlo_ ! load n[j] as pair of 32-bit words
|
||||
fzeros $nlo
|
||||
ld [%o5+4],$nhi_
|
||||
fzeros $nhi
|
||||
|
||||
fxtod $alo,$alo
|
||||
fxtod $ahi,$ahi
|
||||
fxtod $nlo,$nlo
|
||||
fxtod $nhi,$nhi
|
||||
|
||||
ldx [%sp+$bias+$frame+0],%o0
|
||||
fmuld $alo,$ba,$aloa
|
||||
ldx [%sp+$bias+$frame+8],%o1
|
||||
fmuld $nlo,$na,$nloa
|
||||
ldx [%sp+$bias+$frame+16],%o2
|
||||
fmuld $alo,$bb,$alob
|
||||
ldx [%sp+$bias+$frame+24],%o3
|
||||
fmuld $nlo,$nb,$nlob
|
||||
|
||||
srlx %o0,16,%o7
|
||||
std $alo,[$ap_l+$j] ! save smashed ap[j] in double format
|
||||
fmuld $alo,$bc,$aloc
|
||||
add %o7,%o1,%o1
|
||||
std $ahi,[$ap_h+$j]
|
||||
faddd $aloa,$nloa,$nloa
|
||||
fmuld $nlo,$nc,$nloc
|
||||
srlx %o1,16,%o7
|
||||
std $nlo,[$np_l+$j] ! save smashed np[j] in double format
|
||||
fmuld $alo,$bd,$alod
|
||||
add %o7,%o2,%o2
|
||||
std $nhi,[$np_h+$j]
|
||||
faddd $alob,$nlob,$nlob
|
||||
fmuld $nlo,$nd,$nlod
|
||||
srlx %o2,16,%o7
|
||||
fmuld $ahi,$ba,$ahia
|
||||
add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
|
||||
and %o0,$mask,%o0
|
||||
faddd $aloc,$nloc,$nloc
|
||||
fmuld $nhi,$na,$nhia
|
||||
and %o1,$mask,%o1
|
||||
and %o2,$mask,%o2
|
||||
fmuld $ahi,$bb,$ahib
|
||||
sllx %o1,16,%o1
|
||||
faddd $alod,$nlod,$nlod
|
||||
fmuld $nhi,$nb,$nhib
|
||||
sllx %o2,32,%o2
|
||||
fmuld $ahi,$bc,$ahic
|
||||
sllx %o3,48,%o7
|
||||
or %o1,%o0,%o0
|
||||
faddd $ahia,$nhia,$nhia
|
||||
fmuld $nhi,$nc,$nhic
|
||||
or %o2,%o0,%o0
|
||||
fmuld $ahi,$bd,$ahid
|
||||
or %o7,%o0,%o0 ! 64-bit result
|
||||
faddd $ahib,$nhib,$nhib
|
||||
fmuld $nhi,$nd,$nhid
|
||||
addcc %g1,%o0,%o0
|
||||
faddd $dota,$nloa,$nloa
|
||||
srlx %o3,16,%g1 ! 34-bit carry
|
||||
faddd $dotb,$nlob,$nlob
|
||||
bcs,a %xcc,.+8
|
||||
add %g1,1,%g1
|
||||
|
||||
stx %o0,[$tp] ! tp[j-1]=
|
||||
|
||||
faddd $ahic,$nhic,$dota ! $nhic
|
||||
faddd $ahid,$nhid,$dotb ! $nhid
|
||||
|
||||
faddd $nloc,$nhia,$nloc
|
||||
faddd $nlod,$nhib,$nlod
|
||||
|
||||
fdtox $nloa,$nloa
|
||||
fdtox $nlob,$nlob
|
||||
fdtox $nloc,$nloc
|
||||
fdtox $nlod,$nlod
|
||||
|
||||
std $nloa,[%sp+$bias+$frame+0]
|
||||
std $nlob,[%sp+$bias+$frame+8]
|
||||
std $nloc,[%sp+$bias+$frame+16]
|
||||
std $nlod,[%sp+$bias+$frame+24]
|
||||
|
||||
addcc $j,8,$j
|
||||
bnz,pt %icc,.L1st
|
||||
add $tp,8,$tp
|
||||
|
||||
.L1stskip:
|
||||
fdtox $dota,$dota
|
||||
fdtox $dotb,$dotb
|
||||
|
||||
ldx [%sp+$bias+$frame+0],%o0
|
||||
ldx [%sp+$bias+$frame+8],%o1
|
||||
ldx [%sp+$bias+$frame+16],%o2
|
||||
ldx [%sp+$bias+$frame+24],%o3
|
||||
|
||||
srlx %o0,16,%o7
|
||||
std $dota,[%sp+$bias+$frame+32]
|
||||
add %o7,%o1,%o1
|
||||
std $dotb,[%sp+$bias+$frame+40]
|
||||
srlx %o1,16,%o7
|
||||
add %o7,%o2,%o2
|
||||
srlx %o2,16,%o7
|
||||
add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
|
||||
and %o0,$mask,%o0
|
||||
and %o1,$mask,%o1
|
||||
and %o2,$mask,%o2
|
||||
sllx %o1,16,%o1
|
||||
sllx %o2,32,%o2
|
||||
sllx %o3,48,%o7
|
||||
or %o1,%o0,%o0
|
||||
or %o2,%o0,%o0
|
||||
or %o7,%o0,%o0 ! 64-bit result
|
||||
ldx [%sp+$bias+$frame+32],%o4
|
||||
addcc %g1,%o0,%o0
|
||||
ldx [%sp+$bias+$frame+40],%o5
|
||||
srlx %o3,16,%g1 ! 34-bit carry
|
||||
bcs,a %xcc,.+8
|
||||
add %g1,1,%g1
|
||||
|
||||
stx %o0,[$tp] ! tp[j-1]=
|
||||
add $tp,8,$tp
|
||||
|
||||
srlx %o4,16,%o7
|
||||
add %o7,%o5,%o5
|
||||
and %o4,$mask,%o4
|
||||
sllx %o5,16,%o7
|
||||
or %o7,%o4,%o4
|
||||
addcc %g1,%o4,%o4
|
||||
srlx %o5,48,%g1
|
||||
bcs,a %xcc,.+8
|
||||
add %g1,1,%g1
|
||||
|
||||
mov %g1,$carry
|
||||
stx %o4,[$tp] ! tp[num-1]=
|
||||
|
||||
ba .Louter
|
||||
add $i,8,$i
|
||||
.align 32
|
||||
.Louter:
|
||||
sub %g0,$num,$j ! j=-num
|
||||
add %sp,$bias+$frame+$locals,$tp
|
||||
|
||||
add $ap,$j,%o3
|
||||
add $bp,$i,%o4
|
||||
|
||||
ld [%o3+4],%g1 ! bp[i]
|
||||
ld [%o3+0],%o0
|
||||
ld [%o4+4],%g5 ! ap[0]
|
||||
sllx %g1,32,%g1
|
||||
ld [%o4+0],%o1
|
||||
sllx %g5,32,%g5
|
||||
or %g1,%o0,%o0
|
||||
or %g5,%o1,%o1
|
||||
|
||||
ldx [$tp],%o2 ! tp[0]
|
||||
mulx %o1,%o0,%o0
|
||||
addcc %o2,%o0,%o0
|
||||
mulx $n0,%o0,%o0 ! (ap[0]*bp[i]+t[0])*n0
|
||||
stx %o0,[%sp+$bias+$frame+0]
|
||||
|
||||
! transfer b[i] to FPU as 4x16-bit values
|
||||
ldda [%o4+2]%asi,$ba
|
||||
ldda [%o4+0]%asi,$bb
|
||||
ldda [%o4+6]%asi,$bc
|
||||
ldda [%o4+4]%asi,$bd
|
||||
|
||||
! transfer (ap[0]*b[i]+t[0])*n0 to FPU as 4x16-bit values
|
||||
ldda [%sp+$bias+$frame+6]%asi,$na
|
||||
fxtod $ba,$ba
|
||||
ldda [%sp+$bias+$frame+4]%asi,$nb
|
||||
fxtod $bb,$bb
|
||||
ldda [%sp+$bias+$frame+2]%asi,$nc
|
||||
fxtod $bc,$bc
|
||||
ldda [%sp+$bias+$frame+0]%asi,$nd
|
||||
fxtod $bd,$bd
|
||||
ldd [$ap_l+$j],$alo ! load a[j] in double format
|
||||
fxtod $na,$na
|
||||
ldd [$ap_h+$j],$ahi
|
||||
fxtod $nb,$nb
|
||||
ldd [$np_l+$j],$nlo ! load n[j] in double format
|
||||
fxtod $nc,$nc
|
||||
ldd [$np_h+$j],$nhi
|
||||
fxtod $nd,$nd
|
||||
|
||||
fmuld $alo,$ba,$aloa
|
||||
fmuld $nlo,$na,$nloa
|
||||
fmuld $alo,$bb,$alob
|
||||
fmuld $nlo,$nb,$nlob
|
||||
fmuld $alo,$bc,$aloc
|
||||
faddd $aloa,$nloa,$nloa
|
||||
fmuld $nlo,$nc,$nloc
|
||||
fmuld $alo,$bd,$alod
|
||||
faddd $alob,$nlob,$nlob
|
||||
fmuld $nlo,$nd,$nlod
|
||||
fmuld $ahi,$ba,$ahia
|
||||
faddd $aloc,$nloc,$nloc
|
||||
fmuld $nhi,$na,$nhia
|
||||
fmuld $ahi,$bb,$ahib
|
||||
faddd $alod,$nlod,$nlod
|
||||
fmuld $nhi,$nb,$nhib
|
||||
fmuld $ahi,$bc,$ahic
|
||||
faddd $ahia,$nhia,$nhia
|
||||
fmuld $nhi,$nc,$nhic
|
||||
fmuld $ahi,$bd,$ahid
|
||||
faddd $ahib,$nhib,$nhib
|
||||
fmuld $nhi,$nd,$nhid
|
||||
|
||||
faddd $ahic,$nhic,$dota ! $nhic
|
||||
faddd $ahid,$nhid,$dotb ! $nhid
|
||||
|
||||
faddd $nloc,$nhia,$nloc
|
||||
faddd $nlod,$nhib,$nlod
|
||||
|
||||
fdtox $nloa,$nloa
|
||||
fdtox $nlob,$nlob
|
||||
fdtox $nloc,$nloc
|
||||
fdtox $nlod,$nlod
|
||||
|
||||
std $nloa,[%sp+$bias+$frame+0]
|
||||
std $nlob,[%sp+$bias+$frame+8]
|
||||
std $nloc,[%sp+$bias+$frame+16]
|
||||
add $j,8,$j
|
||||
std $nlod,[%sp+$bias+$frame+24]
|
||||
|
||||
ldd [$ap_l+$j],$alo ! load a[j] in double format
|
||||
ldd [$ap_h+$j],$ahi
|
||||
ldd [$np_l+$j],$nlo ! load n[j] in double format
|
||||
ldd [$np_h+$j],$nhi
|
||||
|
||||
fmuld $alo,$ba,$aloa
|
||||
fmuld $nlo,$na,$nloa
|
||||
fmuld $alo,$bb,$alob
|
||||
fmuld $nlo,$nb,$nlob
|
||||
fmuld $alo,$bc,$aloc
|
||||
ldx [%sp+$bias+$frame+0],%o0
|
||||
faddd $aloa,$nloa,$nloa
|
||||
fmuld $nlo,$nc,$nloc
|
||||
ldx [%sp+$bias+$frame+8],%o1
|
||||
fmuld $alo,$bd,$alod
|
||||
ldx [%sp+$bias+$frame+16],%o2
|
||||
faddd $alob,$nlob,$nlob
|
||||
fmuld $nlo,$nd,$nlod
|
||||
ldx [%sp+$bias+$frame+24],%o3
|
||||
fmuld $ahi,$ba,$ahia
|
||||
|
||||
srlx %o0,16,%o7
|
||||
faddd $aloc,$nloc,$nloc
|
||||
fmuld $nhi,$na,$nhia
|
||||
add %o7,%o1,%o1
|
||||
fmuld $ahi,$bb,$ahib
|
||||
srlx %o1,16,%o7
|
||||
faddd $alod,$nlod,$nlod
|
||||
fmuld $nhi,$nb,$nhib
|
||||
add %o7,%o2,%o2
|
||||
fmuld $ahi,$bc,$ahic
|
||||
srlx %o2,16,%o7
|
||||
faddd $ahia,$nhia,$nhia
|
||||
fmuld $nhi,$nc,$nhic
|
||||
add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
|
||||
! why?
|
||||
and %o0,$mask,%o0
|
||||
fmuld $ahi,$bd,$ahid
|
||||
and %o1,$mask,%o1
|
||||
and %o2,$mask,%o2
|
||||
faddd $ahib,$nhib,$nhib
|
||||
fmuld $nhi,$nd,$nhid
|
||||
sllx %o1,16,%o1
|
||||
faddd $dota,$nloa,$nloa
|
||||
sllx %o2,32,%o2
|
||||
faddd $dotb,$nlob,$nlob
|
||||
sllx %o3,48,%o7
|
||||
or %o1,%o0,%o0
|
||||
faddd $ahic,$nhic,$dota ! $nhic
|
||||
or %o2,%o0,%o0
|
||||
faddd $ahid,$nhid,$dotb ! $nhid
|
||||
or %o7,%o0,%o0 ! 64-bit result
|
||||
ldx [$tp],%o7
|
||||
faddd $nloc,$nhia,$nloc
|
||||
addcc %o7,%o0,%o0
|
||||
! end-of-why?
|
||||
faddd $nlod,$nhib,$nlod
|
||||
srlx %o3,16,%g1 ! 34-bit carry
|
||||
fdtox $nloa,$nloa
|
||||
bcs,a %xcc,.+8
|
||||
add %g1,1,%g1
|
||||
|
||||
fdtox $nlob,$nlob
|
||||
fdtox $nloc,$nloc
|
||||
fdtox $nlod,$nlod
|
||||
|
||||
std $nloa,[%sp+$bias+$frame+0]
|
||||
std $nlob,[%sp+$bias+$frame+8]
|
||||
addcc $j,8,$j
|
||||
std $nloc,[%sp+$bias+$frame+16]
|
||||
bz,pn %icc,.Linnerskip
|
||||
std $nlod,[%sp+$bias+$frame+24]
|
||||
|
||||
ba .Linner
|
||||
nop
|
||||
.align 32
|
||||
.Linner:
|
||||
ldd [$ap_l+$j],$alo ! load a[j] in double format
|
||||
ldd [$ap_h+$j],$ahi
|
||||
ldd [$np_l+$j],$nlo ! load n[j] in double format
|
||||
ldd [$np_h+$j],$nhi
|
||||
|
||||
fmuld $alo,$ba,$aloa
|
||||
fmuld $nlo,$na,$nloa
|
||||
fmuld $alo,$bb,$alob
|
||||
fmuld $nlo,$nb,$nlob
|
||||
fmuld $alo,$bc,$aloc
|
||||
ldx [%sp+$bias+$frame+0],%o0
|
||||
faddd $aloa,$nloa,$nloa
|
||||
fmuld $nlo,$nc,$nloc
|
||||
ldx [%sp+$bias+$frame+8],%o1
|
||||
fmuld $alo,$bd,$alod
|
||||
ldx [%sp+$bias+$frame+16],%o2
|
||||
faddd $alob,$nlob,$nlob
|
||||
fmuld $nlo,$nd,$nlod
|
||||
ldx [%sp+$bias+$frame+24],%o3
|
||||
fmuld $ahi,$ba,$ahia
|
||||
|
||||
srlx %o0,16,%o7
|
||||
faddd $aloc,$nloc,$nloc
|
||||
fmuld $nhi,$na,$nhia
|
||||
add %o7,%o1,%o1
|
||||
fmuld $ahi,$bb,$ahib
|
||||
srlx %o1,16,%o7
|
||||
faddd $alod,$nlod,$nlod
|
||||
fmuld $nhi,$nb,$nhib
|
||||
add %o7,%o2,%o2
|
||||
fmuld $ahi,$bc,$ahic
|
||||
srlx %o2,16,%o7
|
||||
faddd $ahia,$nhia,$nhia
|
||||
fmuld $nhi,$nc,$nhic
|
||||
add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
|
||||
and %o0,$mask,%o0
|
||||
fmuld $ahi,$bd,$ahid
|
||||
and %o1,$mask,%o1
|
||||
and %o2,$mask,%o2
|
||||
faddd $ahib,$nhib,$nhib
|
||||
fmuld $nhi,$nd,$nhid
|
||||
sllx %o1,16,%o1
|
||||
faddd $dota,$nloa,$nloa
|
||||
sllx %o2,32,%o2
|
||||
faddd $dotb,$nlob,$nlob
|
||||
sllx %o3,48,%o7
|
||||
or %o1,%o0,%o0
|
||||
faddd $ahic,$nhic,$dota ! $nhic
|
||||
or %o2,%o0,%o0
|
||||
faddd $ahid,$nhid,$dotb ! $nhid
|
||||
or %o7,%o0,%o0 ! 64-bit result
|
||||
faddd $nloc,$nhia,$nloc
|
||||
addcc %g1,%o0,%o0
|
||||
ldx [$tp+8],%o7 ! tp[j]
|
||||
faddd $nlod,$nhib,$nlod
|
||||
srlx %o3,16,%g1 ! 34-bit carry
|
||||
fdtox $nloa,$nloa
|
||||
bcs,a %xcc,.+8
|
||||
add %g1,1,%g1
|
||||
fdtox $nlob,$nlob
|
||||
addcc %o7,%o0,%o0
|
||||
fdtox $nloc,$nloc
|
||||
bcs,a %xcc,.+8
|
||||
add %g1,1,%g1
|
||||
|
||||
stx %o0,[$tp] ! tp[j-1]
|
||||
fdtox $nlod,$nlod
|
||||
|
||||
std $nloa,[%sp+$bias+$frame+0]
|
||||
std $nlob,[%sp+$bias+$frame+8]
|
||||
std $nloc,[%sp+$bias+$frame+16]
|
||||
addcc $j,8,$j
|
||||
std $nlod,[%sp+$bias+$frame+24]
|
||||
bnz,pt %icc,.Linner
|
||||
add $tp,8,$tp
|
||||
|
||||
.Linnerskip:
|
||||
fdtox $dota,$dota
|
||||
fdtox $dotb,$dotb
|
||||
|
||||
ldx [%sp+$bias+$frame+0],%o0
|
||||
ldx [%sp+$bias+$frame+8],%o1
|
||||
ldx [%sp+$bias+$frame+16],%o2
|
||||
ldx [%sp+$bias+$frame+24],%o3
|
||||
|
||||
srlx %o0,16,%o7
|
||||
std $dota,[%sp+$bias+$frame+32]
|
||||
add %o7,%o1,%o1
|
||||
std $dotb,[%sp+$bias+$frame+40]
|
||||
srlx %o1,16,%o7
|
||||
add %o7,%o2,%o2
|
||||
srlx %o2,16,%o7
|
||||
add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
|
||||
and %o0,$mask,%o0
|
||||
and %o1,$mask,%o1
|
||||
and %o2,$mask,%o2
|
||||
sllx %o1,16,%o1
|
||||
sllx %o2,32,%o2
|
||||
sllx %o3,48,%o7
|
||||
or %o1,%o0,%o0
|
||||
or %o2,%o0,%o0
|
||||
ldx [%sp+$bias+$frame+32],%o4
|
||||
or %o7,%o0,%o0 ! 64-bit result
|
||||
ldx [%sp+$bias+$frame+40],%o5
|
||||
addcc %g1,%o0,%o0
|
||||
ldx [$tp+8],%o7 ! tp[j]
|
||||
srlx %o3,16,%g1 ! 34-bit carry
|
||||
bcs,a %xcc,.+8
|
||||
add %g1,1,%g1
|
||||
|
||||
addcc %o7,%o0,%o0
|
||||
bcs,a %xcc,.+8
|
||||
add %g1,1,%g1
|
||||
|
||||
stx %o0,[$tp] ! tp[j-1]
|
||||
add $tp,8,$tp
|
||||
|
||||
srlx %o4,16,%o7
|
||||
add %o7,%o5,%o5
|
||||
and %o4,$mask,%o4
|
||||
sllx %o5,16,%o7
|
||||
or %o7,%o4,%o4
|
||||
addcc %g1,%o4,%o4
|
||||
srlx %o5,48,%g1
|
||||
bcs,a %xcc,.+8
|
||||
add %g1,1,%g1
|
||||
|
||||
addcc $carry,%o4,%o4
|
||||
stx %o4,[$tp] ! tp[num-1]
|
||||
mov %g1,$carry
|
||||
bcs,a %xcc,.+8
|
||||
add $carry,1,$carry
|
||||
|
||||
addcc $i,8,$i
|
||||
bnz %icc,.Louter
|
||||
nop
|
||||
|
||||
add $tp,8,$tp ! adjust tp to point at the end
|
||||
orn %g0,%g0,%g4
|
||||
sub %g0,$num,%o7 ! n=-num
|
||||
ba .Lsub
|
||||
subcc %g0,%g0,%g0 ! clear %icc.c
|
||||
|
||||
.align 32
|
||||
.Lsub:
|
||||
ldx [$tp+%o7],%o0
|
||||
add $np,%o7,%g1
|
||||
ld [%g1+0],%o2
|
||||
ld [%g1+4],%o3
|
||||
srlx %o0,32,%o1
|
||||
subccc %o0,%o2,%o2
|
||||
add $rp,%o7,%g1
|
||||
subccc %o1,%o3,%o3
|
||||
st %o2,[%g1+0]
|
||||
add %o7,8,%o7
|
||||
brnz,pt %o7,.Lsub
|
||||
st %o3,[%g1+4]
|
||||
subc $carry,0,%g4
|
||||
sub %g0,$num,%o7 ! n=-num
|
||||
ba .Lcopy
|
||||
nop
|
||||
|
||||
.align 32
|
||||
.Lcopy:
|
||||
ldx [$tp+%o7],%o0
|
||||
add $rp,%o7,%g1
|
||||
ld [%g1+0],%o2
|
||||
ld [%g1+4],%o3
|
||||
stx %g0,[$tp+%o7]
|
||||
and %o0,%g4,%o0
|
||||
srlx %o0,32,%o1
|
||||
andn %o2,%g4,%o2
|
||||
andn %o3,%g4,%o3
|
||||
or %o2,%o0,%o0
|
||||
or %o3,%o1,%o1
|
||||
st %o0,[%g1+0]
|
||||
add %o7,8,%o7
|
||||
brnz,pt %o7,.Lcopy
|
||||
st %o1,[%g1+4]
|
||||
sub %g0,$num,%o7 ! n=-num
|
||||
|
||||
.Lzap:
|
||||
stx %g0,[$ap_l+%o7]
|
||||
stx %g0,[$ap_h+%o7]
|
||||
stx %g0,[$np_l+%o7]
|
||||
stx %g0,[$np_h+%o7]
|
||||
add %o7,8,%o7
|
||||
brnz,pt %o7,.Lzap
|
||||
nop
|
||||
|
||||
ldx [%sp+$bias+$frame+48],%o7
|
||||
wr %g0,%o7,%asi ! restore %asi
|
||||
|
||||
mov 1,%i0
|
||||
.Lret:
|
||||
ret
|
||||
restore
|
||||
.type $fname,#function
|
||||
.size $fname,(.-$fname)
|
||||
.asciz "Montgomery Multiplication for UltraSPARC, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.align 32
|
||||
___
|
||||
|
||||
$code =~ s/\`([^\`]*)\`/eval($1)/gem;
|
||||
|
||||
# Below substitution makes it possible to compile without demanding
|
||||
# VIS extensions on command line, e.g. -xarch=v9 vs. -xarch=v9a. I
|
||||
# dare to do this, because VIS capability is detected at run-time now
|
||||
# and this routine is not called on CPU not capable to execute it. Do
|
||||
# note that fzeros is not the only VIS dependency! Another dependency
|
||||
# is implicit and is just _a_ numerical value loaded to %asi register,
|
||||
# which assembler can't recognize as VIS specific...
|
||||
$code =~ s/fzeros\s+%f([0-9]+)/
|
||||
sprintf(".word\t0x%x\t! fzeros %%f%d",0x81b00c20|($1<<25),$1)
|
||||
/gem;
|
||||
|
||||
print $code;
|
||||
# flush
|
||||
close STDOUT;
|
251
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/via-mont.pl
vendored
Normal file
251
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/via-mont.pl
vendored
Normal file
|
@ -0,0 +1,251 @@
|
|||
#! /usr/bin/env perl
|
||||
# Copyright 2006-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
#
|
||||
# Wrapper around 'rep montmul', VIA-specific instruction accessing
|
||||
# PadLock Montgomery Multiplier. The wrapper is designed as drop-in
|
||||
# replacement for OpenSSL bn_mul_mont [first implemented in 0.9.9].
|
||||
#
|
||||
# Below are interleaved outputs from 'openssl speed rsa dsa' for 4
|
||||
# different software configurations on 1.5GHz VIA Esther processor.
|
||||
# Lines marked with "software integer" denote performance of hand-
|
||||
# coded integer-only assembler found in OpenSSL 0.9.7. "Software SSE2"
|
||||
# refers to hand-coded SSE2 Montgomery multiplication procedure found
|
||||
# OpenSSL 0.9.9. "Hardware VIA SDK" refers to padlock_pmm routine from
|
||||
# Padlock SDK 2.0.1 available for download from VIA, which naturally
|
||||
# utilizes the magic 'repz montmul' instruction. And finally "hardware
|
||||
# this" refers to *this* implementation which also uses 'repz montmul'
|
||||
#
|
||||
# sign verify sign/s verify/s
|
||||
# rsa 512 bits 0.001720s 0.000140s 581.4 7149.7 software integer
|
||||
# rsa 512 bits 0.000690s 0.000086s 1450.3 11606.0 software SSE2
|
||||
# rsa 512 bits 0.006136s 0.000201s 163.0 4974.5 hardware VIA SDK
|
||||
# rsa 512 bits 0.000712s 0.000050s 1404.9 19858.5 hardware this
|
||||
#
|
||||
# rsa 1024 bits 0.008518s 0.000413s 117.4 2420.8 software integer
|
||||
# rsa 1024 bits 0.004275s 0.000277s 233.9 3609.7 software SSE2
|
||||
# rsa 1024 bits 0.012136s 0.000260s 82.4 3844.5 hardware VIA SDK
|
||||
# rsa 1024 bits 0.002522s 0.000116s 396.5 8650.9 hardware this
|
||||
#
|
||||
# rsa 2048 bits 0.050101s 0.001371s 20.0 729.6 software integer
|
||||
# rsa 2048 bits 0.030273s 0.001008s 33.0 991.9 software SSE2
|
||||
# rsa 2048 bits 0.030833s 0.000976s 32.4 1025.1 hardware VIA SDK
|
||||
# rsa 2048 bits 0.011879s 0.000342s 84.2 2921.7 hardware this
|
||||
#
|
||||
# rsa 4096 bits 0.327097s 0.004859s 3.1 205.8 software integer
|
||||
# rsa 4096 bits 0.229318s 0.003859s 4.4 259.2 software SSE2
|
||||
# rsa 4096 bits 0.233953s 0.003274s 4.3 305.4 hardware VIA SDK
|
||||
# rsa 4096 bits 0.070493s 0.001166s 14.2 857.6 hardware this
|
||||
#
|
||||
# dsa 512 bits 0.001342s 0.001651s 745.2 605.7 software integer
|
||||
# dsa 512 bits 0.000844s 0.000987s 1185.3 1013.1 software SSE2
|
||||
# dsa 512 bits 0.001902s 0.002247s 525.6 444.9 hardware VIA SDK
|
||||
# dsa 512 bits 0.000458s 0.000524s 2182.2 1909.1 hardware this
|
||||
#
|
||||
# dsa 1024 bits 0.003964s 0.004926s 252.3 203.0 software integer
|
||||
# dsa 1024 bits 0.002686s 0.003166s 372.3 315.8 software SSE2
|
||||
# dsa 1024 bits 0.002397s 0.002823s 417.1 354.3 hardware VIA SDK
|
||||
# dsa 1024 bits 0.000978s 0.001170s 1022.2 855.0 hardware this
|
||||
#
|
||||
# dsa 2048 bits 0.013280s 0.016518s 75.3 60.5 software integer
|
||||
# dsa 2048 bits 0.009911s 0.011522s 100.9 86.8 software SSE2
|
||||
# dsa 2048 bits 0.009542s 0.011763s 104.8 85.0 hardware VIA SDK
|
||||
# dsa 2048 bits 0.002884s 0.003352s 346.8 298.3 hardware this
|
||||
#
|
||||
# To give you some other reference point here is output for 2.4GHz P4
|
||||
# running hand-coded SSE2 bn_mul_mont found in 0.9.9, i.e. "software
|
||||
# SSE2" in above terms.
|
||||
#
|
||||
# rsa 512 bits 0.000407s 0.000047s 2454.2 21137.0
|
||||
# rsa 1024 bits 0.002426s 0.000141s 412.1 7100.0
|
||||
# rsa 2048 bits 0.015046s 0.000491s 66.5 2034.9
|
||||
# rsa 4096 bits 0.109770s 0.002379s 9.1 420.3
|
||||
# dsa 512 bits 0.000438s 0.000525s 2281.1 1904.1
|
||||
# dsa 1024 bits 0.001346s 0.001595s 742.7 627.0
|
||||
# dsa 2048 bits 0.004745s 0.005582s 210.7 179.1
|
||||
#
|
||||
# Conclusions:
|
||||
# - VIA SDK leaves a *lot* of room for improvement (which this
|
||||
# implementation successfully fills:-);
|
||||
# - 'rep montmul' gives up to >3x performance improvement depending on
|
||||
# key length;
|
||||
# - in terms of absolute performance it delivers approximately as much
|
||||
# as modern out-of-order 32-bit cores [again, for longer keys].
|
||||
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
push(@INC,"${dir}","${dir}../../perlasm");
|
||||
require "x86asm.pl";
|
||||
|
||||
$output = pop;
|
||||
open STDOUT,">$output";
|
||||
|
||||
&asm_init($ARGV[0]);
|
||||
|
||||
# int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
|
||||
$func="bn_mul_mont_padlock";
|
||||
|
||||
$pad=16*1; # amount of reserved bytes on top of every vector
|
||||
|
||||
# stack layout
|
||||
$mZeroPrime=&DWP(0,"esp"); # these are specified by VIA
|
||||
$A=&DWP(4,"esp");
|
||||
$B=&DWP(8,"esp");
|
||||
$T=&DWP(12,"esp");
|
||||
$M=&DWP(16,"esp");
|
||||
$scratch=&DWP(20,"esp");
|
||||
$rp=&DWP(24,"esp"); # these are mine
|
||||
$sp=&DWP(28,"esp");
|
||||
# &DWP(32,"esp") # 32 byte scratch area
|
||||
# &DWP(64+(4*$num+$pad)*0,"esp") # padded tp[num]
|
||||
# &DWP(64+(4*$num+$pad)*1,"esp") # padded copy of ap[num]
|
||||
# &DWP(64+(4*$num+$pad)*2,"esp") # padded copy of bp[num]
|
||||
# &DWP(64+(4*$num+$pad)*3,"esp") # padded copy of np[num]
|
||||
# Note that SDK suggests to unconditionally allocate 2K per vector. This
|
||||
# has quite an impact on performance. It naturally depends on key length,
|
||||
# but to give an example 1024 bit private RSA key operations suffer >30%
|
||||
# penalty. I allocate only as much as actually required...
|
||||
|
||||
&function_begin($func);
|
||||
&xor ("eax","eax");
|
||||
&mov ("ecx",&wparam(5)); # num
|
||||
# meet VIA's limitations for num [note that the specification
|
||||
# expresses them in bits, while we work with amount of 32-bit words]
|
||||
&test ("ecx",3);
|
||||
&jnz (&label("leave")); # num % 4 != 0
|
||||
&cmp ("ecx",8);
|
||||
&jb (&label("leave")); # num < 8
|
||||
&cmp ("ecx",1024);
|
||||
&ja (&label("leave")); # num > 1024
|
||||
|
||||
&pushf ();
|
||||
&cld ();
|
||||
|
||||
&mov ("edi",&wparam(0)); # rp
|
||||
&mov ("eax",&wparam(1)); # ap
|
||||
&mov ("ebx",&wparam(2)); # bp
|
||||
&mov ("edx",&wparam(3)); # np
|
||||
&mov ("esi",&wparam(4)); # n0
|
||||
&mov ("esi",&DWP(0,"esi")); # *n0
|
||||
|
||||
&lea ("ecx",&DWP($pad,"","ecx",4)); # ecx becomes vector size in bytes
|
||||
&lea ("ebp",&DWP(64,"","ecx",4)); # allocate 4 vectors + 64 bytes
|
||||
&neg ("ebp");
|
||||
&add ("ebp","esp");
|
||||
&and ("ebp",-64); # align to cache-line
|
||||
&xchg ("ebp","esp"); # alloca
|
||||
|
||||
&mov ($rp,"edi"); # save rp
|
||||
&mov ($sp,"ebp"); # save esp
|
||||
|
||||
&mov ($mZeroPrime,"esi");
|
||||
&lea ("esi",&DWP(64,"esp")); # tp
|
||||
&mov ($T,"esi");
|
||||
&lea ("edi",&DWP(32,"esp")); # scratch area
|
||||
&mov ($scratch,"edi");
|
||||
&mov ("esi","eax");
|
||||
|
||||
&lea ("ebp",&DWP(-$pad,"ecx"));
|
||||
&shr ("ebp",2); # restore original num value in ebp
|
||||
|
||||
&xor ("eax","eax");
|
||||
|
||||
&mov ("ecx","ebp");
|
||||
&lea ("ecx",&DWP((32+$pad)/4,"ecx"));# padded tp + scratch
|
||||
&data_byte(0xf3,0xab); # rep stosl, bzero
|
||||
|
||||
&mov ("ecx","ebp");
|
||||
&lea ("edi",&DWP(64+$pad,"esp","ecx",4));# pointer to ap copy
|
||||
&mov ($A,"edi");
|
||||
&data_byte(0xf3,0xa5); # rep movsl, memcpy
|
||||
&mov ("ecx",$pad/4);
|
||||
&data_byte(0xf3,0xab); # rep stosl, bzero pad
|
||||
# edi points at the end of padded ap copy...
|
||||
|
||||
&mov ("ecx","ebp");
|
||||
&mov ("esi","ebx");
|
||||
&mov ($B,"edi");
|
||||
&data_byte(0xf3,0xa5); # rep movsl, memcpy
|
||||
&mov ("ecx",$pad/4);
|
||||
&data_byte(0xf3,0xab); # rep stosl, bzero pad
|
||||
# edi points at the end of padded bp copy...
|
||||
|
||||
&mov ("ecx","ebp");
|
||||
&mov ("esi","edx");
|
||||
&mov ($M,"edi");
|
||||
&data_byte(0xf3,0xa5); # rep movsl, memcpy
|
||||
&mov ("ecx",$pad/4);
|
||||
&data_byte(0xf3,0xab); # rep stosl, bzero pad
|
||||
# edi points at the end of padded np copy...
|
||||
|
||||
# let magic happen...
|
||||
&mov ("ecx","ebp");
|
||||
&mov ("esi","esp");
|
||||
&shl ("ecx",5); # convert word counter to bit counter
|
||||
&align (4);
|
||||
&data_byte(0xf3,0x0f,0xa6,0xc0);# rep montmul
|
||||
|
||||
&mov ("ecx","ebp");
|
||||
&lea ("esi",&DWP(64,"esp")); # tp
|
||||
# edi still points at the end of padded np copy...
|
||||
&neg ("ebp");
|
||||
&lea ("ebp",&DWP(-$pad,"edi","ebp",4)); # so just "rewind"
|
||||
&mov ("edi",$rp); # restore rp
|
||||
&xor ("edx","edx"); # i=0 and clear CF
|
||||
|
||||
&set_label("sub",8);
|
||||
&mov ("eax",&DWP(0,"esi","edx",4));
|
||||
&sbb ("eax",&DWP(0,"ebp","edx",4));
|
||||
&mov (&DWP(0,"edi","edx",4),"eax"); # rp[i]=tp[i]-np[i]
|
||||
&lea ("edx",&DWP(1,"edx")); # i++
|
||||
&loop (&label("sub")); # doesn't affect CF!
|
||||
|
||||
&mov ("eax",&DWP(0,"esi","edx",4)); # upmost overflow bit
|
||||
&sbb ("eax",0);
|
||||
|
||||
&mov ("ecx","edx"); # num
|
||||
&mov ("edx",0); # i=0
|
||||
|
||||
&set_label("copy",8);
|
||||
&mov ("ebx",&DWP(0,"esi","edx",4));
|
||||
&mov ("eax",&DWP(0,"edi","edx",4));
|
||||
&mov (&DWP(0,"esi","edx",4),"ecx"); # zap tp
|
||||
&cmovc ("eax","ebx");
|
||||
&mov (&DWP(0,"edi","edx",4),"eax");
|
||||
&lea ("edx",&DWP(1,"edx")); # i++
|
||||
&loop (&label("copy"));
|
||||
|
||||
&mov ("ebp",$sp);
|
||||
&xor ("eax","eax");
|
||||
|
||||
&mov ("ecx",64/4);
|
||||
&mov ("edi","esp"); # zap frame including scratch area
|
||||
&data_byte(0xf3,0xab); # rep stosl, bzero
|
||||
|
||||
# zap copies of ap, bp and np
|
||||
&lea ("edi",&DWP(64+$pad,"esp","edx",4));# pointer to ap
|
||||
&lea ("ecx",&DWP(3*$pad/4,"edx","edx",2));
|
||||
&data_byte(0xf3,0xab); # rep stosl, bzero
|
||||
|
||||
&mov ("esp","ebp");
|
||||
&inc ("eax"); # signal "done"
|
||||
&popf ();
|
||||
&set_label("leave");
|
||||
&function_end($func);
|
||||
|
||||
&asciz("Padlock Montgomery Multiplication, CRYPTOGAMS by <appro\@openssl.org>");
|
||||
|
||||
&asm_finish();
|
||||
|
||||
close STDOUT;
|
384
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/vis3-mont.pl
vendored
Normal file
384
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/vis3-mont.pl
vendored
Normal file
|
@ -0,0 +1,384 @@
|
|||
#! /usr/bin/env perl
|
||||
# Copyright 2012-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
|
||||
# October 2012.
|
||||
#
|
||||
# SPARCv9 VIS3 Montgomery multiplication procedure suitable for T3 and
|
||||
# onward. There are three new instructions used here: umulxhi,
|
||||
# addxc[cc] and initializing store. On T3 RSA private key operations
|
||||
# are 1.54/1.87/2.11/2.26 times faster for 512/1024/2048/4096-bit key
|
||||
# lengths. This is without dedicated squaring procedure. On T4
|
||||
# corresponding coefficients are 1.47/2.10/2.80/2.90x, which is mostly
|
||||
# for reference purposes, because T4 has dedicated Montgomery
|
||||
# multiplication and squaring *instructions* that deliver even more.
|
||||
|
||||
$output = pop;
|
||||
open STDOUT,">$output";
|
||||
|
||||
$frame = "STACK_FRAME";
|
||||
$bias = "STACK_BIAS";
|
||||
|
||||
$code.=<<___;
|
||||
#include "sparc_arch.h"
|
||||
|
||||
#ifdef __arch64__
|
||||
.register %g2,#scratch
|
||||
.register %g3,#scratch
|
||||
#endif
|
||||
|
||||
.section ".text",#alloc,#execinstr
|
||||
___
|
||||
|
||||
($n0,$m0,$m1,$lo0,$hi0, $lo1,$hi1,$aj,$alo,$nj,$nlo,$tj)=
|
||||
(map("%g$_",(1..5)),map("%o$_",(0..5,7)));
|
||||
|
||||
# int bn_mul_mont(
|
||||
$rp="%o0"; # BN_ULONG *rp,
|
||||
$ap="%o1"; # const BN_ULONG *ap,
|
||||
$bp="%o2"; # const BN_ULONG *bp,
|
||||
$np="%o3"; # const BN_ULONG *np,
|
||||
$n0p="%o4"; # const BN_ULONG *n0,
|
||||
$num="%o5"; # int num); # caller ensures that num is even
|
||||
# and >=6
|
||||
$code.=<<___;
|
||||
.globl bn_mul_mont_vis3
|
||||
.align 32
|
||||
bn_mul_mont_vis3:
|
||||
add %sp, $bias, %g4 ! real top of stack
|
||||
sll $num, 2, $num ! size in bytes
|
||||
add $num, 63, %g5
|
||||
andn %g5, 63, %g5 ! buffer size rounded up to 64 bytes
|
||||
add %g5, %g5, %g1
|
||||
add %g5, %g1, %g1 ! 3*buffer size
|
||||
sub %g4, %g1, %g1
|
||||
andn %g1, 63, %g1 ! align at 64 byte
|
||||
sub %g1, $frame, %g1 ! new top of stack
|
||||
sub %g1, %g4, %g1
|
||||
|
||||
save %sp, %g1, %sp
|
||||
___
|
||||
|
||||
# +-------------------------------+<----- %sp
|
||||
# . .
|
||||
# +-------------------------------+<----- aligned at 64 bytes
|
||||
# | __int64 tmp[0] |
|
||||
# +-------------------------------+
|
||||
# . .
|
||||
# . .
|
||||
# +-------------------------------+<----- aligned at 64 bytes
|
||||
# | __int64 ap[1..0] | converted ap[]
|
||||
# +-------------------------------+
|
||||
# | __int64 np[1..0] | converted np[]
|
||||
# +-------------------------------+
|
||||
# | __int64 ap[3..2] |
|
||||
# . .
|
||||
# . .
|
||||
# +-------------------------------+
|
||||
($rp,$ap,$bp,$np,$n0p,$num)=map("%i$_",(0..5));
|
||||
($t0,$t1,$t2,$t3,$cnt,$tp,$bufsz,$anp)=map("%l$_",(0..7));
|
||||
($ovf,$i)=($t0,$t1);
|
||||
$code.=<<___;
|
||||
ld [$n0p+0], $t0 ! pull n0[0..1] value
|
||||
add %sp, $bias+$frame, $tp
|
||||
ld [$n0p+4], $t1
|
||||
add $tp, %g5, $anp
|
||||
ld [$bp+0], $t2 ! m0=bp[0]
|
||||
sllx $t1, 32, $n0
|
||||
ld [$bp+4], $t3
|
||||
or $t0, $n0, $n0
|
||||
add $bp, 8, $bp
|
||||
|
||||
ld [$ap+0], $t0 ! ap[0]
|
||||
sllx $t3, 32, $m0
|
||||
ld [$ap+4], $t1
|
||||
or $t2, $m0, $m0
|
||||
|
||||
ld [$ap+8], $t2 ! ap[1]
|
||||
sllx $t1, 32, $aj
|
||||
ld [$ap+12], $t3
|
||||
or $t0, $aj, $aj
|
||||
add $ap, 16, $ap
|
||||
stx $aj, [$anp] ! converted ap[0]
|
||||
|
||||
mulx $aj, $m0, $lo0 ! ap[0]*bp[0]
|
||||
umulxhi $aj, $m0, $hi0
|
||||
|
||||
ld [$np+0], $t0 ! np[0]
|
||||
sllx $t3, 32, $aj
|
||||
ld [$np+4], $t1
|
||||
or $t2, $aj, $aj
|
||||
|
||||
ld [$np+8], $t2 ! np[1]
|
||||
sllx $t1, 32, $nj
|
||||
ld [$np+12], $t3
|
||||
or $t0, $nj, $nj
|
||||
add $np, 16, $np
|
||||
stx $nj, [$anp+8] ! converted np[0]
|
||||
|
||||
mulx $lo0, $n0, $m1 ! "tp[0]"*n0
|
||||
stx $aj, [$anp+16] ! converted ap[1]
|
||||
|
||||
mulx $aj, $m0, $alo ! ap[1]*bp[0]
|
||||
umulxhi $aj, $m0, $aj ! ahi=aj
|
||||
|
||||
mulx $nj, $m1, $lo1 ! np[0]*m1
|
||||
umulxhi $nj, $m1, $hi1
|
||||
|
||||
sllx $t3, 32, $nj
|
||||
or $t2, $nj, $nj
|
||||
stx $nj, [$anp+24] ! converted np[1]
|
||||
add $anp, 32, $anp
|
||||
|
||||
addcc $lo0, $lo1, $lo1
|
||||
addxc %g0, $hi1, $hi1
|
||||
|
||||
mulx $nj, $m1, $nlo ! np[1]*m1
|
||||
umulxhi $nj, $m1, $nj ! nhi=nj
|
||||
|
||||
ba .L1st
|
||||
sub $num, 24, $cnt ! cnt=num-3
|
||||
|
||||
.align 16
|
||||
.L1st:
|
||||
ld [$ap+0], $t0 ! ap[j]
|
||||
addcc $alo, $hi0, $lo0
|
||||
ld [$ap+4], $t1
|
||||
addxc $aj, %g0, $hi0
|
||||
|
||||
sllx $t1, 32, $aj
|
||||
add $ap, 8, $ap
|
||||
or $t0, $aj, $aj
|
||||
stx $aj, [$anp] ! converted ap[j]
|
||||
|
||||
ld [$np+0], $t2 ! np[j]
|
||||
addcc $nlo, $hi1, $lo1
|
||||
ld [$np+4], $t3
|
||||
addxc $nj, %g0, $hi1 ! nhi=nj
|
||||
|
||||
sllx $t3, 32, $nj
|
||||
add $np, 8, $np
|
||||
mulx $aj, $m0, $alo ! ap[j]*bp[0]
|
||||
or $t2, $nj, $nj
|
||||
umulxhi $aj, $m0, $aj ! ahi=aj
|
||||
stx $nj, [$anp+8] ! converted np[j]
|
||||
add $anp, 16, $anp ! anp++
|
||||
|
||||
mulx $nj, $m1, $nlo ! np[j]*m1
|
||||
addcc $lo0, $lo1, $lo1 ! np[j]*m1+ap[j]*bp[0]
|
||||
umulxhi $nj, $m1, $nj ! nhi=nj
|
||||
addxc %g0, $hi1, $hi1
|
||||
stx $lo1, [$tp] ! tp[j-1]
|
||||
add $tp, 8, $tp ! tp++
|
||||
|
||||
brnz,pt $cnt, .L1st
|
||||
sub $cnt, 8, $cnt ! j--
|
||||
!.L1st
|
||||
addcc $alo, $hi0, $lo0
|
||||
addxc $aj, %g0, $hi0 ! ahi=aj
|
||||
|
||||
addcc $nlo, $hi1, $lo1
|
||||
addxc $nj, %g0, $hi1
|
||||
addcc $lo0, $lo1, $lo1 ! np[j]*m1+ap[j]*bp[0]
|
||||
addxc %g0, $hi1, $hi1
|
||||
stx $lo1, [$tp] ! tp[j-1]
|
||||
add $tp, 8, $tp
|
||||
|
||||
addcc $hi0, $hi1, $hi1
|
||||
addxc %g0, %g0, $ovf ! upmost overflow bit
|
||||
stx $hi1, [$tp]
|
||||
add $tp, 8, $tp
|
||||
|
||||
ba .Louter
|
||||
sub $num, 16, $i ! i=num-2
|
||||
|
||||
.align 16
|
||||
.Louter:
|
||||
ld [$bp+0], $t2 ! m0=bp[i]
|
||||
ld [$bp+4], $t3
|
||||
|
||||
sub $anp, $num, $anp ! rewind
|
||||
sub $tp, $num, $tp
|
||||
sub $anp, $num, $anp
|
||||
|
||||
add $bp, 8, $bp
|
||||
sllx $t3, 32, $m0
|
||||
ldx [$anp+0], $aj ! ap[0]
|
||||
or $t2, $m0, $m0
|
||||
ldx [$anp+8], $nj ! np[0]
|
||||
|
||||
mulx $aj, $m0, $lo0 ! ap[0]*bp[i]
|
||||
ldx [$tp], $tj ! tp[0]
|
||||
umulxhi $aj, $m0, $hi0
|
||||
ldx [$anp+16], $aj ! ap[1]
|
||||
addcc $lo0, $tj, $lo0 ! ap[0]*bp[i]+tp[0]
|
||||
mulx $aj, $m0, $alo ! ap[1]*bp[i]
|
||||
addxc %g0, $hi0, $hi0
|
||||
mulx $lo0, $n0, $m1 ! tp[0]*n0
|
||||
umulxhi $aj, $m0, $aj ! ahi=aj
|
||||
mulx $nj, $m1, $lo1 ! np[0]*m1
|
||||
umulxhi $nj, $m1, $hi1
|
||||
ldx [$anp+24], $nj ! np[1]
|
||||
add $anp, 32, $anp
|
||||
addcc $lo1, $lo0, $lo1
|
||||
mulx $nj, $m1, $nlo ! np[1]*m1
|
||||
addxc %g0, $hi1, $hi1
|
||||
umulxhi $nj, $m1, $nj ! nhi=nj
|
||||
|
||||
ba .Linner
|
||||
sub $num, 24, $cnt ! cnt=num-3
|
||||
.align 16
|
||||
.Linner:
|
||||
addcc $alo, $hi0, $lo0
|
||||
ldx [$tp+8], $tj ! tp[j]
|
||||
addxc $aj, %g0, $hi0 ! ahi=aj
|
||||
ldx [$anp+0], $aj ! ap[j]
|
||||
addcc $nlo, $hi1, $lo1
|
||||
mulx $aj, $m0, $alo ! ap[j]*bp[i]
|
||||
addxc $nj, %g0, $hi1 ! nhi=nj
|
||||
ldx [$anp+8], $nj ! np[j]
|
||||
add $anp, 16, $anp
|
||||
umulxhi $aj, $m0, $aj ! ahi=aj
|
||||
addcc $lo0, $tj, $lo0 ! ap[j]*bp[i]+tp[j]
|
||||
mulx $nj, $m1, $nlo ! np[j]*m1
|
||||
addxc %g0, $hi0, $hi0
|
||||
umulxhi $nj, $m1, $nj ! nhi=nj
|
||||
addcc $lo1, $lo0, $lo1 ! np[j]*m1+ap[j]*bp[i]+tp[j]
|
||||
addxc %g0, $hi1, $hi1
|
||||
stx $lo1, [$tp] ! tp[j-1]
|
||||
add $tp, 8, $tp
|
||||
brnz,pt $cnt, .Linner
|
||||
sub $cnt, 8, $cnt
|
||||
!.Linner
|
||||
ldx [$tp+8], $tj ! tp[j]
|
||||
addcc $alo, $hi0, $lo0
|
||||
addxc $aj, %g0, $hi0 ! ahi=aj
|
||||
addcc $lo0, $tj, $lo0 ! ap[j]*bp[i]+tp[j]
|
||||
addxc %g0, $hi0, $hi0
|
||||
|
||||
addcc $nlo, $hi1, $lo1
|
||||
addxc $nj, %g0, $hi1 ! nhi=nj
|
||||
addcc $lo1, $lo0, $lo1 ! np[j]*m1+ap[j]*bp[i]+tp[j]
|
||||
addxc %g0, $hi1, $hi1
|
||||
stx $lo1, [$tp] ! tp[j-1]
|
||||
|
||||
subcc %g0, $ovf, %g0 ! move upmost overflow to CCR.xcc
|
||||
addxccc $hi1, $hi0, $hi1
|
||||
addxc %g0, %g0, $ovf
|
||||
stx $hi1, [$tp+8]
|
||||
add $tp, 16, $tp
|
||||
|
||||
brnz,pt $i, .Louter
|
||||
sub $i, 8, $i
|
||||
|
||||
sub $anp, $num, $anp ! rewind
|
||||
sub $tp, $num, $tp
|
||||
sub $anp, $num, $anp
|
||||
ba .Lsub
|
||||
subcc $num, 8, $cnt ! cnt=num-1 and clear CCR.xcc
|
||||
|
||||
.align 16
|
||||
.Lsub:
|
||||
ldx [$tp], $tj
|
||||
add $tp, 8, $tp
|
||||
ldx [$anp+8], $nj
|
||||
add $anp, 16, $anp
|
||||
subccc $tj, $nj, $t2 ! tp[j]-np[j]
|
||||
srlx $tj, 32, $tj
|
||||
srlx $nj, 32, $nj
|
||||
subccc $tj, $nj, $t3
|
||||
add $rp, 8, $rp
|
||||
st $t2, [$rp-4] ! reverse order
|
||||
st $t3, [$rp-8]
|
||||
brnz,pt $cnt, .Lsub
|
||||
sub $cnt, 8, $cnt
|
||||
|
||||
sub $anp, $num, $anp ! rewind
|
||||
sub $tp, $num, $tp
|
||||
sub $anp, $num, $anp
|
||||
sub $rp, $num, $rp
|
||||
|
||||
subccc $ovf, %g0, $ovf ! handle upmost overflow bit
|
||||
ba .Lcopy
|
||||
sub $num, 8, $cnt
|
||||
|
||||
.align 16
|
||||
.Lcopy: ! conditional copy
|
||||
ld [$tp+0], $t0
|
||||
ld [$tp+4], $t1
|
||||
ld [$rp+0], $t2
|
||||
ld [$rp+4], $t3
|
||||
stx %g0, [$tp] ! zap
|
||||
add $tp, 8, $tp
|
||||
stx %g0, [$anp] ! zap
|
||||
stx %g0, [$anp+8]
|
||||
add $anp, 16, $anp
|
||||
movcs %icc, $t0, $t2
|
||||
movcs %icc, $t1, $t3
|
||||
st $t3, [$rp+0] ! flip order
|
||||
st $t2, [$rp+4]
|
||||
add $rp, 8, $rp
|
||||
brnz $cnt, .Lcopy
|
||||
sub $cnt, 8, $cnt
|
||||
|
||||
mov 1, %o0
|
||||
ret
|
||||
restore
|
||||
.type bn_mul_mont_vis3, #function
|
||||
.size bn_mul_mont_vis3, .-bn_mul_mont_vis3
|
||||
.asciz "Montgomery Multiplication for SPARCv9 VIS3, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.align 4
|
||||
___
|
||||
|
||||
# Purpose of these subroutines is to explicitly encode VIS instructions,
|
||||
# so that one can compile the module without having to specify VIS
|
||||
# extensions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
|
||||
# Idea is to reserve for option to produce "universal" binary and let
|
||||
# programmer detect if current CPU is VIS capable at run-time.
|
||||
sub unvis3 {
|
||||
my ($mnemonic,$rs1,$rs2,$rd)=@_;
|
||||
my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 );
|
||||
my ($ref,$opf);
|
||||
my %visopf = ( "addxc" => 0x011,
|
||||
"addxccc" => 0x013,
|
||||
"umulxhi" => 0x016 );
|
||||
|
||||
$ref = "$mnemonic\t$rs1,$rs2,$rd";
|
||||
|
||||
if ($opf=$visopf{$mnemonic}) {
|
||||
foreach ($rs1,$rs2,$rd) {
|
||||
return $ref if (!/%([goli])([0-9])/);
|
||||
$_=$bias{$1}+$2;
|
||||
}
|
||||
|
||||
return sprintf ".word\t0x%08x !%s",
|
||||
0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
|
||||
$ref;
|
||||
} else {
|
||||
return $ref;
|
||||
}
|
||||
}
|
||||
|
||||
foreach (split("\n",$code)) {
|
||||
s/\`([^\`]*)\`/eval $1/ge;
|
||||
|
||||
s/\b(umulxhi|addxc[c]{0,2})\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/
|
||||
&unvis3($1,$2,$3,$4)
|
||||
/ge;
|
||||
|
||||
print $_,"\n";
|
||||
}
|
||||
|
||||
close STDOUT;
|
325
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86-gf2m.pl
vendored
Normal file
325
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86-gf2m.pl
vendored
Normal file
|
@ -0,0 +1,325 @@
|
|||
#! /usr/bin/env perl
|
||||
# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
#
|
||||
# May 2011
|
||||
#
|
||||
# The module implements bn_GF2m_mul_2x2 polynomial multiplication used
|
||||
# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
|
||||
# the time being... Except that it has three code paths: pure integer
|
||||
# code suitable for any x86 CPU, MMX code suitable for PIII and later
|
||||
# and PCLMULQDQ suitable for Westmere and later. Improvement varies
|
||||
# from one benchmark and µ-arch to another. Below are interval values
|
||||
# for 163- and 571-bit ECDH benchmarks relative to compiler-generated
|
||||
# code:
|
||||
#
|
||||
# PIII 16%-30%
|
||||
# P4 12%-12%
|
||||
# Opteron 18%-40%
|
||||
# Core2 19%-44%
|
||||
# Atom 38%-64%
|
||||
# Westmere 53%-121%(PCLMULQDQ)/20%-32%(MMX)
|
||||
# Sandy Bridge 72%-127%(PCLMULQDQ)/27%-23%(MMX)
|
||||
#
|
||||
# Note that above improvement coefficients are not coefficients for
|
||||
# bn_GF2m_mul_2x2 itself. For example 120% ECDH improvement is result
|
||||
# of bn_GF2m_mul_2x2 being >4x faster. As it gets faster, benchmark
|
||||
# is more and more dominated by other subroutines, most notably by
|
||||
# BN_GF2m_mod[_mul]_arr...
|
||||
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
push(@INC,"${dir}","${dir}../../perlasm");
|
||||
require "x86asm.pl";
|
||||
|
||||
$output = pop;
|
||||
open STDOUT,">$output";
|
||||
|
||||
&asm_init($ARGV[0],$x86only = $ARGV[$#ARGV] eq "386");
|
||||
|
||||
$sse2=0;
|
||||
for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
||||
|
||||
&external_label("OPENSSL_ia32cap_P") if ($sse2);
|
||||
|
||||
$a="eax";
|
||||
$b="ebx";
|
||||
($a1,$a2,$a4)=("ecx","edx","ebp");
|
||||
|
||||
$R="mm0";
|
||||
@T=("mm1","mm2");
|
||||
($A,$B,$B30,$B31)=("mm2","mm3","mm4","mm5");
|
||||
@i=("esi","edi");
|
||||
|
||||
if (!$x86only) {
|
||||
&function_begin_B("_mul_1x1_mmx");
|
||||
&sub ("esp",32+4);
|
||||
&mov ($a1,$a);
|
||||
&lea ($a2,&DWP(0,$a,$a));
|
||||
&and ($a1,0x3fffffff);
|
||||
&lea ($a4,&DWP(0,$a2,$a2));
|
||||
&mov (&DWP(0*4,"esp"),0);
|
||||
&and ($a2,0x7fffffff);
|
||||
&movd ($A,$a);
|
||||
&movd ($B,$b);
|
||||
&mov (&DWP(1*4,"esp"),$a1); # a1
|
||||
&xor ($a1,$a2); # a1^a2
|
||||
&pxor ($B31,$B31);
|
||||
&pxor ($B30,$B30);
|
||||
&mov (&DWP(2*4,"esp"),$a2); # a2
|
||||
&xor ($a2,$a4); # a2^a4
|
||||
&mov (&DWP(3*4,"esp"),$a1); # a1^a2
|
||||
&pcmpgtd($B31,$A); # broadcast 31st bit
|
||||
&paddd ($A,$A); # $A<<=1
|
||||
&xor ($a1,$a2); # a1^a4=a1^a2^a2^a4
|
||||
&mov (&DWP(4*4,"esp"),$a4); # a4
|
||||
&xor ($a4,$a2); # a2=a4^a2^a4
|
||||
&pand ($B31,$B);
|
||||
&pcmpgtd($B30,$A); # broadcast 30th bit
|
||||
&mov (&DWP(5*4,"esp"),$a1); # a1^a4
|
||||
&xor ($a4,$a1); # a1^a2^a4
|
||||
&psllq ($B31,31);
|
||||
&pand ($B30,$B);
|
||||
&mov (&DWP(6*4,"esp"),$a2); # a2^a4
|
||||
&mov (@i[0],0x7);
|
||||
&mov (&DWP(7*4,"esp"),$a4); # a1^a2^a4
|
||||
&mov ($a4,@i[0]);
|
||||
&and (@i[0],$b);
|
||||
&shr ($b,3);
|
||||
&mov (@i[1],$a4);
|
||||
&psllq ($B30,30);
|
||||
&and (@i[1],$b);
|
||||
&shr ($b,3);
|
||||
&movd ($R,&DWP(0,"esp",@i[0],4));
|
||||
&mov (@i[0],$a4);
|
||||
&and (@i[0],$b);
|
||||
&shr ($b,3);
|
||||
for($n=1;$n<9;$n++) {
|
||||
&movd (@T[1],&DWP(0,"esp",@i[1],4));
|
||||
&mov (@i[1],$a4);
|
||||
&psllq (@T[1],3*$n);
|
||||
&and (@i[1],$b);
|
||||
&shr ($b,3);
|
||||
&pxor ($R,@T[1]);
|
||||
|
||||
push(@i,shift(@i)); push(@T,shift(@T));
|
||||
}
|
||||
&movd (@T[1],&DWP(0,"esp",@i[1],4));
|
||||
&pxor ($R,$B30);
|
||||
&psllq (@T[1],3*$n++);
|
||||
&pxor ($R,@T[1]);
|
||||
|
||||
&movd (@T[0],&DWP(0,"esp",@i[0],4));
|
||||
&pxor ($R,$B31);
|
||||
&psllq (@T[0],3*$n);
|
||||
&add ("esp",32+4);
|
||||
&pxor ($R,@T[0]);
|
||||
&ret ();
|
||||
&function_end_B("_mul_1x1_mmx");
|
||||
}
|
||||
|
||||
($lo,$hi)=("eax","edx");
|
||||
@T=("ecx","ebp");
|
||||
|
||||
&function_begin_B("_mul_1x1_ialu");
|
||||
&sub ("esp",32+4);
|
||||
&mov ($a1,$a);
|
||||
&lea ($a2,&DWP(0,$a,$a));
|
||||
&lea ($a4,&DWP(0,"",$a,4));
|
||||
&and ($a1,0x3fffffff);
|
||||
&lea (@i[1],&DWP(0,$lo,$lo));
|
||||
&sar ($lo,31); # broadcast 31st bit
|
||||
&mov (&DWP(0*4,"esp"),0);
|
||||
&and ($a2,0x7fffffff);
|
||||
&mov (&DWP(1*4,"esp"),$a1); # a1
|
||||
&xor ($a1,$a2); # a1^a2
|
||||
&mov (&DWP(2*4,"esp"),$a2); # a2
|
||||
&xor ($a2,$a4); # a2^a4
|
||||
&mov (&DWP(3*4,"esp"),$a1); # a1^a2
|
||||
&xor ($a1,$a2); # a1^a4=a1^a2^a2^a4
|
||||
&mov (&DWP(4*4,"esp"),$a4); # a4
|
||||
&xor ($a4,$a2); # a2=a4^a2^a4
|
||||
&mov (&DWP(5*4,"esp"),$a1); # a1^a4
|
||||
&xor ($a4,$a1); # a1^a2^a4
|
||||
&sar (@i[1],31); # broadcast 30th bit
|
||||
&and ($lo,$b);
|
||||
&mov (&DWP(6*4,"esp"),$a2); # a2^a4
|
||||
&and (@i[1],$b);
|
||||
&mov (&DWP(7*4,"esp"),$a4); # a1^a2^a4
|
||||
&mov ($hi,$lo);
|
||||
&shl ($lo,31);
|
||||
&mov (@T[0],@i[1]);
|
||||
&shr ($hi,1);
|
||||
|
||||
&mov (@i[0],0x7);
|
||||
&shl (@i[1],30);
|
||||
&and (@i[0],$b);
|
||||
&shr (@T[0],2);
|
||||
&xor ($lo,@i[1]);
|
||||
|
||||
&shr ($b,3);
|
||||
&mov (@i[1],0x7); # 5-byte instruction!?
|
||||
&and (@i[1],$b);
|
||||
&shr ($b,3);
|
||||
&xor ($hi,@T[0]);
|
||||
&xor ($lo,&DWP(0,"esp",@i[0],4));
|
||||
&mov (@i[0],0x7);
|
||||
&and (@i[0],$b);
|
||||
&shr ($b,3);
|
||||
for($n=1;$n<9;$n++) {
|
||||
&mov (@T[1],&DWP(0,"esp",@i[1],4));
|
||||
&mov (@i[1],0x7);
|
||||
&mov (@T[0],@T[1]);
|
||||
&shl (@T[1],3*$n);
|
||||
&and (@i[1],$b);
|
||||
&shr (@T[0],32-3*$n);
|
||||
&xor ($lo,@T[1]);
|
||||
&shr ($b,3);
|
||||
&xor ($hi,@T[0]);
|
||||
|
||||
push(@i,shift(@i)); push(@T,shift(@T));
|
||||
}
|
||||
&mov (@T[1],&DWP(0,"esp",@i[1],4));
|
||||
&mov (@T[0],@T[1]);
|
||||
&shl (@T[1],3*$n);
|
||||
&mov (@i[1],&DWP(0,"esp",@i[0],4));
|
||||
&shr (@T[0],32-3*$n); $n++;
|
||||
&mov (@i[0],@i[1]);
|
||||
&xor ($lo,@T[1]);
|
||||
&shl (@i[1],3*$n);
|
||||
&xor ($hi,@T[0]);
|
||||
&shr (@i[0],32-3*$n);
|
||||
&xor ($lo,@i[1]);
|
||||
&xor ($hi,@i[0]);
|
||||
|
||||
&add ("esp",32+4);
|
||||
&ret ();
|
||||
&function_end_B("_mul_1x1_ialu");
|
||||
|
||||
# void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0);
|
||||
&function_begin_B("bn_GF2m_mul_2x2");
|
||||
if (!$x86only) {
|
||||
&picmeup("edx","OPENSSL_ia32cap_P");
|
||||
&mov ("eax",&DWP(0,"edx"));
|
||||
&mov ("edx",&DWP(4,"edx"));
|
||||
&test ("eax",1<<23); # check MMX bit
|
||||
&jz (&label("ialu"));
|
||||
if ($sse2) {
|
||||
&test ("eax",1<<24); # check FXSR bit
|
||||
&jz (&label("mmx"));
|
||||
&test ("edx",1<<1); # check PCLMULQDQ bit
|
||||
&jz (&label("mmx"));
|
||||
|
||||
&movups ("xmm0",&QWP(8,"esp"));
|
||||
&shufps ("xmm0","xmm0",0b10110001);
|
||||
&pclmulqdq ("xmm0","xmm0",1);
|
||||
&mov ("eax",&DWP(4,"esp"));
|
||||
&movups (&QWP(0,"eax"),"xmm0");
|
||||
&ret ();
|
||||
|
||||
&set_label("mmx",16);
|
||||
}
|
||||
&push ("ebp");
|
||||
&push ("ebx");
|
||||
&push ("esi");
|
||||
&push ("edi");
|
||||
&mov ($a,&wparam(1));
|
||||
&mov ($b,&wparam(3));
|
||||
&call ("_mul_1x1_mmx"); # a1·b1
|
||||
&movq ("mm7",$R);
|
||||
|
||||
&mov ($a,&wparam(2));
|
||||
&mov ($b,&wparam(4));
|
||||
&call ("_mul_1x1_mmx"); # a0·b0
|
||||
&movq ("mm6",$R);
|
||||
|
||||
&mov ($a,&wparam(1));
|
||||
&mov ($b,&wparam(3));
|
||||
&xor ($a,&wparam(2));
|
||||
&xor ($b,&wparam(4));
|
||||
&call ("_mul_1x1_mmx"); # (a0+a1)·(b0+b1)
|
||||
&pxor ($R,"mm7");
|
||||
&mov ($a,&wparam(0));
|
||||
&pxor ($R,"mm6"); # (a0+a1)·(b0+b1)-a1·b1-a0·b0
|
||||
|
||||
&movq ($A,$R);
|
||||
&psllq ($R,32);
|
||||
&pop ("edi");
|
||||
&psrlq ($A,32);
|
||||
&pop ("esi");
|
||||
&pxor ($R,"mm6");
|
||||
&pop ("ebx");
|
||||
&pxor ($A,"mm7");
|
||||
&movq (&QWP(0,$a),$R);
|
||||
&pop ("ebp");
|
||||
&movq (&QWP(8,$a),$A);
|
||||
&emms ();
|
||||
&ret ();
|
||||
&set_label("ialu",16);
|
||||
}
|
||||
&push ("ebp");
|
||||
&push ("ebx");
|
||||
&push ("esi");
|
||||
&push ("edi");
|
||||
&stack_push(4+1);
|
||||
|
||||
&mov ($a,&wparam(1));
|
||||
&mov ($b,&wparam(3));
|
||||
&call ("_mul_1x1_ialu"); # a1·b1
|
||||
&mov (&DWP(8,"esp"),$lo);
|
||||
&mov (&DWP(12,"esp"),$hi);
|
||||
|
||||
&mov ($a,&wparam(2));
|
||||
&mov ($b,&wparam(4));
|
||||
&call ("_mul_1x1_ialu"); # a0·b0
|
||||
&mov (&DWP(0,"esp"),$lo);
|
||||
&mov (&DWP(4,"esp"),$hi);
|
||||
|
||||
&mov ($a,&wparam(1));
|
||||
&mov ($b,&wparam(3));
|
||||
&xor ($a,&wparam(2));
|
||||
&xor ($b,&wparam(4));
|
||||
&call ("_mul_1x1_ialu"); # (a0+a1)·(b0+b1)
|
||||
|
||||
&mov ("ebp",&wparam(0));
|
||||
@r=("ebx","ecx","edi","esi");
|
||||
&mov (@r[0],&DWP(0,"esp"));
|
||||
&mov (@r[1],&DWP(4,"esp"));
|
||||
&mov (@r[2],&DWP(8,"esp"));
|
||||
&mov (@r[3],&DWP(12,"esp"));
|
||||
|
||||
&xor ($lo,$hi);
|
||||
&xor ($hi,@r[1]);
|
||||
&xor ($lo,@r[0]);
|
||||
&mov (&DWP(0,"ebp"),@r[0]);
|
||||
&xor ($hi,@r[2]);
|
||||
&mov (&DWP(12,"ebp"),@r[3]);
|
||||
&xor ($lo,@r[3]);
|
||||
&stack_pop(4+1);
|
||||
&xor ($hi,@r[3]);
|
||||
&pop ("edi");
|
||||
&xor ($lo,$hi);
|
||||
&pop ("esi");
|
||||
&mov (&DWP(8,"ebp"),$hi);
|
||||
&pop ("ebx");
|
||||
&mov (&DWP(4,"ebp"),$lo);
|
||||
&pop ("ebp");
|
||||
&ret ();
|
||||
&function_end_B("bn_GF2m_mul_2x2");
|
||||
|
||||
&asciz ("GF(2^m) Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
|
||||
|
||||
&asm_finish();
|
||||
|
||||
close STDOUT;
|
631
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86-mont.pl
vendored
Executable file
631
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86-mont.pl
vendored
Executable file
|
@ -0,0 +1,631 @@
|
|||
#! /usr/bin/env perl
|
||||
# Copyright 2005-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
|
||||
# October 2005
|
||||
#
|
||||
# This is a "teaser" code, as it can be improved in several ways...
|
||||
# First of all non-SSE2 path should be implemented (yes, for now it
|
||||
# performs Montgomery multiplication/convolution only on SSE2-capable
|
||||
# CPUs such as P4, others fall down to original code). Then inner loop
|
||||
# can be unrolled and modulo-scheduled to improve ILP and possibly
|
||||
# moved to 128-bit XMM register bank (though it would require input
|
||||
# rearrangement and/or increase bus bandwidth utilization). Dedicated
|
||||
# squaring procedure should give further performance improvement...
|
||||
# Yet, for being draft, the code improves rsa512 *sign* benchmark by
|
||||
# 110%(!), rsa1024 one - by 70% and rsa4096 - by 20%:-)
|
||||
|
||||
# December 2006
|
||||
#
|
||||
# Modulo-scheduling SSE2 loops results in further 15-20% improvement.
|
||||
# Integer-only code [being equipped with dedicated squaring procedure]
|
||||
# gives ~40% on rsa512 sign benchmark...
|
||||
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
push(@INC,"${dir}","${dir}../../perlasm");
|
||||
require "x86asm.pl";
|
||||
|
||||
$output = pop;
|
||||
open STDOUT,">$output";
|
||||
|
||||
&asm_init($ARGV[0]);
|
||||
|
||||
$sse2=0;
|
||||
for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
||||
|
||||
&external_label("OPENSSL_ia32cap_P") if ($sse2);
|
||||
|
||||
&function_begin("bn_mul_mont");
|
||||
|
||||
$i="edx";
|
||||
$j="ecx";
|
||||
$ap="esi"; $tp="esi"; # overlapping variables!!!
|
||||
$rp="edi"; $bp="edi"; # overlapping variables!!!
|
||||
$np="ebp";
|
||||
$num="ebx";
|
||||
|
||||
$_num=&DWP(4*0,"esp"); # stack top layout
|
||||
$_rp=&DWP(4*1,"esp");
|
||||
$_ap=&DWP(4*2,"esp");
|
||||
$_bp=&DWP(4*3,"esp");
|
||||
$_np=&DWP(4*4,"esp");
|
||||
$_n0=&DWP(4*5,"esp"); $_n0q=&QWP(4*5,"esp");
|
||||
$_sp=&DWP(4*6,"esp");
|
||||
$_bpend=&DWP(4*7,"esp");
|
||||
$frame=32; # size of above frame rounded up to 16n
|
||||
|
||||
&xor ("eax","eax");
|
||||
&mov ("edi",&wparam(5)); # int num
|
||||
&cmp ("edi",4);
|
||||
&jl (&label("just_leave"));
|
||||
|
||||
&lea ("esi",&wparam(0)); # put aside pointer to argument block
|
||||
&lea ("edx",&wparam(1)); # load ap
|
||||
&add ("edi",2); # extra two words on top of tp
|
||||
&neg ("edi");
|
||||
&lea ("ebp",&DWP(-$frame,"esp","edi",4)); # future alloca($frame+4*(num+2))
|
||||
&neg ("edi");
|
||||
|
||||
# minimize cache contention by arranging 2K window between stack
|
||||
# pointer and ap argument [np is also position sensitive vector,
|
||||
# but it's assumed to be near ap, as it's allocated at ~same
|
||||
# time].
|
||||
&mov ("eax","ebp");
|
||||
&sub ("eax","edx");
|
||||
&and ("eax",2047);
|
||||
&sub ("ebp","eax"); # this aligns sp and ap modulo 2048
|
||||
|
||||
&xor ("edx","ebp");
|
||||
&and ("edx",2048);
|
||||
&xor ("edx",2048);
|
||||
&sub ("ebp","edx"); # this splits them apart modulo 4096
|
||||
|
||||
&and ("ebp",-64); # align to cache line
|
||||
|
||||
# An OS-agnostic version of __chkstk.
|
||||
#
|
||||
# Some OSes (Windows) insist on stack being "wired" to
|
||||
# physical memory in strictly sequential manner, i.e. if stack
|
||||
# allocation spans two pages, then reference to farmost one can
|
||||
# be punishable by SEGV. But page walking can do good even on
|
||||
# other OSes, because it guarantees that villain thread hits
|
||||
# the guard page before it can make damage to innocent one...
|
||||
&mov ("eax","esp");
|
||||
&sub ("eax","ebp");
|
||||
&and ("eax",-4096);
|
||||
&mov ("edx","esp"); # saved stack pointer!
|
||||
&lea ("esp",&DWP(0,"ebp","eax"));
|
||||
&mov ("eax",&DWP(0,"esp"));
|
||||
&cmp ("esp","ebp");
|
||||
&ja (&label("page_walk"));
|
||||
&jmp (&label("page_walk_done"));
|
||||
|
||||
&set_label("page_walk",16);
|
||||
&lea ("esp",&DWP(-4096,"esp"));
|
||||
&mov ("eax",&DWP(0,"esp"));
|
||||
&cmp ("esp","ebp");
|
||||
&ja (&label("page_walk"));
|
||||
&set_label("page_walk_done");
|
||||
|
||||
################################# load argument block...
|
||||
&mov ("eax",&DWP(0*4,"esi"));# BN_ULONG *rp
|
||||
&mov ("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap
|
||||
&mov ("ecx",&DWP(2*4,"esi"));# const BN_ULONG *bp
|
||||
&mov ("ebp",&DWP(3*4,"esi"));# const BN_ULONG *np
|
||||
&mov ("esi",&DWP(4*4,"esi"));# const BN_ULONG *n0
|
||||
#&mov ("edi",&DWP(5*4,"esi"));# int num
|
||||
|
||||
&mov ("esi",&DWP(0,"esi")); # pull n0[0]
|
||||
&mov ($_rp,"eax"); # ... save a copy of argument block
|
||||
&mov ($_ap,"ebx");
|
||||
&mov ($_bp,"ecx");
|
||||
&mov ($_np,"ebp");
|
||||
&mov ($_n0,"esi");
|
||||
&lea ($num,&DWP(-3,"edi")); # num=num-1 to assist modulo-scheduling
|
||||
#&mov ($_num,$num); # redundant as $num is not reused
|
||||
&mov ($_sp,"edx"); # saved stack pointer!
|
||||
|
||||
if($sse2) {
|
||||
$acc0="mm0"; # mmx register bank layout
|
||||
$acc1="mm1";
|
||||
$car0="mm2";
|
||||
$car1="mm3";
|
||||
$mul0="mm4";
|
||||
$mul1="mm5";
|
||||
$temp="mm6";
|
||||
$mask="mm7";
|
||||
|
||||
&picmeup("eax","OPENSSL_ia32cap_P");
|
||||
&bt (&DWP(0,"eax"),26);
|
||||
&jnc (&label("non_sse2"));
|
||||
|
||||
&mov ("eax",-1);
|
||||
&movd ($mask,"eax"); # mask 32 lower bits
|
||||
|
||||
&mov ($ap,$_ap); # load input pointers
|
||||
&mov ($bp,$_bp);
|
||||
&mov ($np,$_np);
|
||||
|
||||
&xor ($i,$i); # i=0
|
||||
&xor ($j,$j); # j=0
|
||||
|
||||
&movd ($mul0,&DWP(0,$bp)); # bp[0]
|
||||
&movd ($mul1,&DWP(0,$ap)); # ap[0]
|
||||
&movd ($car1,&DWP(0,$np)); # np[0]
|
||||
|
||||
&pmuludq($mul1,$mul0); # ap[0]*bp[0]
|
||||
&movq ($car0,$mul1);
|
||||
&movq ($acc0,$mul1); # I wish movd worked for
|
||||
&pand ($acc0,$mask); # inter-register transfers
|
||||
|
||||
&pmuludq($mul1,$_n0q); # *=n0
|
||||
|
||||
&pmuludq($car1,$mul1); # "t[0]"*np[0]*n0
|
||||
&paddq ($car1,$acc0);
|
||||
|
||||
&movd ($acc1,&DWP(4,$np)); # np[1]
|
||||
&movd ($acc0,&DWP(4,$ap)); # ap[1]
|
||||
|
||||
&psrlq ($car0,32);
|
||||
&psrlq ($car1,32);
|
||||
|
||||
&inc ($j); # j++
|
||||
&set_label("1st",16);
|
||||
&pmuludq($acc0,$mul0); # ap[j]*bp[0]
|
||||
&pmuludq($acc1,$mul1); # np[j]*m1
|
||||
&paddq ($car0,$acc0); # +=c0
|
||||
&paddq ($car1,$acc1); # +=c1
|
||||
|
||||
&movq ($acc0,$car0);
|
||||
&pand ($acc0,$mask);
|
||||
&movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1]
|
||||
&paddq ($car1,$acc0); # +=ap[j]*bp[0];
|
||||
&movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1]
|
||||
&psrlq ($car0,32);
|
||||
&movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[j-1]=
|
||||
&psrlq ($car1,32);
|
||||
|
||||
&lea ($j,&DWP(1,$j));
|
||||
&cmp ($j,$num);
|
||||
&jl (&label("1st"));
|
||||
|
||||
&pmuludq($acc0,$mul0); # ap[num-1]*bp[0]
|
||||
&pmuludq($acc1,$mul1); # np[num-1]*m1
|
||||
&paddq ($car0,$acc0); # +=c0
|
||||
&paddq ($car1,$acc1); # +=c1
|
||||
|
||||
&movq ($acc0,$car0);
|
||||
&pand ($acc0,$mask);
|
||||
&paddq ($car1,$acc0); # +=ap[num-1]*bp[0];
|
||||
&movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]=
|
||||
|
||||
&psrlq ($car0,32);
|
||||
&psrlq ($car1,32);
|
||||
|
||||
&paddq ($car1,$car0);
|
||||
&movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
|
||||
|
||||
&inc ($i); # i++
|
||||
&set_label("outer");
|
||||
&xor ($j,$j); # j=0
|
||||
|
||||
&movd ($mul0,&DWP(0,$bp,$i,4)); # bp[i]
|
||||
&movd ($mul1,&DWP(0,$ap)); # ap[0]
|
||||
&movd ($temp,&DWP($frame,"esp")); # tp[0]
|
||||
&movd ($car1,&DWP(0,$np)); # np[0]
|
||||
&pmuludq($mul1,$mul0); # ap[0]*bp[i]
|
||||
|
||||
&paddq ($mul1,$temp); # +=tp[0]
|
||||
&movq ($acc0,$mul1);
|
||||
&movq ($car0,$mul1);
|
||||
&pand ($acc0,$mask);
|
||||
|
||||
&pmuludq($mul1,$_n0q); # *=n0
|
||||
|
||||
&pmuludq($car1,$mul1);
|
||||
&paddq ($car1,$acc0);
|
||||
|
||||
&movd ($temp,&DWP($frame+4,"esp")); # tp[1]
|
||||
&movd ($acc1,&DWP(4,$np)); # np[1]
|
||||
&movd ($acc0,&DWP(4,$ap)); # ap[1]
|
||||
|
||||
&psrlq ($car0,32);
|
||||
&psrlq ($car1,32);
|
||||
&paddq ($car0,$temp); # +=tp[1]
|
||||
|
||||
&inc ($j); # j++
|
||||
&dec ($num);
|
||||
&set_label("inner");
|
||||
&pmuludq($acc0,$mul0); # ap[j]*bp[i]
|
||||
&pmuludq($acc1,$mul1); # np[j]*m1
|
||||
&paddq ($car0,$acc0); # +=c0
|
||||
&paddq ($car1,$acc1); # +=c1
|
||||
|
||||
&movq ($acc0,$car0);
|
||||
&movd ($temp,&DWP($frame+4,"esp",$j,4));# tp[j+1]
|
||||
&pand ($acc0,$mask);
|
||||
&movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1]
|
||||
&paddq ($car1,$acc0); # +=ap[j]*bp[i]+tp[j]
|
||||
&movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1]
|
||||
&psrlq ($car0,32);
|
||||
&movd (&DWP($frame-4,"esp",$j,4),$car1);# tp[j-1]=
|
||||
&psrlq ($car1,32);
|
||||
&paddq ($car0,$temp); # +=tp[j+1]
|
||||
|
||||
&dec ($num);
|
||||
&lea ($j,&DWP(1,$j)); # j++
|
||||
&jnz (&label("inner"));
|
||||
|
||||
&mov ($num,$j);
|
||||
&pmuludq($acc0,$mul0); # ap[num-1]*bp[i]
|
||||
&pmuludq($acc1,$mul1); # np[num-1]*m1
|
||||
&paddq ($car0,$acc0); # +=c0
|
||||
&paddq ($car1,$acc1); # +=c1
|
||||
|
||||
&movq ($acc0,$car0);
|
||||
&pand ($acc0,$mask);
|
||||
&paddq ($car1,$acc0); # +=ap[num-1]*bp[i]+tp[num-1]
|
||||
&movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]=
|
||||
&psrlq ($car0,32);
|
||||
&psrlq ($car1,32);
|
||||
|
||||
&movd ($temp,&DWP($frame+4,"esp",$num,4)); # += tp[num]
|
||||
&paddq ($car1,$car0);
|
||||
&paddq ($car1,$temp);
|
||||
&movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
|
||||
|
||||
&lea ($i,&DWP(1,$i)); # i++
|
||||
&cmp ($i,$num);
|
||||
&jle (&label("outer"));
|
||||
|
||||
&emms (); # done with mmx bank
|
||||
&jmp (&label("common_tail"));
|
||||
|
||||
&set_label("non_sse2",16);
|
||||
}
|
||||
|
||||
if (0) {
|
||||
&mov ("esp",$_sp);
|
||||
&xor ("eax","eax"); # signal "not fast enough [yet]"
|
||||
&jmp (&label("just_leave"));
|
||||
# While the below code provides competitive performance for
|
||||
# all key lengths on modern Intel cores, it's still more
|
||||
# than 10% slower for 4096-bit key elsewhere:-( "Competitive"
|
||||
# means compared to the original integer-only assembler.
|
||||
# 512-bit RSA sign is better by ~40%, but that's about all
|
||||
# one can say about all CPUs...
|
||||
} else {
|
||||
$inp="esi"; # integer path uses these registers differently
|
||||
$word="edi";
|
||||
$carry="ebp";
|
||||
|
||||
&mov ($inp,$_ap);
|
||||
&lea ($carry,&DWP(1,$num));
|
||||
&mov ($word,$_bp);
|
||||
&xor ($j,$j); # j=0
|
||||
&mov ("edx",$inp);
|
||||
&and ($carry,1); # see if num is even
|
||||
&sub ("edx",$word); # see if ap==bp
|
||||
&lea ("eax",&DWP(4,$word,$num,4)); # &bp[num]
|
||||
&or ($carry,"edx");
|
||||
&mov ($word,&DWP(0,$word)); # bp[0]
|
||||
&jz (&label("bn_sqr_mont"));
|
||||
&mov ($_bpend,"eax");
|
||||
&mov ("eax",&DWP(0,$inp));
|
||||
&xor ("edx","edx");
|
||||
|
||||
&set_label("mull",16);
|
||||
&mov ($carry,"edx");
|
||||
&mul ($word); # ap[j]*bp[0]
|
||||
&add ($carry,"eax");
|
||||
&lea ($j,&DWP(1,$j));
|
||||
&adc ("edx",0);
|
||||
&mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1]
|
||||
&cmp ($j,$num);
|
||||
&mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
|
||||
&jl (&label("mull"));
|
||||
|
||||
&mov ($carry,"edx");
|
||||
&mul ($word); # ap[num-1]*bp[0]
|
||||
&mov ($word,$_n0);
|
||||
&add ("eax",$carry);
|
||||
&mov ($inp,$_np);
|
||||
&adc ("edx",0);
|
||||
&imul ($word,&DWP($frame,"esp")); # n0*tp[0]
|
||||
|
||||
&mov (&DWP($frame,"esp",$num,4),"eax"); # tp[num-1]=
|
||||
&xor ($j,$j);
|
||||
&mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]=
|
||||
&mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]=
|
||||
|
||||
&mov ("eax",&DWP(0,$inp)); # np[0]
|
||||
&mul ($word); # np[0]*m
|
||||
&add ("eax",&DWP($frame,"esp")); # +=tp[0]
|
||||
&mov ("eax",&DWP(4,$inp)); # np[1]
|
||||
&adc ("edx",0);
|
||||
&inc ($j);
|
||||
|
||||
&jmp (&label("2ndmadd"));
|
||||
|
||||
&set_label("1stmadd",16);
|
||||
&mov ($carry,"edx");
|
||||
&mul ($word); # ap[j]*bp[i]
|
||||
&add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
|
||||
&lea ($j,&DWP(1,$j));
|
||||
&adc ("edx",0);
|
||||
&add ($carry,"eax");
|
||||
&mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1]
|
||||
&adc ("edx",0);
|
||||
&cmp ($j,$num);
|
||||
&mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
|
||||
&jl (&label("1stmadd"));
|
||||
|
||||
&mov ($carry,"edx");
|
||||
&mul ($word); # ap[num-1]*bp[i]
|
||||
&add ("eax",&DWP($frame,"esp",$num,4)); # +=tp[num-1]
|
||||
&mov ($word,$_n0);
|
||||
&adc ("edx",0);
|
||||
&mov ($inp,$_np);
|
||||
&add ($carry,"eax");
|
||||
&adc ("edx",0);
|
||||
&imul ($word,&DWP($frame,"esp")); # n0*tp[0]
|
||||
|
||||
&xor ($j,$j);
|
||||
&add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
|
||||
&mov (&DWP($frame,"esp",$num,4),$carry); # tp[num-1]=
|
||||
&adc ($j,0);
|
||||
&mov ("eax",&DWP(0,$inp)); # np[0]
|
||||
&mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]=
|
||||
&mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]=
|
||||
|
||||
&mul ($word); # np[0]*m
|
||||
&add ("eax",&DWP($frame,"esp")); # +=tp[0]
|
||||
&mov ("eax",&DWP(4,$inp)); # np[1]
|
||||
&adc ("edx",0);
|
||||
&mov ($j,1);
|
||||
|
||||
&set_label("2ndmadd",16);
|
||||
&mov ($carry,"edx");
|
||||
&mul ($word); # np[j]*m
|
||||
&add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
|
||||
&lea ($j,&DWP(1,$j));
|
||||
&adc ("edx",0);
|
||||
&add ($carry,"eax");
|
||||
&mov ("eax",&DWP(0,$inp,$j,4)); # np[j+1]
|
||||
&adc ("edx",0);
|
||||
&cmp ($j,$num);
|
||||
&mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j-1]=
|
||||
&jl (&label("2ndmadd"));
|
||||
|
||||
&mov ($carry,"edx");
|
||||
&mul ($word); # np[j]*m
|
||||
&add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1]
|
||||
&adc ("edx",0);
|
||||
&add ($carry,"eax");
|
||||
&adc ("edx",0);
|
||||
&mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]=
|
||||
|
||||
&xor ("eax","eax");
|
||||
&mov ($j,$_bp); # &bp[i]
|
||||
&add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
|
||||
&adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1]
|
||||
&lea ($j,&DWP(4,$j));
|
||||
&mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]=
|
||||
&cmp ($j,$_bpend);
|
||||
&mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]=
|
||||
&je (&label("common_tail"));
|
||||
|
||||
&mov ($word,&DWP(0,$j)); # bp[i+1]
|
||||
&mov ($inp,$_ap);
|
||||
&mov ($_bp,$j); # &bp[++i]
|
||||
&xor ($j,$j);
|
||||
&xor ("edx","edx");
|
||||
&mov ("eax",&DWP(0,$inp));
|
||||
&jmp (&label("1stmadd"));
|
||||
|
||||
&set_label("bn_sqr_mont",16);
|
||||
$sbit=$num;
|
||||
&mov ($_num,$num);
|
||||
&mov ($_bp,$j); # i=0
|
||||
|
||||
&mov ("eax",$word); # ap[0]
|
||||
&mul ($word); # ap[0]*ap[0]
|
||||
&mov (&DWP($frame,"esp"),"eax"); # tp[0]=
|
||||
&mov ($sbit,"edx");
|
||||
&shr ("edx",1);
|
||||
&and ($sbit,1);
|
||||
&inc ($j);
|
||||
&set_label("sqr",16);
|
||||
&mov ("eax",&DWP(0,$inp,$j,4)); # ap[j]
|
||||
&mov ($carry,"edx");
|
||||
&mul ($word); # ap[j]*ap[0]
|
||||
&add ("eax",$carry);
|
||||
&lea ($j,&DWP(1,$j));
|
||||
&adc ("edx",0);
|
||||
&lea ($carry,&DWP(0,$sbit,"eax",2));
|
||||
&shr ("eax",31);
|
||||
&cmp ($j,$_num);
|
||||
&mov ($sbit,"eax");
|
||||
&mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
|
||||
&jl (&label("sqr"));
|
||||
|
||||
&mov ("eax",&DWP(0,$inp,$j,4)); # ap[num-1]
|
||||
&mov ($carry,"edx");
|
||||
&mul ($word); # ap[num-1]*ap[0]
|
||||
&add ("eax",$carry);
|
||||
&mov ($word,$_n0);
|
||||
&adc ("edx",0);
|
||||
&mov ($inp,$_np);
|
||||
&lea ($carry,&DWP(0,$sbit,"eax",2));
|
||||
&imul ($word,&DWP($frame,"esp")); # n0*tp[0]
|
||||
&shr ("eax",31);
|
||||
&mov (&DWP($frame,"esp",$j,4),$carry); # tp[num-1]=
|
||||
|
||||
&lea ($carry,&DWP(0,"eax","edx",2));
|
||||
&mov ("eax",&DWP(0,$inp)); # np[0]
|
||||
&shr ("edx",31);
|
||||
&mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num]=
|
||||
&mov (&DWP($frame+8,"esp",$j,4),"edx"); # tp[num+1]=
|
||||
|
||||
&mul ($word); # np[0]*m
|
||||
&add ("eax",&DWP($frame,"esp")); # +=tp[0]
|
||||
&mov ($num,$j);
|
||||
&adc ("edx",0);
|
||||
&mov ("eax",&DWP(4,$inp)); # np[1]
|
||||
&mov ($j,1);
|
||||
|
||||
&set_label("3rdmadd",16);
|
||||
&mov ($carry,"edx");
|
||||
&mul ($word); # np[j]*m
|
||||
&add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
|
||||
&adc ("edx",0);
|
||||
&add ($carry,"eax");
|
||||
&mov ("eax",&DWP(4,$inp,$j,4)); # np[j+1]
|
||||
&adc ("edx",0);
|
||||
&mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j-1]=
|
||||
|
||||
&mov ($carry,"edx");
|
||||
&mul ($word); # np[j+1]*m
|
||||
&add ($carry,&DWP($frame+4,"esp",$j,4)); # +=tp[j+1]
|
||||
&lea ($j,&DWP(2,$j));
|
||||
&adc ("edx",0);
|
||||
&add ($carry,"eax");
|
||||
&mov ("eax",&DWP(0,$inp,$j,4)); # np[j+2]
|
||||
&adc ("edx",0);
|
||||
&cmp ($j,$num);
|
||||
&mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j]=
|
||||
&jl (&label("3rdmadd"));
|
||||
|
||||
&mov ($carry,"edx");
|
||||
&mul ($word); # np[j]*m
|
||||
&add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1]
|
||||
&adc ("edx",0);
|
||||
&add ($carry,"eax");
|
||||
&adc ("edx",0);
|
||||
&mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]=
|
||||
|
||||
&mov ($j,$_bp); # i
|
||||
&xor ("eax","eax");
|
||||
&mov ($inp,$_ap);
|
||||
&add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
|
||||
&adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1]
|
||||
&mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]=
|
||||
&cmp ($j,$num);
|
||||
&mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]=
|
||||
&je (&label("common_tail"));
|
||||
|
||||
&mov ($word,&DWP(4,$inp,$j,4)); # ap[i]
|
||||
&lea ($j,&DWP(1,$j));
|
||||
&mov ("eax",$word);
|
||||
&mov ($_bp,$j); # ++i
|
||||
&mul ($word); # ap[i]*ap[i]
|
||||
&add ("eax",&DWP($frame,"esp",$j,4)); # +=tp[i]
|
||||
&adc ("edx",0);
|
||||
&mov (&DWP($frame,"esp",$j,4),"eax"); # tp[i]=
|
||||
&xor ($carry,$carry);
|
||||
&cmp ($j,$num);
|
||||
&lea ($j,&DWP(1,$j));
|
||||
&je (&label("sqrlast"));
|
||||
|
||||
&mov ($sbit,"edx"); # zaps $num
|
||||
&shr ("edx",1);
|
||||
&and ($sbit,1);
|
||||
&set_label("sqradd",16);
|
||||
&mov ("eax",&DWP(0,$inp,$j,4)); # ap[j]
|
||||
&mov ($carry,"edx");
|
||||
&mul ($word); # ap[j]*ap[i]
|
||||
&add ("eax",$carry);
|
||||
&lea ($carry,&DWP(0,"eax","eax"));
|
||||
&adc ("edx",0);
|
||||
&shr ("eax",31);
|
||||
&add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
|
||||
&lea ($j,&DWP(1,$j));
|
||||
&adc ("eax",0);
|
||||
&add ($carry,$sbit);
|
||||
&adc ("eax",0);
|
||||
&cmp ($j,$_num);
|
||||
&mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
|
||||
&mov ($sbit,"eax");
|
||||
&jle (&label("sqradd"));
|
||||
|
||||
&mov ($carry,"edx");
|
||||
&add ("edx","edx");
|
||||
&shr ($carry,31);
|
||||
&add ("edx",$sbit);
|
||||
&adc ($carry,0);
|
||||
&set_label("sqrlast");
|
||||
&mov ($word,$_n0);
|
||||
&mov ($inp,$_np);
|
||||
&imul ($word,&DWP($frame,"esp")); # n0*tp[0]
|
||||
|
||||
&add ("edx",&DWP($frame,"esp",$j,4)); # +=tp[num]
|
||||
&mov ("eax",&DWP(0,$inp)); # np[0]
|
||||
&adc ($carry,0);
|
||||
&mov (&DWP($frame,"esp",$j,4),"edx"); # tp[num]=
|
||||
&mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num+1]=
|
||||
|
||||
&mul ($word); # np[0]*m
|
||||
&add ("eax",&DWP($frame,"esp")); # +=tp[0]
|
||||
&lea ($num,&DWP(-1,$j));
|
||||
&adc ("edx",0);
|
||||
&mov ($j,1);
|
||||
&mov ("eax",&DWP(4,$inp)); # np[1]
|
||||
|
||||
&jmp (&label("3rdmadd"));
|
||||
}
|
||||
|
||||
&set_label("common_tail",16);
|
||||
&mov ($np,$_np); # load modulus pointer
|
||||
&mov ($rp,$_rp); # load result pointer
|
||||
&lea ($tp,&DWP($frame,"esp")); # [$ap and $bp are zapped]
|
||||
|
||||
&mov ("eax",&DWP(0,$tp)); # tp[0]
|
||||
&mov ($j,$num); # j=num-1
|
||||
&xor ($i,$i); # i=0 and clear CF!
|
||||
|
||||
&set_label("sub",16);
|
||||
&sbb ("eax",&DWP(0,$np,$i,4));
|
||||
&mov (&DWP(0,$rp,$i,4),"eax"); # rp[i]=tp[i]-np[i]
|
||||
&dec ($j); # doesn't affect CF!
|
||||
&mov ("eax",&DWP(4,$tp,$i,4)); # tp[i+1]
|
||||
&lea ($i,&DWP(1,$i)); # i++
|
||||
&jge (&label("sub"));
|
||||
|
||||
&sbb ("eax",0); # handle upmost overflow bit
|
||||
&mov ("edx",-1);
|
||||
&xor ("edx","eax");
|
||||
&jmp (&label("copy"));
|
||||
|
||||
&set_label("copy",16); # conditional copy
|
||||
&mov ($tp,&DWP($frame,"esp",$num,4));
|
||||
&mov ($np,&DWP(0,$rp,$num,4));
|
||||
&mov (&DWP($frame,"esp",$num,4),$j); # zap temporary vector
|
||||
&and ($tp,"eax");
|
||||
&and ($np,"edx");
|
||||
&or ($np,$tp);
|
||||
&mov (&DWP(0,$rp,$num,4),$np);
|
||||
&dec ($num);
|
||||
&jge (&label("copy"));
|
||||
|
||||
&mov ("esp",$_sp); # pull saved stack pointer
|
||||
&mov ("eax",1);
|
||||
&set_label("just_leave");
|
||||
&function_end("bn_mul_mont");
|
||||
|
||||
&asciz("Montgomery Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
|
||||
|
||||
&asm_finish();
|
||||
|
||||
close STDOUT;
|
643
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86_64-gcc.c
vendored
Normal file
643
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86_64-gcc.c
vendored
Normal file
|
@ -0,0 +1,643 @@
|
|||
/*
|
||||
* Copyright 2002-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include "../bn_lcl.h"
|
||||
#if !(defined(__GNUC__) && __GNUC__>=2)
|
||||
# include "../bn_asm.c" /* kind of dirty hack for Sun Studio */
|
||||
#else
|
||||
/*-
|
||||
* x86_64 BIGNUM accelerator version 0.1, December 2002.
|
||||
*
|
||||
* Implemented by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
* project.
|
||||
*
|
||||
* Rights for redistribution and usage in source and binary forms are
|
||||
* granted according to the OpenSSL license. Warranty of any kind is
|
||||
* disclaimed.
|
||||
*
|
||||
* Q. Version 0.1? It doesn't sound like Andy, he used to assign real
|
||||
* versions, like 1.0...
|
||||
* A. Well, that's because this code is basically a quick-n-dirty
|
||||
* proof-of-concept hack. As you can see it's implemented with
|
||||
* inline assembler, which means that you're bound to GCC and that
|
||||
* there might be enough room for further improvement.
|
||||
*
|
||||
* Q. Why inline assembler?
|
||||
* A. x86_64 features own ABI which I'm not familiar with. This is
|
||||
* why I decided to let the compiler take care of subroutine
|
||||
* prologue/epilogue as well as register allocation. For reference.
|
||||
* Win64 implements different ABI for AMD64, different from Linux.
|
||||
*
|
||||
* Q. How much faster does it get?
|
||||
* A. 'apps/openssl speed rsa dsa' output with no-asm:
|
||||
*
|
||||
* sign verify sign/s verify/s
|
||||
* rsa 512 bits 0.0006s 0.0001s 1683.8 18456.2
|
||||
* rsa 1024 bits 0.0028s 0.0002s 356.0 6407.0
|
||||
* rsa 2048 bits 0.0172s 0.0005s 58.0 1957.8
|
||||
* rsa 4096 bits 0.1155s 0.0018s 8.7 555.6
|
||||
* sign verify sign/s verify/s
|
||||
* dsa 512 bits 0.0005s 0.0006s 2100.8 1768.3
|
||||
* dsa 1024 bits 0.0014s 0.0018s 692.3 559.2
|
||||
* dsa 2048 bits 0.0049s 0.0061s 204.7 165.0
|
||||
*
|
||||
* 'apps/openssl speed rsa dsa' output with this module:
|
||||
*
|
||||
* sign verify sign/s verify/s
|
||||
* rsa 512 bits 0.0004s 0.0000s 2767.1 33297.9
|
||||
* rsa 1024 bits 0.0012s 0.0001s 867.4 14674.7
|
||||
* rsa 2048 bits 0.0061s 0.0002s 164.0 5270.0
|
||||
* rsa 4096 bits 0.0384s 0.0006s 26.1 1650.8
|
||||
* sign verify sign/s verify/s
|
||||
* dsa 512 bits 0.0002s 0.0003s 4442.2 3786.3
|
||||
* dsa 1024 bits 0.0005s 0.0007s 1835.1 1497.4
|
||||
* dsa 2048 bits 0.0016s 0.0020s 620.4 504.6
|
||||
*
|
||||
* For the reference. IA-32 assembler implementation performs
|
||||
* very much like 64-bit code compiled with no-asm on the same
|
||||
* machine.
|
||||
*/
|
||||
|
||||
# undef mul
|
||||
# undef mul_add
|
||||
|
||||
/*-
|
||||
* "m"(a), "+m"(r) is the way to favor DirectPath µ-code;
|
||||
* "g"(0) let the compiler to decide where does it
|
||||
* want to keep the value of zero;
|
||||
*/
|
||||
# define mul_add(r,a,word,carry) do { \
|
||||
register BN_ULONG high,low; \
|
||||
asm ("mulq %3" \
|
||||
: "=a"(low),"=d"(high) \
|
||||
: "a"(word),"m"(a) \
|
||||
: "cc"); \
|
||||
asm ("addq %2,%0; adcq %3,%1" \
|
||||
: "+r"(carry),"+d"(high)\
|
||||
: "a"(low),"g"(0) \
|
||||
: "cc"); \
|
||||
asm ("addq %2,%0; adcq %3,%1" \
|
||||
: "+m"(r),"+d"(high) \
|
||||
: "r"(carry),"g"(0) \
|
||||
: "cc"); \
|
||||
carry=high; \
|
||||
} while (0)
|
||||
|
||||
# define mul(r,a,word,carry) do { \
|
||||
register BN_ULONG high,low; \
|
||||
asm ("mulq %3" \
|
||||
: "=a"(low),"=d"(high) \
|
||||
: "a"(word),"g"(a) \
|
||||
: "cc"); \
|
||||
asm ("addq %2,%0; adcq %3,%1" \
|
||||
: "+r"(carry),"+d"(high)\
|
||||
: "a"(low),"g"(0) \
|
||||
: "cc"); \
|
||||
(r)=carry, carry=high; \
|
||||
} while (0)
|
||||
# undef sqr
|
||||
# define sqr(r0,r1,a) \
|
||||
asm ("mulq %2" \
|
||||
: "=a"(r0),"=d"(r1) \
|
||||
: "a"(a) \
|
||||
: "cc");
|
||||
|
||||
BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
|
||||
BN_ULONG w)
|
||||
{
|
||||
BN_ULONG c1 = 0;
|
||||
|
||||
if (num <= 0)
|
||||
return c1;
|
||||
|
||||
while (num & ~3) {
|
||||
mul_add(rp[0], ap[0], w, c1);
|
||||
mul_add(rp[1], ap[1], w, c1);
|
||||
mul_add(rp[2], ap[2], w, c1);
|
||||
mul_add(rp[3], ap[3], w, c1);
|
||||
ap += 4;
|
||||
rp += 4;
|
||||
num -= 4;
|
||||
}
|
||||
if (num) {
|
||||
mul_add(rp[0], ap[0], w, c1);
|
||||
if (--num == 0)
|
||||
return c1;
|
||||
mul_add(rp[1], ap[1], w, c1);
|
||||
if (--num == 0)
|
||||
return c1;
|
||||
mul_add(rp[2], ap[2], w, c1);
|
||||
return c1;
|
||||
}
|
||||
|
||||
return c1;
|
||||
}
|
||||
|
||||
BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
|
||||
{
|
||||
BN_ULONG c1 = 0;
|
||||
|
||||
if (num <= 0)
|
||||
return c1;
|
||||
|
||||
while (num & ~3) {
|
||||
mul(rp[0], ap[0], w, c1);
|
||||
mul(rp[1], ap[1], w, c1);
|
||||
mul(rp[2], ap[2], w, c1);
|
||||
mul(rp[3], ap[3], w, c1);
|
||||
ap += 4;
|
||||
rp += 4;
|
||||
num -= 4;
|
||||
}
|
||||
if (num) {
|
||||
mul(rp[0], ap[0], w, c1);
|
||||
if (--num == 0)
|
||||
return c1;
|
||||
mul(rp[1], ap[1], w, c1);
|
||||
if (--num == 0)
|
||||
return c1;
|
||||
mul(rp[2], ap[2], w, c1);
|
||||
}
|
||||
return c1;
|
||||
}
|
||||
|
||||
void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
|
||||
{
|
||||
if (n <= 0)
|
||||
return;
|
||||
|
||||
while (n & ~3) {
|
||||
sqr(r[0], r[1], a[0]);
|
||||
sqr(r[2], r[3], a[1]);
|
||||
sqr(r[4], r[5], a[2]);
|
||||
sqr(r[6], r[7], a[3]);
|
||||
a += 4;
|
||||
r += 8;
|
||||
n -= 4;
|
||||
}
|
||||
if (n) {
|
||||
sqr(r[0], r[1], a[0]);
|
||||
if (--n == 0)
|
||||
return;
|
||||
sqr(r[2], r[3], a[1]);
|
||||
if (--n == 0)
|
||||
return;
|
||||
sqr(r[4], r[5], a[2]);
|
||||
}
|
||||
}
|
||||
|
||||
BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
|
||||
{
|
||||
BN_ULONG ret, waste;
|
||||
|
||||
asm("divq %4":"=a"(ret), "=d"(waste)
|
||||
: "a"(l), "d"(h), "r"(d)
|
||||
: "cc");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
|
||||
int n)
|
||||
{
|
||||
BN_ULONG ret;
|
||||
size_t i = 0;
|
||||
|
||||
if (n <= 0)
|
||||
return 0;
|
||||
|
||||
asm volatile (" subq %0,%0 \n" /* clear carry */
|
||||
" jmp 1f \n"
|
||||
".p2align 4 \n"
|
||||
"1: movq (%4,%2,8),%0 \n"
|
||||
" adcq (%5,%2,8),%0 \n"
|
||||
" movq %0,(%3,%2,8) \n"
|
||||
" lea 1(%2),%2 \n"
|
||||
" dec %1 \n"
|
||||
" jnz 1b \n"
|
||||
" sbbq %0,%0 \n"
|
||||
:"=&r" (ret), "+c"(n), "+r"(i)
|
||||
:"r"(rp), "r"(ap), "r"(bp)
|
||||
:"cc", "memory");
|
||||
|
||||
return ret & 1;
|
||||
}
|
||||
|
||||
# ifndef SIMICS
|
||||
BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
|
||||
int n)
|
||||
{
|
||||
BN_ULONG ret;
|
||||
size_t i = 0;
|
||||
|
||||
if (n <= 0)
|
||||
return 0;
|
||||
|
||||
asm volatile (" subq %0,%0 \n" /* clear borrow */
|
||||
" jmp 1f \n"
|
||||
".p2align 4 \n"
|
||||
"1: movq (%4,%2,8),%0 \n"
|
||||
" sbbq (%5,%2,8),%0 \n"
|
||||
" movq %0,(%3,%2,8) \n"
|
||||
" lea 1(%2),%2 \n"
|
||||
" dec %1 \n"
|
||||
" jnz 1b \n"
|
||||
" sbbq %0,%0 \n"
|
||||
:"=&r" (ret), "+c"(n), "+r"(i)
|
||||
:"r"(rp), "r"(ap), "r"(bp)
|
||||
:"cc", "memory");
|
||||
|
||||
return ret & 1;
|
||||
}
|
||||
# else
|
||||
/* Simics 1.4<7 has buggy sbbq:-( */
|
||||
# define BN_MASK2 0xffffffffffffffffL
|
||||
BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
|
||||
{
|
||||
BN_ULONG t1, t2;
|
||||
int c = 0;
|
||||
|
||||
if (n <= 0)
|
||||
return (BN_ULONG)0;
|
||||
|
||||
for (;;) {
|
||||
t1 = a[0];
|
||||
t2 = b[0];
|
||||
r[0] = (t1 - t2 - c) & BN_MASK2;
|
||||
if (t1 != t2)
|
||||
c = (t1 < t2);
|
||||
if (--n <= 0)
|
||||
break;
|
||||
|
||||
t1 = a[1];
|
||||
t2 = b[1];
|
||||
r[1] = (t1 - t2 - c) & BN_MASK2;
|
||||
if (t1 != t2)
|
||||
c = (t1 < t2);
|
||||
if (--n <= 0)
|
||||
break;
|
||||
|
||||
t1 = a[2];
|
||||
t2 = b[2];
|
||||
r[2] = (t1 - t2 - c) & BN_MASK2;
|
||||
if (t1 != t2)
|
||||
c = (t1 < t2);
|
||||
if (--n <= 0)
|
||||
break;
|
||||
|
||||
t1 = a[3];
|
||||
t2 = b[3];
|
||||
r[3] = (t1 - t2 - c) & BN_MASK2;
|
||||
if (t1 != t2)
|
||||
c = (t1 < t2);
|
||||
if (--n <= 0)
|
||||
break;
|
||||
|
||||
a += 4;
|
||||
b += 4;
|
||||
r += 4;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
# endif
|
||||
|
||||
/* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
|
||||
/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
|
||||
/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
|
||||
/*
|
||||
* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number
|
||||
* c=(c2,c1,c0)
|
||||
*/
|
||||
|
||||
/*
|
||||
* Keep in mind that carrying into high part of multiplication result
|
||||
* can not overflow, because it cannot be all-ones.
|
||||
*/
|
||||
# if 0
|
||||
/* original macros are kept for reference purposes */
|
||||
# define mul_add_c(a,b,c0,c1,c2) do { \
|
||||
BN_ULONG ta = (a), tb = (b); \
|
||||
BN_ULONG lo, hi; \
|
||||
BN_UMULT_LOHI(lo,hi,ta,tb); \
|
||||
c0 += lo; hi += (c0<lo)?1:0; \
|
||||
c1 += hi; c2 += (c1<hi)?1:0; \
|
||||
} while(0)
|
||||
|
||||
# define mul_add_c2(a,b,c0,c1,c2) do { \
|
||||
BN_ULONG ta = (a), tb = (b); \
|
||||
BN_ULONG lo, hi, tt; \
|
||||
BN_UMULT_LOHI(lo,hi,ta,tb); \
|
||||
c0 += lo; tt = hi+((c0<lo)?1:0); \
|
||||
c1 += tt; c2 += (c1<tt)?1:0; \
|
||||
c0 += lo; hi += (c0<lo)?1:0; \
|
||||
c1 += hi; c2 += (c1<hi)?1:0; \
|
||||
} while(0)
|
||||
|
||||
# define sqr_add_c(a,i,c0,c1,c2) do { \
|
||||
BN_ULONG ta = (a)[i]; \
|
||||
BN_ULONG lo, hi; \
|
||||
BN_UMULT_LOHI(lo,hi,ta,ta); \
|
||||
c0 += lo; hi += (c0<lo)?1:0; \
|
||||
c1 += hi; c2 += (c1<hi)?1:0; \
|
||||
} while(0)
|
||||
# else
|
||||
# define mul_add_c(a,b,c0,c1,c2) do { \
|
||||
BN_ULONG t1,t2; \
|
||||
asm ("mulq %3" \
|
||||
: "=a"(t1),"=d"(t2) \
|
||||
: "a"(a),"m"(b) \
|
||||
: "cc"); \
|
||||
asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \
|
||||
: "+r"(c0),"+r"(c1),"+r"(c2) \
|
||||
: "r"(t1),"r"(t2),"g"(0) \
|
||||
: "cc"); \
|
||||
} while (0)
|
||||
|
||||
# define sqr_add_c(a,i,c0,c1,c2) do { \
|
||||
BN_ULONG t1,t2; \
|
||||
asm ("mulq %2" \
|
||||
: "=a"(t1),"=d"(t2) \
|
||||
: "a"(a[i]) \
|
||||
: "cc"); \
|
||||
asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \
|
||||
: "+r"(c0),"+r"(c1),"+r"(c2) \
|
||||
: "r"(t1),"r"(t2),"g"(0) \
|
||||
: "cc"); \
|
||||
} while (0)
|
||||
|
||||
# define mul_add_c2(a,b,c0,c1,c2) do { \
|
||||
BN_ULONG t1,t2; \
|
||||
asm ("mulq %3" \
|
||||
: "=a"(t1),"=d"(t2) \
|
||||
: "a"(a),"m"(b) \
|
||||
: "cc"); \
|
||||
asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \
|
||||
: "+r"(c0),"+r"(c1),"+r"(c2) \
|
||||
: "r"(t1),"r"(t2),"g"(0) \
|
||||
: "cc"); \
|
||||
asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \
|
||||
: "+r"(c0),"+r"(c1),"+r"(c2) \
|
||||
: "r"(t1),"r"(t2),"g"(0) \
|
||||
: "cc"); \
|
||||
} while (0)
|
||||
# endif
|
||||
|
||||
# define sqr_add_c2(a,i,j,c0,c1,c2) \
|
||||
mul_add_c2((a)[i],(a)[j],c0,c1,c2)
|
||||
|
||||
void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
|
||||
{
|
||||
BN_ULONG c1, c2, c3;
|
||||
|
||||
c1 = 0;
|
||||
c2 = 0;
|
||||
c3 = 0;
|
||||
mul_add_c(a[0], b[0], c1, c2, c3);
|
||||
r[0] = c1;
|
||||
c1 = 0;
|
||||
mul_add_c(a[0], b[1], c2, c3, c1);
|
||||
mul_add_c(a[1], b[0], c2, c3, c1);
|
||||
r[1] = c2;
|
||||
c2 = 0;
|
||||
mul_add_c(a[2], b[0], c3, c1, c2);
|
||||
mul_add_c(a[1], b[1], c3, c1, c2);
|
||||
mul_add_c(a[0], b[2], c3, c1, c2);
|
||||
r[2] = c3;
|
||||
c3 = 0;
|
||||
mul_add_c(a[0], b[3], c1, c2, c3);
|
||||
mul_add_c(a[1], b[2], c1, c2, c3);
|
||||
mul_add_c(a[2], b[1], c1, c2, c3);
|
||||
mul_add_c(a[3], b[0], c1, c2, c3);
|
||||
r[3] = c1;
|
||||
c1 = 0;
|
||||
mul_add_c(a[4], b[0], c2, c3, c1);
|
||||
mul_add_c(a[3], b[1], c2, c3, c1);
|
||||
mul_add_c(a[2], b[2], c2, c3, c1);
|
||||
mul_add_c(a[1], b[3], c2, c3, c1);
|
||||
mul_add_c(a[0], b[4], c2, c3, c1);
|
||||
r[4] = c2;
|
||||
c2 = 0;
|
||||
mul_add_c(a[0], b[5], c3, c1, c2);
|
||||
mul_add_c(a[1], b[4], c3, c1, c2);
|
||||
mul_add_c(a[2], b[3], c3, c1, c2);
|
||||
mul_add_c(a[3], b[2], c3, c1, c2);
|
||||
mul_add_c(a[4], b[1], c3, c1, c2);
|
||||
mul_add_c(a[5], b[0], c3, c1, c2);
|
||||
r[5] = c3;
|
||||
c3 = 0;
|
||||
mul_add_c(a[6], b[0], c1, c2, c3);
|
||||
mul_add_c(a[5], b[1], c1, c2, c3);
|
||||
mul_add_c(a[4], b[2], c1, c2, c3);
|
||||
mul_add_c(a[3], b[3], c1, c2, c3);
|
||||
mul_add_c(a[2], b[4], c1, c2, c3);
|
||||
mul_add_c(a[1], b[5], c1, c2, c3);
|
||||
mul_add_c(a[0], b[6], c1, c2, c3);
|
||||
r[6] = c1;
|
||||
c1 = 0;
|
||||
mul_add_c(a[0], b[7], c2, c3, c1);
|
||||
mul_add_c(a[1], b[6], c2, c3, c1);
|
||||
mul_add_c(a[2], b[5], c2, c3, c1);
|
||||
mul_add_c(a[3], b[4], c2, c3, c1);
|
||||
mul_add_c(a[4], b[3], c2, c3, c1);
|
||||
mul_add_c(a[5], b[2], c2, c3, c1);
|
||||
mul_add_c(a[6], b[1], c2, c3, c1);
|
||||
mul_add_c(a[7], b[0], c2, c3, c1);
|
||||
r[7] = c2;
|
||||
c2 = 0;
|
||||
mul_add_c(a[7], b[1], c3, c1, c2);
|
||||
mul_add_c(a[6], b[2], c3, c1, c2);
|
||||
mul_add_c(a[5], b[3], c3, c1, c2);
|
||||
mul_add_c(a[4], b[4], c3, c1, c2);
|
||||
mul_add_c(a[3], b[5], c3, c1, c2);
|
||||
mul_add_c(a[2], b[6], c3, c1, c2);
|
||||
mul_add_c(a[1], b[7], c3, c1, c2);
|
||||
r[8] = c3;
|
||||
c3 = 0;
|
||||
mul_add_c(a[2], b[7], c1, c2, c3);
|
||||
mul_add_c(a[3], b[6], c1, c2, c3);
|
||||
mul_add_c(a[4], b[5], c1, c2, c3);
|
||||
mul_add_c(a[5], b[4], c1, c2, c3);
|
||||
mul_add_c(a[6], b[3], c1, c2, c3);
|
||||
mul_add_c(a[7], b[2], c1, c2, c3);
|
||||
r[9] = c1;
|
||||
c1 = 0;
|
||||
mul_add_c(a[7], b[3], c2, c3, c1);
|
||||
mul_add_c(a[6], b[4], c2, c3, c1);
|
||||
mul_add_c(a[5], b[5], c2, c3, c1);
|
||||
mul_add_c(a[4], b[6], c2, c3, c1);
|
||||
mul_add_c(a[3], b[7], c2, c3, c1);
|
||||
r[10] = c2;
|
||||
c2 = 0;
|
||||
mul_add_c(a[4], b[7], c3, c1, c2);
|
||||
mul_add_c(a[5], b[6], c3, c1, c2);
|
||||
mul_add_c(a[6], b[5], c3, c1, c2);
|
||||
mul_add_c(a[7], b[4], c3, c1, c2);
|
||||
r[11] = c3;
|
||||
c3 = 0;
|
||||
mul_add_c(a[7], b[5], c1, c2, c3);
|
||||
mul_add_c(a[6], b[6], c1, c2, c3);
|
||||
mul_add_c(a[5], b[7], c1, c2, c3);
|
||||
r[12] = c1;
|
||||
c1 = 0;
|
||||
mul_add_c(a[6], b[7], c2, c3, c1);
|
||||
mul_add_c(a[7], b[6], c2, c3, c1);
|
||||
r[13] = c2;
|
||||
c2 = 0;
|
||||
mul_add_c(a[7], b[7], c3, c1, c2);
|
||||
r[14] = c3;
|
||||
r[15] = c1;
|
||||
}
|
||||
|
||||
void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
|
||||
{
|
||||
BN_ULONG c1, c2, c3;
|
||||
|
||||
c1 = 0;
|
||||
c2 = 0;
|
||||
c3 = 0;
|
||||
mul_add_c(a[0], b[0], c1, c2, c3);
|
||||
r[0] = c1;
|
||||
c1 = 0;
|
||||
mul_add_c(a[0], b[1], c2, c3, c1);
|
||||
mul_add_c(a[1], b[0], c2, c3, c1);
|
||||
r[1] = c2;
|
||||
c2 = 0;
|
||||
mul_add_c(a[2], b[0], c3, c1, c2);
|
||||
mul_add_c(a[1], b[1], c3, c1, c2);
|
||||
mul_add_c(a[0], b[2], c3, c1, c2);
|
||||
r[2] = c3;
|
||||
c3 = 0;
|
||||
mul_add_c(a[0], b[3], c1, c2, c3);
|
||||
mul_add_c(a[1], b[2], c1, c2, c3);
|
||||
mul_add_c(a[2], b[1], c1, c2, c3);
|
||||
mul_add_c(a[3], b[0], c1, c2, c3);
|
||||
r[3] = c1;
|
||||
c1 = 0;
|
||||
mul_add_c(a[3], b[1], c2, c3, c1);
|
||||
mul_add_c(a[2], b[2], c2, c3, c1);
|
||||
mul_add_c(a[1], b[3], c2, c3, c1);
|
||||
r[4] = c2;
|
||||
c2 = 0;
|
||||
mul_add_c(a[2], b[3], c3, c1, c2);
|
||||
mul_add_c(a[3], b[2], c3, c1, c2);
|
||||
r[5] = c3;
|
||||
c3 = 0;
|
||||
mul_add_c(a[3], b[3], c1, c2, c3);
|
||||
r[6] = c1;
|
||||
r[7] = c2;
|
||||
}
|
||||
|
||||
void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
|
||||
{
|
||||
BN_ULONG c1, c2, c3;
|
||||
|
||||
c1 = 0;
|
||||
c2 = 0;
|
||||
c3 = 0;
|
||||
sqr_add_c(a, 0, c1, c2, c3);
|
||||
r[0] = c1;
|
||||
c1 = 0;
|
||||
sqr_add_c2(a, 1, 0, c2, c3, c1);
|
||||
r[1] = c2;
|
||||
c2 = 0;
|
||||
sqr_add_c(a, 1, c3, c1, c2);
|
||||
sqr_add_c2(a, 2, 0, c3, c1, c2);
|
||||
r[2] = c3;
|
||||
c3 = 0;
|
||||
sqr_add_c2(a, 3, 0, c1, c2, c3);
|
||||
sqr_add_c2(a, 2, 1, c1, c2, c3);
|
||||
r[3] = c1;
|
||||
c1 = 0;
|
||||
sqr_add_c(a, 2, c2, c3, c1);
|
||||
sqr_add_c2(a, 3, 1, c2, c3, c1);
|
||||
sqr_add_c2(a, 4, 0, c2, c3, c1);
|
||||
r[4] = c2;
|
||||
c2 = 0;
|
||||
sqr_add_c2(a, 5, 0, c3, c1, c2);
|
||||
sqr_add_c2(a, 4, 1, c3, c1, c2);
|
||||
sqr_add_c2(a, 3, 2, c3, c1, c2);
|
||||
r[5] = c3;
|
||||
c3 = 0;
|
||||
sqr_add_c(a, 3, c1, c2, c3);
|
||||
sqr_add_c2(a, 4, 2, c1, c2, c3);
|
||||
sqr_add_c2(a, 5, 1, c1, c2, c3);
|
||||
sqr_add_c2(a, 6, 0, c1, c2, c3);
|
||||
r[6] = c1;
|
||||
c1 = 0;
|
||||
sqr_add_c2(a, 7, 0, c2, c3, c1);
|
||||
sqr_add_c2(a, 6, 1, c2, c3, c1);
|
||||
sqr_add_c2(a, 5, 2, c2, c3, c1);
|
||||
sqr_add_c2(a, 4, 3, c2, c3, c1);
|
||||
r[7] = c2;
|
||||
c2 = 0;
|
||||
sqr_add_c(a, 4, c3, c1, c2);
|
||||
sqr_add_c2(a, 5, 3, c3, c1, c2);
|
||||
sqr_add_c2(a, 6, 2, c3, c1, c2);
|
||||
sqr_add_c2(a, 7, 1, c3, c1, c2);
|
||||
r[8] = c3;
|
||||
c3 = 0;
|
||||
sqr_add_c2(a, 7, 2, c1, c2, c3);
|
||||
sqr_add_c2(a, 6, 3, c1, c2, c3);
|
||||
sqr_add_c2(a, 5, 4, c1, c2, c3);
|
||||
r[9] = c1;
|
||||
c1 = 0;
|
||||
sqr_add_c(a, 5, c2, c3, c1);
|
||||
sqr_add_c2(a, 6, 4, c2, c3, c1);
|
||||
sqr_add_c2(a, 7, 3, c2, c3, c1);
|
||||
r[10] = c2;
|
||||
c2 = 0;
|
||||
sqr_add_c2(a, 7, 4, c3, c1, c2);
|
||||
sqr_add_c2(a, 6, 5, c3, c1, c2);
|
||||
r[11] = c3;
|
||||
c3 = 0;
|
||||
sqr_add_c(a, 6, c1, c2, c3);
|
||||
sqr_add_c2(a, 7, 5, c1, c2, c3);
|
||||
r[12] = c1;
|
||||
c1 = 0;
|
||||
sqr_add_c2(a, 7, 6, c2, c3, c1);
|
||||
r[13] = c2;
|
||||
c2 = 0;
|
||||
sqr_add_c(a, 7, c3, c1, c2);
|
||||
r[14] = c3;
|
||||
r[15] = c1;
|
||||
}
|
||||
|
||||
void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
|
||||
{
|
||||
BN_ULONG c1, c2, c3;
|
||||
|
||||
c1 = 0;
|
||||
c2 = 0;
|
||||
c3 = 0;
|
||||
sqr_add_c(a, 0, c1, c2, c3);
|
||||
r[0] = c1;
|
||||
c1 = 0;
|
||||
sqr_add_c2(a, 1, 0, c2, c3, c1);
|
||||
r[1] = c2;
|
||||
c2 = 0;
|
||||
sqr_add_c(a, 1, c3, c1, c2);
|
||||
sqr_add_c2(a, 2, 0, c3, c1, c2);
|
||||
r[2] = c3;
|
||||
c3 = 0;
|
||||
sqr_add_c2(a, 3, 0, c1, c2, c3);
|
||||
sqr_add_c2(a, 2, 1, c1, c2, c3);
|
||||
r[3] = c1;
|
||||
c1 = 0;
|
||||
sqr_add_c(a, 2, c2, c3, c1);
|
||||
sqr_add_c2(a, 3, 1, c2, c3, c1);
|
||||
r[4] = c2;
|
||||
c2 = 0;
|
||||
sqr_add_c2(a, 3, 2, c3, c1, c2);
|
||||
r[5] = c3;
|
||||
c3 = 0;
|
||||
sqr_add_c(a, 3, c1, c2, c3);
|
||||
r[6] = c1;
|
||||
r[7] = c2;
|
||||
}
|
||||
#endif
|
424
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86_64-gf2m.pl
vendored
Normal file
424
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86_64-gf2m.pl
vendored
Normal file
|
@ -0,0 +1,424 @@
|
|||
#! /usr/bin/env perl
|
||||
# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
#
|
||||
# May 2011
|
||||
#
|
||||
# The module implements bn_GF2m_mul_2x2 polynomial multiplication used
|
||||
# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
|
||||
# the time being... Except that it has two code paths: code suitable
|
||||
# for any x86_64 CPU and PCLMULQDQ one suitable for Westmere and
|
||||
# later. Improvement varies from one benchmark and µ-arch to another.
|
||||
# Vanilla code path is at most 20% faster than compiler-generated code
|
||||
# [not very impressive], while PCLMULQDQ - whole 85%-160% better on
|
||||
# 163- and 571-bit ECDH benchmarks on Intel CPUs. Keep in mind that
|
||||
# these coefficients are not ones for bn_GF2m_mul_2x2 itself, as not
|
||||
# all CPU time is burnt in it...
|
||||
|
||||
$flavour = shift;
|
||||
$output = shift;
|
||||
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
|
||||
|
||||
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
|
||||
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
|
||||
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
|
||||
die "can't locate x86_64-xlate.pl";
|
||||
|
||||
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
|
||||
*STDOUT=*OUT;
|
||||
|
||||
($lo,$hi)=("%rax","%rdx"); $a=$lo;
|
||||
($i0,$i1)=("%rsi","%rdi");
|
||||
($t0,$t1)=("%rbx","%rcx");
|
||||
($b,$mask)=("%rbp","%r8");
|
||||
($a1,$a2,$a4,$a8,$a12,$a48)=map("%r$_",(9..15));
|
||||
($R,$Tx)=("%xmm0","%xmm1");
|
||||
|
||||
$code.=<<___;
|
||||
.text
|
||||
|
||||
.type _mul_1x1,\@abi-omnipotent
|
||||
.align 16
|
||||
_mul_1x1:
|
||||
.cfi_startproc
|
||||
sub \$128+8,%rsp
|
||||
.cfi_adjust_cfa_offset 128+8
|
||||
mov \$-1,$a1
|
||||
lea ($a,$a),$i0
|
||||
shr \$3,$a1
|
||||
lea (,$a,4),$i1
|
||||
and $a,$a1 # a1=a&0x1fffffffffffffff
|
||||
lea (,$a,8),$a8
|
||||
sar \$63,$a # broadcast 63rd bit
|
||||
lea ($a1,$a1),$a2
|
||||
sar \$63,$i0 # broadcast 62nd bit
|
||||
lea (,$a1,4),$a4
|
||||
and $b,$a
|
||||
sar \$63,$i1 # broadcast 61st bit
|
||||
mov $a,$hi # $a is $lo
|
||||
shl \$63,$lo
|
||||
and $b,$i0
|
||||
shr \$1,$hi
|
||||
mov $i0,$t1
|
||||
shl \$62,$i0
|
||||
and $b,$i1
|
||||
shr \$2,$t1
|
||||
xor $i0,$lo
|
||||
mov $i1,$t0
|
||||
shl \$61,$i1
|
||||
xor $t1,$hi
|
||||
shr \$3,$t0
|
||||
xor $i1,$lo
|
||||
xor $t0,$hi
|
||||
|
||||
mov $a1,$a12
|
||||
movq \$0,0(%rsp) # tab[0]=0
|
||||
xor $a2,$a12 # a1^a2
|
||||
mov $a1,8(%rsp) # tab[1]=a1
|
||||
mov $a4,$a48
|
||||
mov $a2,16(%rsp) # tab[2]=a2
|
||||
xor $a8,$a48 # a4^a8
|
||||
mov $a12,24(%rsp) # tab[3]=a1^a2
|
||||
|
||||
xor $a4,$a1
|
||||
mov $a4,32(%rsp) # tab[4]=a4
|
||||
xor $a4,$a2
|
||||
mov $a1,40(%rsp) # tab[5]=a1^a4
|
||||
xor $a4,$a12
|
||||
mov $a2,48(%rsp) # tab[6]=a2^a4
|
||||
xor $a48,$a1 # a1^a4^a4^a8=a1^a8
|
||||
mov $a12,56(%rsp) # tab[7]=a1^a2^a4
|
||||
xor $a48,$a2 # a2^a4^a4^a8=a1^a8
|
||||
|
||||
mov $a8,64(%rsp) # tab[8]=a8
|
||||
xor $a48,$a12 # a1^a2^a4^a4^a8=a1^a2^a8
|
||||
mov $a1,72(%rsp) # tab[9]=a1^a8
|
||||
xor $a4,$a1 # a1^a8^a4
|
||||
mov $a2,80(%rsp) # tab[10]=a2^a8
|
||||
xor $a4,$a2 # a2^a8^a4
|
||||
mov $a12,88(%rsp) # tab[11]=a1^a2^a8
|
||||
|
||||
xor $a4,$a12 # a1^a2^a8^a4
|
||||
mov $a48,96(%rsp) # tab[12]=a4^a8
|
||||
mov $mask,$i0
|
||||
mov $a1,104(%rsp) # tab[13]=a1^a4^a8
|
||||
and $b,$i0
|
||||
mov $a2,112(%rsp) # tab[14]=a2^a4^a8
|
||||
shr \$4,$b
|
||||
mov $a12,120(%rsp) # tab[15]=a1^a2^a4^a8
|
||||
mov $mask,$i1
|
||||
and $b,$i1
|
||||
shr \$4,$b
|
||||
|
||||
movq (%rsp,$i0,8),$R # half of calculations is done in SSE2
|
||||
mov $mask,$i0
|
||||
and $b,$i0
|
||||
shr \$4,$b
|
||||
___
|
||||
for ($n=1;$n<8;$n++) {
|
||||
$code.=<<___;
|
||||
mov (%rsp,$i1,8),$t1
|
||||
mov $mask,$i1
|
||||
mov $t1,$t0
|
||||
shl \$`8*$n-4`,$t1
|
||||
and $b,$i1
|
||||
movq (%rsp,$i0,8),$Tx
|
||||
shr \$`64-(8*$n-4)`,$t0
|
||||
xor $t1,$lo
|
||||
pslldq \$$n,$Tx
|
||||
mov $mask,$i0
|
||||
shr \$4,$b
|
||||
xor $t0,$hi
|
||||
and $b,$i0
|
||||
shr \$4,$b
|
||||
pxor $Tx,$R
|
||||
___
|
||||
}
|
||||
$code.=<<___;
|
||||
mov (%rsp,$i1,8),$t1
|
||||
mov $t1,$t0
|
||||
shl \$`8*$n-4`,$t1
|
||||
movq $R,$i0
|
||||
shr \$`64-(8*$n-4)`,$t0
|
||||
xor $t1,$lo
|
||||
psrldq \$8,$R
|
||||
xor $t0,$hi
|
||||
movq $R,$i1
|
||||
xor $i0,$lo
|
||||
xor $i1,$hi
|
||||
|
||||
add \$128+8,%rsp
|
||||
.cfi_adjust_cfa_offset -128-8
|
||||
ret
|
||||
.Lend_mul_1x1:
|
||||
.cfi_endproc
|
||||
.size _mul_1x1,.-_mul_1x1
|
||||
___
|
||||
|
||||
($rp,$a1,$a0,$b1,$b0) = $win64? ("%rcx","%rdx","%r8", "%r9","%r10") : # Win64 order
|
||||
("%rdi","%rsi","%rdx","%rcx","%r8"); # Unix order
|
||||
|
||||
$code.=<<___;
|
||||
.extern OPENSSL_ia32cap_P
|
||||
.globl bn_GF2m_mul_2x2
|
||||
.type bn_GF2m_mul_2x2,\@abi-omnipotent
|
||||
.align 16
|
||||
bn_GF2m_mul_2x2:
|
||||
.cfi_startproc
|
||||
mov %rsp,%rax
|
||||
mov OPENSSL_ia32cap_P(%rip),%r10
|
||||
bt \$33,%r10
|
||||
jnc .Lvanilla_mul_2x2
|
||||
|
||||
movq $a1,%xmm0
|
||||
movq $b1,%xmm1
|
||||
movq $a0,%xmm2
|
||||
___
|
||||
$code.=<<___ if ($win64);
|
||||
movq 40(%rsp),%xmm3
|
||||
___
|
||||
$code.=<<___ if (!$win64);
|
||||
movq $b0,%xmm3
|
||||
___
|
||||
$code.=<<___;
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa %xmm1,%xmm5
|
||||
pclmulqdq \$0,%xmm1,%xmm0 # a1·b1
|
||||
pxor %xmm2,%xmm4
|
||||
pxor %xmm3,%xmm5
|
||||
pclmulqdq \$0,%xmm3,%xmm2 # a0·b0
|
||||
pclmulqdq \$0,%xmm5,%xmm4 # (a0+a1)·(b0+b1)
|
||||
xorps %xmm0,%xmm4
|
||||
xorps %xmm2,%xmm4 # (a0+a1)·(b0+b1)-a0·b0-a1·b1
|
||||
movdqa %xmm4,%xmm5
|
||||
pslldq \$8,%xmm4
|
||||
psrldq \$8,%xmm5
|
||||
pxor %xmm4,%xmm2
|
||||
pxor %xmm5,%xmm0
|
||||
movdqu %xmm2,0($rp)
|
||||
movdqu %xmm0,16($rp)
|
||||
ret
|
||||
|
||||
.align 16
|
||||
.Lvanilla_mul_2x2:
|
||||
lea -8*17(%rsp),%rsp
|
||||
.cfi_adjust_cfa_offset 8*17
|
||||
___
|
||||
$code.=<<___ if ($win64);
|
||||
mov `8*17+40`(%rsp),$b0
|
||||
mov %rdi,8*15(%rsp)
|
||||
mov %rsi,8*16(%rsp)
|
||||
___
|
||||
$code.=<<___;
|
||||
mov %r14,8*10(%rsp)
|
||||
.cfi_rel_offset %r14,8*10
|
||||
mov %r13,8*11(%rsp)
|
||||
.cfi_rel_offset %r13,8*11
|
||||
mov %r12,8*12(%rsp)
|
||||
.cfi_rel_offset %r12,8*12
|
||||
mov %rbp,8*13(%rsp)
|
||||
.cfi_rel_offset %rbp,8*13
|
||||
mov %rbx,8*14(%rsp)
|
||||
.cfi_rel_offset %rbx,8*14
|
||||
.Lbody_mul_2x2:
|
||||
mov $rp,32(%rsp) # save the arguments
|
||||
mov $a1,40(%rsp)
|
||||
mov $a0,48(%rsp)
|
||||
mov $b1,56(%rsp)
|
||||
mov $b0,64(%rsp)
|
||||
|
||||
mov \$0xf,$mask
|
||||
mov $a1,$a
|
||||
mov $b1,$b
|
||||
call _mul_1x1 # a1·b1
|
||||
mov $lo,16(%rsp)
|
||||
mov $hi,24(%rsp)
|
||||
|
||||
mov 48(%rsp),$a
|
||||
mov 64(%rsp),$b
|
||||
call _mul_1x1 # a0·b0
|
||||
mov $lo,0(%rsp)
|
||||
mov $hi,8(%rsp)
|
||||
|
||||
mov 40(%rsp),$a
|
||||
mov 56(%rsp),$b
|
||||
xor 48(%rsp),$a
|
||||
xor 64(%rsp),$b
|
||||
call _mul_1x1 # (a0+a1)·(b0+b1)
|
||||
___
|
||||
@r=("%rbx","%rcx","%rdi","%rsi");
|
||||
$code.=<<___;
|
||||
mov 0(%rsp),@r[0]
|
||||
mov 8(%rsp),@r[1]
|
||||
mov 16(%rsp),@r[2]
|
||||
mov 24(%rsp),@r[3]
|
||||
mov 32(%rsp),%rbp
|
||||
|
||||
xor $hi,$lo
|
||||
xor @r[1],$hi
|
||||
xor @r[0],$lo
|
||||
mov @r[0],0(%rbp)
|
||||
xor @r[2],$hi
|
||||
mov @r[3],24(%rbp)
|
||||
xor @r[3],$lo
|
||||
xor @r[3],$hi
|
||||
xor $hi,$lo
|
||||
mov $hi,16(%rbp)
|
||||
mov $lo,8(%rbp)
|
||||
|
||||
mov 8*10(%rsp),%r14
|
||||
.cfi_restore %r14
|
||||
mov 8*11(%rsp),%r13
|
||||
.cfi_restore %r13
|
||||
mov 8*12(%rsp),%r12
|
||||
.cfi_restore %r12
|
||||
mov 8*13(%rsp),%rbp
|
||||
.cfi_restore %rbp
|
||||
mov 8*14(%rsp),%rbx
|
||||
.cfi_restore %rbx
|
||||
___
|
||||
$code.=<<___ if ($win64);
|
||||
mov 8*15(%rsp),%rdi
|
||||
mov 8*16(%rsp),%rsi
|
||||
___
|
||||
$code.=<<___;
|
||||
lea 8*17(%rsp),%rsp
|
||||
.cfi_adjust_cfa_offset -8*17
|
||||
.Lepilogue_mul_2x2:
|
||||
ret
|
||||
.Lend_mul_2x2:
|
||||
.cfi_endproc
|
||||
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
|
||||
.asciz "GF(2^m) Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.align 16
|
||||
___
|
||||
|
||||
# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
|
||||
# CONTEXT *context,DISPATCHER_CONTEXT *disp)
|
||||
if ($win64) {
|
||||
$rec="%rcx";
|
||||
$frame="%rdx";
|
||||
$context="%r8";
|
||||
$disp="%r9";
|
||||
|
||||
$code.=<<___;
|
||||
.extern __imp_RtlVirtualUnwind
|
||||
|
||||
.type se_handler,\@abi-omnipotent
|
||||
.align 16
|
||||
se_handler:
|
||||
push %rsi
|
||||
push %rdi
|
||||
push %rbx
|
||||
push %rbp
|
||||
push %r12
|
||||
push %r13
|
||||
push %r14
|
||||
push %r15
|
||||
pushfq
|
||||
sub \$64,%rsp
|
||||
|
||||
mov 120($context),%rax # pull context->Rax
|
||||
mov 248($context),%rbx # pull context->Rip
|
||||
|
||||
lea .Lbody_mul_2x2(%rip),%r10
|
||||
cmp %r10,%rbx # context->Rip<"prologue" label
|
||||
jb .Lin_prologue
|
||||
|
||||
mov 152($context),%rax # pull context->Rsp
|
||||
|
||||
lea .Lepilogue_mul_2x2(%rip),%r10
|
||||
cmp %r10,%rbx # context->Rip>="epilogue" label
|
||||
jae .Lin_prologue
|
||||
|
||||
mov 8*10(%rax),%r14 # mimic epilogue
|
||||
mov 8*11(%rax),%r13
|
||||
mov 8*12(%rax),%r12
|
||||
mov 8*13(%rax),%rbp
|
||||
mov 8*14(%rax),%rbx
|
||||
mov 8*15(%rax),%rdi
|
||||
mov 8*16(%rax),%rsi
|
||||
|
||||
mov %rbx,144($context) # restore context->Rbx
|
||||
mov %rbp,160($context) # restore context->Rbp
|
||||
mov %rsi,168($context) # restore context->Rsi
|
||||
mov %rdi,176($context) # restore context->Rdi
|
||||
mov %r12,216($context) # restore context->R12
|
||||
mov %r13,224($context) # restore context->R13
|
||||
mov %r14,232($context) # restore context->R14
|
||||
|
||||
lea 8*17(%rax),%rax
|
||||
|
||||
.Lin_prologue:
|
||||
mov %rax,152($context) # restore context->Rsp
|
||||
|
||||
mov 40($disp),%rdi # disp->ContextRecord
|
||||
mov $context,%rsi # context
|
||||
mov \$154,%ecx # sizeof(CONTEXT)
|
||||
.long 0xa548f3fc # cld; rep movsq
|
||||
|
||||
mov $disp,%rsi
|
||||
xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
|
||||
mov 8(%rsi),%rdx # arg2, disp->ImageBase
|
||||
mov 0(%rsi),%r8 # arg3, disp->ControlPc
|
||||
mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
|
||||
mov 40(%rsi),%r10 # disp->ContextRecord
|
||||
lea 56(%rsi),%r11 # &disp->HandlerData
|
||||
lea 24(%rsi),%r12 # &disp->EstablisherFrame
|
||||
mov %r10,32(%rsp) # arg5
|
||||
mov %r11,40(%rsp) # arg6
|
||||
mov %r12,48(%rsp) # arg7
|
||||
mov %rcx,56(%rsp) # arg8, (NULL)
|
||||
call *__imp_RtlVirtualUnwind(%rip)
|
||||
|
||||
mov \$1,%eax # ExceptionContinueSearch
|
||||
add \$64,%rsp
|
||||
popfq
|
||||
pop %r15
|
||||
pop %r14
|
||||
pop %r13
|
||||
pop %r12
|
||||
pop %rbp
|
||||
pop %rbx
|
||||
pop %rdi
|
||||
pop %rsi
|
||||
ret
|
||||
.size se_handler,.-se_handler
|
||||
|
||||
.section .pdata
|
||||
.align 4
|
||||
.rva _mul_1x1
|
||||
.rva .Lend_mul_1x1
|
||||
.rva .LSEH_info_1x1
|
||||
|
||||
.rva .Lvanilla_mul_2x2
|
||||
.rva .Lend_mul_2x2
|
||||
.rva .LSEH_info_2x2
|
||||
.section .xdata
|
||||
.align 8
|
||||
.LSEH_info_1x1:
|
||||
.byte 0x01,0x07,0x02,0x00
|
||||
.byte 0x07,0x01,0x11,0x00 # sub rsp,128+8
|
||||
.LSEH_info_2x2:
|
||||
.byte 9,0,0,0
|
||||
.rva se_handler
|
||||
___
|
||||
}
|
||||
|
||||
$code =~ s/\`([^\`]*)\`/eval($1)/gem;
|
||||
print $code;
|
||||
close STDOUT;
|
1592
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86_64-mont.pl
vendored
Executable file
1592
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86_64-mont.pl
vendored
Executable file
File diff suppressed because it is too large
Load diff
3945
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86_64-mont5.pl
vendored
Executable file
3945
trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86_64-mont5.pl
vendored
Executable file
File diff suppressed because it is too large
Load diff
171
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_add.c
vendored
Normal file
171
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_add.c
vendored
Normal file
|
@ -0,0 +1,171 @@
|
|||
/*
|
||||
* Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include "internal/cryptlib.h"
|
||||
#include "bn_lcl.h"
|
||||
|
||||
/* signed add of b to a. */
|
||||
int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
|
||||
{
|
||||
int ret, r_neg, cmp_res;
|
||||
|
||||
bn_check_top(a);
|
||||
bn_check_top(b);
|
||||
|
||||
if (a->neg == b->neg) {
|
||||
r_neg = a->neg;
|
||||
ret = BN_uadd(r, a, b);
|
||||
} else {
|
||||
cmp_res = BN_ucmp(a, b);
|
||||
if (cmp_res > 0) {
|
||||
r_neg = a->neg;
|
||||
ret = BN_usub(r, a, b);
|
||||
} else if (cmp_res < 0) {
|
||||
r_neg = b->neg;
|
||||
ret = BN_usub(r, b, a);
|
||||
} else {
|
||||
r_neg = 0;
|
||||
BN_zero(r);
|
||||
ret = 1;
|
||||
}
|
||||
}
|
||||
|
||||
r->neg = r_neg;
|
||||
bn_check_top(r);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* signed sub of b from a. */
|
||||
int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
|
||||
{
|
||||
int ret, r_neg, cmp_res;
|
||||
|
||||
bn_check_top(a);
|
||||
bn_check_top(b);
|
||||
|
||||
if (a->neg != b->neg) {
|
||||
r_neg = a->neg;
|
||||
ret = BN_uadd(r, a, b);
|
||||
} else {
|
||||
cmp_res = BN_ucmp(a, b);
|
||||
if (cmp_res > 0) {
|
||||
r_neg = a->neg;
|
||||
ret = BN_usub(r, a, b);
|
||||
} else if (cmp_res < 0) {
|
||||
r_neg = !b->neg;
|
||||
ret = BN_usub(r, b, a);
|
||||
} else {
|
||||
r_neg = 0;
|
||||
BN_zero(r);
|
||||
ret = 1;
|
||||
}
|
||||
}
|
||||
|
||||
r->neg = r_neg;
|
||||
bn_check_top(r);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* unsigned add of b to a, r can be equal to a or b. */
|
||||
int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
|
||||
{
|
||||
int max, min, dif;
|
||||
const BN_ULONG *ap, *bp;
|
||||
BN_ULONG *rp, carry, t1, t2;
|
||||
|
||||
bn_check_top(a);
|
||||
bn_check_top(b);
|
||||
|
||||
if (a->top < b->top) {
|
||||
const BIGNUM *tmp;
|
||||
|
||||
tmp = a;
|
||||
a = b;
|
||||
b = tmp;
|
||||
}
|
||||
max = a->top;
|
||||
min = b->top;
|
||||
dif = max - min;
|
||||
|
||||
if (bn_wexpand(r, max + 1) == NULL)
|
||||
return 0;
|
||||
|
||||
r->top = max;
|
||||
|
||||
ap = a->d;
|
||||
bp = b->d;
|
||||
rp = r->d;
|
||||
|
||||
carry = bn_add_words(rp, ap, bp, min);
|
||||
rp += min;
|
||||
ap += min;
|
||||
|
||||
while (dif) {
|
||||
dif--;
|
||||
t1 = *(ap++);
|
||||
t2 = (t1 + carry) & BN_MASK2;
|
||||
*(rp++) = t2;
|
||||
carry &= (t2 == 0);
|
||||
}
|
||||
*rp = carry;
|
||||
r->top += carry;
|
||||
|
||||
r->neg = 0;
|
||||
bn_check_top(r);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* unsigned subtraction of b from a, a must be larger than b. */
|
||||
int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
|
||||
{
|
||||
int max, min, dif;
|
||||
BN_ULONG t1, t2, borrow, *rp;
|
||||
const BN_ULONG *ap, *bp;
|
||||
|
||||
bn_check_top(a);
|
||||
bn_check_top(b);
|
||||
|
||||
max = a->top;
|
||||
min = b->top;
|
||||
dif = max - min;
|
||||
|
||||
if (dif < 0) { /* hmm... should not be happening */
|
||||
BNerr(BN_F_BN_USUB, BN_R_ARG2_LT_ARG3);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (bn_wexpand(r, max) == NULL)
|
||||
return 0;
|
||||
|
||||
ap = a->d;
|
||||
bp = b->d;
|
||||
rp = r->d;
|
||||
|
||||
borrow = bn_sub_words(rp, ap, bp, min);
|
||||
ap += min;
|
||||
rp += min;
|
||||
|
||||
while (dif) {
|
||||
dif--;
|
||||
t1 = *(ap++);
|
||||
t2 = (t1 - borrow) & BN_MASK2;
|
||||
*(rp++) = t2;
|
||||
borrow &= (t1 == 0);
|
||||
}
|
||||
|
||||
while (max && *--rp == 0)
|
||||
max--;
|
||||
|
||||
r->top = max;
|
||||
r->neg = 0;
|
||||
bn_pollute(r);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
1039
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_asm.c
vendored
Normal file
1039
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_asm.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
312
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_blind.c
vendored
Normal file
312
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_blind.c
vendored
Normal file
|
@ -0,0 +1,312 @@
|
|||
/*
|
||||
* Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include <openssl/opensslconf.h>
|
||||
#include "internal/cryptlib.h"
|
||||
#include "bn_lcl.h"
|
||||
|
||||
#define BN_BLINDING_COUNTER 32
|
||||
|
||||
struct bn_blinding_st {
|
||||
BIGNUM *A;
|
||||
BIGNUM *Ai;
|
||||
BIGNUM *e;
|
||||
BIGNUM *mod; /* just a reference */
|
||||
CRYPTO_THREAD_ID tid;
|
||||
int counter;
|
||||
unsigned long flags;
|
||||
BN_MONT_CTX *m_ctx;
|
||||
int (*bn_mod_exp) (BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
|
||||
const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
|
||||
CRYPTO_RWLOCK *lock;
|
||||
};
|
||||
|
||||
BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod)
|
||||
{
|
||||
BN_BLINDING *ret = NULL;
|
||||
|
||||
bn_check_top(mod);
|
||||
|
||||
if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL) {
|
||||
BNerr(BN_F_BN_BLINDING_NEW, ERR_R_MALLOC_FAILURE);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ret->lock = CRYPTO_THREAD_lock_new();
|
||||
if (ret->lock == NULL) {
|
||||
BNerr(BN_F_BN_BLINDING_NEW, ERR_R_MALLOC_FAILURE);
|
||||
OPENSSL_free(ret);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
BN_BLINDING_set_current_thread(ret);
|
||||
|
||||
if (A != NULL) {
|
||||
if ((ret->A = BN_dup(A)) == NULL)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (Ai != NULL) {
|
||||
if ((ret->Ai = BN_dup(Ai)) == NULL)
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* save a copy of mod in the BN_BLINDING structure */
|
||||
if ((ret->mod = BN_dup(mod)) == NULL)
|
||||
goto err;
|
||||
|
||||
if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0)
|
||||
BN_set_flags(ret->mod, BN_FLG_CONSTTIME);
|
||||
|
||||
/*
|
||||
* Set the counter to the special value -1 to indicate that this is
|
||||
* never-used fresh blinding that does not need updating before first
|
||||
* use.
|
||||
*/
|
||||
ret->counter = -1;
|
||||
|
||||
return ret;
|
||||
|
||||
err:
|
||||
BN_BLINDING_free(ret);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void BN_BLINDING_free(BN_BLINDING *r)
|
||||
{
|
||||
if (r == NULL)
|
||||
return;
|
||||
BN_free(r->A);
|
||||
BN_free(r->Ai);
|
||||
BN_free(r->e);
|
||||
BN_free(r->mod);
|
||||
CRYPTO_THREAD_lock_free(r->lock);
|
||||
OPENSSL_free(r);
|
||||
}
|
||||
|
||||
int BN_BLINDING_update(BN_BLINDING *b, BN_CTX *ctx)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if ((b->A == NULL) || (b->Ai == NULL)) {
|
||||
BNerr(BN_F_BN_BLINDING_UPDATE, BN_R_NOT_INITIALIZED);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (b->counter == -1)
|
||||
b->counter = 0;
|
||||
|
||||
if (++b->counter == BN_BLINDING_COUNTER && b->e != NULL &&
|
||||
!(b->flags & BN_BLINDING_NO_RECREATE)) {
|
||||
/* re-create blinding parameters */
|
||||
if (!BN_BLINDING_create_param(b, NULL, NULL, ctx, NULL, NULL))
|
||||
goto err;
|
||||
} else if (!(b->flags & BN_BLINDING_NO_UPDATE)) {
|
||||
if (b->m_ctx != NULL) {
|
||||
if (!bn_mul_mont_fixed_top(b->Ai, b->Ai, b->Ai, b->m_ctx, ctx)
|
||||
|| !bn_mul_mont_fixed_top(b->A, b->A, b->A, b->m_ctx, ctx))
|
||||
goto err;
|
||||
} else {
|
||||
if (!BN_mod_mul(b->Ai, b->Ai, b->Ai, b->mod, ctx)
|
||||
|| !BN_mod_mul(b->A, b->A, b->A, b->mod, ctx))
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
ret = 1;
|
||||
err:
|
||||
if (b->counter == BN_BLINDING_COUNTER)
|
||||
b->counter = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx)
|
||||
{
|
||||
return BN_BLINDING_convert_ex(n, NULL, b, ctx);
|
||||
}
|
||||
|
||||
int BN_BLINDING_convert_ex(BIGNUM *n, BIGNUM *r, BN_BLINDING *b, BN_CTX *ctx)
|
||||
{
|
||||
int ret = 1;
|
||||
|
||||
bn_check_top(n);
|
||||
|
||||
if ((b->A == NULL) || (b->Ai == NULL)) {
|
||||
BNerr(BN_F_BN_BLINDING_CONVERT_EX, BN_R_NOT_INITIALIZED);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (b->counter == -1)
|
||||
/* Fresh blinding, doesn't need updating. */
|
||||
b->counter = 0;
|
||||
else if (!BN_BLINDING_update(b, ctx))
|
||||
return 0;
|
||||
|
||||
if (r != NULL && (BN_copy(r, b->Ai) == NULL))
|
||||
return 0;
|
||||
|
||||
if (b->m_ctx != NULL)
|
||||
ret = BN_mod_mul_montgomery(n, n, b->A, b->m_ctx, ctx);
|
||||
else
|
||||
ret = BN_mod_mul(n, n, b->A, b->mod, ctx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx)
|
||||
{
|
||||
return BN_BLINDING_invert_ex(n, NULL, b, ctx);
|
||||
}
|
||||
|
||||
int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b,
|
||||
BN_CTX *ctx)
|
||||
{
|
||||
int ret;
|
||||
|
||||
bn_check_top(n);
|
||||
|
||||
if (r == NULL && (r = b->Ai) == NULL) {
|
||||
BNerr(BN_F_BN_BLINDING_INVERT_EX, BN_R_NOT_INITIALIZED);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (b->m_ctx != NULL) {
|
||||
/* ensure that BN_mod_mul_montgomery takes pre-defined path */
|
||||
if (n->dmax >= r->top) {
|
||||
size_t i, rtop = r->top, ntop = n->top;
|
||||
BN_ULONG mask;
|
||||
|
||||
for (i = 0; i < rtop; i++) {
|
||||
mask = (BN_ULONG)0 - ((i - ntop) >> (8 * sizeof(i) - 1));
|
||||
n->d[i] &= mask;
|
||||
}
|
||||
mask = (BN_ULONG)0 - ((rtop - ntop) >> (8 * sizeof(ntop) - 1));
|
||||
/* always true, if (rtop >= ntop) n->top = r->top; */
|
||||
n->top = (int)(rtop & ~mask) | (ntop & mask);
|
||||
n->flags |= (BN_FLG_FIXED_TOP & ~mask);
|
||||
}
|
||||
ret = BN_mod_mul_montgomery(n, n, r, b->m_ctx, ctx);
|
||||
} else {
|
||||
ret = BN_mod_mul(n, n, r, b->mod, ctx);
|
||||
}
|
||||
|
||||
bn_check_top(n);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int BN_BLINDING_is_current_thread(BN_BLINDING *b)
|
||||
{
|
||||
return CRYPTO_THREAD_compare_id(CRYPTO_THREAD_get_current_id(), b->tid);
|
||||
}
|
||||
|
||||
void BN_BLINDING_set_current_thread(BN_BLINDING *b)
|
||||
{
|
||||
b->tid = CRYPTO_THREAD_get_current_id();
|
||||
}
|
||||
|
||||
int BN_BLINDING_lock(BN_BLINDING *b)
|
||||
{
|
||||
return CRYPTO_THREAD_write_lock(b->lock);
|
||||
}
|
||||
|
||||
int BN_BLINDING_unlock(BN_BLINDING *b)
|
||||
{
|
||||
return CRYPTO_THREAD_unlock(b->lock);
|
||||
}
|
||||
|
||||
unsigned long BN_BLINDING_get_flags(const BN_BLINDING *b)
|
||||
{
|
||||
return b->flags;
|
||||
}
|
||||
|
||||
void BN_BLINDING_set_flags(BN_BLINDING *b, unsigned long flags)
|
||||
{
|
||||
b->flags = flags;
|
||||
}
|
||||
|
||||
BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b,
|
||||
const BIGNUM *e, BIGNUM *m, BN_CTX *ctx,
|
||||
int (*bn_mod_exp) (BIGNUM *r,
|
||||
const BIGNUM *a,
|
||||
const BIGNUM *p,
|
||||
const BIGNUM *m,
|
||||
BN_CTX *ctx,
|
||||
BN_MONT_CTX *m_ctx),
|
||||
BN_MONT_CTX *m_ctx)
|
||||
{
|
||||
int retry_counter = 32;
|
||||
BN_BLINDING *ret = NULL;
|
||||
|
||||
if (b == NULL)
|
||||
ret = BN_BLINDING_new(NULL, NULL, m);
|
||||
else
|
||||
ret = b;
|
||||
|
||||
if (ret == NULL)
|
||||
goto err;
|
||||
|
||||
if (ret->A == NULL && (ret->A = BN_new()) == NULL)
|
||||
goto err;
|
||||
if (ret->Ai == NULL && (ret->Ai = BN_new()) == NULL)
|
||||
goto err;
|
||||
|
||||
if (e != NULL) {
|
||||
BN_free(ret->e);
|
||||
ret->e = BN_dup(e);
|
||||
}
|
||||
if (ret->e == NULL)
|
||||
goto err;
|
||||
|
||||
if (bn_mod_exp != NULL)
|
||||
ret->bn_mod_exp = bn_mod_exp;
|
||||
if (m_ctx != NULL)
|
||||
ret->m_ctx = m_ctx;
|
||||
|
||||
do {
|
||||
int rv;
|
||||
if (!BN_priv_rand_range(ret->A, ret->mod))
|
||||
goto err;
|
||||
if (int_bn_mod_inverse(ret->Ai, ret->A, ret->mod, ctx, &rv))
|
||||
break;
|
||||
|
||||
/*
|
||||
* this should almost never happen for good RSA keys
|
||||
*/
|
||||
if (!rv)
|
||||
goto err;
|
||||
|
||||
if (retry_counter-- == 0) {
|
||||
BNerr(BN_F_BN_BLINDING_CREATE_PARAM, BN_R_TOO_MANY_ITERATIONS);
|
||||
goto err;
|
||||
}
|
||||
} while (1);
|
||||
|
||||
if (ret->bn_mod_exp != NULL && ret->m_ctx != NULL) {
|
||||
if (!ret->bn_mod_exp(ret->A, ret->A, ret->e, ret->mod, ctx, ret->m_ctx))
|
||||
goto err;
|
||||
} else {
|
||||
if (!BN_mod_exp(ret->A, ret->A, ret->e, ret->mod, ctx))
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (ret->m_ctx != NULL) {
|
||||
if (!bn_to_mont_fixed_top(ret->Ai, ret->Ai, ret->m_ctx, ctx)
|
||||
|| !bn_to_mont_fixed_top(ret->A, ret->A, ret->m_ctx, ctx))
|
||||
goto err;
|
||||
}
|
||||
|
||||
return ret;
|
||||
err:
|
||||
if (b == NULL) {
|
||||
BN_BLINDING_free(ret);
|
||||
ret = NULL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
553
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_const.c
vendored
Normal file
553
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_const.c
vendored
Normal file
|
@ -0,0 +1,553 @@
|
|||
/*
|
||||
* Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include <openssl/bn.h>
|
||||
|
||||
/*-
|
||||
* "First Oakley Default Group" from RFC2409, section 6.1.
|
||||
*
|
||||
* The prime is: 2^768 - 2 ^704 - 1 + 2^64 * { [2^638 pi] + 149686 }
|
||||
*
|
||||
* RFC2409 specifies a generator of 2.
|
||||
* RFC2412 specifies a generator of of 22.
|
||||
*/
|
||||
|
||||
BIGNUM *BN_get_rfc2409_prime_768(BIGNUM *bn)
|
||||
{
|
||||
static const unsigned char RFC2409_PRIME_768[] = {
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
|
||||
0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
|
||||
0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
|
||||
0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
|
||||
0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
|
||||
0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
|
||||
0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
|
||||
0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
|
||||
0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
|
||||
0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x3A, 0x36, 0x20,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
};
|
||||
return BN_bin2bn(RFC2409_PRIME_768, sizeof(RFC2409_PRIME_768), bn);
|
||||
}
|
||||
|
||||
/*-
|
||||
* "Second Oakley Default Group" from RFC2409, section 6.2.
|
||||
*
|
||||
* The prime is: 2^1024 - 2^960 - 1 + 2^64 * { [2^894 pi] + 129093 }.
|
||||
*
|
||||
* RFC2409 specifies a generator of 2.
|
||||
* RFC2412 specifies a generator of 22.
|
||||
*/
|
||||
|
||||
BIGNUM *BN_get_rfc2409_prime_1024(BIGNUM *bn)
|
||||
{
|
||||
static const unsigned char RFC2409_PRIME_1024[] = {
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
|
||||
0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
|
||||
0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
|
||||
0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
|
||||
0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
|
||||
0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
|
||||
0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
|
||||
0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
|
||||
0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
|
||||
0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B,
|
||||
0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
|
||||
0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5,
|
||||
0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6,
|
||||
0x49, 0x28, 0x66, 0x51, 0xEC, 0xE6, 0x53, 0x81,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
};
|
||||
return BN_bin2bn(RFC2409_PRIME_1024, sizeof(RFC2409_PRIME_1024), bn);
|
||||
}
|
||||
|
||||
/*-
|
||||
* "1536-bit MODP Group" from RFC3526, Section 2.
|
||||
*
|
||||
* The prime is: 2^1536 - 2^1472 - 1 + 2^64 * { [2^1406 pi] + 741804 }
|
||||
*
|
||||
* RFC3526 specifies a generator of 2.
|
||||
* RFC2312 specifies a generator of 22.
|
||||
*/
|
||||
|
||||
BIGNUM *BN_get_rfc3526_prime_1536(BIGNUM *bn)
|
||||
{
|
||||
static const unsigned char RFC3526_PRIME_1536[] = {
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
|
||||
0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
|
||||
0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
|
||||
0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
|
||||
0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
|
||||
0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
|
||||
0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
|
||||
0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
|
||||
0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
|
||||
0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B,
|
||||
0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
|
||||
0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5,
|
||||
0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6,
|
||||
0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
|
||||
0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05,
|
||||
0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A,
|
||||
0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
|
||||
0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96,
|
||||
0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB,
|
||||
0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
|
||||
0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04,
|
||||
0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x23, 0x73, 0x27,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
};
|
||||
return BN_bin2bn(RFC3526_PRIME_1536, sizeof(RFC3526_PRIME_1536), bn);
|
||||
}
|
||||
|
||||
/*-
|
||||
* "2048-bit MODP Group" from RFC3526, Section 3.
|
||||
*
|
||||
* The prime is: 2^2048 - 2^1984 - 1 + 2^64 * { [2^1918 pi] + 124476 }
|
||||
*
|
||||
* RFC3526 specifies a generator of 2.
|
||||
*/
|
||||
|
||||
BIGNUM *BN_get_rfc3526_prime_2048(BIGNUM *bn)
|
||||
{
|
||||
static const unsigned char RFC3526_PRIME_2048[] = {
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
|
||||
0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
|
||||
0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
|
||||
0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
|
||||
0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
|
||||
0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
|
||||
0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
|
||||
0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
|
||||
0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
|
||||
0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B,
|
||||
0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
|
||||
0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5,
|
||||
0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6,
|
||||
0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
|
||||
0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05,
|
||||
0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A,
|
||||
0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
|
||||
0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96,
|
||||
0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB,
|
||||
0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
|
||||
0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04,
|
||||
0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C,
|
||||
0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B,
|
||||
0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03,
|
||||
0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F,
|
||||
0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9,
|
||||
0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18,
|
||||
0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5,
|
||||
0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10,
|
||||
0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAC, 0xAA, 0x68,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
};
|
||||
return BN_bin2bn(RFC3526_PRIME_2048, sizeof(RFC3526_PRIME_2048), bn);
|
||||
}
|
||||
|
||||
/*-
|
||||
* "3072-bit MODP Group" from RFC3526, Section 4.
|
||||
*
|
||||
* The prime is: 2^3072 - 2^3008 - 1 + 2^64 * { [2^2942 pi] + 1690314 }
|
||||
*
|
||||
* RFC3526 specifies a generator of 2.
|
||||
*/
|
||||
|
||||
BIGNUM *BN_get_rfc3526_prime_3072(BIGNUM *bn)
|
||||
{
|
||||
static const unsigned char RFC3526_PRIME_3072[] = {
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
|
||||
0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
|
||||
0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
|
||||
0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
|
||||
0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
|
||||
0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
|
||||
0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
|
||||
0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
|
||||
0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
|
||||
0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B,
|
||||
0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
|
||||
0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5,
|
||||
0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6,
|
||||
0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
|
||||
0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05,
|
||||
0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A,
|
||||
0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
|
||||
0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96,
|
||||
0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB,
|
||||
0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
|
||||
0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04,
|
||||
0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C,
|
||||
0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B,
|
||||
0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03,
|
||||
0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F,
|
||||
0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9,
|
||||
0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18,
|
||||
0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5,
|
||||
0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10,
|
||||
0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D,
|
||||
0xAD, 0x33, 0x17, 0x0D, 0x04, 0x50, 0x7A, 0x33,
|
||||
0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64,
|
||||
0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A,
|
||||
0x8A, 0xEA, 0x71, 0x57, 0x5D, 0x06, 0x0C, 0x7D,
|
||||
0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7,
|
||||
0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7,
|
||||
0x1E, 0x8C, 0x94, 0xE0, 0x4A, 0x25, 0x61, 0x9D,
|
||||
0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B,
|
||||
0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64,
|
||||
0xD8, 0x76, 0x02, 0x73, 0x3E, 0xC8, 0x6A, 0x64,
|
||||
0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C,
|
||||
0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C,
|
||||
0x77, 0x09, 0x88, 0xC0, 0xBA, 0xD9, 0x46, 0xE2,
|
||||
0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31,
|
||||
0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E,
|
||||
0x4B, 0x82, 0xD1, 0x20, 0xA9, 0x3A, 0xD2, 0xCA,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
};
|
||||
return BN_bin2bn(RFC3526_PRIME_3072, sizeof(RFC3526_PRIME_3072), bn);
|
||||
}
|
||||
|
||||
/*-
|
||||
* "4096-bit MODP Group" from RFC3526, Section 5.
|
||||
*
|
||||
* The prime is: 2^4096 - 2^4032 - 1 + 2^64 * { [2^3966 pi] + 240904 }
|
||||
*
|
||||
* RFC3526 specifies a generator of 2.
|
||||
*/
|
||||
|
||||
BIGNUM *BN_get_rfc3526_prime_4096(BIGNUM *bn)
|
||||
{
|
||||
static const unsigned char RFC3526_PRIME_4096[] = {
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
|
||||
0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
|
||||
0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
|
||||
0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
|
||||
0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
|
||||
0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
|
||||
0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
|
||||
0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
|
||||
0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
|
||||
0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B,
|
||||
0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
|
||||
0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5,
|
||||
0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6,
|
||||
0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
|
||||
0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05,
|
||||
0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A,
|
||||
0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
|
||||
0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96,
|
||||
0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB,
|
||||
0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
|
||||
0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04,
|
||||
0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C,
|
||||
0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B,
|
||||
0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03,
|
||||
0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F,
|
||||
0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9,
|
||||
0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18,
|
||||
0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5,
|
||||
0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10,
|
||||
0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D,
|
||||
0xAD, 0x33, 0x17, 0x0D, 0x04, 0x50, 0x7A, 0x33,
|
||||
0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64,
|
||||
0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A,
|
||||
0x8A, 0xEA, 0x71, 0x57, 0x5D, 0x06, 0x0C, 0x7D,
|
||||
0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7,
|
||||
0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7,
|
||||
0x1E, 0x8C, 0x94, 0xE0, 0x4A, 0x25, 0x61, 0x9D,
|
||||
0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B,
|
||||
0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64,
|
||||
0xD8, 0x76, 0x02, 0x73, 0x3E, 0xC8, 0x6A, 0x64,
|
||||
0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C,
|
||||
0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C,
|
||||
0x77, 0x09, 0x88, 0xC0, 0xBA, 0xD9, 0x46, 0xE2,
|
||||
0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31,
|
||||
0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E,
|
||||
0x4B, 0x82, 0xD1, 0x20, 0xA9, 0x21, 0x08, 0x01,
|
||||
0x1A, 0x72, 0x3C, 0x12, 0xA7, 0x87, 0xE6, 0xD7,
|
||||
0x88, 0x71, 0x9A, 0x10, 0xBD, 0xBA, 0x5B, 0x26,
|
||||
0x99, 0xC3, 0x27, 0x18, 0x6A, 0xF4, 0xE2, 0x3C,
|
||||
0x1A, 0x94, 0x68, 0x34, 0xB6, 0x15, 0x0B, 0xDA,
|
||||
0x25, 0x83, 0xE9, 0xCA, 0x2A, 0xD4, 0x4C, 0xE8,
|
||||
0xDB, 0xBB, 0xC2, 0xDB, 0x04, 0xDE, 0x8E, 0xF9,
|
||||
0x2E, 0x8E, 0xFC, 0x14, 0x1F, 0xBE, 0xCA, 0xA6,
|
||||
0x28, 0x7C, 0x59, 0x47, 0x4E, 0x6B, 0xC0, 0x5D,
|
||||
0x99, 0xB2, 0x96, 0x4F, 0xA0, 0x90, 0xC3, 0xA2,
|
||||
0x23, 0x3B, 0xA1, 0x86, 0x51, 0x5B, 0xE7, 0xED,
|
||||
0x1F, 0x61, 0x29, 0x70, 0xCE, 0xE2, 0xD7, 0xAF,
|
||||
0xB8, 0x1B, 0xDD, 0x76, 0x21, 0x70, 0x48, 0x1C,
|
||||
0xD0, 0x06, 0x91, 0x27, 0xD5, 0xB0, 0x5A, 0xA9,
|
||||
0x93, 0xB4, 0xEA, 0x98, 0x8D, 0x8F, 0xDD, 0xC1,
|
||||
0x86, 0xFF, 0xB7, 0xDC, 0x90, 0xA6, 0xC0, 0x8F,
|
||||
0x4D, 0xF4, 0x35, 0xC9, 0x34, 0x06, 0x31, 0x99,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
};
|
||||
return BN_bin2bn(RFC3526_PRIME_4096, sizeof(RFC3526_PRIME_4096), bn);
|
||||
}
|
||||
|
||||
/*-
|
||||
* "6144-bit MODP Group" from RFC3526, Section 6.
|
||||
*
|
||||
* The prime is: 2^6144 - 2^6080 - 1 + 2^64 * { [2^6014 pi] + 929484 }
|
||||
*
|
||||
* RFC3526 specifies a generator of 2.
|
||||
*/
|
||||
|
||||
BIGNUM *BN_get_rfc3526_prime_6144(BIGNUM *bn)
|
||||
{
|
||||
static const unsigned char RFC3526_PRIME_6144[] = {
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
|
||||
0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
|
||||
0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
|
||||
0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
|
||||
0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
|
||||
0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
|
||||
0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
|
||||
0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
|
||||
0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
|
||||
0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B,
|
||||
0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
|
||||
0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5,
|
||||
0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6,
|
||||
0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
|
||||
0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05,
|
||||
0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A,
|
||||
0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
|
||||
0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96,
|
||||
0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB,
|
||||
0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
|
||||
0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04,
|
||||
0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C,
|
||||
0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B,
|
||||
0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03,
|
||||
0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F,
|
||||
0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9,
|
||||
0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18,
|
||||
0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5,
|
||||
0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10,
|
||||
0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D,
|
||||
0xAD, 0x33, 0x17, 0x0D, 0x04, 0x50, 0x7A, 0x33,
|
||||
0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64,
|
||||
0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A,
|
||||
0x8A, 0xEA, 0x71, 0x57, 0x5D, 0x06, 0x0C, 0x7D,
|
||||
0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7,
|
||||
0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7,
|
||||
0x1E, 0x8C, 0x94, 0xE0, 0x4A, 0x25, 0x61, 0x9D,
|
||||
0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B,
|
||||
0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64,
|
||||
0xD8, 0x76, 0x02, 0x73, 0x3E, 0xC8, 0x6A, 0x64,
|
||||
0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C,
|
||||
0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C,
|
||||
0x77, 0x09, 0x88, 0xC0, 0xBA, 0xD9, 0x46, 0xE2,
|
||||
0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31,
|
||||
0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E,
|
||||
0x4B, 0x82, 0xD1, 0x20, 0xA9, 0x21, 0x08, 0x01,
|
||||
0x1A, 0x72, 0x3C, 0x12, 0xA7, 0x87, 0xE6, 0xD7,
|
||||
0x88, 0x71, 0x9A, 0x10, 0xBD, 0xBA, 0x5B, 0x26,
|
||||
0x99, 0xC3, 0x27, 0x18, 0x6A, 0xF4, 0xE2, 0x3C,
|
||||
0x1A, 0x94, 0x68, 0x34, 0xB6, 0x15, 0x0B, 0xDA,
|
||||
0x25, 0x83, 0xE9, 0xCA, 0x2A, 0xD4, 0x4C, 0xE8,
|
||||
0xDB, 0xBB, 0xC2, 0xDB, 0x04, 0xDE, 0x8E, 0xF9,
|
||||
0x2E, 0x8E, 0xFC, 0x14, 0x1F, 0xBE, 0xCA, 0xA6,
|
||||
0x28, 0x7C, 0x59, 0x47, 0x4E, 0x6B, 0xC0, 0x5D,
|
||||
0x99, 0xB2, 0x96, 0x4F, 0xA0, 0x90, 0xC3, 0xA2,
|
||||
0x23, 0x3B, 0xA1, 0x86, 0x51, 0x5B, 0xE7, 0xED,
|
||||
0x1F, 0x61, 0x29, 0x70, 0xCE, 0xE2, 0xD7, 0xAF,
|
||||
0xB8, 0x1B, 0xDD, 0x76, 0x21, 0x70, 0x48, 0x1C,
|
||||
0xD0, 0x06, 0x91, 0x27, 0xD5, 0xB0, 0x5A, 0xA9,
|
||||
0x93, 0xB4, 0xEA, 0x98, 0x8D, 0x8F, 0xDD, 0xC1,
|
||||
0x86, 0xFF, 0xB7, 0xDC, 0x90, 0xA6, 0xC0, 0x8F,
|
||||
0x4D, 0xF4, 0x35, 0xC9, 0x34, 0x02, 0x84, 0x92,
|
||||
0x36, 0xC3, 0xFA, 0xB4, 0xD2, 0x7C, 0x70, 0x26,
|
||||
0xC1, 0xD4, 0xDC, 0xB2, 0x60, 0x26, 0x46, 0xDE,
|
||||
0xC9, 0x75, 0x1E, 0x76, 0x3D, 0xBA, 0x37, 0xBD,
|
||||
0xF8, 0xFF, 0x94, 0x06, 0xAD, 0x9E, 0x53, 0x0E,
|
||||
0xE5, 0xDB, 0x38, 0x2F, 0x41, 0x30, 0x01, 0xAE,
|
||||
0xB0, 0x6A, 0x53, 0xED, 0x90, 0x27, 0xD8, 0x31,
|
||||
0x17, 0x97, 0x27, 0xB0, 0x86, 0x5A, 0x89, 0x18,
|
||||
0xDA, 0x3E, 0xDB, 0xEB, 0xCF, 0x9B, 0x14, 0xED,
|
||||
0x44, 0xCE, 0x6C, 0xBA, 0xCE, 0xD4, 0xBB, 0x1B,
|
||||
0xDB, 0x7F, 0x14, 0x47, 0xE6, 0xCC, 0x25, 0x4B,
|
||||
0x33, 0x20, 0x51, 0x51, 0x2B, 0xD7, 0xAF, 0x42,
|
||||
0x6F, 0xB8, 0xF4, 0x01, 0x37, 0x8C, 0xD2, 0xBF,
|
||||
0x59, 0x83, 0xCA, 0x01, 0xC6, 0x4B, 0x92, 0xEC,
|
||||
0xF0, 0x32, 0xEA, 0x15, 0xD1, 0x72, 0x1D, 0x03,
|
||||
0xF4, 0x82, 0xD7, 0xCE, 0x6E, 0x74, 0xFE, 0xF6,
|
||||
0xD5, 0x5E, 0x70, 0x2F, 0x46, 0x98, 0x0C, 0x82,
|
||||
0xB5, 0xA8, 0x40, 0x31, 0x90, 0x0B, 0x1C, 0x9E,
|
||||
0x59, 0xE7, 0xC9, 0x7F, 0xBE, 0xC7, 0xE8, 0xF3,
|
||||
0x23, 0xA9, 0x7A, 0x7E, 0x36, 0xCC, 0x88, 0xBE,
|
||||
0x0F, 0x1D, 0x45, 0xB7, 0xFF, 0x58, 0x5A, 0xC5,
|
||||
0x4B, 0xD4, 0x07, 0xB2, 0x2B, 0x41, 0x54, 0xAA,
|
||||
0xCC, 0x8F, 0x6D, 0x7E, 0xBF, 0x48, 0xE1, 0xD8,
|
||||
0x14, 0xCC, 0x5E, 0xD2, 0x0F, 0x80, 0x37, 0xE0,
|
||||
0xA7, 0x97, 0x15, 0xEE, 0xF2, 0x9B, 0xE3, 0x28,
|
||||
0x06, 0xA1, 0xD5, 0x8B, 0xB7, 0xC5, 0xDA, 0x76,
|
||||
0xF5, 0x50, 0xAA, 0x3D, 0x8A, 0x1F, 0xBF, 0xF0,
|
||||
0xEB, 0x19, 0xCC, 0xB1, 0xA3, 0x13, 0xD5, 0x5C,
|
||||
0xDA, 0x56, 0xC9, 0xEC, 0x2E, 0xF2, 0x96, 0x32,
|
||||
0x38, 0x7F, 0xE8, 0xD7, 0x6E, 0x3C, 0x04, 0x68,
|
||||
0x04, 0x3E, 0x8F, 0x66, 0x3F, 0x48, 0x60, 0xEE,
|
||||
0x12, 0xBF, 0x2D, 0x5B, 0x0B, 0x74, 0x74, 0xD6,
|
||||
0xE6, 0x94, 0xF9, 0x1E, 0x6D, 0xCC, 0x40, 0x24,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
};
|
||||
return BN_bin2bn(RFC3526_PRIME_6144, sizeof(RFC3526_PRIME_6144), bn);
|
||||
}
|
||||
|
||||
/*-
|
||||
* "8192-bit MODP Group" from RFC3526, Section 7.
|
||||
*
|
||||
* The prime is: 2^8192 - 2^8128 - 1 + 2^64 * { [2^8062 pi] + 4743158 }
|
||||
*
|
||||
* RFC3526 specifies a generator of 2.
|
||||
*/
|
||||
|
||||
BIGNUM *BN_get_rfc3526_prime_8192(BIGNUM *bn)
|
||||
{
|
||||
static const unsigned char RFC3526_PRIME_8192[] = {
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
|
||||
0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
|
||||
0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
|
||||
0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
|
||||
0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
|
||||
0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
|
||||
0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
|
||||
0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
|
||||
0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
|
||||
0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B,
|
||||
0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
|
||||
0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5,
|
||||
0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6,
|
||||
0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
|
||||
0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05,
|
||||
0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A,
|
||||
0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
|
||||
0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96,
|
||||
0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB,
|
||||
0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
|
||||
0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04,
|
||||
0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C,
|
||||
0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B,
|
||||
0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03,
|
||||
0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F,
|
||||
0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9,
|
||||
0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18,
|
||||
0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5,
|
||||
0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10,
|
||||
0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D,
|
||||
0xAD, 0x33, 0x17, 0x0D, 0x04, 0x50, 0x7A, 0x33,
|
||||
0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64,
|
||||
0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A,
|
||||
0x8A, 0xEA, 0x71, 0x57, 0x5D, 0x06, 0x0C, 0x7D,
|
||||
0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7,
|
||||
0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7,
|
||||
0x1E, 0x8C, 0x94, 0xE0, 0x4A, 0x25, 0x61, 0x9D,
|
||||
0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B,
|
||||
0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64,
|
||||
0xD8, 0x76, 0x02, 0x73, 0x3E, 0xC8, 0x6A, 0x64,
|
||||
0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C,
|
||||
0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C,
|
||||
0x77, 0x09, 0x88, 0xC0, 0xBA, 0xD9, 0x46, 0xE2,
|
||||
0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31,
|
||||
0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E,
|
||||
0x4B, 0x82, 0xD1, 0x20, 0xA9, 0x21, 0x08, 0x01,
|
||||
0x1A, 0x72, 0x3C, 0x12, 0xA7, 0x87, 0xE6, 0xD7,
|
||||
0x88, 0x71, 0x9A, 0x10, 0xBD, 0xBA, 0x5B, 0x26,
|
||||
0x99, 0xC3, 0x27, 0x18, 0x6A, 0xF4, 0xE2, 0x3C,
|
||||
0x1A, 0x94, 0x68, 0x34, 0xB6, 0x15, 0x0B, 0xDA,
|
||||
0x25, 0x83, 0xE9, 0xCA, 0x2A, 0xD4, 0x4C, 0xE8,
|
||||
0xDB, 0xBB, 0xC2, 0xDB, 0x04, 0xDE, 0x8E, 0xF9,
|
||||
0x2E, 0x8E, 0xFC, 0x14, 0x1F, 0xBE, 0xCA, 0xA6,
|
||||
0x28, 0x7C, 0x59, 0x47, 0x4E, 0x6B, 0xC0, 0x5D,
|
||||
0x99, 0xB2, 0x96, 0x4F, 0xA0, 0x90, 0xC3, 0xA2,
|
||||
0x23, 0x3B, 0xA1, 0x86, 0x51, 0x5B, 0xE7, 0xED,
|
||||
0x1F, 0x61, 0x29, 0x70, 0xCE, 0xE2, 0xD7, 0xAF,
|
||||
0xB8, 0x1B, 0xDD, 0x76, 0x21, 0x70, 0x48, 0x1C,
|
||||
0xD0, 0x06, 0x91, 0x27, 0xD5, 0xB0, 0x5A, 0xA9,
|
||||
0x93, 0xB4, 0xEA, 0x98, 0x8D, 0x8F, 0xDD, 0xC1,
|
||||
0x86, 0xFF, 0xB7, 0xDC, 0x90, 0xA6, 0xC0, 0x8F,
|
||||
0x4D, 0xF4, 0x35, 0xC9, 0x34, 0x02, 0x84, 0x92,
|
||||
0x36, 0xC3, 0xFA, 0xB4, 0xD2, 0x7C, 0x70, 0x26,
|
||||
0xC1, 0xD4, 0xDC, 0xB2, 0x60, 0x26, 0x46, 0xDE,
|
||||
0xC9, 0x75, 0x1E, 0x76, 0x3D, 0xBA, 0x37, 0xBD,
|
||||
0xF8, 0xFF, 0x94, 0x06, 0xAD, 0x9E, 0x53, 0x0E,
|
||||
0xE5, 0xDB, 0x38, 0x2F, 0x41, 0x30, 0x01, 0xAE,
|
||||
0xB0, 0x6A, 0x53, 0xED, 0x90, 0x27, 0xD8, 0x31,
|
||||
0x17, 0x97, 0x27, 0xB0, 0x86, 0x5A, 0x89, 0x18,
|
||||
0xDA, 0x3E, 0xDB, 0xEB, 0xCF, 0x9B, 0x14, 0xED,
|
||||
0x44, 0xCE, 0x6C, 0xBA, 0xCE, 0xD4, 0xBB, 0x1B,
|
||||
0xDB, 0x7F, 0x14, 0x47, 0xE6, 0xCC, 0x25, 0x4B,
|
||||
0x33, 0x20, 0x51, 0x51, 0x2B, 0xD7, 0xAF, 0x42,
|
||||
0x6F, 0xB8, 0xF4, 0x01, 0x37, 0x8C, 0xD2, 0xBF,
|
||||
0x59, 0x83, 0xCA, 0x01, 0xC6, 0x4B, 0x92, 0xEC,
|
||||
0xF0, 0x32, 0xEA, 0x15, 0xD1, 0x72, 0x1D, 0x03,
|
||||
0xF4, 0x82, 0xD7, 0xCE, 0x6E, 0x74, 0xFE, 0xF6,
|
||||
0xD5, 0x5E, 0x70, 0x2F, 0x46, 0x98, 0x0C, 0x82,
|
||||
0xB5, 0xA8, 0x40, 0x31, 0x90, 0x0B, 0x1C, 0x9E,
|
||||
0x59, 0xE7, 0xC9, 0x7F, 0xBE, 0xC7, 0xE8, 0xF3,
|
||||
0x23, 0xA9, 0x7A, 0x7E, 0x36, 0xCC, 0x88, 0xBE,
|
||||
0x0F, 0x1D, 0x45, 0xB7, 0xFF, 0x58, 0x5A, 0xC5,
|
||||
0x4B, 0xD4, 0x07, 0xB2, 0x2B, 0x41, 0x54, 0xAA,
|
||||
0xCC, 0x8F, 0x6D, 0x7E, 0xBF, 0x48, 0xE1, 0xD8,
|
||||
0x14, 0xCC, 0x5E, 0xD2, 0x0F, 0x80, 0x37, 0xE0,
|
||||
0xA7, 0x97, 0x15, 0xEE, 0xF2, 0x9B, 0xE3, 0x28,
|
||||
0x06, 0xA1, 0xD5, 0x8B, 0xB7, 0xC5, 0xDA, 0x76,
|
||||
0xF5, 0x50, 0xAA, 0x3D, 0x8A, 0x1F, 0xBF, 0xF0,
|
||||
0xEB, 0x19, 0xCC, 0xB1, 0xA3, 0x13, 0xD5, 0x5C,
|
||||
0xDA, 0x56, 0xC9, 0xEC, 0x2E, 0xF2, 0x96, 0x32,
|
||||
0x38, 0x7F, 0xE8, 0xD7, 0x6E, 0x3C, 0x04, 0x68,
|
||||
0x04, 0x3E, 0x8F, 0x66, 0x3F, 0x48, 0x60, 0xEE,
|
||||
0x12, 0xBF, 0x2D, 0x5B, 0x0B, 0x74, 0x74, 0xD6,
|
||||
0xE6, 0x94, 0xF9, 0x1E, 0x6D, 0xBE, 0x11, 0x59,
|
||||
0x74, 0xA3, 0x92, 0x6F, 0x12, 0xFE, 0xE5, 0xE4,
|
||||
0x38, 0x77, 0x7C, 0xB6, 0xA9, 0x32, 0xDF, 0x8C,
|
||||
0xD8, 0xBE, 0xC4, 0xD0, 0x73, 0xB9, 0x31, 0xBA,
|
||||
0x3B, 0xC8, 0x32, 0xB6, 0x8D, 0x9D, 0xD3, 0x00,
|
||||
0x74, 0x1F, 0xA7, 0xBF, 0x8A, 0xFC, 0x47, 0xED,
|
||||
0x25, 0x76, 0xF6, 0x93, 0x6B, 0xA4, 0x24, 0x66,
|
||||
0x3A, 0xAB, 0x63, 0x9C, 0x5A, 0xE4, 0xF5, 0x68,
|
||||
0x34, 0x23, 0xB4, 0x74, 0x2B, 0xF1, 0xC9, 0x78,
|
||||
0x23, 0x8F, 0x16, 0xCB, 0xE3, 0x9D, 0x65, 0x2D,
|
||||
0xE3, 0xFD, 0xB8, 0xBE, 0xFC, 0x84, 0x8A, 0xD9,
|
||||
0x22, 0x22, 0x2E, 0x04, 0xA4, 0x03, 0x7C, 0x07,
|
||||
0x13, 0xEB, 0x57, 0xA8, 0x1A, 0x23, 0xF0, 0xC7,
|
||||
0x34, 0x73, 0xFC, 0x64, 0x6C, 0xEA, 0x30, 0x6B,
|
||||
0x4B, 0xCB, 0xC8, 0x86, 0x2F, 0x83, 0x85, 0xDD,
|
||||
0xFA, 0x9D, 0x4B, 0x7F, 0xA2, 0xC0, 0x87, 0xE8,
|
||||
0x79, 0x68, 0x33, 0x03, 0xED, 0x5B, 0xDD, 0x3A,
|
||||
0x06, 0x2B, 0x3C, 0xF5, 0xB3, 0xA2, 0x78, 0xA6,
|
||||
0x6D, 0x2A, 0x13, 0xF8, 0x3F, 0x44, 0xF8, 0x2D,
|
||||
0xDF, 0x31, 0x0E, 0xE0, 0x74, 0xAB, 0x6A, 0x36,
|
||||
0x45, 0x97, 0xE8, 0x99, 0xA0, 0x25, 0x5D, 0xC1,
|
||||
0x64, 0xF3, 0x1C, 0xC5, 0x08, 0x46, 0x85, 0x1D,
|
||||
0xF9, 0xAB, 0x48, 0x19, 0x5D, 0xED, 0x7E, 0xA1,
|
||||
0xB1, 0xD5, 0x10, 0xBD, 0x7E, 0xE7, 0x4D, 0x73,
|
||||
0xFA, 0xF3, 0x6B, 0xC3, 0x1E, 0xCF, 0xA2, 0x68,
|
||||
0x35, 0x90, 0x46, 0xF4, 0xEB, 0x87, 0x9F, 0x92,
|
||||
0x40, 0x09, 0x43, 0x8B, 0x48, 0x1C, 0x6C, 0xD7,
|
||||
0x88, 0x9A, 0x00, 0x2E, 0xD5, 0xEE, 0x38, 0x2B,
|
||||
0xC9, 0x19, 0x0D, 0xA6, 0xFC, 0x02, 0x6E, 0x47,
|
||||
0x95, 0x58, 0xE4, 0x47, 0x56, 0x77, 0xE9, 0xAA,
|
||||
0x9E, 0x30, 0x50, 0xE2, 0x76, 0x56, 0x94, 0xDF,
|
||||
0xC8, 0x1F, 0x56, 0xE8, 0x80, 0xB9, 0x6E, 0x71,
|
||||
0x60, 0xC9, 0x80, 0xDD, 0x98, 0xED, 0xD3, 0xDF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
};
|
||||
return BN_bin2bn(RFC3526_PRIME_8192, sizeof(RFC3526_PRIME_8192), bn);
|
||||
}
|
361
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_ctx.c
vendored
Normal file
361
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_ctx.c
vendored
Normal file
|
@ -0,0 +1,361 @@
|
|||
/*
|
||||
* Copyright 2000-2019 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include "internal/cryptlib.h"
|
||||
#include "bn_lcl.h"
|
||||
|
||||
/*-
|
||||
* TODO list
|
||||
*
|
||||
* 1. Check a bunch of "(words+1)" type hacks in various bignum functions and
|
||||
* check they can be safely removed.
|
||||
* - Check +1 and other ugliness in BN_from_montgomery()
|
||||
*
|
||||
* 2. Consider allowing a BN_new_ex() that, at least, lets you specify an
|
||||
* appropriate 'block' size that will be honoured by bn_expand_internal() to
|
||||
* prevent piddly little reallocations. OTOH, profiling bignum expansions in
|
||||
* BN_CTX doesn't show this to be a big issue.
|
||||
*/
|
||||
|
||||
/* How many bignums are in each "pool item"; */
|
||||
#define BN_CTX_POOL_SIZE 16
|
||||
/* The stack frame info is resizing, set a first-time expansion size; */
|
||||
#define BN_CTX_START_FRAMES 32
|
||||
|
||||
/***********/
|
||||
/* BN_POOL */
|
||||
/***********/
|
||||
|
||||
/* A bundle of bignums that can be linked with other bundles */
|
||||
typedef struct bignum_pool_item {
|
||||
/* The bignum values */
|
||||
BIGNUM vals[BN_CTX_POOL_SIZE];
|
||||
/* Linked-list admin */
|
||||
struct bignum_pool_item *prev, *next;
|
||||
} BN_POOL_ITEM;
|
||||
/* A linked-list of bignums grouped in bundles */
|
||||
typedef struct bignum_pool {
|
||||
/* Linked-list admin */
|
||||
BN_POOL_ITEM *head, *current, *tail;
|
||||
/* Stack depth and allocation size */
|
||||
unsigned used, size;
|
||||
} BN_POOL;
|
||||
static void BN_POOL_init(BN_POOL *);
|
||||
static void BN_POOL_finish(BN_POOL *);
|
||||
static BIGNUM *BN_POOL_get(BN_POOL *, int);
|
||||
static void BN_POOL_release(BN_POOL *, unsigned int);
|
||||
|
||||
/************/
|
||||
/* BN_STACK */
|
||||
/************/
|
||||
|
||||
/* A wrapper to manage the "stack frames" */
|
||||
typedef struct bignum_ctx_stack {
|
||||
/* Array of indexes into the bignum stack */
|
||||
unsigned int *indexes;
|
||||
/* Number of stack frames, and the size of the allocated array */
|
||||
unsigned int depth, size;
|
||||
} BN_STACK;
|
||||
static void BN_STACK_init(BN_STACK *);
|
||||
static void BN_STACK_finish(BN_STACK *);
|
||||
static int BN_STACK_push(BN_STACK *, unsigned int);
|
||||
static unsigned int BN_STACK_pop(BN_STACK *);
|
||||
|
||||
/**********/
|
||||
/* BN_CTX */
|
||||
/**********/
|
||||
|
||||
/* The opaque BN_CTX type */
|
||||
struct bignum_ctx {
|
||||
/* The bignum bundles */
|
||||
BN_POOL pool;
|
||||
/* The "stack frames", if you will */
|
||||
BN_STACK stack;
|
||||
/* The number of bignums currently assigned */
|
||||
unsigned int used;
|
||||
/* Depth of stack overflow */
|
||||
int err_stack;
|
||||
/* Block "gets" until an "end" (compatibility behaviour) */
|
||||
int too_many;
|
||||
/* Flags. */
|
||||
int flags;
|
||||
};
|
||||
|
||||
/* Enable this to find BN_CTX bugs */
|
||||
#ifdef BN_CTX_DEBUG
|
||||
static const char *ctxdbg_cur = NULL;
|
||||
static void ctxdbg(BN_CTX *ctx)
|
||||
{
|
||||
unsigned int bnidx = 0, fpidx = 0;
|
||||
BN_POOL_ITEM *item = ctx->pool.head;
|
||||
BN_STACK *stack = &ctx->stack;
|
||||
fprintf(stderr, "(%16p): ", ctx);
|
||||
while (bnidx < ctx->used) {
|
||||
fprintf(stderr, "%03x ", item->vals[bnidx++ % BN_CTX_POOL_SIZE].dmax);
|
||||
if (!(bnidx % BN_CTX_POOL_SIZE))
|
||||
item = item->next;
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
bnidx = 0;
|
||||
fprintf(stderr, " : ");
|
||||
while (fpidx < stack->depth) {
|
||||
while (bnidx++ < stack->indexes[fpidx])
|
||||
fprintf(stderr, " ");
|
||||
fprintf(stderr, "^^^ ");
|
||||
bnidx++;
|
||||
fpidx++;
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
# define CTXDBG_ENTRY(str, ctx) do { \
|
||||
ctxdbg_cur = (str); \
|
||||
fprintf(stderr,"Starting %s\n", ctxdbg_cur); \
|
||||
ctxdbg(ctx); \
|
||||
} while(0)
|
||||
# define CTXDBG_EXIT(ctx) do { \
|
||||
fprintf(stderr,"Ending %s\n", ctxdbg_cur); \
|
||||
ctxdbg(ctx); \
|
||||
} while(0)
|
||||
# define CTXDBG_RET(ctx,ret)
|
||||
#else
|
||||
# define CTXDBG_ENTRY(str, ctx)
|
||||
# define CTXDBG_EXIT(ctx)
|
||||
# define CTXDBG_RET(ctx,ret)
|
||||
#endif
|
||||
|
||||
|
||||
BN_CTX *BN_CTX_new(void)
|
||||
{
|
||||
BN_CTX *ret;
|
||||
|
||||
if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL) {
|
||||
BNerr(BN_F_BN_CTX_NEW, ERR_R_MALLOC_FAILURE);
|
||||
return NULL;
|
||||
}
|
||||
/* Initialise the structure */
|
||||
BN_POOL_init(&ret->pool);
|
||||
BN_STACK_init(&ret->stack);
|
||||
return ret;
|
||||
}
|
||||
|
||||
BN_CTX *BN_CTX_secure_new(void)
|
||||
{
|
||||
BN_CTX *ret = BN_CTX_new();
|
||||
|
||||
if (ret != NULL)
|
||||
ret->flags = BN_FLG_SECURE;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void BN_CTX_free(BN_CTX *ctx)
|
||||
{
|
||||
if (ctx == NULL)
|
||||
return;
|
||||
#ifdef BN_CTX_DEBUG
|
||||
{
|
||||
BN_POOL_ITEM *pool = ctx->pool.head;
|
||||
fprintf(stderr, "BN_CTX_free, stack-size=%d, pool-bignums=%d\n",
|
||||
ctx->stack.size, ctx->pool.size);
|
||||
fprintf(stderr, "dmaxs: ");
|
||||
while (pool) {
|
||||
unsigned loop = 0;
|
||||
while (loop < BN_CTX_POOL_SIZE)
|
||||
fprintf(stderr, "%02x ", pool->vals[loop++].dmax);
|
||||
pool = pool->next;
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
#endif
|
||||
BN_STACK_finish(&ctx->stack);
|
||||
BN_POOL_finish(&ctx->pool);
|
||||
OPENSSL_free(ctx);
|
||||
}
|
||||
|
||||
void BN_CTX_start(BN_CTX *ctx)
|
||||
{
|
||||
CTXDBG_ENTRY("BN_CTX_start", ctx);
|
||||
/* If we're already overflowing ... */
|
||||
if (ctx->err_stack || ctx->too_many)
|
||||
ctx->err_stack++;
|
||||
/* (Try to) get a new frame pointer */
|
||||
else if (!BN_STACK_push(&ctx->stack, ctx->used)) {
|
||||
BNerr(BN_F_BN_CTX_START, BN_R_TOO_MANY_TEMPORARY_VARIABLES);
|
||||
ctx->err_stack++;
|
||||
}
|
||||
CTXDBG_EXIT(ctx);
|
||||
}
|
||||
|
||||
void BN_CTX_end(BN_CTX *ctx)
|
||||
{
|
||||
CTXDBG_ENTRY("BN_CTX_end", ctx);
|
||||
if (ctx->err_stack)
|
||||
ctx->err_stack--;
|
||||
else {
|
||||
unsigned int fp = BN_STACK_pop(&ctx->stack);
|
||||
/* Does this stack frame have anything to release? */
|
||||
if (fp < ctx->used)
|
||||
BN_POOL_release(&ctx->pool, ctx->used - fp);
|
||||
ctx->used = fp;
|
||||
/* Unjam "too_many" in case "get" had failed */
|
||||
ctx->too_many = 0;
|
||||
}
|
||||
CTXDBG_EXIT(ctx);
|
||||
}
|
||||
|
||||
BIGNUM *BN_CTX_get(BN_CTX *ctx)
|
||||
{
|
||||
BIGNUM *ret;
|
||||
|
||||
CTXDBG_ENTRY("BN_CTX_get", ctx);
|
||||
if (ctx->err_stack || ctx->too_many)
|
||||
return NULL;
|
||||
if ((ret = BN_POOL_get(&ctx->pool, ctx->flags)) == NULL) {
|
||||
/*
|
||||
* Setting too_many prevents repeated "get" attempts from cluttering
|
||||
* the error stack.
|
||||
*/
|
||||
ctx->too_many = 1;
|
||||
BNerr(BN_F_BN_CTX_GET, BN_R_TOO_MANY_TEMPORARY_VARIABLES);
|
||||
return NULL;
|
||||
}
|
||||
/* OK, make sure the returned bignum is "zero" */
|
||||
BN_zero(ret);
|
||||
/* clear BN_FLG_CONSTTIME if leaked from previous frames */
|
||||
ret->flags &= (~BN_FLG_CONSTTIME);
|
||||
ctx->used++;
|
||||
CTXDBG_RET(ctx, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/************/
|
||||
/* BN_STACK */
|
||||
/************/
|
||||
|
||||
static void BN_STACK_init(BN_STACK *st)
|
||||
{
|
||||
st->indexes = NULL;
|
||||
st->depth = st->size = 0;
|
||||
}
|
||||
|
||||
static void BN_STACK_finish(BN_STACK *st)
|
||||
{
|
||||
OPENSSL_free(st->indexes);
|
||||
st->indexes = NULL;
|
||||
}
|
||||
|
||||
|
||||
static int BN_STACK_push(BN_STACK *st, unsigned int idx)
|
||||
{
|
||||
if (st->depth == st->size) {
|
||||
/* Need to expand */
|
||||
unsigned int newsize =
|
||||
st->size ? (st->size * 3 / 2) : BN_CTX_START_FRAMES;
|
||||
unsigned int *newitems;
|
||||
|
||||
if ((newitems = OPENSSL_malloc(sizeof(*newitems) * newsize)) == NULL) {
|
||||
BNerr(BN_F_BN_STACK_PUSH, ERR_R_MALLOC_FAILURE);
|
||||
return 0;
|
||||
}
|
||||
if (st->depth)
|
||||
memcpy(newitems, st->indexes, sizeof(*newitems) * st->depth);
|
||||
OPENSSL_free(st->indexes);
|
||||
st->indexes = newitems;
|
||||
st->size = newsize;
|
||||
}
|
||||
st->indexes[(st->depth)++] = idx;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static unsigned int BN_STACK_pop(BN_STACK *st)
|
||||
{
|
||||
return st->indexes[--(st->depth)];
|
||||
}
|
||||
|
||||
/***********/
|
||||
/* BN_POOL */
|
||||
/***********/
|
||||
|
||||
static void BN_POOL_init(BN_POOL *p)
|
||||
{
|
||||
p->head = p->current = p->tail = NULL;
|
||||
p->used = p->size = 0;
|
||||
}
|
||||
|
||||
static void BN_POOL_finish(BN_POOL *p)
|
||||
{
|
||||
unsigned int loop;
|
||||
BIGNUM *bn;
|
||||
|
||||
while (p->head) {
|
||||
for (loop = 0, bn = p->head->vals; loop++ < BN_CTX_POOL_SIZE; bn++)
|
||||
if (bn->d)
|
||||
BN_clear_free(bn);
|
||||
p->current = p->head->next;
|
||||
OPENSSL_free(p->head);
|
||||
p->head = p->current;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static BIGNUM *BN_POOL_get(BN_POOL *p, int flag)
|
||||
{
|
||||
BIGNUM *bn;
|
||||
unsigned int loop;
|
||||
|
||||
/* Full; allocate a new pool item and link it in. */
|
||||
if (p->used == p->size) {
|
||||
BN_POOL_ITEM *item;
|
||||
|
||||
if ((item = OPENSSL_malloc(sizeof(*item))) == NULL) {
|
||||
BNerr(BN_F_BN_POOL_GET, ERR_R_MALLOC_FAILURE);
|
||||
return NULL;
|
||||
}
|
||||
for (loop = 0, bn = item->vals; loop++ < BN_CTX_POOL_SIZE; bn++) {
|
||||
bn_init(bn);
|
||||
if ((flag & BN_FLG_SECURE) != 0)
|
||||
BN_set_flags(bn, BN_FLG_SECURE);
|
||||
}
|
||||
item->prev = p->tail;
|
||||
item->next = NULL;
|
||||
|
||||
if (p->head == NULL)
|
||||
p->head = p->current = p->tail = item;
|
||||
else {
|
||||
p->tail->next = item;
|
||||
p->tail = item;
|
||||
p->current = item;
|
||||
}
|
||||
p->size += BN_CTX_POOL_SIZE;
|
||||
p->used++;
|
||||
/* Return the first bignum from the new pool */
|
||||
return item->vals;
|
||||
}
|
||||
|
||||
if (!p->used)
|
||||
p->current = p->head;
|
||||
else if ((p->used % BN_CTX_POOL_SIZE) == 0)
|
||||
p->current = p->current->next;
|
||||
return p->current->vals + ((p->used++) % BN_CTX_POOL_SIZE);
|
||||
}
|
||||
|
||||
static void BN_POOL_release(BN_POOL *p, unsigned int num)
|
||||
{
|
||||
unsigned int offset = (p->used - 1) % BN_CTX_POOL_SIZE;
|
||||
|
||||
p->used -= num;
|
||||
while (num--) {
|
||||
bn_check_top(p->current->vals + offset);
|
||||
if (offset == 0) {
|
||||
offset = BN_CTX_POOL_SIZE - 1;
|
||||
p->current = p->current->prev;
|
||||
} else
|
||||
offset--;
|
||||
}
|
||||
}
|
68
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_depr.c
vendored
Normal file
68
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_depr.c
vendored
Normal file
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Copyright 2002-2019 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
/*
|
||||
* Support for deprecated functions goes here - static linkage will only
|
||||
* slurp this code if applications are using them directly.
|
||||
*/
|
||||
|
||||
#include <openssl/opensslconf.h>
|
||||
#if OPENSSL_API_COMPAT >= 0x00908000L
|
||||
NON_EMPTY_TRANSLATION_UNIT
|
||||
#else
|
||||
|
||||
# include <stdio.h>
|
||||
# include <time.h>
|
||||
# include "internal/cryptlib.h"
|
||||
# include "bn_lcl.h"
|
||||
|
||||
BIGNUM *BN_generate_prime(BIGNUM *ret, int bits, int safe,
|
||||
const BIGNUM *add, const BIGNUM *rem,
|
||||
void (*callback) (int, int, void *), void *cb_arg)
|
||||
{
|
||||
BN_GENCB cb;
|
||||
BIGNUM *rnd = NULL;
|
||||
|
||||
BN_GENCB_set_old(&cb, callback, cb_arg);
|
||||
|
||||
if (ret == NULL) {
|
||||
if ((rnd = BN_new()) == NULL)
|
||||
goto err;
|
||||
} else
|
||||
rnd = ret;
|
||||
if (!BN_generate_prime_ex(rnd, bits, safe, add, rem, &cb))
|
||||
goto err;
|
||||
|
||||
/* we have a prime :-) */
|
||||
return rnd;
|
||||
err:
|
||||
BN_free(rnd);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int BN_is_prime(const BIGNUM *a, int checks,
|
||||
void (*callback) (int, int, void *), BN_CTX *ctx_passed,
|
||||
void *cb_arg)
|
||||
{
|
||||
BN_GENCB cb;
|
||||
BN_GENCB_set_old(&cb, callback, cb_arg);
|
||||
return BN_is_prime_ex(a, checks, ctx_passed, &cb);
|
||||
}
|
||||
|
||||
int BN_is_prime_fasttest(const BIGNUM *a, int checks,
|
||||
void (*callback) (int, int, void *),
|
||||
BN_CTX *ctx_passed, void *cb_arg,
|
||||
int do_trial_division)
|
||||
{
|
||||
BN_GENCB cb;
|
||||
BN_GENCB_set_old(&cb, callback, cb_arg);
|
||||
return BN_is_prime_fasttest_ex(a, checks, ctx_passed,
|
||||
do_trial_division, &cb);
|
||||
}
|
||||
#endif
|
512
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_dh.c
vendored
Normal file
512
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_dh.c
vendored
Normal file
|
@ -0,0 +1,512 @@
|
|||
/*
|
||||
* Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include "bn_lcl.h"
|
||||
#include "internal/nelem.h"
|
||||
|
||||
#ifndef OPENSSL_NO_DH
|
||||
#include <openssl/dh.h>
|
||||
#include "internal/bn_dh.h"
|
||||
/* DH parameters from RFC5114 */
|
||||
|
||||
# if BN_BITS2 == 64
|
||||
static const BN_ULONG dh1024_160_p[] = {
|
||||
0xDF1FB2BC2E4A4371ULL, 0xE68CFDA76D4DA708ULL, 0x45BF37DF365C1A65ULL,
|
||||
0xA151AF5F0DC8B4BDULL, 0xFAA31A4FF55BCCC0ULL, 0x4EFFD6FAE5644738ULL,
|
||||
0x98488E9C219A7372ULL, 0xACCBDD7D90C4BD70ULL, 0x24975C3CD49B83BFULL,
|
||||
0x13ECB4AEA9061123ULL, 0x9838EF1E2EE652C0ULL, 0x6073E28675A23D18ULL,
|
||||
0x9A6A9DCA52D23B61ULL, 0x52C99FBCFB06A3C6ULL, 0xDE92DE5EAE5D54ECULL,
|
||||
0xB10B8F96A080E01DULL
|
||||
};
|
||||
|
||||
static const BN_ULONG dh1024_160_g[] = {
|
||||
0x855E6EEB22B3B2E5ULL, 0x858F4DCEF97C2A24ULL, 0x2D779D5918D08BC8ULL,
|
||||
0xD662A4D18E73AFA3ULL, 0x1DBF0A0169B6A28AULL, 0xA6A24C087A091F53ULL,
|
||||
0x909D0D2263F80A76ULL, 0xD7FBD7D3B9A92EE1ULL, 0x5E91547F9E2749F4ULL,
|
||||
0x160217B4B01B886AULL, 0x777E690F5504F213ULL, 0x266FEA1E5C41564BULL,
|
||||
0xD6406CFF14266D31ULL, 0xF8104DD258AC507FULL, 0x6765A442EFB99905ULL,
|
||||
0xA4D1CBD5C3FD3412ULL
|
||||
};
|
||||
|
||||
static const BN_ULONG dh1024_160_q[] = {
|
||||
0x64B7CB9D49462353ULL, 0x81A8DF278ABA4E7DULL, 0x00000000F518AA87ULL
|
||||
};
|
||||
|
||||
static const BN_ULONG dh2048_224_p[] = {
|
||||
0x0AC4DFFE0C10E64FULL, 0xCF9DE5384E71B81CULL, 0x7EF363E2FFA31F71ULL,
|
||||
0xE3FB73C16B8E75B9ULL, 0xC9B53DCF4BA80A29ULL, 0x23F10B0E16E79763ULL,
|
||||
0xC52172E413042E9BULL, 0xBE60E69CC928B2B9ULL, 0x80CD86A1B9E587E8ULL,
|
||||
0x315D75E198C641A4ULL, 0xCDF93ACC44328387ULL, 0x15987D9ADC0A486DULL,
|
||||
0x7310F7121FD5A074ULL, 0x278273C7DE31EFDCULL, 0x1602E714415D9330ULL,
|
||||
0x81286130BC8985DBULL, 0xB3BF8A3170918836ULL, 0x6A00E0A0B9C49708ULL,
|
||||
0xC6BA0B2C8BBC27BEULL, 0xC9F98D11ED34DBF6ULL, 0x7AD5B7D0B6C12207ULL,
|
||||
0xD91E8FEF55B7394BULL, 0x9037C9EDEFDA4DF8ULL, 0x6D3F8152AD6AC212ULL,
|
||||
0x1DE6B85A1274A0A6ULL, 0xEB3D688A309C180EULL, 0xAF9A3C407BA1DF15ULL,
|
||||
0xE6FA141DF95A56DBULL, 0xB54B1597B61D0A75ULL, 0xA20D64E5683B9FD1ULL,
|
||||
0xD660FAA79559C51FULL, 0xAD107E1E9123A9D0ULL
|
||||
};
|
||||
|
||||
static const BN_ULONG dh2048_224_g[] = {
|
||||
0x84B890D3191F2BFAULL, 0x81BC087F2A7065B3ULL, 0x19C418E1F6EC0179ULL,
|
||||
0x7B5A0F1C71CFFF4CULL, 0xEDFE72FE9B6AA4BDULL, 0x81E1BCFE94B30269ULL,
|
||||
0x566AFBB48D6C0191ULL, 0xB539CCE3409D13CDULL, 0x6AA21E7F5F2FF381ULL,
|
||||
0xD9E263E4770589EFULL, 0x10E183EDD19963DDULL, 0xB70A8137150B8EEBULL,
|
||||
0x051AE3D428C8F8ACULL, 0xBB77A86F0C1AB15BULL, 0x6E3025E316A330EFULL,
|
||||
0x19529A45D6F83456ULL, 0xF180EB34118E98D1ULL, 0xB5F6C6B250717CBEULL,
|
||||
0x09939D54DA7460CDULL, 0xE247150422EA1ED4ULL, 0xB8A762D0521BC98AULL,
|
||||
0xF4D027275AC1348BULL, 0xC17669101999024AULL, 0xBE5E9001A8D66AD7ULL,
|
||||
0xC57DB17C620A8652ULL, 0xAB739D7700C29F52ULL, 0xDD921F01A70C4AFAULL,
|
||||
0xA6824A4E10B9A6F0ULL, 0x74866A08CFE4FFE3ULL, 0x6CDEBE7B89998CAFULL,
|
||||
0x9DF30B5C8FFDAC50ULL, 0xAC4032EF4F2D9AE3ULL
|
||||
};
|
||||
|
||||
static const BN_ULONG dh2048_224_q[] = {
|
||||
0xBF389A99B36371EBULL, 0x1F80535A4738CEBCULL, 0xC58D93FE99717710ULL,
|
||||
0x00000000801C0D34ULL
|
||||
};
|
||||
|
||||
static const BN_ULONG dh2048_256_p[] = {
|
||||
0xDB094AE91E1A1597ULL, 0x693877FAD7EF09CAULL, 0x6116D2276E11715FULL,
|
||||
0xA4B54330C198AF12ULL, 0x75F26375D7014103ULL, 0xC3A3960A54E710C3ULL,
|
||||
0xDED4010ABD0BE621ULL, 0xC0B857F689962856ULL, 0xB3CA3F7971506026ULL,
|
||||
0x1CCACB83E6B486F6ULL, 0x67E144E514056425ULL, 0xF6A167B5A41825D9ULL,
|
||||
0x3AD8347796524D8EULL, 0xF13C6D9A51BFA4ABULL, 0x2D52526735488A0EULL,
|
||||
0xB63ACAE1CAA6B790ULL, 0x4FDB70C581B23F76ULL, 0xBC39A0BF12307F5CULL,
|
||||
0xB941F54EB1E59BB8ULL, 0x6C5BFC11D45F9088ULL, 0x22E0B1EF4275BF7BULL,
|
||||
0x91F9E6725B4758C0ULL, 0x5A8A9D306BCF67EDULL, 0x209E0C6497517ABDULL,
|
||||
0x3BF4296D830E9A7CULL, 0x16C3D91134096FAAULL, 0xFAF7DF4561B2AA30ULL,
|
||||
0xE00DF8F1D61957D4ULL, 0x5D2CEED4435E3B00ULL, 0x8CEEF608660DD0F2ULL,
|
||||
0xFFBBD19C65195999ULL, 0x87A8E61DB4B6663CULL
|
||||
};
|
||||
|
||||
static const BN_ULONG dh2048_256_g[] = {
|
||||
0x664B4C0F6CC41659ULL, 0x5E2327CFEF98C582ULL, 0xD647D148D4795451ULL,
|
||||
0x2F63078490F00EF8ULL, 0x184B523D1DB246C3ULL, 0xC7891428CDC67EB6ULL,
|
||||
0x7FD028370DF92B52ULL, 0xB3353BBB64E0EC37ULL, 0xECD06E1557CD0915ULL,
|
||||
0xB7D2BBD2DF016199ULL, 0xC8484B1E052588B9ULL, 0xDB2A3B7313D3FE14ULL,
|
||||
0xD052B985D182EA0AULL, 0xA4BD1BFFE83B9C80ULL, 0xDFC967C1FB3F2E55ULL,
|
||||
0xB5045AF2767164E1ULL, 0x1D14348F6F2F9193ULL, 0x64E67982428EBC83ULL,
|
||||
0x8AC376D282D6ED38ULL, 0x777DE62AAAB8A862ULL, 0xDDF463E5E9EC144BULL,
|
||||
0x0196F931C77A57F2ULL, 0xA55AE31341000A65ULL, 0x901228F8C28CBB18ULL,
|
||||
0xBC3773BF7E8C6F62ULL, 0xBE3A6C1B0C6B47B1ULL, 0xFF4FED4AAC0BB555ULL,
|
||||
0x10DBC15077BE463FULL, 0x07F4793A1A0BA125ULL, 0x4CA7B18F21EF2054ULL,
|
||||
0x2E77506660EDBD48ULL, 0x3FB32C9B73134D0BULL
|
||||
};
|
||||
|
||||
static const BN_ULONG dh2048_256_q[] = {
|
||||
0xA308B0FE64F5FBD3ULL, 0x99B1A47D1EB3750BULL, 0xB447997640129DA2ULL,
|
||||
0x8CF83642A709A097ULL
|
||||
};
|
||||
|
||||
/* Primes from RFC 7919 */
|
||||
static const BN_ULONG ffdhe2048_p[] = {
|
||||
0xFFFFFFFFFFFFFFFFULL, 0x886B423861285C97ULL, 0xC6F34A26C1B2EFFAULL,
|
||||
0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL, 0xC3FE3B1B4C6FAD73ULL,
|
||||
0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL, 0xC03404CD28342F61ULL,
|
||||
0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL, 0xAE56EDE76372BB19ULL,
|
||||
0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL, 0xD108A94BB2C8E3FBULL,
|
||||
0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL, 0x1DF158A136ADE735ULL,
|
||||
0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL, 0xB557135E7F57C935ULL,
|
||||
0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL, 0xD3DF1ED5D5FD6561ULL,
|
||||
0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL, 0xCC939DCE249B3EF9ULL,
|
||||
0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL, 0xAFDC5620273D3CF1ULL,
|
||||
0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL
|
||||
};
|
||||
|
||||
static const BN_ULONG ffdhe3072_p[] = {
|
||||
0xFFFFFFFFFFFFFFFFULL, 0x25E41D2B66C62E37ULL, 0x3C1B20EE3FD59D7CULL,
|
||||
0x0ABCD06BFA53DDEFULL, 0x1DBF9A42D5C4484EULL, 0xABC521979B0DEADAULL,
|
||||
0xE86D2BC522363A0DULL, 0x5CAE82AB9C9DF69EULL, 0x64F2E21E71F54BFFULL,
|
||||
0xF4FD4452E2D74DD3ULL, 0xB4130C93BC437944ULL, 0xAEFE130985139270ULL,
|
||||
0x598CB0FAC186D91CULL, 0x7AD91D2691F7F7EEULL, 0x61B46FC9D6E6C907ULL,
|
||||
0xBC34F4DEF99C0238ULL, 0xDE355B3B6519035BULL, 0x886B4238611FCFDCULL,
|
||||
0xC6F34A26C1B2EFFAULL, 0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL,
|
||||
0xC3FE3B1B4C6FAD73ULL, 0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL,
|
||||
0xC03404CD28342F61ULL, 0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL,
|
||||
0xAE56EDE76372BB19ULL, 0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL,
|
||||
0xD108A94BB2C8E3FBULL, 0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL,
|
||||
0x1DF158A136ADE735ULL, 0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL,
|
||||
0xB557135E7F57C935ULL, 0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL,
|
||||
0xD3DF1ED5D5FD6561ULL, 0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL,
|
||||
0xCC939DCE249B3EF9ULL, 0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL,
|
||||
0xAFDC5620273D3CF1ULL, 0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL
|
||||
};
|
||||
|
||||
static const BN_ULONG ffdhe4096_p[] = {
|
||||
0xFFFFFFFFFFFFFFFFULL, 0xC68A007E5E655F6AULL, 0x4DB5A851F44182E1ULL,
|
||||
0x8EC9B55A7F88A46BULL, 0x0A8291CDCEC97DCFULL, 0x2A4ECEA9F98D0ACCULL,
|
||||
0x1A1DB93D7140003CULL, 0x092999A333CB8B7AULL, 0x6DC778F971AD0038ULL,
|
||||
0xA907600A918130C4ULL, 0xED6A1E012D9E6832ULL, 0x7135C886EFB4318AULL,
|
||||
0x87F55BA57E31CC7AULL, 0x7763CF1D55034004ULL, 0xAC7D5F42D69F6D18ULL,
|
||||
0x7930E9E4E58857B6ULL, 0x6E6F52C3164DF4FBULL, 0x25E41D2B669E1EF1ULL,
|
||||
0x3C1B20EE3FD59D7CULL, 0x0ABCD06BFA53DDEFULL, 0x1DBF9A42D5C4484EULL,
|
||||
0xABC521979B0DEADAULL, 0xE86D2BC522363A0DULL, 0x5CAE82AB9C9DF69EULL,
|
||||
0x64F2E21E71F54BFFULL, 0xF4FD4452E2D74DD3ULL, 0xB4130C93BC437944ULL,
|
||||
0xAEFE130985139270ULL, 0x598CB0FAC186D91CULL, 0x7AD91D2691F7F7EEULL,
|
||||
0x61B46FC9D6E6C907ULL, 0xBC34F4DEF99C0238ULL, 0xDE355B3B6519035BULL,
|
||||
0x886B4238611FCFDCULL, 0xC6F34A26C1B2EFFAULL, 0xC58EF1837D1683B2ULL,
|
||||
0x3BB5FCBC2EC22005ULL, 0xC3FE3B1B4C6FAD73ULL, 0x8E4F1232EEF28183ULL,
|
||||
0x9172FE9CE98583FFULL, 0xC03404CD28342F61ULL, 0x9E02FCE1CDF7E2ECULL,
|
||||
0x0B07A7C8EE0A6D70ULL, 0xAE56EDE76372BB19ULL, 0x1D4F42A3DE394DF4ULL,
|
||||
0xB96ADAB760D7F468ULL, 0xD108A94BB2C8E3FBULL, 0xBC0AB182B324FB61ULL,
|
||||
0x30ACCA4F483A797AULL, 0x1DF158A136ADE735ULL, 0xE2A689DAF3EFE872ULL,
|
||||
0x984F0C70E0E68B77ULL, 0xB557135E7F57C935ULL, 0x856365553DED1AF3ULL,
|
||||
0x2433F51F5F066ED0ULL, 0xD3DF1ED5D5FD6561ULL, 0xF681B202AEC4617AULL,
|
||||
0x7D2FE363630C75D8ULL, 0xCC939DCE249B3EF9ULL, 0xA9E13641146433FBULL,
|
||||
0xD8B9C583CE2D3695ULL, 0xAFDC5620273D3CF1ULL, 0xADF85458A2BB4A9AULL,
|
||||
0xFFFFFFFFFFFFFFFFULL
|
||||
};
|
||||
|
||||
static const BN_ULONG ffdhe6144_p[] = {
|
||||
0xFFFFFFFFFFFFFFFFULL, 0xA40E329CD0E40E65ULL, 0xA41D570D7938DAD4ULL,
|
||||
0x62A69526D43161C1ULL, 0x3FDD4A8E9ADB1E69ULL, 0x5B3B71F9DC6B80D6ULL,
|
||||
0xEC9D1810C6272B04ULL, 0x8CCF2DD5CACEF403ULL, 0xE49F5235C95B9117ULL,
|
||||
0x505DC82DB854338AULL, 0x62292C311562A846ULL, 0xD72B03746AE77F5EULL,
|
||||
0xF9C9091B462D538CULL, 0x0AE8DB5847A67CBEULL, 0xB3A739C122611682ULL,
|
||||
0xEEAAC0232A281BF6ULL, 0x94C6651E77CAF992ULL, 0x763E4E4B94B2BBC1ULL,
|
||||
0x587E38DA0077D9B4ULL, 0x7FB29F8C183023C3ULL, 0x0ABEC1FFF9E3A26EULL,
|
||||
0xA00EF092350511E3ULL, 0xB855322EDB6340D8ULL, 0xA52471F7A9A96910ULL,
|
||||
0x388147FB4CFDB477ULL, 0x9B1F5C3E4E46041FULL, 0xCDAD0657FCCFEC71ULL,
|
||||
0xB38E8C334C701C3AULL, 0x917BDD64B1C0FD4CULL, 0x3BB454329B7624C8ULL,
|
||||
0x23BA4442CAF53EA6ULL, 0x4E677D2C38532A3AULL, 0x0BFD64B645036C7AULL,
|
||||
0xC68A007E5E0DD902ULL, 0x4DB5A851F44182E1ULL, 0x8EC9B55A7F88A46BULL,
|
||||
0x0A8291CDCEC97DCFULL, 0x2A4ECEA9F98D0ACCULL, 0x1A1DB93D7140003CULL,
|
||||
0x092999A333CB8B7AULL, 0x6DC778F971AD0038ULL, 0xA907600A918130C4ULL,
|
||||
0xED6A1E012D9E6832ULL, 0x7135C886EFB4318AULL, 0x87F55BA57E31CC7AULL,
|
||||
0x7763CF1D55034004ULL, 0xAC7D5F42D69F6D18ULL, 0x7930E9E4E58857B6ULL,
|
||||
0x6E6F52C3164DF4FBULL, 0x25E41D2B669E1EF1ULL, 0x3C1B20EE3FD59D7CULL,
|
||||
0x0ABCD06BFA53DDEFULL, 0x1DBF9A42D5C4484EULL, 0xABC521979B0DEADAULL,
|
||||
0xE86D2BC522363A0DULL, 0x5CAE82AB9C9DF69EULL, 0x64F2E21E71F54BFFULL,
|
||||
0xF4FD4452E2D74DD3ULL, 0xB4130C93BC437944ULL, 0xAEFE130985139270ULL,
|
||||
0x598CB0FAC186D91CULL, 0x7AD91D2691F7F7EEULL, 0x61B46FC9D6E6C907ULL,
|
||||
0xBC34F4DEF99C0238ULL, 0xDE355B3B6519035BULL, 0x886B4238611FCFDCULL,
|
||||
0xC6F34A26C1B2EFFAULL, 0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL,
|
||||
0xC3FE3B1B4C6FAD73ULL, 0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL,
|
||||
0xC03404CD28342F61ULL, 0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL,
|
||||
0xAE56EDE76372BB19ULL, 0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL,
|
||||
0xD108A94BB2C8E3FBULL, 0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL,
|
||||
0x1DF158A136ADE735ULL, 0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL,
|
||||
0xB557135E7F57C935ULL, 0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL,
|
||||
0xD3DF1ED5D5FD6561ULL, 0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL,
|
||||
0xCC939DCE249B3EF9ULL, 0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL,
|
||||
0xAFDC5620273D3CF1ULL, 0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL
|
||||
};
|
||||
|
||||
static const BN_ULONG ffdhe8192_p[] = {
|
||||
0xFFFFFFFFFFFFFFFFULL, 0xD68C8BB7C5C6424CULL, 0x011E2A94838FF88CULL,
|
||||
0x0822E506A9F4614EULL, 0x97D11D49F7A8443DULL, 0xA6BBFDE530677F0DULL,
|
||||
0x2F741EF8C1FE86FEULL, 0xFAFABE1C5D71A87EULL, 0xDED2FBABFBE58A30ULL,
|
||||
0xB6855DFE72B0A66EULL, 0x1EFC8CE0BA8A4FE8ULL, 0x83F81D4A3F2FA457ULL,
|
||||
0xA1FE3075A577E231ULL, 0xD5B8019488D9C0A0ULL, 0x624816CDAD9A95F9ULL,
|
||||
0x99E9E31650C1217BULL, 0x51AA691E0E423CFCULL, 0x1C217E6C3826E52CULL,
|
||||
0x51A8A93109703FEEULL, 0xBB7099876A460E74ULL, 0x541FC68C9C86B022ULL,
|
||||
0x59160CC046FD8251ULL, 0x2846C0BA35C35F5CULL, 0x54504AC78B758282ULL,
|
||||
0x29388839D2AF05E4ULL, 0xCB2C0F1CC01BD702ULL, 0x555B2F747C932665ULL,
|
||||
0x86B63142A3AB8829ULL, 0x0B8CC3BDF64B10EFULL, 0x687FEB69EDD1CC5EULL,
|
||||
0xFDB23FCEC9509D43ULL, 0x1E425A31D951AE64ULL, 0x36AD004CF600C838ULL,
|
||||
0xA40E329CCFF46AAAULL, 0xA41D570D7938DAD4ULL, 0x62A69526D43161C1ULL,
|
||||
0x3FDD4A8E9ADB1E69ULL, 0x5B3B71F9DC6B80D6ULL, 0xEC9D1810C6272B04ULL,
|
||||
0x8CCF2DD5CACEF403ULL, 0xE49F5235C95B9117ULL, 0x505DC82DB854338AULL,
|
||||
0x62292C311562A846ULL, 0xD72B03746AE77F5EULL, 0xF9C9091B462D538CULL,
|
||||
0x0AE8DB5847A67CBEULL, 0xB3A739C122611682ULL, 0xEEAAC0232A281BF6ULL,
|
||||
0x94C6651E77CAF992ULL, 0x763E4E4B94B2BBC1ULL, 0x587E38DA0077D9B4ULL,
|
||||
0x7FB29F8C183023C3ULL, 0x0ABEC1FFF9E3A26EULL, 0xA00EF092350511E3ULL,
|
||||
0xB855322EDB6340D8ULL, 0xA52471F7A9A96910ULL, 0x388147FB4CFDB477ULL,
|
||||
0x9B1F5C3E4E46041FULL, 0xCDAD0657FCCFEC71ULL, 0xB38E8C334C701C3AULL,
|
||||
0x917BDD64B1C0FD4CULL, 0x3BB454329B7624C8ULL, 0x23BA4442CAF53EA6ULL,
|
||||
0x4E677D2C38532A3AULL, 0x0BFD64B645036C7AULL, 0xC68A007E5E0DD902ULL,
|
||||
0x4DB5A851F44182E1ULL, 0x8EC9B55A7F88A46BULL, 0x0A8291CDCEC97DCFULL,
|
||||
0x2A4ECEA9F98D0ACCULL, 0x1A1DB93D7140003CULL, 0x092999A333CB8B7AULL,
|
||||
0x6DC778F971AD0038ULL, 0xA907600A918130C4ULL, 0xED6A1E012D9E6832ULL,
|
||||
0x7135C886EFB4318AULL, 0x87F55BA57E31CC7AULL, 0x7763CF1D55034004ULL,
|
||||
0xAC7D5F42D69F6D18ULL, 0x7930E9E4E58857B6ULL, 0x6E6F52C3164DF4FBULL,
|
||||
0x25E41D2B669E1EF1ULL, 0x3C1B20EE3FD59D7CULL, 0x0ABCD06BFA53DDEFULL,
|
||||
0x1DBF9A42D5C4484EULL, 0xABC521979B0DEADAULL, 0xE86D2BC522363A0DULL,
|
||||
0x5CAE82AB9C9DF69EULL, 0x64F2E21E71F54BFFULL, 0xF4FD4452E2D74DD3ULL,
|
||||
0xB4130C93BC437944ULL, 0xAEFE130985139270ULL, 0x598CB0FAC186D91CULL,
|
||||
0x7AD91D2691F7F7EEULL, 0x61B46FC9D6E6C907ULL, 0xBC34F4DEF99C0238ULL,
|
||||
0xDE355B3B6519035BULL, 0x886B4238611FCFDCULL, 0xC6F34A26C1B2EFFAULL,
|
||||
0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL, 0xC3FE3B1B4C6FAD73ULL,
|
||||
0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL, 0xC03404CD28342F61ULL,
|
||||
0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL, 0xAE56EDE76372BB19ULL,
|
||||
0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL, 0xD108A94BB2C8E3FBULL,
|
||||
0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL, 0x1DF158A136ADE735ULL,
|
||||
0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL, 0xB557135E7F57C935ULL,
|
||||
0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL, 0xD3DF1ED5D5FD6561ULL,
|
||||
0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL, 0xCC939DCE249B3EF9ULL,
|
||||
0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL, 0xAFDC5620273D3CF1ULL,
|
||||
0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL
|
||||
};
|
||||
|
||||
# elif BN_BITS2 == 32
|
||||
|
||||
static const BN_ULONG dh1024_160_p[] = {
|
||||
0x2E4A4371, 0xDF1FB2BC, 0x6D4DA708, 0xE68CFDA7, 0x365C1A65, 0x45BF37DF,
|
||||
0x0DC8B4BD, 0xA151AF5F, 0xF55BCCC0, 0xFAA31A4F, 0xE5644738, 0x4EFFD6FA,
|
||||
0x219A7372, 0x98488E9C, 0x90C4BD70, 0xACCBDD7D, 0xD49B83BF, 0x24975C3C,
|
||||
0xA9061123, 0x13ECB4AE, 0x2EE652C0, 0x9838EF1E, 0x75A23D18, 0x6073E286,
|
||||
0x52D23B61, 0x9A6A9DCA, 0xFB06A3C6, 0x52C99FBC, 0xAE5D54EC, 0xDE92DE5E,
|
||||
0xA080E01D, 0xB10B8F96
|
||||
};
|
||||
|
||||
static const BN_ULONG dh1024_160_g[] = {
|
||||
0x22B3B2E5, 0x855E6EEB, 0xF97C2A24, 0x858F4DCE, 0x18D08BC8, 0x2D779D59,
|
||||
0x8E73AFA3, 0xD662A4D1, 0x69B6A28A, 0x1DBF0A01, 0x7A091F53, 0xA6A24C08,
|
||||
0x63F80A76, 0x909D0D22, 0xB9A92EE1, 0xD7FBD7D3, 0x9E2749F4, 0x5E91547F,
|
||||
0xB01B886A, 0x160217B4, 0x5504F213, 0x777E690F, 0x5C41564B, 0x266FEA1E,
|
||||
0x14266D31, 0xD6406CFF, 0x58AC507F, 0xF8104DD2, 0xEFB99905, 0x6765A442,
|
||||
0xC3FD3412, 0xA4D1CBD5
|
||||
};
|
||||
|
||||
static const BN_ULONG dh1024_160_q[] = {
|
||||
0x49462353, 0x64B7CB9D, 0x8ABA4E7D, 0x81A8DF27, 0xF518AA87
|
||||
};
|
||||
|
||||
static const BN_ULONG dh2048_224_p[] = {
|
||||
0x0C10E64F, 0x0AC4DFFE, 0x4E71B81C, 0xCF9DE538, 0xFFA31F71, 0x7EF363E2,
|
||||
0x6B8E75B9, 0xE3FB73C1, 0x4BA80A29, 0xC9B53DCF, 0x16E79763, 0x23F10B0E,
|
||||
0x13042E9B, 0xC52172E4, 0xC928B2B9, 0xBE60E69C, 0xB9E587E8, 0x80CD86A1,
|
||||
0x98C641A4, 0x315D75E1, 0x44328387, 0xCDF93ACC, 0xDC0A486D, 0x15987D9A,
|
||||
0x1FD5A074, 0x7310F712, 0xDE31EFDC, 0x278273C7, 0x415D9330, 0x1602E714,
|
||||
0xBC8985DB, 0x81286130, 0x70918836, 0xB3BF8A31, 0xB9C49708, 0x6A00E0A0,
|
||||
0x8BBC27BE, 0xC6BA0B2C, 0xED34DBF6, 0xC9F98D11, 0xB6C12207, 0x7AD5B7D0,
|
||||
0x55B7394B, 0xD91E8FEF, 0xEFDA4DF8, 0x9037C9ED, 0xAD6AC212, 0x6D3F8152,
|
||||
0x1274A0A6, 0x1DE6B85A, 0x309C180E, 0xEB3D688A, 0x7BA1DF15, 0xAF9A3C40,
|
||||
0xF95A56DB, 0xE6FA141D, 0xB61D0A75, 0xB54B1597, 0x683B9FD1, 0xA20D64E5,
|
||||
0x9559C51F, 0xD660FAA7, 0x9123A9D0, 0xAD107E1E
|
||||
};
|
||||
|
||||
static const BN_ULONG dh2048_224_g[] = {
|
||||
0x191F2BFA, 0x84B890D3, 0x2A7065B3, 0x81BC087F, 0xF6EC0179, 0x19C418E1,
|
||||
0x71CFFF4C, 0x7B5A0F1C, 0x9B6AA4BD, 0xEDFE72FE, 0x94B30269, 0x81E1BCFE,
|
||||
0x8D6C0191, 0x566AFBB4, 0x409D13CD, 0xB539CCE3, 0x5F2FF381, 0x6AA21E7F,
|
||||
0x770589EF, 0xD9E263E4, 0xD19963DD, 0x10E183ED, 0x150B8EEB, 0xB70A8137,
|
||||
0x28C8F8AC, 0x051AE3D4, 0x0C1AB15B, 0xBB77A86F, 0x16A330EF, 0x6E3025E3,
|
||||
0xD6F83456, 0x19529A45, 0x118E98D1, 0xF180EB34, 0x50717CBE, 0xB5F6C6B2,
|
||||
0xDA7460CD, 0x09939D54, 0x22EA1ED4, 0xE2471504, 0x521BC98A, 0xB8A762D0,
|
||||
0x5AC1348B, 0xF4D02727, 0x1999024A, 0xC1766910, 0xA8D66AD7, 0xBE5E9001,
|
||||
0x620A8652, 0xC57DB17C, 0x00C29F52, 0xAB739D77, 0xA70C4AFA, 0xDD921F01,
|
||||
0x10B9A6F0, 0xA6824A4E, 0xCFE4FFE3, 0x74866A08, 0x89998CAF, 0x6CDEBE7B,
|
||||
0x8FFDAC50, 0x9DF30B5C, 0x4F2D9AE3, 0xAC4032EF
|
||||
};
|
||||
|
||||
static const BN_ULONG dh2048_224_q[] = {
|
||||
0xB36371EB, 0xBF389A99, 0x4738CEBC, 0x1F80535A, 0x99717710, 0xC58D93FE,
|
||||
0x801C0D34
|
||||
};
|
||||
|
||||
static const BN_ULONG dh2048_256_p[] = {
|
||||
0x1E1A1597, 0xDB094AE9, 0xD7EF09CA, 0x693877FA, 0x6E11715F, 0x6116D227,
|
||||
0xC198AF12, 0xA4B54330, 0xD7014103, 0x75F26375, 0x54E710C3, 0xC3A3960A,
|
||||
0xBD0BE621, 0xDED4010A, 0x89962856, 0xC0B857F6, 0x71506026, 0xB3CA3F79,
|
||||
0xE6B486F6, 0x1CCACB83, 0x14056425, 0x67E144E5, 0xA41825D9, 0xF6A167B5,
|
||||
0x96524D8E, 0x3AD83477, 0x51BFA4AB, 0xF13C6D9A, 0x35488A0E, 0x2D525267,
|
||||
0xCAA6B790, 0xB63ACAE1, 0x81B23F76, 0x4FDB70C5, 0x12307F5C, 0xBC39A0BF,
|
||||
0xB1E59BB8, 0xB941F54E, 0xD45F9088, 0x6C5BFC11, 0x4275BF7B, 0x22E0B1EF,
|
||||
0x5B4758C0, 0x91F9E672, 0x6BCF67ED, 0x5A8A9D30, 0x97517ABD, 0x209E0C64,
|
||||
0x830E9A7C, 0x3BF4296D, 0x34096FAA, 0x16C3D911, 0x61B2AA30, 0xFAF7DF45,
|
||||
0xD61957D4, 0xE00DF8F1, 0x435E3B00, 0x5D2CEED4, 0x660DD0F2, 0x8CEEF608,
|
||||
0x65195999, 0xFFBBD19C, 0xB4B6663C, 0x87A8E61D
|
||||
};
|
||||
|
||||
static const BN_ULONG dh2048_256_g[] = {
|
||||
0x6CC41659, 0x664B4C0F, 0xEF98C582, 0x5E2327CF, 0xD4795451, 0xD647D148,
|
||||
0x90F00EF8, 0x2F630784, 0x1DB246C3, 0x184B523D, 0xCDC67EB6, 0xC7891428,
|
||||
0x0DF92B52, 0x7FD02837, 0x64E0EC37, 0xB3353BBB, 0x57CD0915, 0xECD06E15,
|
||||
0xDF016199, 0xB7D2BBD2, 0x052588B9, 0xC8484B1E, 0x13D3FE14, 0xDB2A3B73,
|
||||
0xD182EA0A, 0xD052B985, 0xE83B9C80, 0xA4BD1BFF, 0xFB3F2E55, 0xDFC967C1,
|
||||
0x767164E1, 0xB5045AF2, 0x6F2F9193, 0x1D14348F, 0x428EBC83, 0x64E67982,
|
||||
0x82D6ED38, 0x8AC376D2, 0xAAB8A862, 0x777DE62A, 0xE9EC144B, 0xDDF463E5,
|
||||
0xC77A57F2, 0x0196F931, 0x41000A65, 0xA55AE313, 0xC28CBB18, 0x901228F8,
|
||||
0x7E8C6F62, 0xBC3773BF, 0x0C6B47B1, 0xBE3A6C1B, 0xAC0BB555, 0xFF4FED4A,
|
||||
0x77BE463F, 0x10DBC150, 0x1A0BA125, 0x07F4793A, 0x21EF2054, 0x4CA7B18F,
|
||||
0x60EDBD48, 0x2E775066, 0x73134D0B, 0x3FB32C9B
|
||||
};
|
||||
|
||||
static const BN_ULONG dh2048_256_q[] = {
|
||||
0x64F5FBD3, 0xA308B0FE, 0x1EB3750B, 0x99B1A47D, 0x40129DA2, 0xB4479976,
|
||||
0xA709A097, 0x8CF83642
|
||||
};
|
||||
|
||||
/* Primes from RFC 7919 */
|
||||
|
||||
static const BN_ULONG ffdhe2048_p[] = {
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0x61285C97, 0x886B4238, 0xC1B2EFFA, 0xC6F34A26,
|
||||
0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC, 0x4C6FAD73, 0xC3FE3B1B,
|
||||
0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C, 0x28342F61, 0xC03404CD,
|
||||
0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8, 0x6372BB19, 0xAE56EDE7,
|
||||
0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7, 0xB2C8E3FB, 0xD108A94B,
|
||||
0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F, 0x36ADE735, 0x1DF158A1,
|
||||
0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70, 0x7F57C935, 0xB557135E,
|
||||
0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F, 0xD5FD6561, 0xD3DF1ED5,
|
||||
0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363, 0x249B3EF9, 0xCC939DCE,
|
||||
0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583, 0x273D3CF1, 0xAFDC5620,
|
||||
0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF
|
||||
};
|
||||
|
||||
static const BN_ULONG ffdhe3072_p[] = {
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0x66C62E37, 0x25E41D2B, 0x3FD59D7C, 0x3C1B20EE,
|
||||
0xFA53DDEF, 0x0ABCD06B, 0xD5C4484E, 0x1DBF9A42, 0x9B0DEADA, 0xABC52197,
|
||||
0x22363A0D, 0xE86D2BC5, 0x9C9DF69E, 0x5CAE82AB, 0x71F54BFF, 0x64F2E21E,
|
||||
0xE2D74DD3, 0xF4FD4452, 0xBC437944, 0xB4130C93, 0x85139270, 0xAEFE1309,
|
||||
0xC186D91C, 0x598CB0FA, 0x91F7F7EE, 0x7AD91D26, 0xD6E6C907, 0x61B46FC9,
|
||||
0xF99C0238, 0xBC34F4DE, 0x6519035B, 0xDE355B3B, 0x611FCFDC, 0x886B4238,
|
||||
0xC1B2EFFA, 0xC6F34A26, 0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC,
|
||||
0x4C6FAD73, 0xC3FE3B1B, 0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C,
|
||||
0x28342F61, 0xC03404CD, 0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8,
|
||||
0x6372BB19, 0xAE56EDE7, 0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7,
|
||||
0xB2C8E3FB, 0xD108A94B, 0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F,
|
||||
0x36ADE735, 0x1DF158A1, 0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70,
|
||||
0x7F57C935, 0xB557135E, 0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F,
|
||||
0xD5FD6561, 0xD3DF1ED5, 0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363,
|
||||
0x249B3EF9, 0xCC939DCE, 0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583,
|
||||
0x273D3CF1, 0xAFDC5620, 0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF
|
||||
};
|
||||
|
||||
static const BN_ULONG ffdhe4096_p[] = {
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0x5E655F6A, 0xC68A007E, 0xF44182E1, 0x4DB5A851,
|
||||
0x7F88A46B, 0x8EC9B55A, 0xCEC97DCF, 0x0A8291CD, 0xF98D0ACC, 0x2A4ECEA9,
|
||||
0x7140003C, 0x1A1DB93D, 0x33CB8B7A, 0x092999A3, 0x71AD0038, 0x6DC778F9,
|
||||
0x918130C4, 0xA907600A, 0x2D9E6832, 0xED6A1E01, 0xEFB4318A, 0x7135C886,
|
||||
0x7E31CC7A, 0x87F55BA5, 0x55034004, 0x7763CF1D, 0xD69F6D18, 0xAC7D5F42,
|
||||
0xE58857B6, 0x7930E9E4, 0x164DF4FB, 0x6E6F52C3, 0x669E1EF1, 0x25E41D2B,
|
||||
0x3FD59D7C, 0x3C1B20EE, 0xFA53DDEF, 0x0ABCD06B, 0xD5C4484E, 0x1DBF9A42,
|
||||
0x9B0DEADA, 0xABC52197, 0x22363A0D, 0xE86D2BC5, 0x9C9DF69E, 0x5CAE82AB,
|
||||
0x71F54BFF, 0x64F2E21E, 0xE2D74DD3, 0xF4FD4452, 0xBC437944, 0xB4130C93,
|
||||
0x85139270, 0xAEFE1309, 0xC186D91C, 0x598CB0FA, 0x91F7F7EE, 0x7AD91D26,
|
||||
0xD6E6C907, 0x61B46FC9, 0xF99C0238, 0xBC34F4DE, 0x6519035B, 0xDE355B3B,
|
||||
0x611FCFDC, 0x886B4238, 0xC1B2EFFA, 0xC6F34A26, 0x7D1683B2, 0xC58EF183,
|
||||
0x2EC22005, 0x3BB5FCBC, 0x4C6FAD73, 0xC3FE3B1B, 0xEEF28183, 0x8E4F1232,
|
||||
0xE98583FF, 0x9172FE9C, 0x28342F61, 0xC03404CD, 0xCDF7E2EC, 0x9E02FCE1,
|
||||
0xEE0A6D70, 0x0B07A7C8, 0x6372BB19, 0xAE56EDE7, 0xDE394DF4, 0x1D4F42A3,
|
||||
0x60D7F468, 0xB96ADAB7, 0xB2C8E3FB, 0xD108A94B, 0xB324FB61, 0xBC0AB182,
|
||||
0x483A797A, 0x30ACCA4F, 0x36ADE735, 0x1DF158A1, 0xF3EFE872, 0xE2A689DA,
|
||||
0xE0E68B77, 0x984F0C70, 0x7F57C935, 0xB557135E, 0x3DED1AF3, 0x85636555,
|
||||
0x5F066ED0, 0x2433F51F, 0xD5FD6561, 0xD3DF1ED5, 0xAEC4617A, 0xF681B202,
|
||||
0x630C75D8, 0x7D2FE363, 0x249B3EF9, 0xCC939DCE, 0x146433FB, 0xA9E13641,
|
||||
0xCE2D3695, 0xD8B9C583, 0x273D3CF1, 0xAFDC5620, 0xA2BB4A9A, 0xADF85458,
|
||||
0xFFFFFFFF, 0xFFFFFFFF
|
||||
};
|
||||
|
||||
static const BN_ULONG ffdhe6144_p[] = {
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xD0E40E65, 0xA40E329C, 0x7938DAD4, 0xA41D570D,
|
||||
0xD43161C1, 0x62A69526, 0x9ADB1E69, 0x3FDD4A8E, 0xDC6B80D6, 0x5B3B71F9,
|
||||
0xC6272B04, 0xEC9D1810, 0xCACEF403, 0x8CCF2DD5, 0xC95B9117, 0xE49F5235,
|
||||
0xB854338A, 0x505DC82D, 0x1562A846, 0x62292C31, 0x6AE77F5E, 0xD72B0374,
|
||||
0x462D538C, 0xF9C9091B, 0x47A67CBE, 0x0AE8DB58, 0x22611682, 0xB3A739C1,
|
||||
0x2A281BF6, 0xEEAAC023, 0x77CAF992, 0x94C6651E, 0x94B2BBC1, 0x763E4E4B,
|
||||
0x0077D9B4, 0x587E38DA, 0x183023C3, 0x7FB29F8C, 0xF9E3A26E, 0x0ABEC1FF,
|
||||
0x350511E3, 0xA00EF092, 0xDB6340D8, 0xB855322E, 0xA9A96910, 0xA52471F7,
|
||||
0x4CFDB477, 0x388147FB, 0x4E46041F, 0x9B1F5C3E, 0xFCCFEC71, 0xCDAD0657,
|
||||
0x4C701C3A, 0xB38E8C33, 0xB1C0FD4C, 0x917BDD64, 0x9B7624C8, 0x3BB45432,
|
||||
0xCAF53EA6, 0x23BA4442, 0x38532A3A, 0x4E677D2C, 0x45036C7A, 0x0BFD64B6,
|
||||
0x5E0DD902, 0xC68A007E, 0xF44182E1, 0x4DB5A851, 0x7F88A46B, 0x8EC9B55A,
|
||||
0xCEC97DCF, 0x0A8291CD, 0xF98D0ACC, 0x2A4ECEA9, 0x7140003C, 0x1A1DB93D,
|
||||
0x33CB8B7A, 0x092999A3, 0x71AD0038, 0x6DC778F9, 0x918130C4, 0xA907600A,
|
||||
0x2D9E6832, 0xED6A1E01, 0xEFB4318A, 0x7135C886, 0x7E31CC7A, 0x87F55BA5,
|
||||
0x55034004, 0x7763CF1D, 0xD69F6D18, 0xAC7D5F42, 0xE58857B6, 0x7930E9E4,
|
||||
0x164DF4FB, 0x6E6F52C3, 0x669E1EF1, 0x25E41D2B, 0x3FD59D7C, 0x3C1B20EE,
|
||||
0xFA53DDEF, 0x0ABCD06B, 0xD5C4484E, 0x1DBF9A42, 0x9B0DEADA, 0xABC52197,
|
||||
0x22363A0D, 0xE86D2BC5, 0x9C9DF69E, 0x5CAE82AB, 0x71F54BFF, 0x64F2E21E,
|
||||
0xE2D74DD3, 0xF4FD4452, 0xBC437944, 0xB4130C93, 0x85139270, 0xAEFE1309,
|
||||
0xC186D91C, 0x598CB0FA, 0x91F7F7EE, 0x7AD91D26, 0xD6E6C907, 0x61B46FC9,
|
||||
0xF99C0238, 0xBC34F4DE, 0x6519035B, 0xDE355B3B, 0x611FCFDC, 0x886B4238,
|
||||
0xC1B2EFFA, 0xC6F34A26, 0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC,
|
||||
0x4C6FAD73, 0xC3FE3B1B, 0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C,
|
||||
0x28342F61, 0xC03404CD, 0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8,
|
||||
0x6372BB19, 0xAE56EDE7, 0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7,
|
||||
0xB2C8E3FB, 0xD108A94B, 0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F,
|
||||
0x36ADE735, 0x1DF158A1, 0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70,
|
||||
0x7F57C935, 0xB557135E, 0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F,
|
||||
0xD5FD6561, 0xD3DF1ED5, 0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363,
|
||||
0x249B3EF9, 0xCC939DCE, 0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583,
|
||||
0x273D3CF1, 0xAFDC5620, 0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF
|
||||
};
|
||||
|
||||
static const BN_ULONG ffdhe8192_p[] = {
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xC5C6424C, 0xD68C8BB7, 0x838FF88C, 0x011E2A94,
|
||||
0xA9F4614E, 0x0822E506, 0xF7A8443D, 0x97D11D49, 0x30677F0D, 0xA6BBFDE5,
|
||||
0xC1FE86FE, 0x2F741EF8, 0x5D71A87E, 0xFAFABE1C, 0xFBE58A30, 0xDED2FBAB,
|
||||
0x72B0A66E, 0xB6855DFE, 0xBA8A4FE8, 0x1EFC8CE0, 0x3F2FA457, 0x83F81D4A,
|
||||
0xA577E231, 0xA1FE3075, 0x88D9C0A0, 0xD5B80194, 0xAD9A95F9, 0x624816CD,
|
||||
0x50C1217B, 0x99E9E316, 0x0E423CFC, 0x51AA691E, 0x3826E52C, 0x1C217E6C,
|
||||
0x09703FEE, 0x51A8A931, 0x6A460E74, 0xBB709987, 0x9C86B022, 0x541FC68C,
|
||||
0x46FD8251, 0x59160CC0, 0x35C35F5C, 0x2846C0BA, 0x8B758282, 0x54504AC7,
|
||||
0xD2AF05E4, 0x29388839, 0xC01BD702, 0xCB2C0F1C, 0x7C932665, 0x555B2F74,
|
||||
0xA3AB8829, 0x86B63142, 0xF64B10EF, 0x0B8CC3BD, 0xEDD1CC5E, 0x687FEB69,
|
||||
0xC9509D43, 0xFDB23FCE, 0xD951AE64, 0x1E425A31, 0xF600C838, 0x36AD004C,
|
||||
0xCFF46AAA, 0xA40E329C, 0x7938DAD4, 0xA41D570D, 0xD43161C1, 0x62A69526,
|
||||
0x9ADB1E69, 0x3FDD4A8E, 0xDC6B80D6, 0x5B3B71F9, 0xC6272B04, 0xEC9D1810,
|
||||
0xCACEF403, 0x8CCF2DD5, 0xC95B9117, 0xE49F5235, 0xB854338A, 0x505DC82D,
|
||||
0x1562A846, 0x62292C31, 0x6AE77F5E, 0xD72B0374, 0x462D538C, 0xF9C9091B,
|
||||
0x47A67CBE, 0x0AE8DB58, 0x22611682, 0xB3A739C1, 0x2A281BF6, 0xEEAAC023,
|
||||
0x77CAF992, 0x94C6651E, 0x94B2BBC1, 0x763E4E4B, 0x0077D9B4, 0x587E38DA,
|
||||
0x183023C3, 0x7FB29F8C, 0xF9E3A26E, 0x0ABEC1FF, 0x350511E3, 0xA00EF092,
|
||||
0xDB6340D8, 0xB855322E, 0xA9A96910, 0xA52471F7, 0x4CFDB477, 0x388147FB,
|
||||
0x4E46041F, 0x9B1F5C3E, 0xFCCFEC71, 0xCDAD0657, 0x4C701C3A, 0xB38E8C33,
|
||||
0xB1C0FD4C, 0x917BDD64, 0x9B7624C8, 0x3BB45432, 0xCAF53EA6, 0x23BA4442,
|
||||
0x38532A3A, 0x4E677D2C, 0x45036C7A, 0x0BFD64B6, 0x5E0DD902, 0xC68A007E,
|
||||
0xF44182E1, 0x4DB5A851, 0x7F88A46B, 0x8EC9B55A, 0xCEC97DCF, 0x0A8291CD,
|
||||
0xF98D0ACC, 0x2A4ECEA9, 0x7140003C, 0x1A1DB93D, 0x33CB8B7A, 0x092999A3,
|
||||
0x71AD0038, 0x6DC778F9, 0x918130C4, 0xA907600A, 0x2D9E6832, 0xED6A1E01,
|
||||
0xEFB4318A, 0x7135C886, 0x7E31CC7A, 0x87F55BA5, 0x55034004, 0x7763CF1D,
|
||||
0xD69F6D18, 0xAC7D5F42, 0xE58857B6, 0x7930E9E4, 0x164DF4FB, 0x6E6F52C3,
|
||||
0x669E1EF1, 0x25E41D2B, 0x3FD59D7C, 0x3C1B20EE, 0xFA53DDEF, 0x0ABCD06B,
|
||||
0xD5C4484E, 0x1DBF9A42, 0x9B0DEADA, 0xABC52197, 0x22363A0D, 0xE86D2BC5,
|
||||
0x9C9DF69E, 0x5CAE82AB, 0x71F54BFF, 0x64F2E21E, 0xE2D74DD3, 0xF4FD4452,
|
||||
0xBC437944, 0xB4130C93, 0x85139270, 0xAEFE1309, 0xC186D91C, 0x598CB0FA,
|
||||
0x91F7F7EE, 0x7AD91D26, 0xD6E6C907, 0x61B46FC9, 0xF99C0238, 0xBC34F4DE,
|
||||
0x6519035B, 0xDE355B3B, 0x611FCFDC, 0x886B4238, 0xC1B2EFFA, 0xC6F34A26,
|
||||
0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC, 0x4C6FAD73, 0xC3FE3B1B,
|
||||
0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C, 0x28342F61, 0xC03404CD,
|
||||
0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8, 0x6372BB19, 0xAE56EDE7,
|
||||
0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7, 0xB2C8E3FB, 0xD108A94B,
|
||||
0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F, 0x36ADE735, 0x1DF158A1,
|
||||
0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70, 0x7F57C935, 0xB557135E,
|
||||
0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F, 0xD5FD6561, 0xD3DF1ED5,
|
||||
0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363, 0x249B3EF9, 0xCC939DCE,
|
||||
0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583, 0x273D3CF1, 0xAFDC5620,
|
||||
0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF
|
||||
};
|
||||
|
||||
# else
|
||||
# error "unsupported BN_BITS2"
|
||||
# endif
|
||||
|
||||
/* Macro to make a BIGNUM from static data */
|
||||
|
||||
# define make_dh_bn(x) extern const BIGNUM _bignum_##x; \
|
||||
const BIGNUM _bignum_##x = { (BN_ULONG *) x, \
|
||||
OSSL_NELEM(x),\
|
||||
OSSL_NELEM(x),\
|
||||
0, BN_FLG_STATIC_DATA };
|
||||
|
||||
static const BN_ULONG value_2 = 2;
|
||||
|
||||
const BIGNUM _bignum_const_2 =
|
||||
{ (BN_ULONG *)&value_2, 1, 1, 0, BN_FLG_STATIC_DATA };
|
||||
|
||||
make_dh_bn(dh1024_160_p)
|
||||
make_dh_bn(dh1024_160_g)
|
||||
make_dh_bn(dh1024_160_q)
|
||||
make_dh_bn(dh2048_224_p)
|
||||
make_dh_bn(dh2048_224_g)
|
||||
make_dh_bn(dh2048_224_q)
|
||||
make_dh_bn(dh2048_256_p)
|
||||
make_dh_bn(dh2048_256_g)
|
||||
make_dh_bn(dh2048_256_q)
|
||||
|
||||
make_dh_bn(ffdhe2048_p)
|
||||
make_dh_bn(ffdhe3072_p)
|
||||
make_dh_bn(ffdhe4096_p)
|
||||
make_dh_bn(ffdhe6144_p)
|
||||
make_dh_bn(ffdhe8192_p)
|
||||
|
||||
|
||||
#endif
|
457
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_div.c
vendored
Normal file
457
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_div.c
vendored
Normal file
|
@ -0,0 +1,457 @@
|
|||
/*
|
||||
* Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <openssl/bn.h>
|
||||
#include "internal/cryptlib.h"
|
||||
#include "bn_lcl.h"
|
||||
|
||||
/* The old slow way */
|
||||
#if 0
|
||||
int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
|
||||
BN_CTX *ctx)
|
||||
{
|
||||
int i, nm, nd;
|
||||
int ret = 0;
|
||||
BIGNUM *D;
|
||||
|
||||
bn_check_top(m);
|
||||
bn_check_top(d);
|
||||
if (BN_is_zero(d)) {
|
||||
BNerr(BN_F_BN_DIV, BN_R_DIV_BY_ZERO);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (BN_ucmp(m, d) < 0) {
|
||||
if (rem != NULL) {
|
||||
if (BN_copy(rem, m) == NULL)
|
||||
return 0;
|
||||
}
|
||||
if (dv != NULL)
|
||||
BN_zero(dv);
|
||||
return 1;
|
||||
}
|
||||
|
||||
BN_CTX_start(ctx);
|
||||
D = BN_CTX_get(ctx);
|
||||
if (dv == NULL)
|
||||
dv = BN_CTX_get(ctx);
|
||||
if (rem == NULL)
|
||||
rem = BN_CTX_get(ctx);
|
||||
if (D == NULL || dv == NULL || rem == NULL)
|
||||
goto end;
|
||||
|
||||
nd = BN_num_bits(d);
|
||||
nm = BN_num_bits(m);
|
||||
if (BN_copy(D, d) == NULL)
|
||||
goto end;
|
||||
if (BN_copy(rem, m) == NULL)
|
||||
goto end;
|
||||
|
||||
/*
|
||||
* The next 2 are needed so we can do a dv->d[0]|=1 later since
|
||||
* BN_lshift1 will only work once there is a value :-)
|
||||
*/
|
||||
BN_zero(dv);
|
||||
if (bn_wexpand(dv, 1) == NULL)
|
||||
goto end;
|
||||
dv->top = 1;
|
||||
|
||||
if (!BN_lshift(D, D, nm - nd))
|
||||
goto end;
|
||||
for (i = nm - nd; i >= 0; i--) {
|
||||
if (!BN_lshift1(dv, dv))
|
||||
goto end;
|
||||
if (BN_ucmp(rem, D) >= 0) {
|
||||
dv->d[0] |= 1;
|
||||
if (!BN_usub(rem, rem, D))
|
||||
goto end;
|
||||
}
|
||||
/* CAN IMPROVE (and have now :=) */
|
||||
if (!BN_rshift1(D, D))
|
||||
goto end;
|
||||
}
|
||||
rem->neg = BN_is_zero(rem) ? 0 : m->neg;
|
||||
dv->neg = m->neg ^ d->neg;
|
||||
ret = 1;
|
||||
end:
|
||||
BN_CTX_end(ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
# if defined(BN_DIV3W)
|
||||
BN_ULONG bn_div_3_words(const BN_ULONG *m, BN_ULONG d1, BN_ULONG d0);
|
||||
# elif 0
|
||||
/*
|
||||
* This is #if-ed away, because it's a reference for assembly implementations,
|
||||
* where it can and should be made constant-time. But if you want to test it,
|
||||
* just replace 0 with 1.
|
||||
*/
|
||||
# if BN_BITS2 == 64 && defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
|
||||
# undef BN_ULLONG
|
||||
# define BN_ULLONG __uint128_t
|
||||
# define BN_LLONG
|
||||
# endif
|
||||
|
||||
# ifdef BN_LLONG
|
||||
# define BN_DIV3W
|
||||
/*
|
||||
* Interface is somewhat quirky, |m| is pointer to most significant limb,
|
||||
* and less significant limb is referred at |m[-1]|. This means that caller
|
||||
* is responsible for ensuring that |m[-1]| is valid. Second condition that
|
||||
* has to be met is that |d0|'s most significant bit has to be set. Or in
|
||||
* other words divisor has to be "bit-aligned to the left." bn_div_fixed_top
|
||||
* does all this. The subroutine considers four limbs, two of which are
|
||||
* "overlapping," hence the name...
|
||||
*/
|
||||
static BN_ULONG bn_div_3_words(const BN_ULONG *m, BN_ULONG d1, BN_ULONG d0)
|
||||
{
|
||||
BN_ULLONG R = ((BN_ULLONG)m[0] << BN_BITS2) | m[-1];
|
||||
BN_ULLONG D = ((BN_ULLONG)d0 << BN_BITS2) | d1;
|
||||
BN_ULONG Q = 0, mask;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BN_BITS2; i++) {
|
||||
Q <<= 1;
|
||||
if (R >= D) {
|
||||
Q |= 1;
|
||||
R -= D;
|
||||
}
|
||||
D >>= 1;
|
||||
}
|
||||
|
||||
mask = 0 - (Q >> (BN_BITS2 - 1)); /* does it overflow? */
|
||||
|
||||
Q <<= 1;
|
||||
Q |= (R >= D);
|
||||
|
||||
return (Q | mask) & BN_MASK2;
|
||||
}
|
||||
# endif
|
||||
# endif
|
||||
|
||||
static int bn_left_align(BIGNUM *num)
|
||||
{
|
||||
BN_ULONG *d = num->d, n, m, rmask;
|
||||
int top = num->top;
|
||||
int rshift = BN_num_bits_word(d[top - 1]), lshift, i;
|
||||
|
||||
lshift = BN_BITS2 - rshift;
|
||||
rshift %= BN_BITS2; /* say no to undefined behaviour */
|
||||
rmask = (BN_ULONG)0 - rshift; /* rmask = 0 - (rshift != 0) */
|
||||
rmask |= rmask >> 8;
|
||||
|
||||
for (i = 0, m = 0; i < top; i++) {
|
||||
n = d[i];
|
||||
d[i] = ((n << lshift) | m) & BN_MASK2;
|
||||
m = (n >> rshift) & rmask;
|
||||
}
|
||||
|
||||
return lshift;
|
||||
}
|
||||
|
||||
# if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) \
|
||||
&& !defined(PEDANTIC) && !defined(BN_DIV3W)
|
||||
# if defined(__GNUC__) && __GNUC__>=2
|
||||
# if defined(__i386) || defined (__i386__)
|
||||
/*-
|
||||
* There were two reasons for implementing this template:
|
||||
* - GNU C generates a call to a function (__udivdi3 to be exact)
|
||||
* in reply to ((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0 (I fail to
|
||||
* understand why...);
|
||||
* - divl doesn't only calculate quotient, but also leaves
|
||||
* remainder in %edx which we can definitely use here:-)
|
||||
*/
|
||||
# undef bn_div_words
|
||||
# define bn_div_words(n0,n1,d0) \
|
||||
({ asm volatile ( \
|
||||
"divl %4" \
|
||||
: "=a"(q), "=d"(rem) \
|
||||
: "a"(n1), "d"(n0), "r"(d0) \
|
||||
: "cc"); \
|
||||
q; \
|
||||
})
|
||||
# define REMAINDER_IS_ALREADY_CALCULATED
|
||||
# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
|
||||
/*
|
||||
* Same story here, but it's 128-bit by 64-bit division. Wow!
|
||||
*/
|
||||
# undef bn_div_words
|
||||
# define bn_div_words(n0,n1,d0) \
|
||||
({ asm volatile ( \
|
||||
"divq %4" \
|
||||
: "=a"(q), "=d"(rem) \
|
||||
: "a"(n1), "d"(n0), "r"(d0) \
|
||||
: "cc"); \
|
||||
q; \
|
||||
})
|
||||
# define REMAINDER_IS_ALREADY_CALCULATED
|
||||
# endif /* __<cpu> */
|
||||
# endif /* __GNUC__ */
|
||||
# endif /* OPENSSL_NO_ASM */
|
||||
|
||||
/*-
|
||||
* BN_div computes dv := num / divisor, rounding towards
|
||||
* zero, and sets up rm such that dv*divisor + rm = num holds.
|
||||
* Thus:
|
||||
* dv->neg == num->neg ^ divisor->neg (unless the result is zero)
|
||||
* rm->neg == num->neg (unless the remainder is zero)
|
||||
* If 'dv' or 'rm' is NULL, the respective value is not returned.
|
||||
*/
|
||||
int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
|
||||
BN_CTX *ctx)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (BN_is_zero(divisor)) {
|
||||
BNerr(BN_F_BN_DIV, BN_R_DIV_BY_ZERO);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Invalid zero-padding would have particularly bad consequences so don't
|
||||
* just rely on bn_check_top() here (bn_check_top() works only for
|
||||
* BN_DEBUG builds)
|
||||
*/
|
||||
if (divisor->d[divisor->top - 1] == 0) {
|
||||
BNerr(BN_F_BN_DIV, BN_R_NOT_INITIALIZED);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = bn_div_fixed_top(dv, rm, num, divisor, ctx);
|
||||
|
||||
if (ret) {
|
||||
if (dv != NULL)
|
||||
bn_correct_top(dv);
|
||||
if (rm != NULL)
|
||||
bn_correct_top(rm);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* It's argued that *length* of *significant* part of divisor is public.
|
||||
* Even if it's private modulus that is. Again, *length* is assumed
|
||||
* public, but not *value*. Former is likely to be pre-defined by
|
||||
* algorithm with bit granularity, though below subroutine is invariant
|
||||
* of limb length. Thanks to this assumption we can require that |divisor|
|
||||
* may not be zero-padded, yet claim this subroutine "constant-time"(*).
|
||||
* This is because zero-padded dividend, |num|, is tolerated, so that
|
||||
* caller can pass dividend of public length(*), but with smaller amount
|
||||
* of significant limbs. This naturally means that quotient, |dv|, would
|
||||
* contain correspongly less significant limbs as well, and will be zero-
|
||||
* padded accordingly. Returned remainder, |rm|, will have same bit length
|
||||
* as divisor, also zero-padded if needed. These actually leave sign bits
|
||||
* in ambiguous state. In sense that we try to avoid negative zeros, while
|
||||
* zero-padded zeros would retain sign.
|
||||
*
|
||||
* (*) "Constant-time-ness" has two pre-conditions:
|
||||
*
|
||||
* - availability of constant-time bn_div_3_words;
|
||||
* - dividend is at least as "wide" as divisor, limb-wise, zero-padded
|
||||
* if so requied, which shouldn't be a privacy problem, because
|
||||
* divisor's length is considered public;
|
||||
*/
|
||||
int bn_div_fixed_top(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num,
|
||||
const BIGNUM *divisor, BN_CTX *ctx)
|
||||
{
|
||||
int norm_shift, i, j, loop;
|
||||
BIGNUM *tmp, *snum, *sdiv, *res;
|
||||
BN_ULONG *resp, *wnum, *wnumtop;
|
||||
BN_ULONG d0, d1;
|
||||
int num_n, div_n;
|
||||
|
||||
assert(divisor->top > 0 && divisor->d[divisor->top - 1] != 0);
|
||||
|
||||
bn_check_top(num);
|
||||
bn_check_top(divisor);
|
||||
bn_check_top(dv);
|
||||
bn_check_top(rm);
|
||||
|
||||
BN_CTX_start(ctx);
|
||||
res = (dv == NULL) ? BN_CTX_get(ctx) : dv;
|
||||
tmp = BN_CTX_get(ctx);
|
||||
snum = BN_CTX_get(ctx);
|
||||
sdiv = BN_CTX_get(ctx);
|
||||
if (sdiv == NULL)
|
||||
goto err;
|
||||
|
||||
/* First we normalise the numbers */
|
||||
if (!BN_copy(sdiv, divisor))
|
||||
goto err;
|
||||
norm_shift = bn_left_align(sdiv);
|
||||
sdiv->neg = 0;
|
||||
/*
|
||||
* Note that bn_lshift_fixed_top's output is always one limb longer
|
||||
* than input, even when norm_shift is zero. This means that amount of
|
||||
* inner loop iterations is invariant of dividend value, and that one
|
||||
* doesn't need to compare dividend and divisor if they were originally
|
||||
* of the same bit length.
|
||||
*/
|
||||
if (!(bn_lshift_fixed_top(snum, num, norm_shift)))
|
||||
goto err;
|
||||
|
||||
div_n = sdiv->top;
|
||||
num_n = snum->top;
|
||||
|
||||
if (num_n <= div_n) {
|
||||
/* caller didn't pad dividend -> no constant-time guarantee... */
|
||||
if (bn_wexpand(snum, div_n + 1) == NULL)
|
||||
goto err;
|
||||
memset(&(snum->d[num_n]), 0, (div_n - num_n + 1) * sizeof(BN_ULONG));
|
||||
snum->top = num_n = div_n + 1;
|
||||
}
|
||||
|
||||
loop = num_n - div_n;
|
||||
/*
|
||||
* Lets setup a 'window' into snum This is the part that corresponds to
|
||||
* the current 'area' being divided
|
||||
*/
|
||||
wnum = &(snum->d[loop]);
|
||||
wnumtop = &(snum->d[num_n - 1]);
|
||||
|
||||
/* Get the top 2 words of sdiv */
|
||||
d0 = sdiv->d[div_n - 1];
|
||||
d1 = (div_n == 1) ? 0 : sdiv->d[div_n - 2];
|
||||
|
||||
/* Setup quotient */
|
||||
if (!bn_wexpand(res, loop))
|
||||
goto err;
|
||||
res->neg = (num->neg ^ divisor->neg);
|
||||
res->top = loop;
|
||||
res->flags |= BN_FLG_FIXED_TOP;
|
||||
resp = &(res->d[loop]);
|
||||
|
||||
/* space for temp */
|
||||
if (!bn_wexpand(tmp, (div_n + 1)))
|
||||
goto err;
|
||||
|
||||
for (i = 0; i < loop; i++, wnumtop--) {
|
||||
BN_ULONG q, l0;
|
||||
/*
|
||||
* the first part of the loop uses the top two words of snum and sdiv
|
||||
* to calculate a BN_ULONG q such that | wnum - sdiv * q | < sdiv
|
||||
*/
|
||||
# if defined(BN_DIV3W)
|
||||
q = bn_div_3_words(wnumtop, d1, d0);
|
||||
# else
|
||||
BN_ULONG n0, n1, rem = 0;
|
||||
|
||||
n0 = wnumtop[0];
|
||||
n1 = wnumtop[-1];
|
||||
if (n0 == d0)
|
||||
q = BN_MASK2;
|
||||
else { /* n0 < d0 */
|
||||
BN_ULONG n2 = (wnumtop == wnum) ? 0 : wnumtop[-2];
|
||||
# ifdef BN_LLONG
|
||||
BN_ULLONG t2;
|
||||
|
||||
# if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words)
|
||||
q = (BN_ULONG)(((((BN_ULLONG) n0) << BN_BITS2) | n1) / d0);
|
||||
# else
|
||||
q = bn_div_words(n0, n1, d0);
|
||||
# endif
|
||||
|
||||
# ifndef REMAINDER_IS_ALREADY_CALCULATED
|
||||
/*
|
||||
* rem doesn't have to be BN_ULLONG. The least we
|
||||
* know it's less that d0, isn't it?
|
||||
*/
|
||||
rem = (n1 - q * d0) & BN_MASK2;
|
||||
# endif
|
||||
t2 = (BN_ULLONG) d1 *q;
|
||||
|
||||
for (;;) {
|
||||
if (t2 <= ((((BN_ULLONG) rem) << BN_BITS2) | n2))
|
||||
break;
|
||||
q--;
|
||||
rem += d0;
|
||||
if (rem < d0)
|
||||
break; /* don't let rem overflow */
|
||||
t2 -= d1;
|
||||
}
|
||||
# else /* !BN_LLONG */
|
||||
BN_ULONG t2l, t2h;
|
||||
|
||||
q = bn_div_words(n0, n1, d0);
|
||||
# ifndef REMAINDER_IS_ALREADY_CALCULATED
|
||||
rem = (n1 - q * d0) & BN_MASK2;
|
||||
# endif
|
||||
|
||||
# if defined(BN_UMULT_LOHI)
|
||||
BN_UMULT_LOHI(t2l, t2h, d1, q);
|
||||
# elif defined(BN_UMULT_HIGH)
|
||||
t2l = d1 * q;
|
||||
t2h = BN_UMULT_HIGH(d1, q);
|
||||
# else
|
||||
{
|
||||
BN_ULONG ql, qh;
|
||||
t2l = LBITS(d1);
|
||||
t2h = HBITS(d1);
|
||||
ql = LBITS(q);
|
||||
qh = HBITS(q);
|
||||
mul64(t2l, t2h, ql, qh); /* t2=(BN_ULLONG)d1*q; */
|
||||
}
|
||||
# endif
|
||||
|
||||
for (;;) {
|
||||
if ((t2h < rem) || ((t2h == rem) && (t2l <= n2)))
|
||||
break;
|
||||
q--;
|
||||
rem += d0;
|
||||
if (rem < d0)
|
||||
break; /* don't let rem overflow */
|
||||
if (t2l < d1)
|
||||
t2h--;
|
||||
t2l -= d1;
|
||||
}
|
||||
# endif /* !BN_LLONG */
|
||||
}
|
||||
# endif /* !BN_DIV3W */
|
||||
|
||||
l0 = bn_mul_words(tmp->d, sdiv->d, div_n, q);
|
||||
tmp->d[div_n] = l0;
|
||||
wnum--;
|
||||
/*
|
||||
* ignore top values of the bignums just sub the two BN_ULONG arrays
|
||||
* with bn_sub_words
|
||||
*/
|
||||
l0 = bn_sub_words(wnum, wnum, tmp->d, div_n + 1);
|
||||
q -= l0;
|
||||
/*
|
||||
* Note: As we have considered only the leading two BN_ULONGs in
|
||||
* the calculation of q, sdiv * q might be greater than wnum (but
|
||||
* then (q-1) * sdiv is less or equal than wnum)
|
||||
*/
|
||||
for (l0 = 0 - l0, j = 0; j < div_n; j++)
|
||||
tmp->d[j] = sdiv->d[j] & l0;
|
||||
l0 = bn_add_words(wnum, wnum, tmp->d, div_n);
|
||||
(*wnumtop) += l0;
|
||||
assert((*wnumtop) == 0);
|
||||
|
||||
/* store part of the result */
|
||||
*--resp = q;
|
||||
}
|
||||
/* snum holds remainder, it's as wide as divisor */
|
||||
snum->neg = num->neg;
|
||||
snum->top = div_n;
|
||||
snum->flags |= BN_FLG_FIXED_TOP;
|
||||
if (rm != NULL)
|
||||
bn_rshift_fixed_top(rm, snum, norm_shift);
|
||||
BN_CTX_end(ctx);
|
||||
return 1;
|
||||
err:
|
||||
bn_check_top(rm);
|
||||
BN_CTX_end(ctx);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
118
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_err.c
vendored
Normal file
118
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_err.c
vendored
Normal file
|
@ -0,0 +1,118 @@
|
|||
/*
|
||||
* Generated by util/mkerr.pl DO NOT EDIT
|
||||
* Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include <openssl/err.h>
|
||||
#include <openssl/bnerr.h>
|
||||
|
||||
#ifndef OPENSSL_NO_ERR
|
||||
|
||||
static const ERR_STRING_DATA BN_str_functs[] = {
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BNRAND, 0), "bnrand"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BNRAND_RANGE, 0), "bnrand_range"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_CONVERT_EX, 0),
|
||||
"BN_BLINDING_convert_ex"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_CREATE_PARAM, 0),
|
||||
"BN_BLINDING_create_param"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_INVERT_EX, 0),
|
||||
"BN_BLINDING_invert_ex"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_NEW, 0), "BN_BLINDING_new"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_UPDATE, 0), "BN_BLINDING_update"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_BN2DEC, 0), "BN_bn2dec"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_BN2HEX, 0), "BN_bn2hex"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_COMPUTE_WNAF, 0), "bn_compute_wNAF"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_CTX_GET, 0), "BN_CTX_get"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_CTX_NEW, 0), "BN_CTX_new"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_CTX_START, 0), "BN_CTX_start"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_DIV, 0), "BN_div"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_DIV_RECP, 0), "BN_div_recp"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_EXP, 0), "BN_exp"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_EXPAND_INTERNAL, 0), "bn_expand_internal"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_GENCB_NEW, 0), "BN_GENCB_new"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_GENERATE_DSA_NONCE, 0),
|
||||
"BN_generate_dsa_nonce"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_GENERATE_PRIME_EX, 0),
|
||||
"BN_generate_prime_ex"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD, 0), "BN_GF2m_mod"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_EXP, 0), "BN_GF2m_mod_exp"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_MUL, 0), "BN_GF2m_mod_mul"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SOLVE_QUAD, 0),
|
||||
"BN_GF2m_mod_solve_quad"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR, 0),
|
||||
"BN_GF2m_mod_solve_quad_arr"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SQR, 0), "BN_GF2m_mod_sqr"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SQRT, 0), "BN_GF2m_mod_sqrt"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_LSHIFT, 0), "BN_lshift"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP2_MONT, 0), "BN_mod_exp2_mont"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_MONT, 0), "BN_mod_exp_mont"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_MONT_CONSTTIME, 0),
|
||||
"BN_mod_exp_mont_consttime"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_MONT_WORD, 0),
|
||||
"BN_mod_exp_mont_word"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_RECP, 0), "BN_mod_exp_recp"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_SIMPLE, 0), "BN_mod_exp_simple"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_INVERSE, 0), "BN_mod_inverse"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_INVERSE_NO_BRANCH, 0),
|
||||
"BN_mod_inverse_no_branch"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_LSHIFT_QUICK, 0), "BN_mod_lshift_quick"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_SQRT, 0), "BN_mod_sqrt"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_MONT_CTX_NEW, 0), "BN_MONT_CTX_new"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_MPI2BN, 0), "BN_mpi2bn"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_NEW, 0), "BN_new"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_POOL_GET, 0), "BN_POOL_get"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_RAND, 0), "BN_rand"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_RAND_RANGE, 0), "BN_rand_range"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_RECP_CTX_NEW, 0), "BN_RECP_CTX_new"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_RSHIFT, 0), "BN_rshift"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_SET_WORDS, 0), "bn_set_words"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_STACK_PUSH, 0), "BN_STACK_push"},
|
||||
{ERR_PACK(ERR_LIB_BN, BN_F_BN_USUB, 0), "BN_usub"},
|
||||
{0, NULL}
|
||||
};
|
||||
|
||||
static const ERR_STRING_DATA BN_str_reasons[] = {
|
||||
{ERR_PACK(ERR_LIB_BN, 0, BN_R_ARG2_LT_ARG3), "arg2 lt arg3"},
|
||||
{ERR_PACK(ERR_LIB_BN, 0, BN_R_BAD_RECIPROCAL), "bad reciprocal"},
|
||||
{ERR_PACK(ERR_LIB_BN, 0, BN_R_BIGNUM_TOO_LONG), "bignum too long"},
|
||||
{ERR_PACK(ERR_LIB_BN, 0, BN_R_BITS_TOO_SMALL), "bits too small"},
|
||||
{ERR_PACK(ERR_LIB_BN, 0, BN_R_CALLED_WITH_EVEN_MODULUS),
|
||||
"called with even modulus"},
|
||||
{ERR_PACK(ERR_LIB_BN, 0, BN_R_DIV_BY_ZERO), "div by zero"},
|
||||
{ERR_PACK(ERR_LIB_BN, 0, BN_R_ENCODING_ERROR), "encoding error"},
|
||||
{ERR_PACK(ERR_LIB_BN, 0, BN_R_EXPAND_ON_STATIC_BIGNUM_DATA),
|
||||
"expand on static bignum data"},
|
||||
{ERR_PACK(ERR_LIB_BN, 0, BN_R_INPUT_NOT_REDUCED), "input not reduced"},
|
||||
{ERR_PACK(ERR_LIB_BN, 0, BN_R_INVALID_LENGTH), "invalid length"},
|
||||
{ERR_PACK(ERR_LIB_BN, 0, BN_R_INVALID_RANGE), "invalid range"},
|
||||
{ERR_PACK(ERR_LIB_BN, 0, BN_R_INVALID_SHIFT), "invalid shift"},
|
||||
{ERR_PACK(ERR_LIB_BN, 0, BN_R_NOT_A_SQUARE), "not a square"},
|
||||
{ERR_PACK(ERR_LIB_BN, 0, BN_R_NOT_INITIALIZED), "not initialized"},
|
||||
{ERR_PACK(ERR_LIB_BN, 0, BN_R_NO_INVERSE), "no inverse"},
|
||||
{ERR_PACK(ERR_LIB_BN, 0, BN_R_NO_SOLUTION), "no solution"},
|
||||
{ERR_PACK(ERR_LIB_BN, 0, BN_R_PRIVATE_KEY_TOO_LARGE),
|
||||
"private key too large"},
|
||||
{ERR_PACK(ERR_LIB_BN, 0, BN_R_P_IS_NOT_PRIME), "p is not prime"},
|
||||
{ERR_PACK(ERR_LIB_BN, 0, BN_R_TOO_MANY_ITERATIONS), "too many iterations"},
|
||||
{ERR_PACK(ERR_LIB_BN, 0, BN_R_TOO_MANY_TEMPORARY_VARIABLES),
|
||||
"too many temporary variables"},
|
||||
{0, NULL}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
int ERR_load_BN_strings(void)
|
||||
{
|
||||
#ifndef OPENSSL_NO_ERR
|
||||
if (ERR_func_error_string(BN_str_functs[0].error) == NULL) {
|
||||
ERR_load_strings_const(BN_str_functs);
|
||||
ERR_load_strings_const(BN_str_reasons);
|
||||
}
|
||||
#endif
|
||||
return 1;
|
||||
}
|
1395
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_exp.c
vendored
Normal file
1395
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_exp.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
201
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_exp2.c
vendored
Normal file
201
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_exp2.c
vendored
Normal file
|
@ -0,0 +1,201 @@
|
|||
/*
|
||||
* Copyright 1995-2017 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "internal/cryptlib.h"
|
||||
#include "bn_lcl.h"
|
||||
|
||||
#define TABLE_SIZE 32
|
||||
|
||||
int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
|
||||
const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m,
|
||||
BN_CTX *ctx, BN_MONT_CTX *in_mont)
|
||||
{
|
||||
int i, j, bits, b, bits1, bits2, ret =
|
||||
0, wpos1, wpos2, window1, window2, wvalue1, wvalue2;
|
||||
int r_is_one = 1;
|
||||
BIGNUM *d, *r;
|
||||
const BIGNUM *a_mod_m;
|
||||
/* Tables of variables obtained from 'ctx' */
|
||||
BIGNUM *val1[TABLE_SIZE], *val2[TABLE_SIZE];
|
||||
BN_MONT_CTX *mont = NULL;
|
||||
|
||||
bn_check_top(a1);
|
||||
bn_check_top(p1);
|
||||
bn_check_top(a2);
|
||||
bn_check_top(p2);
|
||||
bn_check_top(m);
|
||||
|
||||
if (!(m->d[0] & 1)) {
|
||||
BNerr(BN_F_BN_MOD_EXP2_MONT, BN_R_CALLED_WITH_EVEN_MODULUS);
|
||||
return 0;
|
||||
}
|
||||
bits1 = BN_num_bits(p1);
|
||||
bits2 = BN_num_bits(p2);
|
||||
if ((bits1 == 0) && (bits2 == 0)) {
|
||||
ret = BN_one(rr);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bits = (bits1 > bits2) ? bits1 : bits2;
|
||||
|
||||
BN_CTX_start(ctx);
|
||||
d = BN_CTX_get(ctx);
|
||||
r = BN_CTX_get(ctx);
|
||||
val1[0] = BN_CTX_get(ctx);
|
||||
val2[0] = BN_CTX_get(ctx);
|
||||
if (val2[0] == NULL)
|
||||
goto err;
|
||||
|
||||
if (in_mont != NULL)
|
||||
mont = in_mont;
|
||||
else {
|
||||
if ((mont = BN_MONT_CTX_new()) == NULL)
|
||||
goto err;
|
||||
if (!BN_MONT_CTX_set(mont, m, ctx))
|
||||
goto err;
|
||||
}
|
||||
|
||||
window1 = BN_window_bits_for_exponent_size(bits1);
|
||||
window2 = BN_window_bits_for_exponent_size(bits2);
|
||||
|
||||
/*
|
||||
* Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 .. 2^(window1-1)
|
||||
*/
|
||||
if (a1->neg || BN_ucmp(a1, m) >= 0) {
|
||||
if (!BN_mod(val1[0], a1, m, ctx))
|
||||
goto err;
|
||||
a_mod_m = val1[0];
|
||||
} else
|
||||
a_mod_m = a1;
|
||||
if (BN_is_zero(a_mod_m)) {
|
||||
BN_zero(rr);
|
||||
ret = 1;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!BN_to_montgomery(val1[0], a_mod_m, mont, ctx))
|
||||
goto err;
|
||||
if (window1 > 1) {
|
||||
if (!BN_mod_mul_montgomery(d, val1[0], val1[0], mont, ctx))
|
||||
goto err;
|
||||
|
||||
j = 1 << (window1 - 1);
|
||||
for (i = 1; i < j; i++) {
|
||||
if (((val1[i] = BN_CTX_get(ctx)) == NULL) ||
|
||||
!BN_mod_mul_montgomery(val1[i], val1[i - 1], d, mont, ctx))
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Build table for a2: val2[i] := a2^(2*i + 1) mod m for i = 0 .. 2^(window2-1)
|
||||
*/
|
||||
if (a2->neg || BN_ucmp(a2, m) >= 0) {
|
||||
if (!BN_mod(val2[0], a2, m, ctx))
|
||||
goto err;
|
||||
a_mod_m = val2[0];
|
||||
} else
|
||||
a_mod_m = a2;
|
||||
if (BN_is_zero(a_mod_m)) {
|
||||
BN_zero(rr);
|
||||
ret = 1;
|
||||
goto err;
|
||||
}
|
||||
if (!BN_to_montgomery(val2[0], a_mod_m, mont, ctx))
|
||||
goto err;
|
||||
if (window2 > 1) {
|
||||
if (!BN_mod_mul_montgomery(d, val2[0], val2[0], mont, ctx))
|
||||
goto err;
|
||||
|
||||
j = 1 << (window2 - 1);
|
||||
for (i = 1; i < j; i++) {
|
||||
if (((val2[i] = BN_CTX_get(ctx)) == NULL) ||
|
||||
!BN_mod_mul_montgomery(val2[i], val2[i - 1], d, mont, ctx))
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now compute the power product, using independent windows. */
|
||||
r_is_one = 1;
|
||||
wvalue1 = 0; /* The 'value' of the first window */
|
||||
wvalue2 = 0; /* The 'value' of the second window */
|
||||
wpos1 = 0; /* If wvalue1 > 0, the bottom bit of the
|
||||
* first window */
|
||||
wpos2 = 0; /* If wvalue2 > 0, the bottom bit of the
|
||||
* second window */
|
||||
|
||||
if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
|
||||
goto err;
|
||||
for (b = bits - 1; b >= 0; b--) {
|
||||
if (!r_is_one) {
|
||||
if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!wvalue1)
|
||||
if (BN_is_bit_set(p1, b)) {
|
||||
/*
|
||||
* consider bits b-window1+1 .. b for this window
|
||||
*/
|
||||
i = b - window1 + 1;
|
||||
while (!BN_is_bit_set(p1, i)) /* works for i<0 */
|
||||
i++;
|
||||
wpos1 = i;
|
||||
wvalue1 = 1;
|
||||
for (i = b - 1; i >= wpos1; i--) {
|
||||
wvalue1 <<= 1;
|
||||
if (BN_is_bit_set(p1, i))
|
||||
wvalue1++;
|
||||
}
|
||||
}
|
||||
|
||||
if (!wvalue2)
|
||||
if (BN_is_bit_set(p2, b)) {
|
||||
/*
|
||||
* consider bits b-window2+1 .. b for this window
|
||||
*/
|
||||
i = b - window2 + 1;
|
||||
while (!BN_is_bit_set(p2, i))
|
||||
i++;
|
||||
wpos2 = i;
|
||||
wvalue2 = 1;
|
||||
for (i = b - 1; i >= wpos2; i--) {
|
||||
wvalue2 <<= 1;
|
||||
if (BN_is_bit_set(p2, i))
|
||||
wvalue2++;
|
||||
}
|
||||
}
|
||||
|
||||
if (wvalue1 && b == wpos1) {
|
||||
/* wvalue1 is odd and < 2^window1 */
|
||||
if (!BN_mod_mul_montgomery(r, r, val1[wvalue1 >> 1], mont, ctx))
|
||||
goto err;
|
||||
wvalue1 = 0;
|
||||
r_is_one = 0;
|
||||
}
|
||||
|
||||
if (wvalue2 && b == wpos2) {
|
||||
/* wvalue2 is odd and < 2^window2 */
|
||||
if (!BN_mod_mul_montgomery(r, r, val2[wvalue2 >> 1], mont, ctx))
|
||||
goto err;
|
||||
wvalue2 = 0;
|
||||
r_is_one = 0;
|
||||
}
|
||||
}
|
||||
if (!BN_from_montgomery(rr, r, mont, ctx))
|
||||
goto err;
|
||||
ret = 1;
|
||||
err:
|
||||
if (in_mont == NULL)
|
||||
BN_MONT_CTX_free(mont);
|
||||
BN_CTX_end(ctx);
|
||||
bn_check_top(rr);
|
||||
return ret;
|
||||
}
|
623
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_gcd.c
vendored
Normal file
623
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_gcd.c
vendored
Normal file
|
@ -0,0 +1,623 @@
|
|||
/*
|
||||
* Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include "internal/cryptlib.h"
|
||||
#include "bn_lcl.h"
|
||||
|
||||
static BIGNUM *euclid(BIGNUM *a, BIGNUM *b);
|
||||
|
||||
int BN_gcd(BIGNUM *r, const BIGNUM *in_a, const BIGNUM *in_b, BN_CTX *ctx)
|
||||
{
|
||||
BIGNUM *a, *b, *t;
|
||||
int ret = 0;
|
||||
|
||||
bn_check_top(in_a);
|
||||
bn_check_top(in_b);
|
||||
|
||||
BN_CTX_start(ctx);
|
||||
a = BN_CTX_get(ctx);
|
||||
b = BN_CTX_get(ctx);
|
||||
if (b == NULL)
|
||||
goto err;
|
||||
|
||||
if (BN_copy(a, in_a) == NULL)
|
||||
goto err;
|
||||
if (BN_copy(b, in_b) == NULL)
|
||||
goto err;
|
||||
a->neg = 0;
|
||||
b->neg = 0;
|
||||
|
||||
if (BN_cmp(a, b) < 0) {
|
||||
t = a;
|
||||
a = b;
|
||||
b = t;
|
||||
}
|
||||
t = euclid(a, b);
|
||||
if (t == NULL)
|
||||
goto err;
|
||||
|
||||
if (BN_copy(r, t) == NULL)
|
||||
goto err;
|
||||
ret = 1;
|
||||
err:
|
||||
BN_CTX_end(ctx);
|
||||
bn_check_top(r);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static BIGNUM *euclid(BIGNUM *a, BIGNUM *b)
|
||||
{
|
||||
BIGNUM *t;
|
||||
int shifts = 0;
|
||||
|
||||
bn_check_top(a);
|
||||
bn_check_top(b);
|
||||
|
||||
/* 0 <= b <= a */
|
||||
while (!BN_is_zero(b)) {
|
||||
/* 0 < b <= a */
|
||||
|
||||
if (BN_is_odd(a)) {
|
||||
if (BN_is_odd(b)) {
|
||||
if (!BN_sub(a, a, b))
|
||||
goto err;
|
||||
if (!BN_rshift1(a, a))
|
||||
goto err;
|
||||
if (BN_cmp(a, b) < 0) {
|
||||
t = a;
|
||||
a = b;
|
||||
b = t;
|
||||
}
|
||||
} else { /* a odd - b even */
|
||||
|
||||
if (!BN_rshift1(b, b))
|
||||
goto err;
|
||||
if (BN_cmp(a, b) < 0) {
|
||||
t = a;
|
||||
a = b;
|
||||
b = t;
|
||||
}
|
||||
}
|
||||
} else { /* a is even */
|
||||
|
||||
if (BN_is_odd(b)) {
|
||||
if (!BN_rshift1(a, a))
|
||||
goto err;
|
||||
if (BN_cmp(a, b) < 0) {
|
||||
t = a;
|
||||
a = b;
|
||||
b = t;
|
||||
}
|
||||
} else { /* a even - b even */
|
||||
|
||||
if (!BN_rshift1(a, a))
|
||||
goto err;
|
||||
if (!BN_rshift1(b, b))
|
||||
goto err;
|
||||
shifts++;
|
||||
}
|
||||
}
|
||||
/* 0 <= b <= a */
|
||||
}
|
||||
|
||||
if (shifts) {
|
||||
if (!BN_lshift(a, a, shifts))
|
||||
goto err;
|
||||
}
|
||||
bn_check_top(a);
|
||||
return a;
|
||||
err:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* solves ax == 1 (mod n) */
|
||||
static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in,
|
||||
const BIGNUM *a, const BIGNUM *n,
|
||||
BN_CTX *ctx);
|
||||
|
||||
BIGNUM *BN_mod_inverse(BIGNUM *in,
|
||||
const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
|
||||
{
|
||||
BIGNUM *rv;
|
||||
int noinv;
|
||||
rv = int_bn_mod_inverse(in, a, n, ctx, &noinv);
|
||||
if (noinv)
|
||||
BNerr(BN_F_BN_MOD_INVERSE, BN_R_NO_INVERSE);
|
||||
return rv;
|
||||
}
|
||||
|
||||
BIGNUM *int_bn_mod_inverse(BIGNUM *in,
|
||||
const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx,
|
||||
int *pnoinv)
|
||||
{
|
||||
BIGNUM *A, *B, *X, *Y, *M, *D, *T, *R = NULL;
|
||||
BIGNUM *ret = NULL;
|
||||
int sign;
|
||||
|
||||
/* This is invalid input so we don't worry about constant time here */
|
||||
if (BN_abs_is_word(n, 1) || BN_is_zero(n)) {
|
||||
if (pnoinv != NULL)
|
||||
*pnoinv = 1;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (pnoinv != NULL)
|
||||
*pnoinv = 0;
|
||||
|
||||
if ((BN_get_flags(a, BN_FLG_CONSTTIME) != 0)
|
||||
|| (BN_get_flags(n, BN_FLG_CONSTTIME) != 0)) {
|
||||
return BN_mod_inverse_no_branch(in, a, n, ctx);
|
||||
}
|
||||
|
||||
bn_check_top(a);
|
||||
bn_check_top(n);
|
||||
|
||||
BN_CTX_start(ctx);
|
||||
A = BN_CTX_get(ctx);
|
||||
B = BN_CTX_get(ctx);
|
||||
X = BN_CTX_get(ctx);
|
||||
D = BN_CTX_get(ctx);
|
||||
M = BN_CTX_get(ctx);
|
||||
Y = BN_CTX_get(ctx);
|
||||
T = BN_CTX_get(ctx);
|
||||
if (T == NULL)
|
||||
goto err;
|
||||
|
||||
if (in == NULL)
|
||||
R = BN_new();
|
||||
else
|
||||
R = in;
|
||||
if (R == NULL)
|
||||
goto err;
|
||||
|
||||
BN_one(X);
|
||||
BN_zero(Y);
|
||||
if (BN_copy(B, a) == NULL)
|
||||
goto err;
|
||||
if (BN_copy(A, n) == NULL)
|
||||
goto err;
|
||||
A->neg = 0;
|
||||
if (B->neg || (BN_ucmp(B, A) >= 0)) {
|
||||
if (!BN_nnmod(B, B, A, ctx))
|
||||
goto err;
|
||||
}
|
||||
sign = -1;
|
||||
/*-
|
||||
* From B = a mod |n|, A = |n| it follows that
|
||||
*
|
||||
* 0 <= B < A,
|
||||
* -sign*X*a == B (mod |n|),
|
||||
* sign*Y*a == A (mod |n|).
|
||||
*/
|
||||
|
||||
if (BN_is_odd(n) && (BN_num_bits(n) <= 2048)) {
|
||||
/*
|
||||
* Binary inversion algorithm; requires odd modulus. This is faster
|
||||
* than the general algorithm if the modulus is sufficiently small
|
||||
* (about 400 .. 500 bits on 32-bit systems, but much more on 64-bit
|
||||
* systems)
|
||||
*/
|
||||
int shift;
|
||||
|
||||
while (!BN_is_zero(B)) {
|
||||
/*-
|
||||
* 0 < B < |n|,
|
||||
* 0 < A <= |n|,
|
||||
* (1) -sign*X*a == B (mod |n|),
|
||||
* (2) sign*Y*a == A (mod |n|)
|
||||
*/
|
||||
|
||||
/*
|
||||
* Now divide B by the maximum possible power of two in the
|
||||
* integers, and divide X by the same value mod |n|. When we're
|
||||
* done, (1) still holds.
|
||||
*/
|
||||
shift = 0;
|
||||
while (!BN_is_bit_set(B, shift)) { /* note that 0 < B */
|
||||
shift++;
|
||||
|
||||
if (BN_is_odd(X)) {
|
||||
if (!BN_uadd(X, X, n))
|
||||
goto err;
|
||||
}
|
||||
/*
|
||||
* now X is even, so we can easily divide it by two
|
||||
*/
|
||||
if (!BN_rshift1(X, X))
|
||||
goto err;
|
||||
}
|
||||
if (shift > 0) {
|
||||
if (!BN_rshift(B, B, shift))
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Same for A and Y. Afterwards, (2) still holds.
|
||||
*/
|
||||
shift = 0;
|
||||
while (!BN_is_bit_set(A, shift)) { /* note that 0 < A */
|
||||
shift++;
|
||||
|
||||
if (BN_is_odd(Y)) {
|
||||
if (!BN_uadd(Y, Y, n))
|
||||
goto err;
|
||||
}
|
||||
/* now Y is even */
|
||||
if (!BN_rshift1(Y, Y))
|
||||
goto err;
|
||||
}
|
||||
if (shift > 0) {
|
||||
if (!BN_rshift(A, A, shift))
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*-
|
||||
* We still have (1) and (2).
|
||||
* Both A and B are odd.
|
||||
* The following computations ensure that
|
||||
*
|
||||
* 0 <= B < |n|,
|
||||
* 0 < A < |n|,
|
||||
* (1) -sign*X*a == B (mod |n|),
|
||||
* (2) sign*Y*a == A (mod |n|),
|
||||
*
|
||||
* and that either A or B is even in the next iteration.
|
||||
*/
|
||||
if (BN_ucmp(B, A) >= 0) {
|
||||
/* -sign*(X + Y)*a == B - A (mod |n|) */
|
||||
if (!BN_uadd(X, X, Y))
|
||||
goto err;
|
||||
/*
|
||||
* NB: we could use BN_mod_add_quick(X, X, Y, n), but that
|
||||
* actually makes the algorithm slower
|
||||
*/
|
||||
if (!BN_usub(B, B, A))
|
||||
goto err;
|
||||
} else {
|
||||
/* sign*(X + Y)*a == A - B (mod |n|) */
|
||||
if (!BN_uadd(Y, Y, X))
|
||||
goto err;
|
||||
/*
|
||||
* as above, BN_mod_add_quick(Y, Y, X, n) would slow things down
|
||||
*/
|
||||
if (!BN_usub(A, A, B))
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* general inversion algorithm */
|
||||
|
||||
while (!BN_is_zero(B)) {
|
||||
BIGNUM *tmp;
|
||||
|
||||
/*-
|
||||
* 0 < B < A,
|
||||
* (*) -sign*X*a == B (mod |n|),
|
||||
* sign*Y*a == A (mod |n|)
|
||||
*/
|
||||
|
||||
/* (D, M) := (A/B, A%B) ... */
|
||||
if (BN_num_bits(A) == BN_num_bits(B)) {
|
||||
if (!BN_one(D))
|
||||
goto err;
|
||||
if (!BN_sub(M, A, B))
|
||||
goto err;
|
||||
} else if (BN_num_bits(A) == BN_num_bits(B) + 1) {
|
||||
/* A/B is 1, 2, or 3 */
|
||||
if (!BN_lshift1(T, B))
|
||||
goto err;
|
||||
if (BN_ucmp(A, T) < 0) {
|
||||
/* A < 2*B, so D=1 */
|
||||
if (!BN_one(D))
|
||||
goto err;
|
||||
if (!BN_sub(M, A, B))
|
||||
goto err;
|
||||
} else {
|
||||
/* A >= 2*B, so D=2 or D=3 */
|
||||
if (!BN_sub(M, A, T))
|
||||
goto err;
|
||||
if (!BN_add(D, T, B))
|
||||
goto err; /* use D (:= 3*B) as temp */
|
||||
if (BN_ucmp(A, D) < 0) {
|
||||
/* A < 3*B, so D=2 */
|
||||
if (!BN_set_word(D, 2))
|
||||
goto err;
|
||||
/*
|
||||
* M (= A - 2*B) already has the correct value
|
||||
*/
|
||||
} else {
|
||||
/* only D=3 remains */
|
||||
if (!BN_set_word(D, 3))
|
||||
goto err;
|
||||
/*
|
||||
* currently M = A - 2*B, but we need M = A - 3*B
|
||||
*/
|
||||
if (!BN_sub(M, M, B))
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (!BN_div(D, M, A, B, ctx))
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*-
|
||||
* Now
|
||||
* A = D*B + M;
|
||||
* thus we have
|
||||
* (**) sign*Y*a == D*B + M (mod |n|).
|
||||
*/
|
||||
|
||||
tmp = A; /* keep the BIGNUM object, the value does not matter */
|
||||
|
||||
/* (A, B) := (B, A mod B) ... */
|
||||
A = B;
|
||||
B = M;
|
||||
/* ... so we have 0 <= B < A again */
|
||||
|
||||
/*-
|
||||
* Since the former M is now B and the former B is now A,
|
||||
* (**) translates into
|
||||
* sign*Y*a == D*A + B (mod |n|),
|
||||
* i.e.
|
||||
* sign*Y*a - D*A == B (mod |n|).
|
||||
* Similarly, (*) translates into
|
||||
* -sign*X*a == A (mod |n|).
|
||||
*
|
||||
* Thus,
|
||||
* sign*Y*a + D*sign*X*a == B (mod |n|),
|
||||
* i.e.
|
||||
* sign*(Y + D*X)*a == B (mod |n|).
|
||||
*
|
||||
* So if we set (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at
|
||||
* -sign*X*a == B (mod |n|),
|
||||
* sign*Y*a == A (mod |n|).
|
||||
* Note that X and Y stay non-negative all the time.
|
||||
*/
|
||||
|
||||
/*
|
||||
* most of the time D is very small, so we can optimize tmp := D*X+Y
|
||||
*/
|
||||
if (BN_is_one(D)) {
|
||||
if (!BN_add(tmp, X, Y))
|
||||
goto err;
|
||||
} else {
|
||||
if (BN_is_word(D, 2)) {
|
||||
if (!BN_lshift1(tmp, X))
|
||||
goto err;
|
||||
} else if (BN_is_word(D, 4)) {
|
||||
if (!BN_lshift(tmp, X, 2))
|
||||
goto err;
|
||||
} else if (D->top == 1) {
|
||||
if (!BN_copy(tmp, X))
|
||||
goto err;
|
||||
if (!BN_mul_word(tmp, D->d[0]))
|
||||
goto err;
|
||||
} else {
|
||||
if (!BN_mul(tmp, D, X, ctx))
|
||||
goto err;
|
||||
}
|
||||
if (!BN_add(tmp, tmp, Y))
|
||||
goto err;
|
||||
}
|
||||
|
||||
M = Y; /* keep the BIGNUM object, the value does not matter */
|
||||
Y = X;
|
||||
X = tmp;
|
||||
sign = -sign;
|
||||
}
|
||||
}
|
||||
|
||||
/*-
|
||||
* The while loop (Euclid's algorithm) ends when
|
||||
* A == gcd(a,n);
|
||||
* we have
|
||||
* sign*Y*a == A (mod |n|),
|
||||
* where Y is non-negative.
|
||||
*/
|
||||
|
||||
if (sign < 0) {
|
||||
if (!BN_sub(Y, n, Y))
|
||||
goto err;
|
||||
}
|
||||
/* Now Y*a == A (mod |n|). */
|
||||
|
||||
if (BN_is_one(A)) {
|
||||
/* Y*a == 1 (mod |n|) */
|
||||
if (!Y->neg && BN_ucmp(Y, n) < 0) {
|
||||
if (!BN_copy(R, Y))
|
||||
goto err;
|
||||
} else {
|
||||
if (!BN_nnmod(R, Y, n, ctx))
|
||||
goto err;
|
||||
}
|
||||
} else {
|
||||
if (pnoinv)
|
||||
*pnoinv = 1;
|
||||
goto err;
|
||||
}
|
||||
ret = R;
|
||||
err:
|
||||
if ((ret == NULL) && (in == NULL))
|
||||
BN_free(R);
|
||||
BN_CTX_end(ctx);
|
||||
bn_check_top(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* BN_mod_inverse_no_branch is a special version of BN_mod_inverse. It does
|
||||
* not contain branches that may leak sensitive information.
|
||||
*/
|
||||
static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in,
|
||||
const BIGNUM *a, const BIGNUM *n,
|
||||
BN_CTX *ctx)
|
||||
{
|
||||
BIGNUM *A, *B, *X, *Y, *M, *D, *T, *R = NULL;
|
||||
BIGNUM *ret = NULL;
|
||||
int sign;
|
||||
|
||||
bn_check_top(a);
|
||||
bn_check_top(n);
|
||||
|
||||
BN_CTX_start(ctx);
|
||||
A = BN_CTX_get(ctx);
|
||||
B = BN_CTX_get(ctx);
|
||||
X = BN_CTX_get(ctx);
|
||||
D = BN_CTX_get(ctx);
|
||||
M = BN_CTX_get(ctx);
|
||||
Y = BN_CTX_get(ctx);
|
||||
T = BN_CTX_get(ctx);
|
||||
if (T == NULL)
|
||||
goto err;
|
||||
|
||||
if (in == NULL)
|
||||
R = BN_new();
|
||||
else
|
||||
R = in;
|
||||
if (R == NULL)
|
||||
goto err;
|
||||
|
||||
BN_one(X);
|
||||
BN_zero(Y);
|
||||
if (BN_copy(B, a) == NULL)
|
||||
goto err;
|
||||
if (BN_copy(A, n) == NULL)
|
||||
goto err;
|
||||
A->neg = 0;
|
||||
|
||||
if (B->neg || (BN_ucmp(B, A) >= 0)) {
|
||||
/*
|
||||
* Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked,
|
||||
* BN_div_no_branch will be called eventually.
|
||||
*/
|
||||
{
|
||||
BIGNUM local_B;
|
||||
bn_init(&local_B);
|
||||
BN_with_flags(&local_B, B, BN_FLG_CONSTTIME);
|
||||
if (!BN_nnmod(B, &local_B, A, ctx))
|
||||
goto err;
|
||||
/* Ensure local_B goes out of scope before any further use of B */
|
||||
}
|
||||
}
|
||||
sign = -1;
|
||||
/*-
|
||||
* From B = a mod |n|, A = |n| it follows that
|
||||
*
|
||||
* 0 <= B < A,
|
||||
* -sign*X*a == B (mod |n|),
|
||||
* sign*Y*a == A (mod |n|).
|
||||
*/
|
||||
|
||||
while (!BN_is_zero(B)) {
|
||||
BIGNUM *tmp;
|
||||
|
||||
/*-
|
||||
* 0 < B < A,
|
||||
* (*) -sign*X*a == B (mod |n|),
|
||||
* sign*Y*a == A (mod |n|)
|
||||
*/
|
||||
|
||||
/*
|
||||
* Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked,
|
||||
* BN_div_no_branch will be called eventually.
|
||||
*/
|
||||
{
|
||||
BIGNUM local_A;
|
||||
bn_init(&local_A);
|
||||
BN_with_flags(&local_A, A, BN_FLG_CONSTTIME);
|
||||
|
||||
/* (D, M) := (A/B, A%B) ... */
|
||||
if (!BN_div(D, M, &local_A, B, ctx))
|
||||
goto err;
|
||||
/* Ensure local_A goes out of scope before any further use of A */
|
||||
}
|
||||
|
||||
/*-
|
||||
* Now
|
||||
* A = D*B + M;
|
||||
* thus we have
|
||||
* (**) sign*Y*a == D*B + M (mod |n|).
|
||||
*/
|
||||
|
||||
tmp = A; /* keep the BIGNUM object, the value does not
|
||||
* matter */
|
||||
|
||||
/* (A, B) := (B, A mod B) ... */
|
||||
A = B;
|
||||
B = M;
|
||||
/* ... so we have 0 <= B < A again */
|
||||
|
||||
/*-
|
||||
* Since the former M is now B and the former B is now A,
|
||||
* (**) translates into
|
||||
* sign*Y*a == D*A + B (mod |n|),
|
||||
* i.e.
|
||||
* sign*Y*a - D*A == B (mod |n|).
|
||||
* Similarly, (*) translates into
|
||||
* -sign*X*a == A (mod |n|).
|
||||
*
|
||||
* Thus,
|
||||
* sign*Y*a + D*sign*X*a == B (mod |n|),
|
||||
* i.e.
|
||||
* sign*(Y + D*X)*a == B (mod |n|).
|
||||
*
|
||||
* So if we set (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at
|
||||
* -sign*X*a == B (mod |n|),
|
||||
* sign*Y*a == A (mod |n|).
|
||||
* Note that X and Y stay non-negative all the time.
|
||||
*/
|
||||
|
||||
if (!BN_mul(tmp, D, X, ctx))
|
||||
goto err;
|
||||
if (!BN_add(tmp, tmp, Y))
|
||||
goto err;
|
||||
|
||||
M = Y; /* keep the BIGNUM object, the value does not
|
||||
* matter */
|
||||
Y = X;
|
||||
X = tmp;
|
||||
sign = -sign;
|
||||
}
|
||||
|
||||
/*-
|
||||
* The while loop (Euclid's algorithm) ends when
|
||||
* A == gcd(a,n);
|
||||
* we have
|
||||
* sign*Y*a == A (mod |n|),
|
||||
* where Y is non-negative.
|
||||
*/
|
||||
|
||||
if (sign < 0) {
|
||||
if (!BN_sub(Y, n, Y))
|
||||
goto err;
|
||||
}
|
||||
/* Now Y*a == A (mod |n|). */
|
||||
|
||||
if (BN_is_one(A)) {
|
||||
/* Y*a == 1 (mod |n|) */
|
||||
if (!Y->neg && BN_ucmp(Y, n) < 0) {
|
||||
if (!BN_copy(R, Y))
|
||||
goto err;
|
||||
} else {
|
||||
if (!BN_nnmod(R, Y, n, ctx))
|
||||
goto err;
|
||||
}
|
||||
} else {
|
||||
BNerr(BN_F_BN_MOD_INVERSE_NO_BRANCH, BN_R_NO_INVERSE);
|
||||
goto err;
|
||||
}
|
||||
ret = R;
|
||||
err:
|
||||
if ((ret == NULL) && (in == NULL))
|
||||
BN_free(R);
|
||||
BN_CTX_end(ctx);
|
||||
bn_check_top(ret);
|
||||
return ret;
|
||||
}
|
1166
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_gf2m.c
vendored
Normal file
1166
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_gf2m.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
199
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_intern.c
vendored
Normal file
199
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_intern.c
vendored
Normal file
|
@ -0,0 +1,199 @@
|
|||
/*
|
||||
* Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include "internal/cryptlib.h"
|
||||
#include "bn_lcl.h"
|
||||
|
||||
/*
|
||||
* Determine the modified width-(w+1) Non-Adjacent Form (wNAF) of 'scalar'.
|
||||
* This is an array r[] of values that are either zero or odd with an
|
||||
* absolute value less than 2^w satisfying
|
||||
* scalar = \sum_j r[j]*2^j
|
||||
* where at most one of any w+1 consecutive digits is non-zero
|
||||
* with the exception that the most significant digit may be only
|
||||
* w-1 zeros away from that next non-zero digit.
|
||||
*/
|
||||
signed char *bn_compute_wNAF(const BIGNUM *scalar, int w, size_t *ret_len)
|
||||
{
|
||||
int window_val;
|
||||
signed char *r = NULL;
|
||||
int sign = 1;
|
||||
int bit, next_bit, mask;
|
||||
size_t len = 0, j;
|
||||
|
||||
if (BN_is_zero(scalar)) {
|
||||
r = OPENSSL_malloc(1);
|
||||
if (r == NULL) {
|
||||
BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_MALLOC_FAILURE);
|
||||
goto err;
|
||||
}
|
||||
r[0] = 0;
|
||||
*ret_len = 1;
|
||||
return r;
|
||||
}
|
||||
|
||||
if (w <= 0 || w > 7) { /* 'signed char' can represent integers with
|
||||
* absolute values less than 2^7 */
|
||||
BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR);
|
||||
goto err;
|
||||
}
|
||||
bit = 1 << w; /* at most 128 */
|
||||
next_bit = bit << 1; /* at most 256 */
|
||||
mask = next_bit - 1; /* at most 255 */
|
||||
|
||||
if (BN_is_negative(scalar)) {
|
||||
sign = -1;
|
||||
}
|
||||
|
||||
if (scalar->d == NULL || scalar->top == 0) {
|
||||
BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR);
|
||||
goto err;
|
||||
}
|
||||
|
||||
len = BN_num_bits(scalar);
|
||||
r = OPENSSL_malloc(len + 1); /*
|
||||
* Modified wNAF may be one digit longer than binary representation
|
||||
* (*ret_len will be set to the actual length, i.e. at most
|
||||
* BN_num_bits(scalar) + 1)
|
||||
*/
|
||||
if (r == NULL) {
|
||||
BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_MALLOC_FAILURE);
|
||||
goto err;
|
||||
}
|
||||
window_val = scalar->d[0] & mask;
|
||||
j = 0;
|
||||
while ((window_val != 0) || (j + w + 1 < len)) { /* if j+w+1 >= len,
|
||||
* window_val will not
|
||||
* increase */
|
||||
int digit = 0;
|
||||
|
||||
/* 0 <= window_val <= 2^(w+1) */
|
||||
|
||||
if (window_val & 1) {
|
||||
/* 0 < window_val < 2^(w+1) */
|
||||
|
||||
if (window_val & bit) {
|
||||
digit = window_val - next_bit; /* -2^w < digit < 0 */
|
||||
|
||||
#if 1 /* modified wNAF */
|
||||
if (j + w + 1 >= len) {
|
||||
/*
|
||||
* Special case for generating modified wNAFs:
|
||||
* no new bits will be added into window_val,
|
||||
* so using a positive digit here will decrease
|
||||
* the total length of the representation
|
||||
*/
|
||||
|
||||
digit = window_val & (mask >> 1); /* 0 < digit < 2^w */
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
digit = window_val; /* 0 < digit < 2^w */
|
||||
}
|
||||
|
||||
if (digit <= -bit || digit >= bit || !(digit & 1)) {
|
||||
BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR);
|
||||
goto err;
|
||||
}
|
||||
|
||||
window_val -= digit;
|
||||
|
||||
/*
|
||||
* now window_val is 0 or 2^(w+1) in standard wNAF generation;
|
||||
* for modified window NAFs, it may also be 2^w
|
||||
*/
|
||||
if (window_val != 0 && window_val != next_bit
|
||||
&& window_val != bit) {
|
||||
BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
r[j++] = sign * digit;
|
||||
|
||||
window_val >>= 1;
|
||||
window_val += bit * BN_is_bit_set(scalar, j + w);
|
||||
|
||||
if (window_val > next_bit) {
|
||||
BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
if (j > len + 1) {
|
||||
BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR);
|
||||
goto err;
|
||||
}
|
||||
*ret_len = j;
|
||||
return r;
|
||||
|
||||
err:
|
||||
OPENSSL_free(r);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int bn_get_top(const BIGNUM *a)
|
||||
{
|
||||
return a->top;
|
||||
}
|
||||
|
||||
int bn_get_dmax(const BIGNUM *a)
|
||||
{
|
||||
return a->dmax;
|
||||
}
|
||||
|
||||
void bn_set_all_zero(BIGNUM *a)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = a->top; i < a->dmax; i++)
|
||||
a->d[i] = 0;
|
||||
}
|
||||
|
||||
int bn_copy_words(BN_ULONG *out, const BIGNUM *in, int size)
|
||||
{
|
||||
if (in->top > size)
|
||||
return 0;
|
||||
|
||||
memset(out, 0, sizeof(*out) * size);
|
||||
if (in->d != NULL)
|
||||
memcpy(out, in->d, sizeof(*out) * in->top);
|
||||
return 1;
|
||||
}
|
||||
|
||||
BN_ULONG *bn_get_words(const BIGNUM *a)
|
||||
{
|
||||
return a->d;
|
||||
}
|
||||
|
||||
void bn_set_static_words(BIGNUM *a, const BN_ULONG *words, int size)
|
||||
{
|
||||
/*
|
||||
* |const| qualifier omission is compensated by BN_FLG_STATIC_DATA
|
||||
* flag, which effectively means "read-only data".
|
||||
*/
|
||||
a->d = (BN_ULONG *)words;
|
||||
a->dmax = a->top = size;
|
||||
a->neg = 0;
|
||||
a->flags |= BN_FLG_STATIC_DATA;
|
||||
bn_correct_top(a);
|
||||
}
|
||||
|
||||
int bn_set_words(BIGNUM *a, const BN_ULONG *words, int num_words)
|
||||
{
|
||||
if (bn_wexpand(a, num_words) == NULL) {
|
||||
BNerr(BN_F_BN_SET_WORDS, ERR_R_MALLOC_FAILURE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
memcpy(a->d, words, sizeof(BN_ULONG) * num_words);
|
||||
a->top = num_words;
|
||||
bn_correct_top(a);
|
||||
return 1;
|
||||
}
|
140
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_kron.c
vendored
Normal file
140
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_kron.c
vendored
Normal file
|
@ -0,0 +1,140 @@
|
|||
/*
|
||||
* Copyright 2000-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include "internal/cryptlib.h"
|
||||
#include "bn_lcl.h"
|
||||
|
||||
/* least significant word */
|
||||
#define BN_lsw(n) (((n)->top == 0) ? (BN_ULONG) 0 : (n)->d[0])
|
||||
|
||||
/* Returns -2 for errors because both -1 and 0 are valid results. */
|
||||
int BN_kronecker(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
|
||||
{
|
||||
int i;
|
||||
int ret = -2; /* avoid 'uninitialized' warning */
|
||||
int err = 0;
|
||||
BIGNUM *A, *B, *tmp;
|
||||
/*-
|
||||
* In 'tab', only odd-indexed entries are relevant:
|
||||
* For any odd BIGNUM n,
|
||||
* tab[BN_lsw(n) & 7]
|
||||
* is $(-1)^{(n^2-1)/8}$ (using TeX notation).
|
||||
* Note that the sign of n does not matter.
|
||||
*/
|
||||
static const int tab[8] = { 0, 1, 0, -1, 0, -1, 0, 1 };
|
||||
|
||||
bn_check_top(a);
|
||||
bn_check_top(b);
|
||||
|
||||
BN_CTX_start(ctx);
|
||||
A = BN_CTX_get(ctx);
|
||||
B = BN_CTX_get(ctx);
|
||||
if (B == NULL)
|
||||
goto end;
|
||||
|
||||
err = !BN_copy(A, a);
|
||||
if (err)
|
||||
goto end;
|
||||
err = !BN_copy(B, b);
|
||||
if (err)
|
||||
goto end;
|
||||
|
||||
/*
|
||||
* Kronecker symbol, implemented according to Henri Cohen,
|
||||
* "A Course in Computational Algebraic Number Theory"
|
||||
* (algorithm 1.4.10).
|
||||
*/
|
||||
|
||||
/* Cohen's step 1: */
|
||||
|
||||
if (BN_is_zero(B)) {
|
||||
ret = BN_abs_is_word(A, 1);
|
||||
goto end;
|
||||
}
|
||||
|
||||
/* Cohen's step 2: */
|
||||
|
||||
if (!BN_is_odd(A) && !BN_is_odd(B)) {
|
||||
ret = 0;
|
||||
goto end;
|
||||
}
|
||||
|
||||
/* now B is non-zero */
|
||||
i = 0;
|
||||
while (!BN_is_bit_set(B, i))
|
||||
i++;
|
||||
err = !BN_rshift(B, B, i);
|
||||
if (err)
|
||||
goto end;
|
||||
if (i & 1) {
|
||||
/* i is odd */
|
||||
/* (thus B was even, thus A must be odd!) */
|
||||
|
||||
/* set 'ret' to $(-1)^{(A^2-1)/8}$ */
|
||||
ret = tab[BN_lsw(A) & 7];
|
||||
} else {
|
||||
/* i is even */
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
if (B->neg) {
|
||||
B->neg = 0;
|
||||
if (A->neg)
|
||||
ret = -ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* now B is positive and odd, so what remains to be done is to compute
|
||||
* the Jacobi symbol (A/B) and multiply it by 'ret'
|
||||
*/
|
||||
|
||||
while (1) {
|
||||
/* Cohen's step 3: */
|
||||
|
||||
/* B is positive and odd */
|
||||
|
||||
if (BN_is_zero(A)) {
|
||||
ret = BN_is_one(B) ? ret : 0;
|
||||
goto end;
|
||||
}
|
||||
|
||||
/* now A is non-zero */
|
||||
i = 0;
|
||||
while (!BN_is_bit_set(A, i))
|
||||
i++;
|
||||
err = !BN_rshift(A, A, i);
|
||||
if (err)
|
||||
goto end;
|
||||
if (i & 1) {
|
||||
/* i is odd */
|
||||
/* multiply 'ret' by $(-1)^{(B^2-1)/8}$ */
|
||||
ret = ret * tab[BN_lsw(B) & 7];
|
||||
}
|
||||
|
||||
/* Cohen's step 4: */
|
||||
/* multiply 'ret' by $(-1)^{(A-1)(B-1)/4}$ */
|
||||
if ((A->neg ? ~BN_lsw(A) : BN_lsw(A)) & BN_lsw(B) & 2)
|
||||
ret = -ret;
|
||||
|
||||
/* (A, B) := (B mod |A|, |A|) */
|
||||
err = !BN_nnmod(B, B, A, ctx);
|
||||
if (err)
|
||||
goto end;
|
||||
tmp = A;
|
||||
A = B;
|
||||
B = tmp;
|
||||
tmp->neg = 0;
|
||||
}
|
||||
end:
|
||||
BN_CTX_end(ctx);
|
||||
if (err)
|
||||
return -2;
|
||||
else
|
||||
return ret;
|
||||
}
|
671
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_lcl.h
vendored
Normal file
671
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_lcl.h
vendored
Normal file
|
@ -0,0 +1,671 @@
|
|||
/*
|
||||
* Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#ifndef HEADER_BN_LCL_H
|
||||
# define HEADER_BN_LCL_H
|
||||
|
||||
/*
|
||||
* The EDK2 build doesn't use bn_conf.h; it sets THIRTY_TWO_BIT or
|
||||
* SIXTY_FOUR_BIT in its own environment since it doesn't re-run our
|
||||
* Configure script and needs to support both 32-bit and 64-bit.
|
||||
*/
|
||||
# include <openssl/opensslconf.h>
|
||||
|
||||
# if !defined(OPENSSL_SYS_UEFI)
|
||||
# include "internal/bn_conf.h"
|
||||
# endif
|
||||
|
||||
# include "internal/bn_int.h"
|
||||
|
||||
/*
|
||||
* These preprocessor symbols control various aspects of the bignum headers
|
||||
* and library code. They're not defined by any "normal" configuration, as
|
||||
* they are intended for development and testing purposes. NB: defining all
|
||||
* three can be useful for debugging application code as well as openssl
|
||||
* itself. BN_DEBUG - turn on various debugging alterations to the bignum
|
||||
* code BN_DEBUG_RAND - uses random poisoning of unused words to trip up
|
||||
* mismanagement of bignum internals. You must also define BN_DEBUG.
|
||||
*/
|
||||
/* #define BN_DEBUG */
|
||||
/* #define BN_DEBUG_RAND */
|
||||
|
||||
# ifndef OPENSSL_SMALL_FOOTPRINT
|
||||
# define BN_MUL_COMBA
|
||||
# define BN_SQR_COMBA
|
||||
# define BN_RECURSION
|
||||
# endif
|
||||
|
||||
/*
|
||||
* This next option uses the C libraries (2 word)/(1 word) function. If it is
|
||||
* not defined, I use my C version (which is slower). The reason for this
|
||||
* flag is that when the particular C compiler library routine is used, and
|
||||
* the library is linked with a different compiler, the library is missing.
|
||||
* This mostly happens when the library is built with gcc and then linked
|
||||
* using normal cc. This would be a common occurrence because gcc normally
|
||||
* produces code that is 2 times faster than system compilers for the big
|
||||
* number stuff. For machines with only one compiler (or shared libraries),
|
||||
* this should be on. Again this in only really a problem on machines using
|
||||
* "long long's", are 32bit, and are not using my assembler code.
|
||||
*/
|
||||
# if defined(OPENSSL_SYS_MSDOS) || defined(OPENSSL_SYS_WINDOWS) || \
|
||||
defined(OPENSSL_SYS_WIN32) || defined(linux)
|
||||
# define BN_DIV2W
|
||||
# endif
|
||||
|
||||
/*
|
||||
* 64-bit processor with LP64 ABI
|
||||
*/
|
||||
# ifdef SIXTY_FOUR_BIT_LONG
|
||||
# define BN_ULLONG unsigned long long
|
||||
# define BN_BITS4 32
|
||||
# define BN_MASK2 (0xffffffffffffffffL)
|
||||
# define BN_MASK2l (0xffffffffL)
|
||||
# define BN_MASK2h (0xffffffff00000000L)
|
||||
# define BN_MASK2h1 (0xffffffff80000000L)
|
||||
# define BN_DEC_CONV (10000000000000000000UL)
|
||||
# define BN_DEC_NUM 19
|
||||
# define BN_DEC_FMT1 "%lu"
|
||||
# define BN_DEC_FMT2 "%019lu"
|
||||
# endif
|
||||
|
||||
/*
|
||||
* 64-bit processor other than LP64 ABI
|
||||
*/
|
||||
# ifdef SIXTY_FOUR_BIT
|
||||
# undef BN_LLONG
|
||||
# undef BN_ULLONG
|
||||
# define BN_BITS4 32
|
||||
# define BN_MASK2 (0xffffffffffffffffLL)
|
||||
# define BN_MASK2l (0xffffffffL)
|
||||
# define BN_MASK2h (0xffffffff00000000LL)
|
||||
# define BN_MASK2h1 (0xffffffff80000000LL)
|
||||
# define BN_DEC_CONV (10000000000000000000ULL)
|
||||
# define BN_DEC_NUM 19
|
||||
# define BN_DEC_FMT1 "%llu"
|
||||
# define BN_DEC_FMT2 "%019llu"
|
||||
# endif
|
||||
|
||||
# ifdef THIRTY_TWO_BIT
|
||||
# ifdef BN_LLONG
|
||||
# if defined(_WIN32) && !defined(__GNUC__)
|
||||
# define BN_ULLONG unsigned __int64
|
||||
# else
|
||||
# define BN_ULLONG unsigned long long
|
||||
# endif
|
||||
# endif
|
||||
# define BN_BITS4 16
|
||||
# define BN_MASK2 (0xffffffffL)
|
||||
# define BN_MASK2l (0xffff)
|
||||
# define BN_MASK2h1 (0xffff8000L)
|
||||
# define BN_MASK2h (0xffff0000L)
|
||||
# define BN_DEC_CONV (1000000000L)
|
||||
# define BN_DEC_NUM 9
|
||||
# define BN_DEC_FMT1 "%u"
|
||||
# define BN_DEC_FMT2 "%09u"
|
||||
# endif
|
||||
|
||||
|
||||
/*-
|
||||
* Bignum consistency macros
|
||||
* There is one "API" macro, bn_fix_top(), for stripping leading zeroes from
|
||||
* bignum data after direct manipulations on the data. There is also an
|
||||
* "internal" macro, bn_check_top(), for verifying that there are no leading
|
||||
* zeroes. Unfortunately, some auditing is required due to the fact that
|
||||
* bn_fix_top() has become an overabused duct-tape because bignum data is
|
||||
* occasionally passed around in an inconsistent state. So the following
|
||||
* changes have been made to sort this out;
|
||||
* - bn_fix_top()s implementation has been moved to bn_correct_top()
|
||||
* - if BN_DEBUG isn't defined, bn_fix_top() maps to bn_correct_top(), and
|
||||
* bn_check_top() is as before.
|
||||
* - if BN_DEBUG *is* defined;
|
||||
* - bn_check_top() tries to pollute unused words even if the bignum 'top' is
|
||||
* consistent. (ed: only if BN_DEBUG_RAND is defined)
|
||||
* - bn_fix_top() maps to bn_check_top() rather than "fixing" anything.
|
||||
* The idea is to have debug builds flag up inconsistent bignums when they
|
||||
* occur. If that occurs in a bn_fix_top(), we examine the code in question; if
|
||||
* the use of bn_fix_top() was appropriate (ie. it follows directly after code
|
||||
* that manipulates the bignum) it is converted to bn_correct_top(), and if it
|
||||
* was not appropriate, we convert it permanently to bn_check_top() and track
|
||||
* down the cause of the bug. Eventually, no internal code should be using the
|
||||
* bn_fix_top() macro. External applications and libraries should try this with
|
||||
* their own code too, both in terms of building against the openssl headers
|
||||
* with BN_DEBUG defined *and* linking with a version of OpenSSL built with it
|
||||
* defined. This not only improves external code, it provides more test
|
||||
* coverage for openssl's own code.
|
||||
*/
|
||||
|
||||
# ifdef BN_DEBUG
|
||||
/*
|
||||
* The new BN_FLG_FIXED_TOP flag marks vectors that were not treated with
|
||||
* bn_correct_top, in other words such vectors are permitted to have zeros
|
||||
* in most significant limbs. Such vectors are used internally to achieve
|
||||
* execution time invariance for critical operations with private keys.
|
||||
* It's BN_DEBUG-only flag, because user application is not supposed to
|
||||
* observe it anyway. Moreover, optimizing compiler would actually remove
|
||||
* all operations manipulating the bit in question in non-BN_DEBUG build.
|
||||
*/
|
||||
# define BN_FLG_FIXED_TOP 0x10000
|
||||
# ifdef BN_DEBUG_RAND
|
||||
# define bn_pollute(a) \
|
||||
do { \
|
||||
const BIGNUM *_bnum1 = (a); \
|
||||
if (_bnum1->top < _bnum1->dmax) { \
|
||||
unsigned char _tmp_char; \
|
||||
/* We cast away const without the compiler knowing, any \
|
||||
* *genuinely* constant variables that aren't mutable \
|
||||
* wouldn't be constructed with top!=dmax. */ \
|
||||
BN_ULONG *_not_const; \
|
||||
memcpy(&_not_const, &_bnum1->d, sizeof(_not_const)); \
|
||||
RAND_bytes(&_tmp_char, 1); /* Debug only - safe to ignore error return */\
|
||||
memset(_not_const + _bnum1->top, _tmp_char, \
|
||||
sizeof(*_not_const) * (_bnum1->dmax - _bnum1->top)); \
|
||||
} \
|
||||
} while(0)
|
||||
# else
|
||||
# define bn_pollute(a)
|
||||
# endif
|
||||
# define bn_check_top(a) \
|
||||
do { \
|
||||
const BIGNUM *_bnum2 = (a); \
|
||||
if (_bnum2 != NULL) { \
|
||||
int _top = _bnum2->top; \
|
||||
(void)ossl_assert((_top == 0 && !_bnum2->neg) || \
|
||||
(_top && ((_bnum2->flags & BN_FLG_FIXED_TOP) \
|
||||
|| _bnum2->d[_top - 1] != 0))); \
|
||||
bn_pollute(_bnum2); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
# define bn_fix_top(a) bn_check_top(a)
|
||||
|
||||
# define bn_check_size(bn, bits) bn_wcheck_size(bn, ((bits+BN_BITS2-1))/BN_BITS2)
|
||||
# define bn_wcheck_size(bn, words) \
|
||||
do { \
|
||||
const BIGNUM *_bnum2 = (bn); \
|
||||
assert((words) <= (_bnum2)->dmax && \
|
||||
(words) >= (_bnum2)->top); \
|
||||
/* avoid unused variable warning with NDEBUG */ \
|
||||
(void)(_bnum2); \
|
||||
} while(0)
|
||||
|
||||
# else /* !BN_DEBUG */
|
||||
|
||||
# define BN_FLG_FIXED_TOP 0
|
||||
# define bn_pollute(a)
|
||||
# define bn_check_top(a)
|
||||
# define bn_fix_top(a) bn_correct_top(a)
|
||||
# define bn_check_size(bn, bits)
|
||||
# define bn_wcheck_size(bn, words)
|
||||
|
||||
# endif
|
||||
|
||||
BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
|
||||
BN_ULONG w);
|
||||
BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
|
||||
void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num);
|
||||
BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d);
|
||||
BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
|
||||
int num);
|
||||
BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
|
||||
int num);
|
||||
|
||||
struct bignum_st {
|
||||
BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit
|
||||
* chunks. */
|
||||
int top; /* Index of last used d +1. */
|
||||
/* The next are internal book keeping for bn_expand. */
|
||||
int dmax; /* Size of the d array. */
|
||||
int neg; /* one if the number is negative */
|
||||
int flags;
|
||||
};
|
||||
|
||||
/* Used for montgomery multiplication */
|
||||
struct bn_mont_ctx_st {
|
||||
int ri; /* number of bits in R */
|
||||
BIGNUM RR; /* used to convert to montgomery form,
|
||||
possibly zero-padded */
|
||||
BIGNUM N; /* The modulus */
|
||||
BIGNUM Ni; /* R*(1/R mod N) - N*Ni = 1 (Ni is only
|
||||
* stored for bignum algorithm) */
|
||||
BN_ULONG n0[2]; /* least significant word(s) of Ni; (type
|
||||
* changed with 0.9.9, was "BN_ULONG n0;"
|
||||
* before) */
|
||||
int flags;
|
||||
};
|
||||
|
||||
/*
|
||||
* Used for reciprocal division/mod functions It cannot be shared between
|
||||
* threads
|
||||
*/
|
||||
struct bn_recp_ctx_st {
|
||||
BIGNUM N; /* the divisor */
|
||||
BIGNUM Nr; /* the reciprocal */
|
||||
int num_bits;
|
||||
int shift;
|
||||
int flags;
|
||||
};
|
||||
|
||||
/* Used for slow "generation" functions. */
|
||||
struct bn_gencb_st {
|
||||
unsigned int ver; /* To handle binary (in)compatibility */
|
||||
void *arg; /* callback-specific data */
|
||||
union {
|
||||
/* if (ver==1) - handles old style callbacks */
|
||||
void (*cb_1) (int, int, void *);
|
||||
/* if (ver==2) - new callback style */
|
||||
int (*cb_2) (int, int, BN_GENCB *);
|
||||
} cb;
|
||||
};
|
||||
|
||||
/*-
|
||||
* BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions
|
||||
*
|
||||
*
|
||||
* For window size 'w' (w >= 2) and a random 'b' bits exponent,
|
||||
* the number of multiplications is a constant plus on average
|
||||
*
|
||||
* 2^(w-1) + (b-w)/(w+1);
|
||||
*
|
||||
* here 2^(w-1) is for precomputing the table (we actually need
|
||||
* entries only for windows that have the lowest bit set), and
|
||||
* (b-w)/(w+1) is an approximation for the expected number of
|
||||
* w-bit windows, not counting the first one.
|
||||
*
|
||||
* Thus we should use
|
||||
*
|
||||
* w >= 6 if b > 671
|
||||
* w = 5 if 671 > b > 239
|
||||
* w = 4 if 239 > b > 79
|
||||
* w = 3 if 79 > b > 23
|
||||
* w <= 2 if 23 > b
|
||||
*
|
||||
* (with draws in between). Very small exponents are often selected
|
||||
* with low Hamming weight, so we use w = 1 for b <= 23.
|
||||
*/
|
||||
# define BN_window_bits_for_exponent_size(b) \
|
||||
((b) > 671 ? 6 : \
|
||||
(b) > 239 ? 5 : \
|
||||
(b) > 79 ? 4 : \
|
||||
(b) > 23 ? 3 : 1)
|
||||
|
||||
/*
|
||||
* BN_mod_exp_mont_conttime is based on the assumption that the L1 data cache
|
||||
* line width of the target processor is at least the following value.
|
||||
*/
|
||||
# define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH ( 64 )
|
||||
# define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
|
||||
|
||||
/*
|
||||
* Window sizes optimized for fixed window size modular exponentiation
|
||||
* algorithm (BN_mod_exp_mont_consttime). To achieve the security goals of
|
||||
* BN_mode_exp_mont_consttime, the maximum size of the window must not exceed
|
||||
* log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH). Window size thresholds are
|
||||
* defined for cache line sizes of 32 and 64, cache line sizes where
|
||||
* log_2(32)=5 and log_2(64)=6 respectively. A window size of 7 should only be
|
||||
* used on processors that have a 128 byte or greater cache line size.
|
||||
*/
|
||||
# if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
|
||||
|
||||
# define BN_window_bits_for_ctime_exponent_size(b) \
|
||||
((b) > 937 ? 6 : \
|
||||
(b) > 306 ? 5 : \
|
||||
(b) > 89 ? 4 : \
|
||||
(b) > 22 ? 3 : 1)
|
||||
# define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6)
|
||||
|
||||
# elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32
|
||||
|
||||
# define BN_window_bits_for_ctime_exponent_size(b) \
|
||||
((b) > 306 ? 5 : \
|
||||
(b) > 89 ? 4 : \
|
||||
(b) > 22 ? 3 : 1)
|
||||
# define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5)
|
||||
|
||||
# endif
|
||||
|
||||
/* Pentium pro 16,16,16,32,64 */
|
||||
/* Alpha 16,16,16,16.64 */
|
||||
# define BN_MULL_SIZE_NORMAL (16)/* 32 */
|
||||
# define BN_MUL_RECURSIVE_SIZE_NORMAL (16)/* 32 less than */
|
||||
# define BN_SQR_RECURSIVE_SIZE_NORMAL (16)/* 32 */
|
||||
# define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32)/* 32 */
|
||||
# define BN_MONT_CTX_SET_SIZE_WORD (64)/* 32 */
|
||||
|
||||
/*
|
||||
* 2011-02-22 SMS. In various places, a size_t variable or a type cast to
|
||||
* size_t was used to perform integer-only operations on pointers. This
|
||||
* failed on VMS with 64-bit pointers (CC /POINTER_SIZE = 64) because size_t
|
||||
* is still only 32 bits. What's needed in these cases is an integer type
|
||||
* with the same size as a pointer, which size_t is not certain to be. The
|
||||
* only fix here is VMS-specific.
|
||||
*/
|
||||
# if defined(OPENSSL_SYS_VMS)
|
||||
# if __INITIAL_POINTER_SIZE == 64
|
||||
# define PTR_SIZE_INT long long
|
||||
# else /* __INITIAL_POINTER_SIZE == 64 */
|
||||
# define PTR_SIZE_INT int
|
||||
# endif /* __INITIAL_POINTER_SIZE == 64 [else] */
|
||||
# elif !defined(PTR_SIZE_INT) /* defined(OPENSSL_SYS_VMS) */
|
||||
# define PTR_SIZE_INT size_t
|
||||
# endif /* defined(OPENSSL_SYS_VMS) [else] */
|
||||
|
||||
# if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC)
|
||||
/*
|
||||
* BN_UMULT_HIGH section.
|
||||
* If the compiler doesn't support 2*N integer type, then you have to
|
||||
* replace every N*N multiplication with 4 (N/2)*(N/2) accompanied by some
|
||||
* shifts and additions which unavoidably results in severe performance
|
||||
* penalties. Of course provided that the hardware is capable of producing
|
||||
* 2*N result... That's when you normally start considering assembler
|
||||
* implementation. However! It should be pointed out that some CPUs (e.g.,
|
||||
* PowerPC, Alpha, and IA-64) provide *separate* instruction calculating
|
||||
* the upper half of the product placing the result into a general
|
||||
* purpose register. Now *if* the compiler supports inline assembler,
|
||||
* then it's not impossible to implement the "bignum" routines (and have
|
||||
* the compiler optimize 'em) exhibiting "native" performance in C. That's
|
||||
* what BN_UMULT_HIGH macro is about:-) Note that more recent compilers do
|
||||
* support 2*64 integer type, which is also used here.
|
||||
*/
|
||||
# if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16 && \
|
||||
(defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG))
|
||||
# define BN_UMULT_HIGH(a,b) (((__uint128_t)(a)*(b))>>64)
|
||||
# define BN_UMULT_LOHI(low,high,a,b) ({ \
|
||||
__uint128_t ret=(__uint128_t)(a)*(b); \
|
||||
(high)=ret>>64; (low)=ret; })
|
||||
# elif defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
|
||||
# if defined(__DECC)
|
||||
# include <c_asm.h>
|
||||
# define BN_UMULT_HIGH(a,b) (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b))
|
||||
# elif defined(__GNUC__) && __GNUC__>=2
|
||||
# define BN_UMULT_HIGH(a,b) ({ \
|
||||
register BN_ULONG ret; \
|
||||
asm ("umulh %1,%2,%0" \
|
||||
: "=r"(ret) \
|
||||
: "r"(a), "r"(b)); \
|
||||
ret; })
|
||||
# endif /* compiler */
|
||||
# elif defined(_ARCH_PPC64) && defined(SIXTY_FOUR_BIT_LONG)
|
||||
# if defined(__GNUC__) && __GNUC__>=2
|
||||
# define BN_UMULT_HIGH(a,b) ({ \
|
||||
register BN_ULONG ret; \
|
||||
asm ("mulhdu %0,%1,%2" \
|
||||
: "=r"(ret) \
|
||||
: "r"(a), "r"(b)); \
|
||||
ret; })
|
||||
# endif /* compiler */
|
||||
# elif (defined(__x86_64) || defined(__x86_64__)) && \
|
||||
(defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
|
||||
# if defined(__GNUC__) && __GNUC__>=2
|
||||
# define BN_UMULT_HIGH(a,b) ({ \
|
||||
register BN_ULONG ret,discard; \
|
||||
asm ("mulq %3" \
|
||||
: "=a"(discard),"=d"(ret) \
|
||||
: "a"(a), "g"(b) \
|
||||
: "cc"); \
|
||||
ret; })
|
||||
# define BN_UMULT_LOHI(low,high,a,b) \
|
||||
asm ("mulq %3" \
|
||||
: "=a"(low),"=d"(high) \
|
||||
: "a"(a),"g"(b) \
|
||||
: "cc");
|
||||
# endif
|
||||
# elif (defined(_M_AMD64) || defined(_M_X64)) && defined(SIXTY_FOUR_BIT)
|
||||
# if defined(_MSC_VER) && _MSC_VER>=1400
|
||||
unsigned __int64 __umulh(unsigned __int64 a, unsigned __int64 b);
|
||||
unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b,
|
||||
unsigned __int64 *h);
|
||||
# pragma intrinsic(__umulh,_umul128)
|
||||
# define BN_UMULT_HIGH(a,b) __umulh((a),(b))
|
||||
# define BN_UMULT_LOHI(low,high,a,b) ((low)=_umul128((a),(b),&(high)))
|
||||
# endif
|
||||
# elif defined(__mips) && (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG))
|
||||
# if defined(__GNUC__) && __GNUC__>=2
|
||||
# define BN_UMULT_HIGH(a,b) ({ \
|
||||
register BN_ULONG ret; \
|
||||
asm ("dmultu %1,%2" \
|
||||
: "=h"(ret) \
|
||||
: "r"(a), "r"(b) : "l"); \
|
||||
ret; })
|
||||
# define BN_UMULT_LOHI(low,high,a,b) \
|
||||
asm ("dmultu %2,%3" \
|
||||
: "=l"(low),"=h"(high) \
|
||||
: "r"(a), "r"(b));
|
||||
# endif
|
||||
# elif defined(__aarch64__) && defined(SIXTY_FOUR_BIT_LONG)
|
||||
# if defined(__GNUC__) && __GNUC__>=2
|
||||
# define BN_UMULT_HIGH(a,b) ({ \
|
||||
register BN_ULONG ret; \
|
||||
asm ("umulh %0,%1,%2" \
|
||||
: "=r"(ret) \
|
||||
: "r"(a), "r"(b)); \
|
||||
ret; })
|
||||
# endif
|
||||
# endif /* cpu */
|
||||
# endif /* OPENSSL_NO_ASM */
|
||||
|
||||
# ifdef BN_DEBUG_RAND
|
||||
# define bn_clear_top2max(a) \
|
||||
{ \
|
||||
int ind = (a)->dmax - (a)->top; \
|
||||
BN_ULONG *ftl = &(a)->d[(a)->top-1]; \
|
||||
for (; ind != 0; ind--) \
|
||||
*(++ftl) = 0x0; \
|
||||
}
|
||||
# else
|
||||
# define bn_clear_top2max(a)
|
||||
# endif
|
||||
|
||||
# ifdef BN_LLONG
|
||||
/*******************************************************************
|
||||
* Using the long long type, has to be twice as wide as BN_ULONG...
|
||||
*/
|
||||
# define Lw(t) (((BN_ULONG)(t))&BN_MASK2)
|
||||
# define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
|
||||
|
||||
# define mul_add(r,a,w,c) { \
|
||||
BN_ULLONG t; \
|
||||
t=(BN_ULLONG)w * (a) + (r) + (c); \
|
||||
(r)= Lw(t); \
|
||||
(c)= Hw(t); \
|
||||
}
|
||||
|
||||
# define mul(r,a,w,c) { \
|
||||
BN_ULLONG t; \
|
||||
t=(BN_ULLONG)w * (a) + (c); \
|
||||
(r)= Lw(t); \
|
||||
(c)= Hw(t); \
|
||||
}
|
||||
|
||||
# define sqr(r0,r1,a) { \
|
||||
BN_ULLONG t; \
|
||||
t=(BN_ULLONG)(a)*(a); \
|
||||
(r0)=Lw(t); \
|
||||
(r1)=Hw(t); \
|
||||
}
|
||||
|
||||
# elif defined(BN_UMULT_LOHI)
|
||||
# define mul_add(r,a,w,c) { \
|
||||
BN_ULONG high,low,ret,tmp=(a); \
|
||||
ret = (r); \
|
||||
BN_UMULT_LOHI(low,high,w,tmp); \
|
||||
ret += (c); \
|
||||
(c) = (ret<(c))?1:0; \
|
||||
(c) += high; \
|
||||
ret += low; \
|
||||
(c) += (ret<low)?1:0; \
|
||||
(r) = ret; \
|
||||
}
|
||||
|
||||
# define mul(r,a,w,c) { \
|
||||
BN_ULONG high,low,ret,ta=(a); \
|
||||
BN_UMULT_LOHI(low,high,w,ta); \
|
||||
ret = low + (c); \
|
||||
(c) = high; \
|
||||
(c) += (ret<low)?1:0; \
|
||||
(r) = ret; \
|
||||
}
|
||||
|
||||
# define sqr(r0,r1,a) { \
|
||||
BN_ULONG tmp=(a); \
|
||||
BN_UMULT_LOHI(r0,r1,tmp,tmp); \
|
||||
}
|
||||
|
||||
# elif defined(BN_UMULT_HIGH)
|
||||
# define mul_add(r,a,w,c) { \
|
||||
BN_ULONG high,low,ret,tmp=(a); \
|
||||
ret = (r); \
|
||||
high= BN_UMULT_HIGH(w,tmp); \
|
||||
ret += (c); \
|
||||
low = (w) * tmp; \
|
||||
(c) = (ret<(c))?1:0; \
|
||||
(c) += high; \
|
||||
ret += low; \
|
||||
(c) += (ret<low)?1:0; \
|
||||
(r) = ret; \
|
||||
}
|
||||
|
||||
# define mul(r,a,w,c) { \
|
||||
BN_ULONG high,low,ret,ta=(a); \
|
||||
low = (w) * ta; \
|
||||
high= BN_UMULT_HIGH(w,ta); \
|
||||
ret = low + (c); \
|
||||
(c) = high; \
|
||||
(c) += (ret<low)?1:0; \
|
||||
(r) = ret; \
|
||||
}
|
||||
|
||||
# define sqr(r0,r1,a) { \
|
||||
BN_ULONG tmp=(a); \
|
||||
(r0) = tmp * tmp; \
|
||||
(r1) = BN_UMULT_HIGH(tmp,tmp); \
|
||||
}
|
||||
|
||||
# else
|
||||
/*************************************************************
|
||||
* No long long type
|
||||
*/
|
||||
|
||||
# define LBITS(a) ((a)&BN_MASK2l)
|
||||
# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
|
||||
# define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
|
||||
|
||||
# define LLBITS(a) ((a)&BN_MASKl)
|
||||
# define LHBITS(a) (((a)>>BN_BITS2)&BN_MASKl)
|
||||
# define LL2HBITS(a) ((BN_ULLONG)((a)&BN_MASKl)<<BN_BITS2)
|
||||
|
||||
# define mul64(l,h,bl,bh) \
|
||||
{ \
|
||||
BN_ULONG m,m1,lt,ht; \
|
||||
\
|
||||
lt=l; \
|
||||
ht=h; \
|
||||
m =(bh)*(lt); \
|
||||
lt=(bl)*(lt); \
|
||||
m1=(bl)*(ht); \
|
||||
ht =(bh)*(ht); \
|
||||
m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS((BN_ULONG)1); \
|
||||
ht+=HBITS(m); \
|
||||
m1=L2HBITS(m); \
|
||||
lt=(lt+m1)&BN_MASK2; if (lt < m1) ht++; \
|
||||
(l)=lt; \
|
||||
(h)=ht; \
|
||||
}
|
||||
|
||||
# define sqr64(lo,ho,in) \
|
||||
{ \
|
||||
BN_ULONG l,h,m; \
|
||||
\
|
||||
h=(in); \
|
||||
l=LBITS(h); \
|
||||
h=HBITS(h); \
|
||||
m =(l)*(h); \
|
||||
l*=l; \
|
||||
h*=h; \
|
||||
h+=(m&BN_MASK2h1)>>(BN_BITS4-1); \
|
||||
m =(m&BN_MASK2l)<<(BN_BITS4+1); \
|
||||
l=(l+m)&BN_MASK2; if (l < m) h++; \
|
||||
(lo)=l; \
|
||||
(ho)=h; \
|
||||
}
|
||||
|
||||
# define mul_add(r,a,bl,bh,c) { \
|
||||
BN_ULONG l,h; \
|
||||
\
|
||||
h= (a); \
|
||||
l=LBITS(h); \
|
||||
h=HBITS(h); \
|
||||
mul64(l,h,(bl),(bh)); \
|
||||
\
|
||||
/* non-multiply part */ \
|
||||
l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
|
||||
(c)=(r); \
|
||||
l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
|
||||
(c)=h&BN_MASK2; \
|
||||
(r)=l; \
|
||||
}
|
||||
|
||||
# define mul(r,a,bl,bh,c) { \
|
||||
BN_ULONG l,h; \
|
||||
\
|
||||
h= (a); \
|
||||
l=LBITS(h); \
|
||||
h=HBITS(h); \
|
||||
mul64(l,h,(bl),(bh)); \
|
||||
\
|
||||
/* non-multiply part */ \
|
||||
l+=(c); if ((l&BN_MASK2) < (c)) h++; \
|
||||
(c)=h&BN_MASK2; \
|
||||
(r)=l&BN_MASK2; \
|
||||
}
|
||||
# endif /* !BN_LLONG */
|
||||
|
||||
void BN_RECP_CTX_init(BN_RECP_CTX *recp);
|
||||
void BN_MONT_CTX_init(BN_MONT_CTX *ctx);
|
||||
|
||||
void bn_init(BIGNUM *a);
|
||||
void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb);
|
||||
void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
|
||||
void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
|
||||
void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp);
|
||||
void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a);
|
||||
void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a);
|
||||
int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n);
|
||||
int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b, int cl, int dl);
|
||||
void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
|
||||
int dna, int dnb, BN_ULONG *t);
|
||||
void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b,
|
||||
int n, int tna, int tnb, BN_ULONG *t);
|
||||
void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t);
|
||||
void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n);
|
||||
void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
|
||||
BN_ULONG *t);
|
||||
BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
|
||||
int cl, int dl);
|
||||
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
|
||||
const BN_ULONG *np, const BN_ULONG *n0, int num);
|
||||
|
||||
BIGNUM *int_bn_mod_inverse(BIGNUM *in,
|
||||
const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx,
|
||||
int *noinv);
|
||||
|
||||
int bn_probable_prime_dh(BIGNUM *rnd, int bits,
|
||||
const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx);
|
||||
|
||||
static ossl_inline BIGNUM *bn_expand(BIGNUM *a, int bits)
|
||||
{
|
||||
if (bits > (INT_MAX - BN_BITS2 + 1))
|
||||
return NULL;
|
||||
|
||||
if (((bits+BN_BITS2-1)/BN_BITS2) <= (a)->dmax)
|
||||
return a;
|
||||
|
||||
return bn_expand2((a),(bits+BN_BITS2-1)/BN_BITS2);
|
||||
}
|
||||
|
||||
#endif
|
964
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_lib.c
vendored
Normal file
964
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_lib.c
vendored
Normal file
|
@ -0,0 +1,964 @@
|
|||
/*
|
||||
* Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
#include "internal/cryptlib.h"
|
||||
#include "bn_lcl.h"
|
||||
#include <openssl/opensslconf.h>
|
||||
#include "internal/constant_time_locl.h"
|
||||
|
||||
/* This stuff appears to be completely unused, so is deprecated */
|
||||
#if OPENSSL_API_COMPAT < 0x00908000L
|
||||
/*-
|
||||
* For a 32 bit machine
|
||||
* 2 - 4 == 128
|
||||
* 3 - 8 == 256
|
||||
* 4 - 16 == 512
|
||||
* 5 - 32 == 1024
|
||||
* 6 - 64 == 2048
|
||||
* 7 - 128 == 4096
|
||||
* 8 - 256 == 8192
|
||||
*/
|
||||
static int bn_limit_bits = 0;
|
||||
static int bn_limit_num = 8; /* (1<<bn_limit_bits) */
|
||||
static int bn_limit_bits_low = 0;
|
||||
static int bn_limit_num_low = 8; /* (1<<bn_limit_bits_low) */
|
||||
static int bn_limit_bits_high = 0;
|
||||
static int bn_limit_num_high = 8; /* (1<<bn_limit_bits_high) */
|
||||
static int bn_limit_bits_mont = 0;
|
||||
static int bn_limit_num_mont = 8; /* (1<<bn_limit_bits_mont) */
|
||||
|
||||
void BN_set_params(int mult, int high, int low, int mont)
|
||||
{
|
||||
if (mult >= 0) {
|
||||
if (mult > (int)(sizeof(int) * 8) - 1)
|
||||
mult = sizeof(int) * 8 - 1;
|
||||
bn_limit_bits = mult;
|
||||
bn_limit_num = 1 << mult;
|
||||
}
|
||||
if (high >= 0) {
|
||||
if (high > (int)(sizeof(int) * 8) - 1)
|
||||
high = sizeof(int) * 8 - 1;
|
||||
bn_limit_bits_high = high;
|
||||
bn_limit_num_high = 1 << high;
|
||||
}
|
||||
if (low >= 0) {
|
||||
if (low > (int)(sizeof(int) * 8) - 1)
|
||||
low = sizeof(int) * 8 - 1;
|
||||
bn_limit_bits_low = low;
|
||||
bn_limit_num_low = 1 << low;
|
||||
}
|
||||
if (mont >= 0) {
|
||||
if (mont > (int)(sizeof(int) * 8) - 1)
|
||||
mont = sizeof(int) * 8 - 1;
|
||||
bn_limit_bits_mont = mont;
|
||||
bn_limit_num_mont = 1 << mont;
|
||||
}
|
||||
}
|
||||
|
||||
int BN_get_params(int which)
|
||||
{
|
||||
if (which == 0)
|
||||
return bn_limit_bits;
|
||||
else if (which == 1)
|
||||
return bn_limit_bits_high;
|
||||
else if (which == 2)
|
||||
return bn_limit_bits_low;
|
||||
else if (which == 3)
|
||||
return bn_limit_bits_mont;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
const BIGNUM *BN_value_one(void)
|
||||
{
|
||||
static const BN_ULONG data_one = 1L;
|
||||
static const BIGNUM const_one =
|
||||
{ (BN_ULONG *)&data_one, 1, 1, 0, BN_FLG_STATIC_DATA };
|
||||
|
||||
return &const_one;
|
||||
}
|
||||
|
||||
int BN_num_bits_word(BN_ULONG l)
|
||||
{
|
||||
BN_ULONG x, mask;
|
||||
int bits = (l != 0);
|
||||
|
||||
#if BN_BITS2 > 32
|
||||
x = l >> 32;
|
||||
mask = (0 - x) & BN_MASK2;
|
||||
mask = (0 - (mask >> (BN_BITS2 - 1)));
|
||||
bits += 32 & mask;
|
||||
l ^= (x ^ l) & mask;
|
||||
#endif
|
||||
|
||||
x = l >> 16;
|
||||
mask = (0 - x) & BN_MASK2;
|
||||
mask = (0 - (mask >> (BN_BITS2 - 1)));
|
||||
bits += 16 & mask;
|
||||
l ^= (x ^ l) & mask;
|
||||
|
||||
x = l >> 8;
|
||||
mask = (0 - x) & BN_MASK2;
|
||||
mask = (0 - (mask >> (BN_BITS2 - 1)));
|
||||
bits += 8 & mask;
|
||||
l ^= (x ^ l) & mask;
|
||||
|
||||
x = l >> 4;
|
||||
mask = (0 - x) & BN_MASK2;
|
||||
mask = (0 - (mask >> (BN_BITS2 - 1)));
|
||||
bits += 4 & mask;
|
||||
l ^= (x ^ l) & mask;
|
||||
|
||||
x = l >> 2;
|
||||
mask = (0 - x) & BN_MASK2;
|
||||
mask = (0 - (mask >> (BN_BITS2 - 1)));
|
||||
bits += 2 & mask;
|
||||
l ^= (x ^ l) & mask;
|
||||
|
||||
x = l >> 1;
|
||||
mask = (0 - x) & BN_MASK2;
|
||||
mask = (0 - (mask >> (BN_BITS2 - 1)));
|
||||
bits += 1 & mask;
|
||||
|
||||
return bits;
|
||||
}
|
||||
|
||||
int BN_num_bits(const BIGNUM *a)
|
||||
{
|
||||
int i = a->top - 1;
|
||||
bn_check_top(a);
|
||||
|
||||
if (BN_is_zero(a))
|
||||
return 0;
|
||||
return ((i * BN_BITS2) + BN_num_bits_word(a->d[i]));
|
||||
}
|
||||
|
||||
static void bn_free_d(BIGNUM *a)
|
||||
{
|
||||
if (BN_get_flags(a, BN_FLG_SECURE))
|
||||
OPENSSL_secure_free(a->d);
|
||||
else
|
||||
OPENSSL_free(a->d);
|
||||
}
|
||||
|
||||
|
||||
void BN_clear_free(BIGNUM *a)
|
||||
{
|
||||
if (a == NULL)
|
||||
return;
|
||||
if (a->d != NULL && !BN_get_flags(a, BN_FLG_STATIC_DATA)) {
|
||||
OPENSSL_cleanse(a->d, a->dmax * sizeof(a->d[0]));
|
||||
bn_free_d(a);
|
||||
}
|
||||
if (BN_get_flags(a, BN_FLG_MALLOCED)) {
|
||||
OPENSSL_cleanse(a, sizeof(*a));
|
||||
OPENSSL_free(a);
|
||||
}
|
||||
}
|
||||
|
||||
void BN_free(BIGNUM *a)
|
||||
{
|
||||
if (a == NULL)
|
||||
return;
|
||||
if (!BN_get_flags(a, BN_FLG_STATIC_DATA))
|
||||
bn_free_d(a);
|
||||
if (a->flags & BN_FLG_MALLOCED)
|
||||
OPENSSL_free(a);
|
||||
}
|
||||
|
||||
void bn_init(BIGNUM *a)
|
||||
{
|
||||
static BIGNUM nilbn;
|
||||
|
||||
*a = nilbn;
|
||||
bn_check_top(a);
|
||||
}
|
||||
|
||||
BIGNUM *BN_new(void)
|
||||
{
|
||||
BIGNUM *ret;
|
||||
|
||||
if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL) {
|
||||
BNerr(BN_F_BN_NEW, ERR_R_MALLOC_FAILURE);
|
||||
return NULL;
|
||||
}
|
||||
ret->flags = BN_FLG_MALLOCED;
|
||||
bn_check_top(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
BIGNUM *BN_secure_new(void)
|
||||
{
|
||||
BIGNUM *ret = BN_new();
|
||||
if (ret != NULL)
|
||||
ret->flags |= BN_FLG_SECURE;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* This is used by bn_expand2() */
|
||||
/* The caller MUST check that words > b->dmax before calling this */
|
||||
static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words)
|
||||
{
|
||||
BN_ULONG *a = NULL;
|
||||
|
||||
if (words > (INT_MAX / (4 * BN_BITS2))) {
|
||||
BNerr(BN_F_BN_EXPAND_INTERNAL, BN_R_BIGNUM_TOO_LONG);
|
||||
return NULL;
|
||||
}
|
||||
if (BN_get_flags(b, BN_FLG_STATIC_DATA)) {
|
||||
BNerr(BN_F_BN_EXPAND_INTERNAL, BN_R_EXPAND_ON_STATIC_BIGNUM_DATA);
|
||||
return NULL;
|
||||
}
|
||||
if (BN_get_flags(b, BN_FLG_SECURE))
|
||||
a = OPENSSL_secure_zalloc(words * sizeof(*a));
|
||||
else
|
||||
a = OPENSSL_zalloc(words * sizeof(*a));
|
||||
if (a == NULL) {
|
||||
BNerr(BN_F_BN_EXPAND_INTERNAL, ERR_R_MALLOC_FAILURE);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
assert(b->top <= words);
|
||||
if (b->top > 0)
|
||||
memcpy(a, b->d, sizeof(*a) * b->top);
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is an internal function that should not be used in applications. It
|
||||
* ensures that 'b' has enough room for a 'words' word number and initialises
|
||||
* any unused part of b->d with leading zeros. It is mostly used by the
|
||||
* various BIGNUM routines. If there is an error, NULL is returned. If not,
|
||||
* 'b' is returned.
|
||||
*/
|
||||
|
||||
BIGNUM *bn_expand2(BIGNUM *b, int words)
|
||||
{
|
||||
if (words > b->dmax) {
|
||||
BN_ULONG *a = bn_expand_internal(b, words);
|
||||
if (!a)
|
||||
return NULL;
|
||||
if (b->d) {
|
||||
OPENSSL_cleanse(b->d, b->dmax * sizeof(b->d[0]));
|
||||
bn_free_d(b);
|
||||
}
|
||||
b->d = a;
|
||||
b->dmax = words;
|
||||
}
|
||||
|
||||
return b;
|
||||
}
|
||||
|
||||
BIGNUM *BN_dup(const BIGNUM *a)
|
||||
{
|
||||
BIGNUM *t;
|
||||
|
||||
if (a == NULL)
|
||||
return NULL;
|
||||
bn_check_top(a);
|
||||
|
||||
t = BN_get_flags(a, BN_FLG_SECURE) ? BN_secure_new() : BN_new();
|
||||
if (t == NULL)
|
||||
return NULL;
|
||||
if (!BN_copy(t, a)) {
|
||||
BN_free(t);
|
||||
return NULL;
|
||||
}
|
||||
bn_check_top(t);
|
||||
return t;
|
||||
}
|
||||
|
||||
BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b)
|
||||
{
|
||||
bn_check_top(b);
|
||||
|
||||
if (a == b)
|
||||
return a;
|
||||
if (bn_wexpand(a, b->top) == NULL)
|
||||
return NULL;
|
||||
|
||||
if (b->top > 0)
|
||||
memcpy(a->d, b->d, sizeof(b->d[0]) * b->top);
|
||||
|
||||
a->neg = b->neg;
|
||||
a->top = b->top;
|
||||
a->flags |= b->flags & BN_FLG_FIXED_TOP;
|
||||
bn_check_top(a);
|
||||
return a;
|
||||
}
|
||||
|
||||
#define FLAGS_DATA(flags) ((flags) & (BN_FLG_STATIC_DATA \
|
||||
| BN_FLG_CONSTTIME \
|
||||
| BN_FLG_SECURE \
|
||||
| BN_FLG_FIXED_TOP))
|
||||
#define FLAGS_STRUCT(flags) ((flags) & (BN_FLG_MALLOCED))
|
||||
|
||||
void BN_swap(BIGNUM *a, BIGNUM *b)
|
||||
{
|
||||
int flags_old_a, flags_old_b;
|
||||
BN_ULONG *tmp_d;
|
||||
int tmp_top, tmp_dmax, tmp_neg;
|
||||
|
||||
bn_check_top(a);
|
||||
bn_check_top(b);
|
||||
|
||||
flags_old_a = a->flags;
|
||||
flags_old_b = b->flags;
|
||||
|
||||
tmp_d = a->d;
|
||||
tmp_top = a->top;
|
||||
tmp_dmax = a->dmax;
|
||||
tmp_neg = a->neg;
|
||||
|
||||
a->d = b->d;
|
||||
a->top = b->top;
|
||||
a->dmax = b->dmax;
|
||||
a->neg = b->neg;
|
||||
|
||||
b->d = tmp_d;
|
||||
b->top = tmp_top;
|
||||
b->dmax = tmp_dmax;
|
||||
b->neg = tmp_neg;
|
||||
|
||||
a->flags = FLAGS_STRUCT(flags_old_a) | FLAGS_DATA(flags_old_b);
|
||||
b->flags = FLAGS_STRUCT(flags_old_b) | FLAGS_DATA(flags_old_a);
|
||||
bn_check_top(a);
|
||||
bn_check_top(b);
|
||||
}
|
||||
|
||||
void BN_clear(BIGNUM *a)
|
||||
{
|
||||
bn_check_top(a);
|
||||
if (a->d != NULL)
|
||||
OPENSSL_cleanse(a->d, sizeof(*a->d) * a->dmax);
|
||||
a->neg = 0;
|
||||
a->top = 0;
|
||||
a->flags &= ~BN_FLG_FIXED_TOP;
|
||||
}
|
||||
|
||||
BN_ULONG BN_get_word(const BIGNUM *a)
|
||||
{
|
||||
if (a->top > 1)
|
||||
return BN_MASK2;
|
||||
else if (a->top == 1)
|
||||
return a->d[0];
|
||||
/* a->top == 0 */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int BN_set_word(BIGNUM *a, BN_ULONG w)
|
||||
{
|
||||
bn_check_top(a);
|
||||
if (bn_expand(a, (int)sizeof(BN_ULONG) * 8) == NULL)
|
||||
return 0;
|
||||
a->neg = 0;
|
||||
a->d[0] = w;
|
||||
a->top = (w ? 1 : 0);
|
||||
a->flags &= ~BN_FLG_FIXED_TOP;
|
||||
bn_check_top(a);
|
||||
return 1;
|
||||
}
|
||||
|
||||
BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
|
||||
{
|
||||
unsigned int i, m;
|
||||
unsigned int n;
|
||||
BN_ULONG l;
|
||||
BIGNUM *bn = NULL;
|
||||
|
||||
if (ret == NULL)
|
||||
ret = bn = BN_new();
|
||||
if (ret == NULL)
|
||||
return NULL;
|
||||
bn_check_top(ret);
|
||||
/* Skip leading zero's. */
|
||||
for ( ; len > 0 && *s == 0; s++, len--)
|
||||
continue;
|
||||
n = len;
|
||||
if (n == 0) {
|
||||
ret->top = 0;
|
||||
return ret;
|
||||
}
|
||||
i = ((n - 1) / BN_BYTES) + 1;
|
||||
m = ((n - 1) % (BN_BYTES));
|
||||
if (bn_wexpand(ret, (int)i) == NULL) {
|
||||
BN_free(bn);
|
||||
return NULL;
|
||||
}
|
||||
ret->top = i;
|
||||
ret->neg = 0;
|
||||
l = 0;
|
||||
while (n--) {
|
||||
l = (l << 8L) | *(s++);
|
||||
if (m-- == 0) {
|
||||
ret->d[--i] = l;
|
||||
l = 0;
|
||||
m = BN_BYTES - 1;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* need to call this due to clear byte at top if avoiding having the top
|
||||
* bit set (-ve number)
|
||||
*/
|
||||
bn_correct_top(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* ignore negative */
|
||||
static int bn2binpad(const BIGNUM *a, unsigned char *to, int tolen)
|
||||
{
|
||||
int n;
|
||||
size_t i, lasti, j, atop, mask;
|
||||
BN_ULONG l;
|
||||
|
||||
/*
|
||||
* In case |a| is fixed-top, BN_num_bytes can return bogus length,
|
||||
* but it's assumed that fixed-top inputs ought to be "nominated"
|
||||
* even for padded output, so it works out...
|
||||
*/
|
||||
n = BN_num_bytes(a);
|
||||
if (tolen == -1) {
|
||||
tolen = n;
|
||||
} else if (tolen < n) { /* uncommon/unlike case */
|
||||
BIGNUM temp = *a;
|
||||
|
||||
bn_correct_top(&temp);
|
||||
n = BN_num_bytes(&temp);
|
||||
if (tolen < n)
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Swipe through whole available data and don't give away padded zero. */
|
||||
atop = a->dmax * BN_BYTES;
|
||||
if (atop == 0) {
|
||||
OPENSSL_cleanse(to, tolen);
|
||||
return tolen;
|
||||
}
|
||||
|
||||
lasti = atop - 1;
|
||||
atop = a->top * BN_BYTES;
|
||||
for (i = 0, j = 0, to += tolen; j < (size_t)tolen; j++) {
|
||||
l = a->d[i / BN_BYTES];
|
||||
mask = 0 - ((j - atop) >> (8 * sizeof(i) - 1));
|
||||
*--to = (unsigned char)(l >> (8 * (i % BN_BYTES)) & mask);
|
||||
i += (i - lasti) >> (8 * sizeof(i) - 1); /* stay on last limb */
|
||||
}
|
||||
|
||||
return tolen;
|
||||
}
|
||||
|
||||
int BN_bn2binpad(const BIGNUM *a, unsigned char *to, int tolen)
|
||||
{
|
||||
if (tolen < 0)
|
||||
return -1;
|
||||
return bn2binpad(a, to, tolen);
|
||||
}
|
||||
|
||||
int BN_bn2bin(const BIGNUM *a, unsigned char *to)
|
||||
{
|
||||
return bn2binpad(a, to, -1);
|
||||
}
|
||||
|
||||
BIGNUM *BN_lebin2bn(const unsigned char *s, int len, BIGNUM *ret)
|
||||
{
|
||||
unsigned int i, m;
|
||||
unsigned int n;
|
||||
BN_ULONG l;
|
||||
BIGNUM *bn = NULL;
|
||||
|
||||
if (ret == NULL)
|
||||
ret = bn = BN_new();
|
||||
if (ret == NULL)
|
||||
return NULL;
|
||||
bn_check_top(ret);
|
||||
s += len;
|
||||
/* Skip trailing zeroes. */
|
||||
for ( ; len > 0 && s[-1] == 0; s--, len--)
|
||||
continue;
|
||||
n = len;
|
||||
if (n == 0) {
|
||||
ret->top = 0;
|
||||
return ret;
|
||||
}
|
||||
i = ((n - 1) / BN_BYTES) + 1;
|
||||
m = ((n - 1) % (BN_BYTES));
|
||||
if (bn_wexpand(ret, (int)i) == NULL) {
|
||||
BN_free(bn);
|
||||
return NULL;
|
||||
}
|
||||
ret->top = i;
|
||||
ret->neg = 0;
|
||||
l = 0;
|
||||
while (n--) {
|
||||
s--;
|
||||
l = (l << 8L) | *s;
|
||||
if (m-- == 0) {
|
||||
ret->d[--i] = l;
|
||||
l = 0;
|
||||
m = BN_BYTES - 1;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* need to call this due to clear byte at top if avoiding having the top
|
||||
* bit set (-ve number)
|
||||
*/
|
||||
bn_correct_top(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int BN_bn2lebinpad(const BIGNUM *a, unsigned char *to, int tolen)
|
||||
{
|
||||
int i;
|
||||
BN_ULONG l;
|
||||
bn_check_top(a);
|
||||
i = BN_num_bytes(a);
|
||||
if (tolen < i)
|
||||
return -1;
|
||||
/* Add trailing zeroes if necessary */
|
||||
if (tolen > i)
|
||||
memset(to + i, 0, tolen - i);
|
||||
to += i;
|
||||
while (i--) {
|
||||
l = a->d[i / BN_BYTES];
|
||||
to--;
|
||||
*to = (unsigned char)(l >> (8 * (i % BN_BYTES))) & 0xff;
|
||||
}
|
||||
return tolen;
|
||||
}
|
||||
|
||||
int BN_ucmp(const BIGNUM *a, const BIGNUM *b)
|
||||
{
|
||||
int i;
|
||||
BN_ULONG t1, t2, *ap, *bp;
|
||||
|
||||
bn_check_top(a);
|
||||
bn_check_top(b);
|
||||
|
||||
i = a->top - b->top;
|
||||
if (i != 0)
|
||||
return i;
|
||||
ap = a->d;
|
||||
bp = b->d;
|
||||
for (i = a->top - 1; i >= 0; i--) {
|
||||
t1 = ap[i];
|
||||
t2 = bp[i];
|
||||
if (t1 != t2)
|
||||
return ((t1 > t2) ? 1 : -1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int BN_cmp(const BIGNUM *a, const BIGNUM *b)
|
||||
{
|
||||
int i;
|
||||
int gt, lt;
|
||||
BN_ULONG t1, t2;
|
||||
|
||||
if ((a == NULL) || (b == NULL)) {
|
||||
if (a != NULL)
|
||||
return -1;
|
||||
else if (b != NULL)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
bn_check_top(a);
|
||||
bn_check_top(b);
|
||||
|
||||
if (a->neg != b->neg) {
|
||||
if (a->neg)
|
||||
return -1;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
if (a->neg == 0) {
|
||||
gt = 1;
|
||||
lt = -1;
|
||||
} else {
|
||||
gt = -1;
|
||||
lt = 1;
|
||||
}
|
||||
|
||||
if (a->top > b->top)
|
||||
return gt;
|
||||
if (a->top < b->top)
|
||||
return lt;
|
||||
for (i = a->top - 1; i >= 0; i--) {
|
||||
t1 = a->d[i];
|
||||
t2 = b->d[i];
|
||||
if (t1 > t2)
|
||||
return gt;
|
||||
if (t1 < t2)
|
||||
return lt;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int BN_set_bit(BIGNUM *a, int n)
|
||||
{
|
||||
int i, j, k;
|
||||
|
||||
if (n < 0)
|
||||
return 0;
|
||||
|
||||
i = n / BN_BITS2;
|
||||
j = n % BN_BITS2;
|
||||
if (a->top <= i) {
|
||||
if (bn_wexpand(a, i + 1) == NULL)
|
||||
return 0;
|
||||
for (k = a->top; k < i + 1; k++)
|
||||
a->d[k] = 0;
|
||||
a->top = i + 1;
|
||||
a->flags &= ~BN_FLG_FIXED_TOP;
|
||||
}
|
||||
|
||||
a->d[i] |= (((BN_ULONG)1) << j);
|
||||
bn_check_top(a);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int BN_clear_bit(BIGNUM *a, int n)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
bn_check_top(a);
|
||||
if (n < 0)
|
||||
return 0;
|
||||
|
||||
i = n / BN_BITS2;
|
||||
j = n % BN_BITS2;
|
||||
if (a->top <= i)
|
||||
return 0;
|
||||
|
||||
a->d[i] &= (~(((BN_ULONG)1) << j));
|
||||
bn_correct_top(a);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int BN_is_bit_set(const BIGNUM *a, int n)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
bn_check_top(a);
|
||||
if (n < 0)
|
||||
return 0;
|
||||
i = n / BN_BITS2;
|
||||
j = n % BN_BITS2;
|
||||
if (a->top <= i)
|
||||
return 0;
|
||||
return (int)(((a->d[i]) >> j) & ((BN_ULONG)1));
|
||||
}
|
||||
|
||||
int BN_mask_bits(BIGNUM *a, int n)
|
||||
{
|
||||
int b, w;
|
||||
|
||||
bn_check_top(a);
|
||||
if (n < 0)
|
||||
return 0;
|
||||
|
||||
w = n / BN_BITS2;
|
||||
b = n % BN_BITS2;
|
||||
if (w >= a->top)
|
||||
return 0;
|
||||
if (b == 0)
|
||||
a->top = w;
|
||||
else {
|
||||
a->top = w + 1;
|
||||
a->d[w] &= ~(BN_MASK2 << b);
|
||||
}
|
||||
bn_correct_top(a);
|
||||
return 1;
|
||||
}
|
||||
|
||||
void BN_set_negative(BIGNUM *a, int b)
|
||||
{
|
||||
if (b && !BN_is_zero(a))
|
||||
a->neg = 1;
|
||||
else
|
||||
a->neg = 0;
|
||||
}
|
||||
|
||||
int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n)
|
||||
{
|
||||
int i;
|
||||
BN_ULONG aa, bb;
|
||||
|
||||
if (n == 0)
|
||||
return 0;
|
||||
|
||||
aa = a[n - 1];
|
||||
bb = b[n - 1];
|
||||
if (aa != bb)
|
||||
return ((aa > bb) ? 1 : -1);
|
||||
for (i = n - 2; i >= 0; i--) {
|
||||
aa = a[i];
|
||||
bb = b[i];
|
||||
if (aa != bb)
|
||||
return ((aa > bb) ? 1 : -1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Here follows a specialised variants of bn_cmp_words(). It has the
|
||||
* capability of performing the operation on arrays of different sizes. The
|
||||
* sizes of those arrays is expressed through cl, which is the common length
|
||||
* ( basically, min(len(a),len(b)) ), and dl, which is the delta between the
|
||||
* two lengths, calculated as len(a)-len(b). All lengths are the number of
|
||||
* BN_ULONGs...
|
||||
*/
|
||||
|
||||
int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b, int cl, int dl)
|
||||
{
|
||||
int n, i;
|
||||
n = cl - 1;
|
||||
|
||||
if (dl < 0) {
|
||||
for (i = dl; i < 0; i++) {
|
||||
if (b[n - i] != 0)
|
||||
return -1; /* a < b */
|
||||
}
|
||||
}
|
||||
if (dl > 0) {
|
||||
for (i = dl; i > 0; i--) {
|
||||
if (a[n + i] != 0)
|
||||
return 1; /* a > b */
|
||||
}
|
||||
}
|
||||
return bn_cmp_words(a, b, cl);
|
||||
}
|
||||
|
||||
/*-
|
||||
* Constant-time conditional swap of a and b.
|
||||
* a and b are swapped if condition is not 0.
|
||||
* nwords is the number of words to swap.
|
||||
* Assumes that at least nwords are allocated in both a and b.
|
||||
* Assumes that no more than nwords are used by either a or b.
|
||||
*/
|
||||
void BN_consttime_swap(BN_ULONG condition, BIGNUM *a, BIGNUM *b, int nwords)
|
||||
{
|
||||
BN_ULONG t;
|
||||
int i;
|
||||
|
||||
if (a == b)
|
||||
return;
|
||||
|
||||
bn_wcheck_size(a, nwords);
|
||||
bn_wcheck_size(b, nwords);
|
||||
|
||||
condition = ((~condition & ((condition - 1))) >> (BN_BITS2 - 1)) - 1;
|
||||
|
||||
t = (a->top ^ b->top) & condition;
|
||||
a->top ^= t;
|
||||
b->top ^= t;
|
||||
|
||||
t = (a->neg ^ b->neg) & condition;
|
||||
a->neg ^= t;
|
||||
b->neg ^= t;
|
||||
|
||||
/*-
|
||||
* BN_FLG_STATIC_DATA: indicates that data may not be written to. Intention
|
||||
* is actually to treat it as it's read-only data, and some (if not most)
|
||||
* of it does reside in read-only segment. In other words observation of
|
||||
* BN_FLG_STATIC_DATA in BN_consttime_swap should be treated as fatal
|
||||
* condition. It would either cause SEGV or effectively cause data
|
||||
* corruption.
|
||||
*
|
||||
* BN_FLG_MALLOCED: refers to BN structure itself, and hence must be
|
||||
* preserved.
|
||||
*
|
||||
* BN_FLG_SECURE: must be preserved, because it determines how x->d was
|
||||
* allocated and hence how to free it.
|
||||
*
|
||||
* BN_FLG_CONSTTIME: sufficient to mask and swap
|
||||
*
|
||||
* BN_FLG_FIXED_TOP: indicates that we haven't called bn_correct_top() on
|
||||
* the data, so the d array may be padded with additional 0 values (i.e.
|
||||
* top could be greater than the minimal value that it could be). We should
|
||||
* be swapping it
|
||||
*/
|
||||
|
||||
#define BN_CONSTTIME_SWAP_FLAGS (BN_FLG_CONSTTIME | BN_FLG_FIXED_TOP)
|
||||
|
||||
t = ((a->flags ^ b->flags) & BN_CONSTTIME_SWAP_FLAGS) & condition;
|
||||
a->flags ^= t;
|
||||
b->flags ^= t;
|
||||
|
||||
/* conditionally swap the data */
|
||||
for (i = 0; i < nwords; i++) {
|
||||
t = (a->d[i] ^ b->d[i]) & condition;
|
||||
a->d[i] ^= t;
|
||||
b->d[i] ^= t;
|
||||
}
|
||||
}
|
||||
|
||||
#undef BN_CONSTTIME_SWAP_FLAGS
|
||||
|
||||
/* Bits of security, see SP800-57 */
|
||||
|
||||
int BN_security_bits(int L, int N)
|
||||
{
|
||||
int secbits, bits;
|
||||
if (L >= 15360)
|
||||
secbits = 256;
|
||||
else if (L >= 7680)
|
||||
secbits = 192;
|
||||
else if (L >= 3072)
|
||||
secbits = 128;
|
||||
else if (L >= 2048)
|
||||
secbits = 112;
|
||||
else if (L >= 1024)
|
||||
secbits = 80;
|
||||
else
|
||||
return 0;
|
||||
if (N == -1)
|
||||
return secbits;
|
||||
bits = N / 2;
|
||||
if (bits < 80)
|
||||
return 0;
|
||||
return bits >= secbits ? secbits : bits;
|
||||
}
|
||||
|
||||
void BN_zero_ex(BIGNUM *a)
|
||||
{
|
||||
a->neg = 0;
|
||||
a->top = 0;
|
||||
a->flags &= ~BN_FLG_FIXED_TOP;
|
||||
}
|
||||
|
||||
int BN_abs_is_word(const BIGNUM *a, const BN_ULONG w)
|
||||
{
|
||||
return ((a->top == 1) && (a->d[0] == w)) || ((w == 0) && (a->top == 0));
|
||||
}
|
||||
|
||||
int BN_is_zero(const BIGNUM *a)
|
||||
{
|
||||
return a->top == 0;
|
||||
}
|
||||
|
||||
int BN_is_one(const BIGNUM *a)
|
||||
{
|
||||
return BN_abs_is_word(a, 1) && !a->neg;
|
||||
}
|
||||
|
||||
int BN_is_word(const BIGNUM *a, const BN_ULONG w)
|
||||
{
|
||||
return BN_abs_is_word(a, w) && (!w || !a->neg);
|
||||
}
|
||||
|
||||
int BN_is_odd(const BIGNUM *a)
|
||||
{
|
||||
return (a->top > 0) && (a->d[0] & 1);
|
||||
}
|
||||
|
||||
int BN_is_negative(const BIGNUM *a)
|
||||
{
|
||||
return (a->neg != 0);
|
||||
}
|
||||
|
||||
int BN_to_montgomery(BIGNUM *r, const BIGNUM *a, BN_MONT_CTX *mont,
|
||||
BN_CTX *ctx)
|
||||
{
|
||||
return BN_mod_mul_montgomery(r, a, &(mont->RR), mont, ctx);
|
||||
}
|
||||
|
||||
void BN_with_flags(BIGNUM *dest, const BIGNUM *b, int flags)
|
||||
{
|
||||
dest->d = b->d;
|
||||
dest->top = b->top;
|
||||
dest->dmax = b->dmax;
|
||||
dest->neg = b->neg;
|
||||
dest->flags = ((dest->flags & BN_FLG_MALLOCED)
|
||||
| (b->flags & ~BN_FLG_MALLOCED)
|
||||
| BN_FLG_STATIC_DATA | flags);
|
||||
}
|
||||
|
||||
BN_GENCB *BN_GENCB_new(void)
|
||||
{
|
||||
BN_GENCB *ret;
|
||||
|
||||
if ((ret = OPENSSL_malloc(sizeof(*ret))) == NULL) {
|
||||
BNerr(BN_F_BN_GENCB_NEW, ERR_R_MALLOC_FAILURE);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void BN_GENCB_free(BN_GENCB *cb)
|
||||
{
|
||||
if (cb == NULL)
|
||||
return;
|
||||
OPENSSL_free(cb);
|
||||
}
|
||||
|
||||
void BN_set_flags(BIGNUM *b, int n)
|
||||
{
|
||||
b->flags |= n;
|
||||
}
|
||||
|
||||
int BN_get_flags(const BIGNUM *b, int n)
|
||||
{
|
||||
return b->flags & n;
|
||||
}
|
||||
|
||||
/* Populate a BN_GENCB structure with an "old"-style callback */
|
||||
void BN_GENCB_set_old(BN_GENCB *gencb, void (*callback) (int, int, void *),
|
||||
void *cb_arg)
|
||||
{
|
||||
BN_GENCB *tmp_gencb = gencb;
|
||||
tmp_gencb->ver = 1;
|
||||
tmp_gencb->arg = cb_arg;
|
||||
tmp_gencb->cb.cb_1 = callback;
|
||||
}
|
||||
|
||||
/* Populate a BN_GENCB structure with a "new"-style callback */
|
||||
void BN_GENCB_set(BN_GENCB *gencb, int (*callback) (int, int, BN_GENCB *),
|
||||
void *cb_arg)
|
||||
{
|
||||
BN_GENCB *tmp_gencb = gencb;
|
||||
tmp_gencb->ver = 2;
|
||||
tmp_gencb->arg = cb_arg;
|
||||
tmp_gencb->cb.cb_2 = callback;
|
||||
}
|
||||
|
||||
void *BN_GENCB_get_arg(BN_GENCB *cb)
|
||||
{
|
||||
return cb->arg;
|
||||
}
|
||||
|
||||
BIGNUM *bn_wexpand(BIGNUM *a, int words)
|
||||
{
|
||||
return (words <= a->dmax) ? a : bn_expand2(a, words);
|
||||
}
|
||||
|
||||
void bn_correct_top(BIGNUM *a)
|
||||
{
|
||||
BN_ULONG *ftl;
|
||||
int tmp_top = a->top;
|
||||
|
||||
if (tmp_top > 0) {
|
||||
for (ftl = &(a->d[tmp_top]); tmp_top > 0; tmp_top--) {
|
||||
ftl--;
|
||||
if (*ftl != 0)
|
||||
break;
|
||||
}
|
||||
a->top = tmp_top;
|
||||
}
|
||||
if (a->top == 0)
|
||||
a->neg = 0;
|
||||
a->flags &= ~BN_FLG_FIXED_TOP;
|
||||
bn_pollute(a);
|
||||
}
|
321
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_mod.c
vendored
Normal file
321
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_mod.c
vendored
Normal file
|
@ -0,0 +1,321 @@
|
|||
/*
|
||||
* Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include "internal/cryptlib.h"
|
||||
#include "bn_lcl.h"
|
||||
|
||||
int BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx)
|
||||
{
|
||||
/*
|
||||
* like BN_mod, but returns non-negative remainder (i.e., 0 <= r < |d|
|
||||
* always holds)
|
||||
*/
|
||||
|
||||
if (!(BN_mod(r, m, d, ctx)))
|
||||
return 0;
|
||||
if (!r->neg)
|
||||
return 1;
|
||||
/* now -|d| < r < 0, so we have to set r := r + |d| */
|
||||
return (d->neg ? BN_sub : BN_add) (r, r, d);
|
||||
}
|
||||
|
||||
int BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
|
||||
BN_CTX *ctx)
|
||||
{
|
||||
if (!BN_add(r, a, b))
|
||||
return 0;
|
||||
return BN_nnmod(r, r, m, ctx);
|
||||
}
|
||||
|
||||
/*
|
||||
* BN_mod_add variant that may be used if both a and b are non-negative and
|
||||
* less than m. The original algorithm was
|
||||
*
|
||||
* if (!BN_uadd(r, a, b))
|
||||
* return 0;
|
||||
* if (BN_ucmp(r, m) >= 0)
|
||||
* return BN_usub(r, r, m);
|
||||
*
|
||||
* which is replaced with addition, subtracting modulus, and conditional
|
||||
* move depending on whether or not subtraction borrowed.
|
||||
*/
|
||||
int bn_mod_add_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
|
||||
const BIGNUM *m)
|
||||
{
|
||||
size_t i, ai, bi, mtop = m->top;
|
||||
BN_ULONG storage[1024 / BN_BITS2];
|
||||
BN_ULONG carry, temp, mask, *rp, *tp = storage;
|
||||
const BN_ULONG *ap, *bp;
|
||||
|
||||
if (bn_wexpand(r, mtop) == NULL)
|
||||
return 0;
|
||||
|
||||
if (mtop > sizeof(storage) / sizeof(storage[0])
|
||||
&& (tp = OPENSSL_malloc(mtop * sizeof(BN_ULONG))) == NULL)
|
||||
return 0;
|
||||
|
||||
ap = a->d != NULL ? a->d : tp;
|
||||
bp = b->d != NULL ? b->d : tp;
|
||||
|
||||
for (i = 0, ai = 0, bi = 0, carry = 0; i < mtop;) {
|
||||
mask = (BN_ULONG)0 - ((i - a->top) >> (8 * sizeof(i) - 1));
|
||||
temp = ((ap[ai] & mask) + carry) & BN_MASK2;
|
||||
carry = (temp < carry);
|
||||
|
||||
mask = (BN_ULONG)0 - ((i - b->top) >> (8 * sizeof(i) - 1));
|
||||
tp[i] = ((bp[bi] & mask) + temp) & BN_MASK2;
|
||||
carry += (tp[i] < temp);
|
||||
|
||||
i++;
|
||||
ai += (i - a->dmax) >> (8 * sizeof(i) - 1);
|
||||
bi += (i - b->dmax) >> (8 * sizeof(i) - 1);
|
||||
}
|
||||
rp = r->d;
|
||||
carry -= bn_sub_words(rp, tp, m->d, mtop);
|
||||
for (i = 0; i < mtop; i++) {
|
||||
rp[i] = (carry & tp[i]) | (~carry & rp[i]);
|
||||
((volatile BN_ULONG *)tp)[i] = 0;
|
||||
}
|
||||
r->top = mtop;
|
||||
r->flags |= BN_FLG_FIXED_TOP;
|
||||
r->neg = 0;
|
||||
|
||||
if (tp != storage)
|
||||
OPENSSL_free(tp);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
|
||||
const BIGNUM *m)
|
||||
{
|
||||
int ret = bn_mod_add_fixed_top(r, a, b, m);
|
||||
|
||||
if (ret)
|
||||
bn_correct_top(r);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
|
||||
BN_CTX *ctx)
|
||||
{
|
||||
if (!BN_sub(r, a, b))
|
||||
return 0;
|
||||
return BN_nnmod(r, r, m, ctx);
|
||||
}
|
||||
|
||||
/*
|
||||
* BN_mod_sub variant that may be used if both a and b are non-negative,
|
||||
* a is less than m, while b is of same bit width as m. It's implemented
|
||||
* as subtraction followed by two conditional additions.
|
||||
*
|
||||
* 0 <= a < m
|
||||
* 0 <= b < 2^w < 2*m
|
||||
*
|
||||
* after subtraction
|
||||
*
|
||||
* -2*m < r = a - b < m
|
||||
*
|
||||
* Thus it takes up to two conditional additions to make |r| positive.
|
||||
*/
|
||||
int bn_mod_sub_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
|
||||
const BIGNUM *m)
|
||||
{
|
||||
size_t i, ai, bi, mtop = m->top;
|
||||
BN_ULONG borrow, carry, ta, tb, mask, *rp;
|
||||
const BN_ULONG *ap, *bp;
|
||||
|
||||
if (bn_wexpand(r, mtop) == NULL)
|
||||
return 0;
|
||||
|
||||
rp = r->d;
|
||||
ap = a->d != NULL ? a->d : rp;
|
||||
bp = b->d != NULL ? b->d : rp;
|
||||
|
||||
for (i = 0, ai = 0, bi = 0, borrow = 0; i < mtop;) {
|
||||
mask = (BN_ULONG)0 - ((i - a->top) >> (8 * sizeof(i) - 1));
|
||||
ta = ap[ai] & mask;
|
||||
|
||||
mask = (BN_ULONG)0 - ((i - b->top) >> (8 * sizeof(i) - 1));
|
||||
tb = bp[bi] & mask;
|
||||
rp[i] = ta - tb - borrow;
|
||||
if (ta != tb)
|
||||
borrow = (ta < tb);
|
||||
|
||||
i++;
|
||||
ai += (i - a->dmax) >> (8 * sizeof(i) - 1);
|
||||
bi += (i - b->dmax) >> (8 * sizeof(i) - 1);
|
||||
}
|
||||
ap = m->d;
|
||||
for (i = 0, mask = 0 - borrow, carry = 0; i < mtop; i++) {
|
||||
ta = ((ap[i] & mask) + carry) & BN_MASK2;
|
||||
carry = (ta < carry);
|
||||
rp[i] = (rp[i] + ta) & BN_MASK2;
|
||||
carry += (rp[i] < ta);
|
||||
}
|
||||
borrow -= carry;
|
||||
for (i = 0, mask = 0 - borrow, carry = 0; i < mtop; i++) {
|
||||
ta = ((ap[i] & mask) + carry) & BN_MASK2;
|
||||
carry = (ta < carry);
|
||||
rp[i] = (rp[i] + ta) & BN_MASK2;
|
||||
carry += (rp[i] < ta);
|
||||
}
|
||||
|
||||
r->top = mtop;
|
||||
r->flags |= BN_FLG_FIXED_TOP;
|
||||
r->neg = 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* BN_mod_sub variant that may be used if both a and b are non-negative and
|
||||
* less than m
|
||||
*/
|
||||
int BN_mod_sub_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
|
||||
const BIGNUM *m)
|
||||
{
|
||||
if (!BN_sub(r, a, b))
|
||||
return 0;
|
||||
if (r->neg)
|
||||
return BN_add(r, r, m);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* slow but works */
|
||||
int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
|
||||
BN_CTX *ctx)
|
||||
{
|
||||
BIGNUM *t;
|
||||
int ret = 0;
|
||||
|
||||
bn_check_top(a);
|
||||
bn_check_top(b);
|
||||
bn_check_top(m);
|
||||
|
||||
BN_CTX_start(ctx);
|
||||
if ((t = BN_CTX_get(ctx)) == NULL)
|
||||
goto err;
|
||||
if (a == b) {
|
||||
if (!BN_sqr(t, a, ctx))
|
||||
goto err;
|
||||
} else {
|
||||
if (!BN_mul(t, a, b, ctx))
|
||||
goto err;
|
||||
}
|
||||
if (!BN_nnmod(r, t, m, ctx))
|
||||
goto err;
|
||||
bn_check_top(r);
|
||||
ret = 1;
|
||||
err:
|
||||
BN_CTX_end(ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
|
||||
{
|
||||
if (!BN_sqr(r, a, ctx))
|
||||
return 0;
|
||||
/* r->neg == 0, thus we don't need BN_nnmod */
|
||||
return BN_mod(r, r, m, ctx);
|
||||
}
|
||||
|
||||
int BN_mod_lshift1(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
|
||||
{
|
||||
if (!BN_lshift1(r, a))
|
||||
return 0;
|
||||
bn_check_top(r);
|
||||
return BN_nnmod(r, r, m, ctx);
|
||||
}
|
||||
|
||||
/*
|
||||
* BN_mod_lshift1 variant that may be used if a is non-negative and less than
|
||||
* m
|
||||
*/
|
||||
int BN_mod_lshift1_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *m)
|
||||
{
|
||||
if (!BN_lshift1(r, a))
|
||||
return 0;
|
||||
bn_check_top(r);
|
||||
if (BN_cmp(r, m) >= 0)
|
||||
return BN_sub(r, r, m);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m,
|
||||
BN_CTX *ctx)
|
||||
{
|
||||
BIGNUM *abs_m = NULL;
|
||||
int ret;
|
||||
|
||||
if (!BN_nnmod(r, a, m, ctx))
|
||||
return 0;
|
||||
|
||||
if (m->neg) {
|
||||
abs_m = BN_dup(m);
|
||||
if (abs_m == NULL)
|
||||
return 0;
|
||||
abs_m->neg = 0;
|
||||
}
|
||||
|
||||
ret = BN_mod_lshift_quick(r, r, n, (abs_m ? abs_m : m));
|
||||
bn_check_top(r);
|
||||
|
||||
BN_free(abs_m);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* BN_mod_lshift variant that may be used if a is non-negative and less than
|
||||
* m
|
||||
*/
|
||||
int BN_mod_lshift_quick(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m)
|
||||
{
|
||||
if (r != a) {
|
||||
if (BN_copy(r, a) == NULL)
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (n > 0) {
|
||||
int max_shift;
|
||||
|
||||
/* 0 < r < m */
|
||||
max_shift = BN_num_bits(m) - BN_num_bits(r);
|
||||
/* max_shift >= 0 */
|
||||
|
||||
if (max_shift < 0) {
|
||||
BNerr(BN_F_BN_MOD_LSHIFT_QUICK, BN_R_INPUT_NOT_REDUCED);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (max_shift > n)
|
||||
max_shift = n;
|
||||
|
||||
if (max_shift) {
|
||||
if (!BN_lshift(r, r, max_shift))
|
||||
return 0;
|
||||
n -= max_shift;
|
||||
} else {
|
||||
if (!BN_lshift1(r, r))
|
||||
return 0;
|
||||
--n;
|
||||
}
|
||||
|
||||
/* BN_num_bits(r) <= BN_num_bits(m) */
|
||||
|
||||
if (BN_cmp(r, m) >= 0) {
|
||||
if (!BN_sub(r, r, m))
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
bn_check_top(r);
|
||||
|
||||
return 1;
|
||||
}
|
464
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_mont.c
vendored
Normal file
464
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_mont.c
vendored
Normal file
|
@ -0,0 +1,464 @@
|
|||
/*
|
||||
* Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
/*
|
||||
* Details about Montgomery multiplication algorithms can be found at
|
||||
* http://security.ece.orst.edu/publications.html, e.g.
|
||||
* http://security.ece.orst.edu/koc/papers/j37acmon.pdf and
|
||||
* sections 3.8 and 4.2 in http://security.ece.orst.edu/koc/papers/r01rsasw.pdf
|
||||
*/
|
||||
|
||||
#include "internal/cryptlib.h"
|
||||
#include "bn_lcl.h"
|
||||
|
||||
#define MONT_WORD /* use the faster word-based algorithm */
|
||||
|
||||
#ifdef MONT_WORD
|
||||
static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont);
|
||||
#endif
|
||||
|
||||
int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
|
||||
BN_MONT_CTX *mont, BN_CTX *ctx)
|
||||
{
|
||||
int ret = bn_mul_mont_fixed_top(r, a, b, mont, ctx);
|
||||
|
||||
bn_correct_top(r);
|
||||
bn_check_top(r);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bn_mul_mont_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
|
||||
BN_MONT_CTX *mont, BN_CTX *ctx)
|
||||
{
|
||||
BIGNUM *tmp;
|
||||
int ret = 0;
|
||||
int num = mont->N.top;
|
||||
|
||||
#if defined(OPENSSL_BN_ASM_MONT) && defined(MONT_WORD)
|
||||
if (num > 1 && a->top == num && b->top == num) {
|
||||
if (bn_wexpand(r, num) == NULL)
|
||||
return 0;
|
||||
if (bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) {
|
||||
r->neg = a->neg ^ b->neg;
|
||||
r->top = num;
|
||||
r->flags |= BN_FLG_FIXED_TOP;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if ((a->top + b->top) > 2 * num)
|
||||
return 0;
|
||||
|
||||
BN_CTX_start(ctx);
|
||||
tmp = BN_CTX_get(ctx);
|
||||
if (tmp == NULL)
|
||||
goto err;
|
||||
|
||||
bn_check_top(tmp);
|
||||
if (a == b) {
|
||||
if (!bn_sqr_fixed_top(tmp, a, ctx))
|
||||
goto err;
|
||||
} else {
|
||||
if (!bn_mul_fixed_top(tmp, a, b, ctx))
|
||||
goto err;
|
||||
}
|
||||
/* reduce from aRR to aR */
|
||||
#ifdef MONT_WORD
|
||||
if (!bn_from_montgomery_word(r, tmp, mont))
|
||||
goto err;
|
||||
#else
|
||||
if (!BN_from_montgomery(r, tmp, mont, ctx))
|
||||
goto err;
|
||||
#endif
|
||||
ret = 1;
|
||||
err:
|
||||
BN_CTX_end(ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef MONT_WORD
|
||||
static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
|
||||
{
|
||||
BIGNUM *n;
|
||||
BN_ULONG *ap, *np, *rp, n0, v, carry;
|
||||
int nl, max, i;
|
||||
unsigned int rtop;
|
||||
|
||||
n = &(mont->N);
|
||||
nl = n->top;
|
||||
if (nl == 0) {
|
||||
ret->top = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
max = (2 * nl); /* carry is stored separately */
|
||||
if (bn_wexpand(r, max) == NULL)
|
||||
return 0;
|
||||
|
||||
r->neg ^= n->neg;
|
||||
np = n->d;
|
||||
rp = r->d;
|
||||
|
||||
/* clear the top words of T */
|
||||
for (rtop = r->top, i = 0; i < max; i++) {
|
||||
v = (BN_ULONG)0 - ((i - rtop) >> (8 * sizeof(rtop) - 1));
|
||||
rp[i] &= v;
|
||||
}
|
||||
|
||||
r->top = max;
|
||||
r->flags |= BN_FLG_FIXED_TOP;
|
||||
n0 = mont->n0[0];
|
||||
|
||||
/*
|
||||
* Add multiples of |n| to |r| until R = 2^(nl * BN_BITS2) divides it. On
|
||||
* input, we had |r| < |n| * R, so now |r| < 2 * |n| * R. Note that |r|
|
||||
* includes |carry| which is stored separately.
|
||||
*/
|
||||
for (carry = 0, i = 0; i < nl; i++, rp++) {
|
||||
v = bn_mul_add_words(rp, np, nl, (rp[0] * n0) & BN_MASK2);
|
||||
v = (v + carry + rp[nl]) & BN_MASK2;
|
||||
carry |= (v != rp[nl]);
|
||||
carry &= (v <= rp[nl]);
|
||||
rp[nl] = v;
|
||||
}
|
||||
|
||||
if (bn_wexpand(ret, nl) == NULL)
|
||||
return 0;
|
||||
ret->top = nl;
|
||||
ret->flags |= BN_FLG_FIXED_TOP;
|
||||
ret->neg = r->neg;
|
||||
|
||||
rp = ret->d;
|
||||
|
||||
/*
|
||||
* Shift |nl| words to divide by R. We have |ap| < 2 * |n|. Note that |ap|
|
||||
* includes |carry| which is stored separately.
|
||||
*/
|
||||
ap = &(r->d[nl]);
|
||||
|
||||
carry -= bn_sub_words(rp, ap, np, nl);
|
||||
/*
|
||||
* |carry| is -1 if |ap| - |np| underflowed or zero if it did not. Note
|
||||
* |carry| cannot be 1. That would imply the subtraction did not fit in
|
||||
* |nl| words, and we know at most one subtraction is needed.
|
||||
*/
|
||||
for (i = 0; i < nl; i++) {
|
||||
rp[i] = (carry & ap[i]) | (~carry & rp[i]);
|
||||
ap[i] = 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
#endif /* MONT_WORD */
|
||||
|
||||
int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
|
||||
BN_CTX *ctx)
|
||||
{
|
||||
int retn;
|
||||
|
||||
retn = bn_from_mont_fixed_top(ret, a, mont, ctx);
|
||||
bn_correct_top(ret);
|
||||
bn_check_top(ret);
|
||||
|
||||
return retn;
|
||||
}
|
||||
|
||||
int bn_from_mont_fixed_top(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
|
||||
BN_CTX *ctx)
|
||||
{
|
||||
int retn = 0;
|
||||
#ifdef MONT_WORD
|
||||
BIGNUM *t;
|
||||
|
||||
BN_CTX_start(ctx);
|
||||
if ((t = BN_CTX_get(ctx)) && BN_copy(t, a)) {
|
||||
retn = bn_from_montgomery_word(ret, t, mont);
|
||||
}
|
||||
BN_CTX_end(ctx);
|
||||
#else /* !MONT_WORD */
|
||||
BIGNUM *t1, *t2;
|
||||
|
||||
BN_CTX_start(ctx);
|
||||
t1 = BN_CTX_get(ctx);
|
||||
t2 = BN_CTX_get(ctx);
|
||||
if (t2 == NULL)
|
||||
goto err;
|
||||
|
||||
if (!BN_copy(t1, a))
|
||||
goto err;
|
||||
BN_mask_bits(t1, mont->ri);
|
||||
|
||||
if (!BN_mul(t2, t1, &mont->Ni, ctx))
|
||||
goto err;
|
||||
BN_mask_bits(t2, mont->ri);
|
||||
|
||||
if (!BN_mul(t1, t2, &mont->N, ctx))
|
||||
goto err;
|
||||
if (!BN_add(t2, a, t1))
|
||||
goto err;
|
||||
if (!BN_rshift(ret, t2, mont->ri))
|
||||
goto err;
|
||||
|
||||
if (BN_ucmp(ret, &(mont->N)) >= 0) {
|
||||
if (!BN_usub(ret, ret, &(mont->N)))
|
||||
goto err;
|
||||
}
|
||||
retn = 1;
|
||||
bn_check_top(ret);
|
||||
err:
|
||||
BN_CTX_end(ctx);
|
||||
#endif /* MONT_WORD */
|
||||
return retn;
|
||||
}
|
||||
|
||||
int bn_to_mont_fixed_top(BIGNUM *r, const BIGNUM *a, BN_MONT_CTX *mont,
|
||||
BN_CTX *ctx)
|
||||
{
|
||||
return bn_mul_mont_fixed_top(r, a, &(mont->RR), mont, ctx);
|
||||
}
|
||||
|
||||
BN_MONT_CTX *BN_MONT_CTX_new(void)
|
||||
{
|
||||
BN_MONT_CTX *ret;
|
||||
|
||||
if ((ret = OPENSSL_malloc(sizeof(*ret))) == NULL) {
|
||||
BNerr(BN_F_BN_MONT_CTX_NEW, ERR_R_MALLOC_FAILURE);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
BN_MONT_CTX_init(ret);
|
||||
ret->flags = BN_FLG_MALLOCED;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void BN_MONT_CTX_init(BN_MONT_CTX *ctx)
|
||||
{
|
||||
ctx->ri = 0;
|
||||
bn_init(&ctx->RR);
|
||||
bn_init(&ctx->N);
|
||||
bn_init(&ctx->Ni);
|
||||
ctx->n0[0] = ctx->n0[1] = 0;
|
||||
ctx->flags = 0;
|
||||
}
|
||||
|
||||
void BN_MONT_CTX_free(BN_MONT_CTX *mont)
|
||||
{
|
||||
if (mont == NULL)
|
||||
return;
|
||||
BN_clear_free(&mont->RR);
|
||||
BN_clear_free(&mont->N);
|
||||
BN_clear_free(&mont->Ni);
|
||||
if (mont->flags & BN_FLG_MALLOCED)
|
||||
OPENSSL_free(mont);
|
||||
}
|
||||
|
||||
int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
|
||||
{
|
||||
int i, ret = 0;
|
||||
BIGNUM *Ri, *R;
|
||||
|
||||
if (BN_is_zero(mod))
|
||||
return 0;
|
||||
|
||||
BN_CTX_start(ctx);
|
||||
if ((Ri = BN_CTX_get(ctx)) == NULL)
|
||||
goto err;
|
||||
R = &(mont->RR); /* grab RR as a temp */
|
||||
if (!BN_copy(&(mont->N), mod))
|
||||
goto err; /* Set N */
|
||||
if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0)
|
||||
BN_set_flags(&(mont->N), BN_FLG_CONSTTIME);
|
||||
mont->N.neg = 0;
|
||||
|
||||
#ifdef MONT_WORD
|
||||
{
|
||||
BIGNUM tmod;
|
||||
BN_ULONG buf[2];
|
||||
|
||||
bn_init(&tmod);
|
||||
tmod.d = buf;
|
||||
tmod.dmax = 2;
|
||||
tmod.neg = 0;
|
||||
|
||||
if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0)
|
||||
BN_set_flags(&tmod, BN_FLG_CONSTTIME);
|
||||
|
||||
mont->ri = (BN_num_bits(mod) + (BN_BITS2 - 1)) / BN_BITS2 * BN_BITS2;
|
||||
|
||||
# if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)
|
||||
/*
|
||||
* Only certain BN_BITS2<=32 platforms actually make use of n0[1],
|
||||
* and we could use the #else case (with a shorter R value) for the
|
||||
* others. However, currently only the assembler files do know which
|
||||
* is which.
|
||||
*/
|
||||
|
||||
BN_zero(R);
|
||||
if (!(BN_set_bit(R, 2 * BN_BITS2)))
|
||||
goto err;
|
||||
|
||||
tmod.top = 0;
|
||||
if ((buf[0] = mod->d[0]))
|
||||
tmod.top = 1;
|
||||
if ((buf[1] = mod->top > 1 ? mod->d[1] : 0))
|
||||
tmod.top = 2;
|
||||
|
||||
if (BN_is_one(&tmod))
|
||||
BN_zero(Ri);
|
||||
else if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL)
|
||||
goto err;
|
||||
if (!BN_lshift(Ri, Ri, 2 * BN_BITS2))
|
||||
goto err; /* R*Ri */
|
||||
if (!BN_is_zero(Ri)) {
|
||||
if (!BN_sub_word(Ri, 1))
|
||||
goto err;
|
||||
} else { /* if N mod word size == 1 */
|
||||
|
||||
if (bn_expand(Ri, (int)sizeof(BN_ULONG) * 2) == NULL)
|
||||
goto err;
|
||||
/* Ri-- (mod double word size) */
|
||||
Ri->neg = 0;
|
||||
Ri->d[0] = BN_MASK2;
|
||||
Ri->d[1] = BN_MASK2;
|
||||
Ri->top = 2;
|
||||
}
|
||||
if (!BN_div(Ri, NULL, Ri, &tmod, ctx))
|
||||
goto err;
|
||||
/*
|
||||
* Ni = (R*Ri-1)/N, keep only couple of least significant words:
|
||||
*/
|
||||
mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
|
||||
mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0;
|
||||
# else
|
||||
BN_zero(R);
|
||||
if (!(BN_set_bit(R, BN_BITS2)))
|
||||
goto err; /* R */
|
||||
|
||||
buf[0] = mod->d[0]; /* tmod = N mod word size */
|
||||
buf[1] = 0;
|
||||
tmod.top = buf[0] != 0 ? 1 : 0;
|
||||
/* Ri = R^-1 mod N */
|
||||
if (BN_is_one(&tmod))
|
||||
BN_zero(Ri);
|
||||
else if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL)
|
||||
goto err;
|
||||
if (!BN_lshift(Ri, Ri, BN_BITS2))
|
||||
goto err; /* R*Ri */
|
||||
if (!BN_is_zero(Ri)) {
|
||||
if (!BN_sub_word(Ri, 1))
|
||||
goto err;
|
||||
} else { /* if N mod word size == 1 */
|
||||
|
||||
if (!BN_set_word(Ri, BN_MASK2))
|
||||
goto err; /* Ri-- (mod word size) */
|
||||
}
|
||||
if (!BN_div(Ri, NULL, Ri, &tmod, ctx))
|
||||
goto err;
|
||||
/*
|
||||
* Ni = (R*Ri-1)/N, keep only least significant word:
|
||||
*/
|
||||
mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
|
||||
mont->n0[1] = 0;
|
||||
# endif
|
||||
}
|
||||
#else /* !MONT_WORD */
|
||||
{ /* bignum version */
|
||||
mont->ri = BN_num_bits(&mont->N);
|
||||
BN_zero(R);
|
||||
if (!BN_set_bit(R, mont->ri))
|
||||
goto err; /* R = 2^ri */
|
||||
/* Ri = R^-1 mod N */
|
||||
if ((BN_mod_inverse(Ri, R, &mont->N, ctx)) == NULL)
|
||||
goto err;
|
||||
if (!BN_lshift(Ri, Ri, mont->ri))
|
||||
goto err; /* R*Ri */
|
||||
if (!BN_sub_word(Ri, 1))
|
||||
goto err;
|
||||
/*
|
||||
* Ni = (R*Ri-1) / N
|
||||
*/
|
||||
if (!BN_div(&(mont->Ni), NULL, Ri, &mont->N, ctx))
|
||||
goto err;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* setup RR for conversions */
|
||||
BN_zero(&(mont->RR));
|
||||
if (!BN_set_bit(&(mont->RR), mont->ri * 2))
|
||||
goto err;
|
||||
if (!BN_mod(&(mont->RR), &(mont->RR), &(mont->N), ctx))
|
||||
goto err;
|
||||
|
||||
for (i = mont->RR.top, ret = mont->N.top; i < ret; i++)
|
||||
mont->RR.d[i] = 0;
|
||||
mont->RR.top = ret;
|
||||
mont->RR.flags |= BN_FLG_FIXED_TOP;
|
||||
|
||||
ret = 1;
|
||||
err:
|
||||
BN_CTX_end(ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)
|
||||
{
|
||||
if (to == from)
|
||||
return to;
|
||||
|
||||
if (!BN_copy(&(to->RR), &(from->RR)))
|
||||
return NULL;
|
||||
if (!BN_copy(&(to->N), &(from->N)))
|
||||
return NULL;
|
||||
if (!BN_copy(&(to->Ni), &(from->Ni)))
|
||||
return NULL;
|
||||
to->ri = from->ri;
|
||||
to->n0[0] = from->n0[0];
|
||||
to->n0[1] = from->n0[1];
|
||||
return to;
|
||||
}
|
||||
|
||||
BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, CRYPTO_RWLOCK *lock,
|
||||
const BIGNUM *mod, BN_CTX *ctx)
|
||||
{
|
||||
BN_MONT_CTX *ret;
|
||||
|
||||
CRYPTO_THREAD_read_lock(lock);
|
||||
ret = *pmont;
|
||||
CRYPTO_THREAD_unlock(lock);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* We don't want to serialise globally while doing our lazy-init math in
|
||||
* BN_MONT_CTX_set. That punishes threads that are doing independent
|
||||
* things. Instead, punish the case where more than one thread tries to
|
||||
* lazy-init the same 'pmont', by having each do the lazy-init math work
|
||||
* independently and only use the one from the thread that wins the race
|
||||
* (the losers throw away the work they've done).
|
||||
*/
|
||||
ret = BN_MONT_CTX_new();
|
||||
if (ret == NULL)
|
||||
return NULL;
|
||||
if (!BN_MONT_CTX_set(ret, mod, ctx)) {
|
||||
BN_MONT_CTX_free(ret);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* The locked compare-and-set, after the local work is done. */
|
||||
CRYPTO_THREAD_write_lock(lock);
|
||||
if (*pmont) {
|
||||
BN_MONT_CTX_free(ret);
|
||||
ret = *pmont;
|
||||
} else
|
||||
*pmont = ret;
|
||||
CRYPTO_THREAD_unlock(lock);
|
||||
return ret;
|
||||
}
|
86
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_mpi.c
vendored
Normal file
86
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_mpi.c
vendored
Normal file
|
@ -0,0 +1,86 @@
|
|||
/*
|
||||
* Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "internal/cryptlib.h"
|
||||
#include "bn_lcl.h"
|
||||
|
||||
int BN_bn2mpi(const BIGNUM *a, unsigned char *d)
|
||||
{
|
||||
int bits;
|
||||
int num = 0;
|
||||
int ext = 0;
|
||||
long l;
|
||||
|
||||
bits = BN_num_bits(a);
|
||||
num = (bits + 7) / 8;
|
||||
if (bits > 0) {
|
||||
ext = ((bits & 0x07) == 0);
|
||||
}
|
||||
if (d == NULL)
|
||||
return (num + 4 + ext);
|
||||
|
||||
l = num + ext;
|
||||
d[0] = (unsigned char)(l >> 24) & 0xff;
|
||||
d[1] = (unsigned char)(l >> 16) & 0xff;
|
||||
d[2] = (unsigned char)(l >> 8) & 0xff;
|
||||
d[3] = (unsigned char)(l) & 0xff;
|
||||
if (ext)
|
||||
d[4] = 0;
|
||||
num = BN_bn2bin(a, &(d[4 + ext]));
|
||||
if (a->neg)
|
||||
d[4] |= 0x80;
|
||||
return (num + 4 + ext);
|
||||
}
|
||||
|
||||
BIGNUM *BN_mpi2bn(const unsigned char *d, int n, BIGNUM *ain)
|
||||
{
|
||||
long len;
|
||||
int neg = 0;
|
||||
BIGNUM *a = NULL;
|
||||
|
||||
if (n < 4) {
|
||||
BNerr(BN_F_BN_MPI2BN, BN_R_INVALID_LENGTH);
|
||||
return NULL;
|
||||
}
|
||||
len = ((long)d[0] << 24) | ((long)d[1] << 16) | ((int)d[2] << 8) | (int)
|
||||
d[3];
|
||||
if ((len + 4) != n) {
|
||||
BNerr(BN_F_BN_MPI2BN, BN_R_ENCODING_ERROR);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (ain == NULL)
|
||||
a = BN_new();
|
||||
else
|
||||
a = ain;
|
||||
|
||||
if (a == NULL)
|
||||
return NULL;
|
||||
|
||||
if (len == 0) {
|
||||
a->neg = 0;
|
||||
a->top = 0;
|
||||
return a;
|
||||
}
|
||||
d += 4;
|
||||
if ((*d) & 0x80)
|
||||
neg = 1;
|
||||
if (BN_bin2bn(d, (int)len, a) == NULL) {
|
||||
if (ain == NULL)
|
||||
BN_free(a);
|
||||
return NULL;
|
||||
}
|
||||
a->neg = neg;
|
||||
if (neg) {
|
||||
BN_clear_bit(a, BN_num_bits(a) - 1);
|
||||
}
|
||||
bn_check_top(a);
|
||||
return a;
|
||||
}
|
684
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_mul.c
vendored
Normal file
684
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_mul.c
vendored
Normal file
|
@ -0,0 +1,684 @@
|
|||
/*
|
||||
* Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include "internal/cryptlib.h"
|
||||
#include "bn_lcl.h"
|
||||
|
||||
#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
|
||||
/*
|
||||
* Here follows specialised variants of bn_add_words() and bn_sub_words().
|
||||
* They have the property performing operations on arrays of different sizes.
|
||||
* The sizes of those arrays is expressed through cl, which is the common
|
||||
* length ( basically, min(len(a),len(b)) ), and dl, which is the delta
|
||||
* between the two lengths, calculated as len(a)-len(b). All lengths are the
|
||||
* number of BN_ULONGs... For the operations that require a result array as
|
||||
* parameter, it must have the length cl+abs(dl). These functions should
|
||||
* probably end up in bn_asm.c as soon as there are assembler counterparts
|
||||
* for the systems that use assembler files.
|
||||
*/
|
||||
|
||||
BN_ULONG bn_sub_part_words(BN_ULONG *r,
|
||||
const BN_ULONG *a, const BN_ULONG *b,
|
||||
int cl, int dl)
|
||||
{
|
||||
BN_ULONG c, t;
|
||||
|
||||
assert(cl >= 0);
|
||||
c = bn_sub_words(r, a, b, cl);
|
||||
|
||||
if (dl == 0)
|
||||
return c;
|
||||
|
||||
r += cl;
|
||||
a += cl;
|
||||
b += cl;
|
||||
|
||||
if (dl < 0) {
|
||||
for (;;) {
|
||||
t = b[0];
|
||||
r[0] = (0 - t - c) & BN_MASK2;
|
||||
if (t != 0)
|
||||
c = 1;
|
||||
if (++dl >= 0)
|
||||
break;
|
||||
|
||||
t = b[1];
|
||||
r[1] = (0 - t - c) & BN_MASK2;
|
||||
if (t != 0)
|
||||
c = 1;
|
||||
if (++dl >= 0)
|
||||
break;
|
||||
|
||||
t = b[2];
|
||||
r[2] = (0 - t - c) & BN_MASK2;
|
||||
if (t != 0)
|
||||
c = 1;
|
||||
if (++dl >= 0)
|
||||
break;
|
||||
|
||||
t = b[3];
|
||||
r[3] = (0 - t - c) & BN_MASK2;
|
||||
if (t != 0)
|
||||
c = 1;
|
||||
if (++dl >= 0)
|
||||
break;
|
||||
|
||||
b += 4;
|
||||
r += 4;
|
||||
}
|
||||
} else {
|
||||
int save_dl = dl;
|
||||
while (c) {
|
||||
t = a[0];
|
||||
r[0] = (t - c) & BN_MASK2;
|
||||
if (t != 0)
|
||||
c = 0;
|
||||
if (--dl <= 0)
|
||||
break;
|
||||
|
||||
t = a[1];
|
||||
r[1] = (t - c) & BN_MASK2;
|
||||
if (t != 0)
|
||||
c = 0;
|
||||
if (--dl <= 0)
|
||||
break;
|
||||
|
||||
t = a[2];
|
||||
r[2] = (t - c) & BN_MASK2;
|
||||
if (t != 0)
|
||||
c = 0;
|
||||
if (--dl <= 0)
|
||||
break;
|
||||
|
||||
t = a[3];
|
||||
r[3] = (t - c) & BN_MASK2;
|
||||
if (t != 0)
|
||||
c = 0;
|
||||
if (--dl <= 0)
|
||||
break;
|
||||
|
||||
save_dl = dl;
|
||||
a += 4;
|
||||
r += 4;
|
||||
}
|
||||
if (dl > 0) {
|
||||
if (save_dl > dl) {
|
||||
switch (save_dl - dl) {
|
||||
case 1:
|
||||
r[1] = a[1];
|
||||
if (--dl <= 0)
|
||||
break;
|
||||
/* fall thru */
|
||||
case 2:
|
||||
r[2] = a[2];
|
||||
if (--dl <= 0)
|
||||
break;
|
||||
/* fall thru */
|
||||
case 3:
|
||||
r[3] = a[3];
|
||||
if (--dl <= 0)
|
||||
break;
|
||||
}
|
||||
a += 4;
|
||||
r += 4;
|
||||
}
|
||||
}
|
||||
if (dl > 0) {
|
||||
for (;;) {
|
||||
r[0] = a[0];
|
||||
if (--dl <= 0)
|
||||
break;
|
||||
r[1] = a[1];
|
||||
if (--dl <= 0)
|
||||
break;
|
||||
r[2] = a[2];
|
||||
if (--dl <= 0)
|
||||
break;
|
||||
r[3] = a[3];
|
||||
if (--dl <= 0)
|
||||
break;
|
||||
|
||||
a += 4;
|
||||
r += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
return c;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef BN_RECURSION
|
||||
/*
|
||||
* Karatsuba recursive multiplication algorithm (cf. Knuth, The Art of
|
||||
* Computer Programming, Vol. 2)
|
||||
*/
|
||||
|
||||
/*-
|
||||
* r is 2*n2 words in size,
|
||||
* a and b are both n2 words in size.
|
||||
* n2 must be a power of 2.
|
||||
* We multiply and return the result.
|
||||
* t must be 2*n2 words in size
|
||||
* We calculate
|
||||
* a[0]*b[0]
|
||||
* a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
|
||||
* a[1]*b[1]
|
||||
*/
|
||||
/* dnX may not be positive, but n2/2+dnX has to be */
|
||||
void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
|
||||
int dna, int dnb, BN_ULONG *t)
|
||||
{
|
||||
int n = n2 / 2, c1, c2;
|
||||
int tna = n + dna, tnb = n + dnb;
|
||||
unsigned int neg, zero;
|
||||
BN_ULONG ln, lo, *p;
|
||||
|
||||
# ifdef BN_MUL_COMBA
|
||||
# if 0
|
||||
if (n2 == 4) {
|
||||
bn_mul_comba4(r, a, b);
|
||||
return;
|
||||
}
|
||||
# endif
|
||||
/*
|
||||
* Only call bn_mul_comba 8 if n2 == 8 and the two arrays are complete
|
||||
* [steve]
|
||||
*/
|
||||
if (n2 == 8 && dna == 0 && dnb == 0) {
|
||||
bn_mul_comba8(r, a, b);
|
||||
return;
|
||||
}
|
||||
# endif /* BN_MUL_COMBA */
|
||||
/* Else do normal multiply */
|
||||
if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) {
|
||||
bn_mul_normal(r, a, n2 + dna, b, n2 + dnb);
|
||||
if ((dna + dnb) < 0)
|
||||
memset(&r[2 * n2 + dna + dnb], 0,
|
||||
sizeof(BN_ULONG) * -(dna + dnb));
|
||||
return;
|
||||
}
|
||||
/* r=(a[0]-a[1])*(b[1]-b[0]) */
|
||||
c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
|
||||
c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
|
||||
zero = neg = 0;
|
||||
switch (c1 * 3 + c2) {
|
||||
case -4:
|
||||
bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
|
||||
bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
|
||||
break;
|
||||
case -3:
|
||||
zero = 1;
|
||||
break;
|
||||
case -2:
|
||||
bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
|
||||
bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
|
||||
neg = 1;
|
||||
break;
|
||||
case -1:
|
||||
case 0:
|
||||
case 1:
|
||||
zero = 1;
|
||||
break;
|
||||
case 2:
|
||||
bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
|
||||
bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
|
||||
neg = 1;
|
||||
break;
|
||||
case 3:
|
||||
zero = 1;
|
||||
break;
|
||||
case 4:
|
||||
bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
|
||||
bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
|
||||
break;
|
||||
}
|
||||
|
||||
# ifdef BN_MUL_COMBA
|
||||
if (n == 4 && dna == 0 && dnb == 0) { /* XXX: bn_mul_comba4 could take
|
||||
* extra args to do this well */
|
||||
if (!zero)
|
||||
bn_mul_comba4(&(t[n2]), t, &(t[n]));
|
||||
else
|
||||
memset(&t[n2], 0, sizeof(*t) * 8);
|
||||
|
||||
bn_mul_comba4(r, a, b);
|
||||
bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n]));
|
||||
} else if (n == 8 && dna == 0 && dnb == 0) { /* XXX: bn_mul_comba8 could
|
||||
* take extra args to do
|
||||
* this well */
|
||||
if (!zero)
|
||||
bn_mul_comba8(&(t[n2]), t, &(t[n]));
|
||||
else
|
||||
memset(&t[n2], 0, sizeof(*t) * 16);
|
||||
|
||||
bn_mul_comba8(r, a, b);
|
||||
bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n]));
|
||||
} else
|
||||
# endif /* BN_MUL_COMBA */
|
||||
{
|
||||
p = &(t[n2 * 2]);
|
||||
if (!zero)
|
||||
bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
|
||||
else
|
||||
memset(&t[n2], 0, sizeof(*t) * n2);
|
||||
bn_mul_recursive(r, a, b, n, 0, 0, p);
|
||||
bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p);
|
||||
}
|
||||
|
||||
/*-
|
||||
* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
|
||||
* r[10] holds (a[0]*b[0])
|
||||
* r[32] holds (b[1]*b[1])
|
||||
*/
|
||||
|
||||
c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
|
||||
|
||||
if (neg) { /* if t[32] is negative */
|
||||
c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
|
||||
} else {
|
||||
/* Might have a carry */
|
||||
c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
|
||||
}
|
||||
|
||||
/*-
|
||||
* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
|
||||
* r[10] holds (a[0]*b[0])
|
||||
* r[32] holds (b[1]*b[1])
|
||||
* c1 holds the carry bits
|
||||
*/
|
||||
c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
|
||||
if (c1) {
|
||||
p = &(r[n + n2]);
|
||||
lo = *p;
|
||||
ln = (lo + c1) & BN_MASK2;
|
||||
*p = ln;
|
||||
|
||||
/*
|
||||
* The overflow will stop before we over write words we should not
|
||||
* overwrite
|
||||
*/
|
||||
if (ln < (BN_ULONG)c1) {
|
||||
do {
|
||||
p++;
|
||||
lo = *p;
|
||||
ln = (lo + 1) & BN_MASK2;
|
||||
*p = ln;
|
||||
} while (ln == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* n+tn is the word length t needs to be n*4 is size, as does r
|
||||
*/
|
||||
/* tnX may not be negative but less than n */
|
||||
void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
|
||||
int tna, int tnb, BN_ULONG *t)
|
||||
{
|
||||
int i, j, n2 = n * 2;
|
||||
int c1, c2, neg;
|
||||
BN_ULONG ln, lo, *p;
|
||||
|
||||
if (n < 8) {
|
||||
bn_mul_normal(r, a, n + tna, b, n + tnb);
|
||||
return;
|
||||
}
|
||||
|
||||
/* r=(a[0]-a[1])*(b[1]-b[0]) */
|
||||
c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
|
||||
c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
|
||||
neg = 0;
|
||||
switch (c1 * 3 + c2) {
|
||||
case -4:
|
||||
bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
|
||||
bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
|
||||
break;
|
||||
case -3:
|
||||
case -2:
|
||||
bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
|
||||
bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
|
||||
neg = 1;
|
||||
break;
|
||||
case -1:
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
|
||||
bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
|
||||
neg = 1;
|
||||
break;
|
||||
case 3:
|
||||
case 4:
|
||||
bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
|
||||
bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* The zero case isn't yet implemented here. The speedup would probably
|
||||
* be negligible.
|
||||
*/
|
||||
# if 0
|
||||
if (n == 4) {
|
||||
bn_mul_comba4(&(t[n2]), t, &(t[n]));
|
||||
bn_mul_comba4(r, a, b);
|
||||
bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn);
|
||||
memset(&r[n2 + tn * 2], 0, sizeof(*r) * (n2 - tn * 2));
|
||||
} else
|
||||
# endif
|
||||
if (n == 8) {
|
||||
bn_mul_comba8(&(t[n2]), t, &(t[n]));
|
||||
bn_mul_comba8(r, a, b);
|
||||
bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
|
||||
memset(&r[n2 + tna + tnb], 0, sizeof(*r) * (n2 - tna - tnb));
|
||||
} else {
|
||||
p = &(t[n2 * 2]);
|
||||
bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
|
||||
bn_mul_recursive(r, a, b, n, 0, 0, p);
|
||||
i = n / 2;
|
||||
/*
|
||||
* If there is only a bottom half to the number, just do it
|
||||
*/
|
||||
if (tna > tnb)
|
||||
j = tna - i;
|
||||
else
|
||||
j = tnb - i;
|
||||
if (j == 0) {
|
||||
bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]),
|
||||
i, tna - i, tnb - i, p);
|
||||
memset(&r[n2 + i * 2], 0, sizeof(*r) * (n2 - i * 2));
|
||||
} else if (j > 0) { /* eg, n == 16, i == 8 and tn == 11 */
|
||||
bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]),
|
||||
i, tna - i, tnb - i, p);
|
||||
memset(&(r[n2 + tna + tnb]), 0,
|
||||
sizeof(BN_ULONG) * (n2 - tna - tnb));
|
||||
} else { /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
|
||||
|
||||
memset(&r[n2], 0, sizeof(*r) * n2);
|
||||
if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL
|
||||
&& tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) {
|
||||
bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
|
||||
} else {
|
||||
for (;;) {
|
||||
i /= 2;
|
||||
/*
|
||||
* these simplified conditions work exclusively because
|
||||
* difference between tna and tnb is 1 or 0
|
||||
*/
|
||||
if (i < tna || i < tnb) {
|
||||
bn_mul_part_recursive(&(r[n2]),
|
||||
&(a[n]), &(b[n]),
|
||||
i, tna - i, tnb - i, p);
|
||||
break;
|
||||
} else if (i == tna || i == tnb) {
|
||||
bn_mul_recursive(&(r[n2]),
|
||||
&(a[n]), &(b[n]),
|
||||
i, tna - i, tnb - i, p);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*-
|
||||
* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
|
||||
* r[10] holds (a[0]*b[0])
|
||||
* r[32] holds (b[1]*b[1])
|
||||
*/
|
||||
|
||||
c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
|
||||
|
||||
if (neg) { /* if t[32] is negative */
|
||||
c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
|
||||
} else {
|
||||
/* Might have a carry */
|
||||
c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
|
||||
}
|
||||
|
||||
/*-
|
||||
* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
|
||||
* r[10] holds (a[0]*b[0])
|
||||
* r[32] holds (b[1]*b[1])
|
||||
* c1 holds the carry bits
|
||||
*/
|
||||
c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
|
||||
if (c1) {
|
||||
p = &(r[n + n2]);
|
||||
lo = *p;
|
||||
ln = (lo + c1) & BN_MASK2;
|
||||
*p = ln;
|
||||
|
||||
/*
|
||||
* The overflow will stop before we over write words we should not
|
||||
* overwrite
|
||||
*/
|
||||
if (ln < (BN_ULONG)c1) {
|
||||
do {
|
||||
p++;
|
||||
lo = *p;
|
||||
ln = (lo + 1) & BN_MASK2;
|
||||
*p = ln;
|
||||
} while (ln == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*-
|
||||
* a and b must be the same size, which is n2.
|
||||
* r needs to be n2 words and t needs to be n2*2
|
||||
*/
|
||||
void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
|
||||
BN_ULONG *t)
|
||||
{
|
||||
int n = n2 / 2;
|
||||
|
||||
bn_mul_recursive(r, a, b, n, 0, 0, &(t[0]));
|
||||
if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) {
|
||||
bn_mul_low_recursive(&(t[0]), &(a[0]), &(b[n]), n, &(t[n2]));
|
||||
bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
|
||||
bn_mul_low_recursive(&(t[0]), &(a[n]), &(b[0]), n, &(t[n2]));
|
||||
bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
|
||||
} else {
|
||||
bn_mul_low_normal(&(t[0]), &(a[0]), &(b[n]), n);
|
||||
bn_mul_low_normal(&(t[n]), &(a[n]), &(b[0]), n);
|
||||
bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
|
||||
bn_add_words(&(r[n]), &(r[n]), &(t[n]), n);
|
||||
}
|
||||
}
|
||||
#endif /* BN_RECURSION */
|
||||
|
||||
int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
|
||||
{
|
||||
int ret = bn_mul_fixed_top(r, a, b, ctx);
|
||||
|
||||
bn_correct_top(r);
|
||||
bn_check_top(r);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bn_mul_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
|
||||
{
|
||||
int ret = 0;
|
||||
int top, al, bl;
|
||||
BIGNUM *rr;
|
||||
#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
|
||||
int i;
|
||||
#endif
|
||||
#ifdef BN_RECURSION
|
||||
BIGNUM *t = NULL;
|
||||
int j = 0, k;
|
||||
#endif
|
||||
|
||||
bn_check_top(a);
|
||||
bn_check_top(b);
|
||||
bn_check_top(r);
|
||||
|
||||
al = a->top;
|
||||
bl = b->top;
|
||||
|
||||
if ((al == 0) || (bl == 0)) {
|
||||
BN_zero(r);
|
||||
return 1;
|
||||
}
|
||||
top = al + bl;
|
||||
|
||||
BN_CTX_start(ctx);
|
||||
if ((r == a) || (r == b)) {
|
||||
if ((rr = BN_CTX_get(ctx)) == NULL)
|
||||
goto err;
|
||||
} else
|
||||
rr = r;
|
||||
|
||||
#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
|
||||
i = al - bl;
|
||||
#endif
|
||||
#ifdef BN_MUL_COMBA
|
||||
if (i == 0) {
|
||||
# if 0
|
||||
if (al == 4) {
|
||||
if (bn_wexpand(rr, 8) == NULL)
|
||||
goto err;
|
||||
rr->top = 8;
|
||||
bn_mul_comba4(rr->d, a->d, b->d);
|
||||
goto end;
|
||||
}
|
||||
# endif
|
||||
if (al == 8) {
|
||||
if (bn_wexpand(rr, 16) == NULL)
|
||||
goto err;
|
||||
rr->top = 16;
|
||||
bn_mul_comba8(rr->d, a->d, b->d);
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
#endif /* BN_MUL_COMBA */
|
||||
#ifdef BN_RECURSION
|
||||
if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) {
|
||||
if (i >= -1 && i <= 1) {
|
||||
/*
|
||||
* Find out the power of two lower or equal to the longest of the
|
||||
* two numbers
|
||||
*/
|
||||
if (i >= 0) {
|
||||
j = BN_num_bits_word((BN_ULONG)al);
|
||||
}
|
||||
if (i == -1) {
|
||||
j = BN_num_bits_word((BN_ULONG)bl);
|
||||
}
|
||||
j = 1 << (j - 1);
|
||||
assert(j <= al || j <= bl);
|
||||
k = j + j;
|
||||
t = BN_CTX_get(ctx);
|
||||
if (t == NULL)
|
||||
goto err;
|
||||
if (al > j || bl > j) {
|
||||
if (bn_wexpand(t, k * 4) == NULL)
|
||||
goto err;
|
||||
if (bn_wexpand(rr, k * 4) == NULL)
|
||||
goto err;
|
||||
bn_mul_part_recursive(rr->d, a->d, b->d,
|
||||
j, al - j, bl - j, t->d);
|
||||
} else { /* al <= j || bl <= j */
|
||||
|
||||
if (bn_wexpand(t, k * 2) == NULL)
|
||||
goto err;
|
||||
if (bn_wexpand(rr, k * 2) == NULL)
|
||||
goto err;
|
||||
bn_mul_recursive(rr->d, a->d, b->d, j, al - j, bl - j, t->d);
|
||||
}
|
||||
rr->top = top;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
#endif /* BN_RECURSION */
|
||||
if (bn_wexpand(rr, top) == NULL)
|
||||
goto err;
|
||||
rr->top = top;
|
||||
bn_mul_normal(rr->d, a->d, al, b->d, bl);
|
||||
|
||||
#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
|
||||
end:
|
||||
#endif
|
||||
rr->neg = a->neg ^ b->neg;
|
||||
rr->flags |= BN_FLG_FIXED_TOP;
|
||||
if (r != rr && BN_copy(r, rr) == NULL)
|
||||
goto err;
|
||||
|
||||
ret = 1;
|
||||
err:
|
||||
bn_check_top(r);
|
||||
BN_CTX_end(ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
|
||||
{
|
||||
BN_ULONG *rr;
|
||||
|
||||
if (na < nb) {
|
||||
int itmp;
|
||||
BN_ULONG *ltmp;
|
||||
|
||||
itmp = na;
|
||||
na = nb;
|
||||
nb = itmp;
|
||||
ltmp = a;
|
||||
a = b;
|
||||
b = ltmp;
|
||||
|
||||
}
|
||||
rr = &(r[na]);
|
||||
if (nb <= 0) {
|
||||
(void)bn_mul_words(r, a, na, 0);
|
||||
return;
|
||||
} else
|
||||
rr[0] = bn_mul_words(r, a, na, b[0]);
|
||||
|
||||
for (;;) {
|
||||
if (--nb <= 0)
|
||||
return;
|
||||
rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
|
||||
if (--nb <= 0)
|
||||
return;
|
||||
rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
|
||||
if (--nb <= 0)
|
||||
return;
|
||||
rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
|
||||
if (--nb <= 0)
|
||||
return;
|
||||
rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
|
||||
rr += 4;
|
||||
r += 4;
|
||||
b += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
|
||||
{
|
||||
bn_mul_words(r, a, n, b[0]);
|
||||
|
||||
for (;;) {
|
||||
if (--n <= 0)
|
||||
return;
|
||||
bn_mul_add_words(&(r[1]), a, n, b[1]);
|
||||
if (--n <= 0)
|
||||
return;
|
||||
bn_mul_add_words(&(r[2]), a, n, b[2]);
|
||||
if (--n <= 0)
|
||||
return;
|
||||
bn_mul_add_words(&(r[3]), a, n, b[3]);
|
||||
if (--n <= 0)
|
||||
return;
|
||||
bn_mul_add_words(&(r[4]), a, n, b[4]);
|
||||
r += 4;
|
||||
b += 4;
|
||||
}
|
||||
}
|
1239
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_nist.c
vendored
Normal file
1239
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_nist.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
469
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_prime.c
vendored
Normal file
469
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_prime.c
vendored
Normal file
|
@ -0,0 +1,469 @@
|
|||
/*
|
||||
* Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
#include "internal/cryptlib.h"
|
||||
#include "bn_lcl.h"
|
||||
|
||||
/*
|
||||
* The quick sieve algorithm approach to weeding out primes is Philip
|
||||
* Zimmermann's, as implemented in PGP. I have had a read of his comments
|
||||
* and implemented my own version.
|
||||
*/
|
||||
#include "bn_prime.h"
|
||||
|
||||
static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1,
|
||||
const BIGNUM *a1_odd, int k, BN_CTX *ctx,
|
||||
BN_MONT_CTX *mont);
|
||||
static int probable_prime(BIGNUM *rnd, int bits, prime_t *mods);
|
||||
static int probable_prime_dh_safe(BIGNUM *rnd, int bits,
|
||||
const BIGNUM *add, const BIGNUM *rem,
|
||||
BN_CTX *ctx);
|
||||
|
||||
int BN_GENCB_call(BN_GENCB *cb, int a, int b)
|
||||
{
|
||||
/* No callback means continue */
|
||||
if (!cb)
|
||||
return 1;
|
||||
switch (cb->ver) {
|
||||
case 1:
|
||||
/* Deprecated-style callbacks */
|
||||
if (!cb->cb.cb_1)
|
||||
return 1;
|
||||
cb->cb.cb_1(a, b, cb->arg);
|
||||
return 1;
|
||||
case 2:
|
||||
/* New-style callbacks */
|
||||
return cb->cb.cb_2(a, b, cb);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
/* Unrecognised callback type */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int BN_generate_prime_ex(BIGNUM *ret, int bits, int safe,
|
||||
const BIGNUM *add, const BIGNUM *rem, BN_GENCB *cb)
|
||||
{
|
||||
BIGNUM *t;
|
||||
int found = 0;
|
||||
int i, j, c1 = 0;
|
||||
BN_CTX *ctx = NULL;
|
||||
prime_t *mods = NULL;
|
||||
int checks = BN_prime_checks_for_size(bits);
|
||||
|
||||
if (bits < 2) {
|
||||
/* There are no prime numbers this small. */
|
||||
BNerr(BN_F_BN_GENERATE_PRIME_EX, BN_R_BITS_TOO_SMALL);
|
||||
return 0;
|
||||
} else if (bits == 2 && safe) {
|
||||
/* The smallest safe prime (7) is three bits. */
|
||||
BNerr(BN_F_BN_GENERATE_PRIME_EX, BN_R_BITS_TOO_SMALL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
mods = OPENSSL_zalloc(sizeof(*mods) * NUMPRIMES);
|
||||
if (mods == NULL)
|
||||
goto err;
|
||||
|
||||
ctx = BN_CTX_new();
|
||||
if (ctx == NULL)
|
||||
goto err;
|
||||
BN_CTX_start(ctx);
|
||||
t = BN_CTX_get(ctx);
|
||||
if (t == NULL)
|
||||
goto err;
|
||||
loop:
|
||||
/* make a random number and set the top and bottom bits */
|
||||
if (add == NULL) {
|
||||
if (!probable_prime(ret, bits, mods))
|
||||
goto err;
|
||||
} else {
|
||||
if (safe) {
|
||||
if (!probable_prime_dh_safe(ret, bits, add, rem, ctx))
|
||||
goto err;
|
||||
} else {
|
||||
if (!bn_probable_prime_dh(ret, bits, add, rem, ctx))
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
if (!BN_GENCB_call(cb, 0, c1++))
|
||||
/* aborted */
|
||||
goto err;
|
||||
|
||||
if (!safe) {
|
||||
i = BN_is_prime_fasttest_ex(ret, checks, ctx, 0, cb);
|
||||
if (i == -1)
|
||||
goto err;
|
||||
if (i == 0)
|
||||
goto loop;
|
||||
} else {
|
||||
/*
|
||||
* for "safe prime" generation, check that (p-1)/2 is prime. Since a
|
||||
* prime is odd, We just need to divide by 2
|
||||
*/
|
||||
if (!BN_rshift1(t, ret))
|
||||
goto err;
|
||||
|
||||
for (i = 0; i < checks; i++) {
|
||||
j = BN_is_prime_fasttest_ex(ret, 1, ctx, 0, cb);
|
||||
if (j == -1)
|
||||
goto err;
|
||||
if (j == 0)
|
||||
goto loop;
|
||||
|
||||
j = BN_is_prime_fasttest_ex(t, 1, ctx, 0, cb);
|
||||
if (j == -1)
|
||||
goto err;
|
||||
if (j == 0)
|
||||
goto loop;
|
||||
|
||||
if (!BN_GENCB_call(cb, 2, c1 - 1))
|
||||
goto err;
|
||||
/* We have a safe prime test pass */
|
||||
}
|
||||
}
|
||||
/* we have a prime :-) */
|
||||
found = 1;
|
||||
err:
|
||||
OPENSSL_free(mods);
|
||||
if (ctx != NULL)
|
||||
BN_CTX_end(ctx);
|
||||
BN_CTX_free(ctx);
|
||||
bn_check_top(ret);
|
||||
return found;
|
||||
}
|
||||
|
||||
int BN_is_prime_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed,
|
||||
BN_GENCB *cb)
|
||||
{
|
||||
return BN_is_prime_fasttest_ex(a, checks, ctx_passed, 0, cb);
|
||||
}
|
||||
|
||||
int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed,
|
||||
int do_trial_division, BN_GENCB *cb)
|
||||
{
|
||||
int i, j, ret = -1;
|
||||
int k;
|
||||
BN_CTX *ctx = NULL;
|
||||
BIGNUM *A1, *A1_odd, *A3, *check; /* taken from ctx */
|
||||
BN_MONT_CTX *mont = NULL;
|
||||
|
||||
/* Take care of the really small primes 2 & 3 */
|
||||
if (BN_is_word(a, 2) || BN_is_word(a, 3))
|
||||
return 1;
|
||||
|
||||
/* Check odd and bigger than 1 */
|
||||
if (!BN_is_odd(a) || BN_cmp(a, BN_value_one()) <= 0)
|
||||
return 0;
|
||||
|
||||
if (checks == BN_prime_checks)
|
||||
checks = BN_prime_checks_for_size(BN_num_bits(a));
|
||||
|
||||
/* first look for small factors */
|
||||
if (do_trial_division) {
|
||||
for (i = 1; i < NUMPRIMES; i++) {
|
||||
BN_ULONG mod = BN_mod_word(a, primes[i]);
|
||||
if (mod == (BN_ULONG)-1)
|
||||
goto err;
|
||||
if (mod == 0)
|
||||
return BN_is_word(a, primes[i]);
|
||||
}
|
||||
if (!BN_GENCB_call(cb, 1, -1))
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (ctx_passed != NULL)
|
||||
ctx = ctx_passed;
|
||||
else if ((ctx = BN_CTX_new()) == NULL)
|
||||
goto err;
|
||||
BN_CTX_start(ctx);
|
||||
|
||||
A1 = BN_CTX_get(ctx);
|
||||
A3 = BN_CTX_get(ctx);
|
||||
A1_odd = BN_CTX_get(ctx);
|
||||
check = BN_CTX_get(ctx);
|
||||
if (check == NULL)
|
||||
goto err;
|
||||
|
||||
/* compute A1 := a - 1 */
|
||||
if (!BN_copy(A1, a) || !BN_sub_word(A1, 1))
|
||||
goto err;
|
||||
/* compute A3 := a - 3 */
|
||||
if (!BN_copy(A3, a) || !BN_sub_word(A3, 3))
|
||||
goto err;
|
||||
|
||||
/* write A1 as A1_odd * 2^k */
|
||||
k = 1;
|
||||
while (!BN_is_bit_set(A1, k))
|
||||
k++;
|
||||
if (!BN_rshift(A1_odd, A1, k))
|
||||
goto err;
|
||||
|
||||
/* Montgomery setup for computations mod a */
|
||||
mont = BN_MONT_CTX_new();
|
||||
if (mont == NULL)
|
||||
goto err;
|
||||
if (!BN_MONT_CTX_set(mont, a, ctx))
|
||||
goto err;
|
||||
|
||||
for (i = 0; i < checks; i++) {
|
||||
/* 1 < check < a-1 */
|
||||
if (!BN_priv_rand_range(check, A3) || !BN_add_word(check, 2))
|
||||
goto err;
|
||||
|
||||
j = witness(check, a, A1, A1_odd, k, ctx, mont);
|
||||
if (j == -1)
|
||||
goto err;
|
||||
if (j) {
|
||||
ret = 0;
|
||||
goto err;
|
||||
}
|
||||
if (!BN_GENCB_call(cb, 1, i))
|
||||
goto err;
|
||||
}
|
||||
ret = 1;
|
||||
err:
|
||||
if (ctx != NULL) {
|
||||
BN_CTX_end(ctx);
|
||||
if (ctx_passed == NULL)
|
||||
BN_CTX_free(ctx);
|
||||
}
|
||||
BN_MONT_CTX_free(mont);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1,
|
||||
const BIGNUM *a1_odd, int k, BN_CTX *ctx,
|
||||
BN_MONT_CTX *mont)
|
||||
{
|
||||
if (!BN_mod_exp_mont(w, w, a1_odd, a, ctx, mont)) /* w := w^a1_odd mod a */
|
||||
return -1;
|
||||
if (BN_is_one(w))
|
||||
return 0; /* probably prime */
|
||||
if (BN_cmp(w, a1) == 0)
|
||||
return 0; /* w == -1 (mod a), 'a' is probably prime */
|
||||
while (--k) {
|
||||
if (!BN_mod_mul(w, w, w, a, ctx)) /* w := w^2 mod a */
|
||||
return -1;
|
||||
if (BN_is_one(w))
|
||||
return 1; /* 'a' is composite, otherwise a previous 'w'
|
||||
* would have been == -1 (mod 'a') */
|
||||
if (BN_cmp(w, a1) == 0)
|
||||
return 0; /* w == -1 (mod a), 'a' is probably prime */
|
||||
}
|
||||
/*
|
||||
* If we get here, 'w' is the (a-1)/2-th power of the original 'w', and
|
||||
* it is neither -1 nor +1 -- so 'a' cannot be prime
|
||||
*/
|
||||
bn_check_top(w);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int probable_prime(BIGNUM *rnd, int bits, prime_t *mods)
|
||||
{
|
||||
int i;
|
||||
BN_ULONG delta;
|
||||
BN_ULONG maxdelta = BN_MASK2 - primes[NUMPRIMES - 1];
|
||||
char is_single_word = bits <= BN_BITS2;
|
||||
|
||||
again:
|
||||
/* TODO: Not all primes are private */
|
||||
if (!BN_priv_rand(rnd, bits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ODD))
|
||||
return 0;
|
||||
/* we now have a random number 'rnd' to test. */
|
||||
for (i = 1; i < NUMPRIMES; i++) {
|
||||
BN_ULONG mod = BN_mod_word(rnd, (BN_ULONG)primes[i]);
|
||||
if (mod == (BN_ULONG)-1)
|
||||
return 0;
|
||||
mods[i] = (prime_t) mod;
|
||||
}
|
||||
/*
|
||||
* If bits is so small that it fits into a single word then we
|
||||
* additionally don't want to exceed that many bits.
|
||||
*/
|
||||
if (is_single_word) {
|
||||
BN_ULONG size_limit;
|
||||
|
||||
if (bits == BN_BITS2) {
|
||||
/*
|
||||
* Shifting by this much has undefined behaviour so we do it a
|
||||
* different way
|
||||
*/
|
||||
size_limit = ~((BN_ULONG)0) - BN_get_word(rnd);
|
||||
} else {
|
||||
size_limit = (((BN_ULONG)1) << bits) - BN_get_word(rnd) - 1;
|
||||
}
|
||||
if (size_limit < maxdelta)
|
||||
maxdelta = size_limit;
|
||||
}
|
||||
delta = 0;
|
||||
loop:
|
||||
if (is_single_word) {
|
||||
BN_ULONG rnd_word = BN_get_word(rnd);
|
||||
|
||||
/*-
|
||||
* In the case that the candidate prime is a single word then
|
||||
* we check that:
|
||||
* 1) It's greater than primes[i] because we shouldn't reject
|
||||
* 3 as being a prime number because it's a multiple of
|
||||
* three.
|
||||
* 2) That it's not a multiple of a known prime. We don't
|
||||
* check that rnd-1 is also coprime to all the known
|
||||
* primes because there aren't many small primes where
|
||||
* that's true.
|
||||
*/
|
||||
for (i = 1; i < NUMPRIMES && primes[i] < rnd_word; i++) {
|
||||
if ((mods[i] + delta) % primes[i] == 0) {
|
||||
delta += 2;
|
||||
if (delta > maxdelta)
|
||||
goto again;
|
||||
goto loop;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (i = 1; i < NUMPRIMES; i++) {
|
||||
/*
|
||||
* check that rnd is not a prime and also that gcd(rnd-1,primes)
|
||||
* == 1 (except for 2)
|
||||
*/
|
||||
if (((mods[i] + delta) % primes[i]) <= 1) {
|
||||
delta += 2;
|
||||
if (delta > maxdelta)
|
||||
goto again;
|
||||
goto loop;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!BN_add_word(rnd, delta))
|
||||
return 0;
|
||||
if (BN_num_bits(rnd) != bits)
|
||||
goto again;
|
||||
bn_check_top(rnd);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int bn_probable_prime_dh(BIGNUM *rnd, int bits,
|
||||
const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx)
|
||||
{
|
||||
int i, ret = 0;
|
||||
BIGNUM *t1;
|
||||
|
||||
BN_CTX_start(ctx);
|
||||
if ((t1 = BN_CTX_get(ctx)) == NULL)
|
||||
goto err;
|
||||
|
||||
if (!BN_rand(rnd, bits, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ODD))
|
||||
goto err;
|
||||
|
||||
/* we need ((rnd-rem) % add) == 0 */
|
||||
|
||||
if (!BN_mod(t1, rnd, add, ctx))
|
||||
goto err;
|
||||
if (!BN_sub(rnd, rnd, t1))
|
||||
goto err;
|
||||
if (rem == NULL) {
|
||||
if (!BN_add_word(rnd, 1))
|
||||
goto err;
|
||||
} else {
|
||||
if (!BN_add(rnd, rnd, rem))
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* we now have a random number 'rand' to test. */
|
||||
|
||||
loop:
|
||||
for (i = 1; i < NUMPRIMES; i++) {
|
||||
/* check that rnd is a prime */
|
||||
BN_ULONG mod = BN_mod_word(rnd, (BN_ULONG)primes[i]);
|
||||
if (mod == (BN_ULONG)-1)
|
||||
goto err;
|
||||
if (mod <= 1) {
|
||||
if (!BN_add(rnd, rnd, add))
|
||||
goto err;
|
||||
goto loop;
|
||||
}
|
||||
}
|
||||
ret = 1;
|
||||
|
||||
err:
|
||||
BN_CTX_end(ctx);
|
||||
bn_check_top(rnd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int probable_prime_dh_safe(BIGNUM *p, int bits, const BIGNUM *padd,
|
||||
const BIGNUM *rem, BN_CTX *ctx)
|
||||
{
|
||||
int i, ret = 0;
|
||||
BIGNUM *t1, *qadd, *q;
|
||||
|
||||
bits--;
|
||||
BN_CTX_start(ctx);
|
||||
t1 = BN_CTX_get(ctx);
|
||||
q = BN_CTX_get(ctx);
|
||||
qadd = BN_CTX_get(ctx);
|
||||
if (qadd == NULL)
|
||||
goto err;
|
||||
|
||||
if (!BN_rshift1(qadd, padd))
|
||||
goto err;
|
||||
|
||||
if (!BN_rand(q, bits, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ODD))
|
||||
goto err;
|
||||
|
||||
/* we need ((rnd-rem) % add) == 0 */
|
||||
if (!BN_mod(t1, q, qadd, ctx))
|
||||
goto err;
|
||||
if (!BN_sub(q, q, t1))
|
||||
goto err;
|
||||
if (rem == NULL) {
|
||||
if (!BN_add_word(q, 1))
|
||||
goto err;
|
||||
} else {
|
||||
if (!BN_rshift1(t1, rem))
|
||||
goto err;
|
||||
if (!BN_add(q, q, t1))
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* we now have a random number 'rand' to test. */
|
||||
if (!BN_lshift1(p, q))
|
||||
goto err;
|
||||
if (!BN_add_word(p, 1))
|
||||
goto err;
|
||||
|
||||
loop:
|
||||
for (i = 1; i < NUMPRIMES; i++) {
|
||||
/* check that p and q are prime */
|
||||
/*
|
||||
* check that for p and q gcd(p-1,primes) == 1 (except for 2)
|
||||
*/
|
||||
BN_ULONG pmod = BN_mod_word(p, (BN_ULONG)primes[i]);
|
||||
BN_ULONG qmod = BN_mod_word(q, (BN_ULONG)primes[i]);
|
||||
if (pmod == (BN_ULONG)-1 || qmod == (BN_ULONG)-1)
|
||||
goto err;
|
||||
if (pmod == 0 || qmod == 0) {
|
||||
if (!BN_add(p, p, padd))
|
||||
goto err;
|
||||
if (!BN_add(q, q, qadd))
|
||||
goto err;
|
||||
goto loop;
|
||||
}
|
||||
}
|
||||
ret = 1;
|
||||
|
||||
err:
|
||||
BN_CTX_end(ctx);
|
||||
bn_check_top(p);
|
||||
return ret;
|
||||
}
|
273
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_prime.h
vendored
Normal file
273
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_prime.h
vendored
Normal file
|
@ -0,0 +1,273 @@
|
|||
/*
|
||||
* WARNING: do not edit!
|
||||
* Generated by crypto/bn/bn_prime.pl
|
||||
*
|
||||
* Copyright 1998-2019 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
typedef unsigned short prime_t;
|
||||
# define NUMPRIMES 2048
|
||||
|
||||
static const prime_t primes[2048] = {
|
||||
2, 3, 5, 7, 11, 13, 17, 19,
|
||||
23, 29, 31, 37, 41, 43, 47, 53,
|
||||
59, 61, 67, 71, 73, 79, 83, 89,
|
||||
97, 101, 103, 107, 109, 113, 127, 131,
|
||||
137, 139, 149, 151, 157, 163, 167, 173,
|
||||
179, 181, 191, 193, 197, 199, 211, 223,
|
||||
227, 229, 233, 239, 241, 251, 257, 263,
|
||||
269, 271, 277, 281, 283, 293, 307, 311,
|
||||
313, 317, 331, 337, 347, 349, 353, 359,
|
||||
367, 373, 379, 383, 389, 397, 401, 409,
|
||||
419, 421, 431, 433, 439, 443, 449, 457,
|
||||
461, 463, 467, 479, 487, 491, 499, 503,
|
||||
509, 521, 523, 541, 547, 557, 563, 569,
|
||||
571, 577, 587, 593, 599, 601, 607, 613,
|
||||
617, 619, 631, 641, 643, 647, 653, 659,
|
||||
661, 673, 677, 683, 691, 701, 709, 719,
|
||||
727, 733, 739, 743, 751, 757, 761, 769,
|
||||
773, 787, 797, 809, 811, 821, 823, 827,
|
||||
829, 839, 853, 857, 859, 863, 877, 881,
|
||||
883, 887, 907, 911, 919, 929, 937, 941,
|
||||
947, 953, 967, 971, 977, 983, 991, 997,
|
||||
1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049,
|
||||
1051, 1061, 1063, 1069, 1087, 1091, 1093, 1097,
|
||||
1103, 1109, 1117, 1123, 1129, 1151, 1153, 1163,
|
||||
1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223,
|
||||
1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283,
|
||||
1289, 1291, 1297, 1301, 1303, 1307, 1319, 1321,
|
||||
1327, 1361, 1367, 1373, 1381, 1399, 1409, 1423,
|
||||
1427, 1429, 1433, 1439, 1447, 1451, 1453, 1459,
|
||||
1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511,
|
||||
1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571,
|
||||
1579, 1583, 1597, 1601, 1607, 1609, 1613, 1619,
|
||||
1621, 1627, 1637, 1657, 1663, 1667, 1669, 1693,
|
||||
1697, 1699, 1709, 1721, 1723, 1733, 1741, 1747,
|
||||
1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811,
|
||||
1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877,
|
||||
1879, 1889, 1901, 1907, 1913, 1931, 1933, 1949,
|
||||
1951, 1973, 1979, 1987, 1993, 1997, 1999, 2003,
|
||||
2011, 2017, 2027, 2029, 2039, 2053, 2063, 2069,
|
||||
2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129,
|
||||
2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203,
|
||||
2207, 2213, 2221, 2237, 2239, 2243, 2251, 2267,
|
||||
2269, 2273, 2281, 2287, 2293, 2297, 2309, 2311,
|
||||
2333, 2339, 2341, 2347, 2351, 2357, 2371, 2377,
|
||||
2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423,
|
||||
2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503,
|
||||
2521, 2531, 2539, 2543, 2549, 2551, 2557, 2579,
|
||||
2591, 2593, 2609, 2617, 2621, 2633, 2647, 2657,
|
||||
2659, 2663, 2671, 2677, 2683, 2687, 2689, 2693,
|
||||
2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741,
|
||||
2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801,
|
||||
2803, 2819, 2833, 2837, 2843, 2851, 2857, 2861,
|
||||
2879, 2887, 2897, 2903, 2909, 2917, 2927, 2939,
|
||||
2953, 2957, 2963, 2969, 2971, 2999, 3001, 3011,
|
||||
3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079,
|
||||
3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167,
|
||||
3169, 3181, 3187, 3191, 3203, 3209, 3217, 3221,
|
||||
3229, 3251, 3253, 3257, 3259, 3271, 3299, 3301,
|
||||
3307, 3313, 3319, 3323, 3329, 3331, 3343, 3347,
|
||||
3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413,
|
||||
3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491,
|
||||
3499, 3511, 3517, 3527, 3529, 3533, 3539, 3541,
|
||||
3547, 3557, 3559, 3571, 3581, 3583, 3593, 3607,
|
||||
3613, 3617, 3623, 3631, 3637, 3643, 3659, 3671,
|
||||
3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727,
|
||||
3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797,
|
||||
3803, 3821, 3823, 3833, 3847, 3851, 3853, 3863,
|
||||
3877, 3881, 3889, 3907, 3911, 3917, 3919, 3923,
|
||||
3929, 3931, 3943, 3947, 3967, 3989, 4001, 4003,
|
||||
4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057,
|
||||
4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129,
|
||||
4133, 4139, 4153, 4157, 4159, 4177, 4201, 4211,
|
||||
4217, 4219, 4229, 4231, 4241, 4243, 4253, 4259,
|
||||
4261, 4271, 4273, 4283, 4289, 4297, 4327, 4337,
|
||||
4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409,
|
||||
4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481,
|
||||
4483, 4493, 4507, 4513, 4517, 4519, 4523, 4547,
|
||||
4549, 4561, 4567, 4583, 4591, 4597, 4603, 4621,
|
||||
4637, 4639, 4643, 4649, 4651, 4657, 4663, 4673,
|
||||
4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751,
|
||||
4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813,
|
||||
4817, 4831, 4861, 4871, 4877, 4889, 4903, 4909,
|
||||
4919, 4931, 4933, 4937, 4943, 4951, 4957, 4967,
|
||||
4969, 4973, 4987, 4993, 4999, 5003, 5009, 5011,
|
||||
5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087,
|
||||
5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167,
|
||||
5171, 5179, 5189, 5197, 5209, 5227, 5231, 5233,
|
||||
5237, 5261, 5273, 5279, 5281, 5297, 5303, 5309,
|
||||
5323, 5333, 5347, 5351, 5381, 5387, 5393, 5399,
|
||||
5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443,
|
||||
5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507,
|
||||
5519, 5521, 5527, 5531, 5557, 5563, 5569, 5573,
|
||||
5581, 5591, 5623, 5639, 5641, 5647, 5651, 5653,
|
||||
5657, 5659, 5669, 5683, 5689, 5693, 5701, 5711,
|
||||
5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791,
|
||||
5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849,
|
||||
5851, 5857, 5861, 5867, 5869, 5879, 5881, 5897,
|
||||
5903, 5923, 5927, 5939, 5953, 5981, 5987, 6007,
|
||||
6011, 6029, 6037, 6043, 6047, 6053, 6067, 6073,
|
||||
6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133,
|
||||
6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211,
|
||||
6217, 6221, 6229, 6247, 6257, 6263, 6269, 6271,
|
||||
6277, 6287, 6299, 6301, 6311, 6317, 6323, 6329,
|
||||
6337, 6343, 6353, 6359, 6361, 6367, 6373, 6379,
|
||||
6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473,
|
||||
6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563,
|
||||
6569, 6571, 6577, 6581, 6599, 6607, 6619, 6637,
|
||||
6653, 6659, 6661, 6673, 6679, 6689, 6691, 6701,
|
||||
6703, 6709, 6719, 6733, 6737, 6761, 6763, 6779,
|
||||
6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833,
|
||||
6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907,
|
||||
6911, 6917, 6947, 6949, 6959, 6961, 6967, 6971,
|
||||
6977, 6983, 6991, 6997, 7001, 7013, 7019, 7027,
|
||||
7039, 7043, 7057, 7069, 7079, 7103, 7109, 7121,
|
||||
7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207,
|
||||
7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253,
|
||||
7283, 7297, 7307, 7309, 7321, 7331, 7333, 7349,
|
||||
7351, 7369, 7393, 7411, 7417, 7433, 7451, 7457,
|
||||
7459, 7477, 7481, 7487, 7489, 7499, 7507, 7517,
|
||||
7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561,
|
||||
7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621,
|
||||
7639, 7643, 7649, 7669, 7673, 7681, 7687, 7691,
|
||||
7699, 7703, 7717, 7723, 7727, 7741, 7753, 7757,
|
||||
7759, 7789, 7793, 7817, 7823, 7829, 7841, 7853,
|
||||
7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919,
|
||||
7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009,
|
||||
8011, 8017, 8039, 8053, 8059, 8069, 8081, 8087,
|
||||
8089, 8093, 8101, 8111, 8117, 8123, 8147, 8161,
|
||||
8167, 8171, 8179, 8191, 8209, 8219, 8221, 8231,
|
||||
8233, 8237, 8243, 8263, 8269, 8273, 8287, 8291,
|
||||
8293, 8297, 8311, 8317, 8329, 8353, 8363, 8369,
|
||||
8377, 8387, 8389, 8419, 8423, 8429, 8431, 8443,
|
||||
8447, 8461, 8467, 8501, 8513, 8521, 8527, 8537,
|
||||
8539, 8543, 8563, 8573, 8581, 8597, 8599, 8609,
|
||||
8623, 8627, 8629, 8641, 8647, 8663, 8669, 8677,
|
||||
8681, 8689, 8693, 8699, 8707, 8713, 8719, 8731,
|
||||
8737, 8741, 8747, 8753, 8761, 8779, 8783, 8803,
|
||||
8807, 8819, 8821, 8831, 8837, 8839, 8849, 8861,
|
||||
8863, 8867, 8887, 8893, 8923, 8929, 8933, 8941,
|
||||
8951, 8963, 8969, 8971, 8999, 9001, 9007, 9011,
|
||||
9013, 9029, 9041, 9043, 9049, 9059, 9067, 9091,
|
||||
9103, 9109, 9127, 9133, 9137, 9151, 9157, 9161,
|
||||
9173, 9181, 9187, 9199, 9203, 9209, 9221, 9227,
|
||||
9239, 9241, 9257, 9277, 9281, 9283, 9293, 9311,
|
||||
9319, 9323, 9337, 9341, 9343, 9349, 9371, 9377,
|
||||
9391, 9397, 9403, 9413, 9419, 9421, 9431, 9433,
|
||||
9437, 9439, 9461, 9463, 9467, 9473, 9479, 9491,
|
||||
9497, 9511, 9521, 9533, 9539, 9547, 9551, 9587,
|
||||
9601, 9613, 9619, 9623, 9629, 9631, 9643, 9649,
|
||||
9661, 9677, 9679, 9689, 9697, 9719, 9721, 9733,
|
||||
9739, 9743, 9749, 9767, 9769, 9781, 9787, 9791,
|
||||
9803, 9811, 9817, 9829, 9833, 9839, 9851, 9857,
|
||||
9859, 9871, 9883, 9887, 9901, 9907, 9923, 9929,
|
||||
9931, 9941, 9949, 9967, 9973, 10007, 10009, 10037,
|
||||
10039, 10061, 10067, 10069, 10079, 10091, 10093, 10099,
|
||||
10103, 10111, 10133, 10139, 10141, 10151, 10159, 10163,
|
||||
10169, 10177, 10181, 10193, 10211, 10223, 10243, 10247,
|
||||
10253, 10259, 10267, 10271, 10273, 10289, 10301, 10303,
|
||||
10313, 10321, 10331, 10333, 10337, 10343, 10357, 10369,
|
||||
10391, 10399, 10427, 10429, 10433, 10453, 10457, 10459,
|
||||
10463, 10477, 10487, 10499, 10501, 10513, 10529, 10531,
|
||||
10559, 10567, 10589, 10597, 10601, 10607, 10613, 10627,
|
||||
10631, 10639, 10651, 10657, 10663, 10667, 10687, 10691,
|
||||
10709, 10711, 10723, 10729, 10733, 10739, 10753, 10771,
|
||||
10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859,
|
||||
10861, 10867, 10883, 10889, 10891, 10903, 10909, 10937,
|
||||
10939, 10949, 10957, 10973, 10979, 10987, 10993, 11003,
|
||||
11027, 11047, 11057, 11059, 11069, 11071, 11083, 11087,
|
||||
11093, 11113, 11117, 11119, 11131, 11149, 11159, 11161,
|
||||
11171, 11173, 11177, 11197, 11213, 11239, 11243, 11251,
|
||||
11257, 11261, 11273, 11279, 11287, 11299, 11311, 11317,
|
||||
11321, 11329, 11351, 11353, 11369, 11383, 11393, 11399,
|
||||
11411, 11423, 11437, 11443, 11447, 11467, 11471, 11483,
|
||||
11489, 11491, 11497, 11503, 11519, 11527, 11549, 11551,
|
||||
11579, 11587, 11593, 11597, 11617, 11621, 11633, 11657,
|
||||
11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731,
|
||||
11743, 11777, 11779, 11783, 11789, 11801, 11807, 11813,
|
||||
11821, 11827, 11831, 11833, 11839, 11863, 11867, 11887,
|
||||
11897, 11903, 11909, 11923, 11927, 11933, 11939, 11941,
|
||||
11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011,
|
||||
12037, 12041, 12043, 12049, 12071, 12073, 12097, 12101,
|
||||
12107, 12109, 12113, 12119, 12143, 12149, 12157, 12161,
|
||||
12163, 12197, 12203, 12211, 12227, 12239, 12241, 12251,
|
||||
12253, 12263, 12269, 12277, 12281, 12289, 12301, 12323,
|
||||
12329, 12343, 12347, 12373, 12377, 12379, 12391, 12401,
|
||||
12409, 12413, 12421, 12433, 12437, 12451, 12457, 12473,
|
||||
12479, 12487, 12491, 12497, 12503, 12511, 12517, 12527,
|
||||
12539, 12541, 12547, 12553, 12569, 12577, 12583, 12589,
|
||||
12601, 12611, 12613, 12619, 12637, 12641, 12647, 12653,
|
||||
12659, 12671, 12689, 12697, 12703, 12713, 12721, 12739,
|
||||
12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821,
|
||||
12823, 12829, 12841, 12853, 12889, 12893, 12899, 12907,
|
||||
12911, 12917, 12919, 12923, 12941, 12953, 12959, 12967,
|
||||
12973, 12979, 12983, 13001, 13003, 13007, 13009, 13033,
|
||||
13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109,
|
||||
13121, 13127, 13147, 13151, 13159, 13163, 13171, 13177,
|
||||
13183, 13187, 13217, 13219, 13229, 13241, 13249, 13259,
|
||||
13267, 13291, 13297, 13309, 13313, 13327, 13331, 13337,
|
||||
13339, 13367, 13381, 13397, 13399, 13411, 13417, 13421,
|
||||
13441, 13451, 13457, 13463, 13469, 13477, 13487, 13499,
|
||||
13513, 13523, 13537, 13553, 13567, 13577, 13591, 13597,
|
||||
13613, 13619, 13627, 13633, 13649, 13669, 13679, 13681,
|
||||
13687, 13691, 13693, 13697, 13709, 13711, 13721, 13723,
|
||||
13729, 13751, 13757, 13759, 13763, 13781, 13789, 13799,
|
||||
13807, 13829, 13831, 13841, 13859, 13873, 13877, 13879,
|
||||
13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933,
|
||||
13963, 13967, 13997, 13999, 14009, 14011, 14029, 14033,
|
||||
14051, 14057, 14071, 14081, 14083, 14087, 14107, 14143,
|
||||
14149, 14153, 14159, 14173, 14177, 14197, 14207, 14221,
|
||||
14243, 14249, 14251, 14281, 14293, 14303, 14321, 14323,
|
||||
14327, 14341, 14347, 14369, 14387, 14389, 14401, 14407,
|
||||
14411, 14419, 14423, 14431, 14437, 14447, 14449, 14461,
|
||||
14479, 14489, 14503, 14519, 14533, 14537, 14543, 14549,
|
||||
14551, 14557, 14561, 14563, 14591, 14593, 14621, 14627,
|
||||
14629, 14633, 14639, 14653, 14657, 14669, 14683, 14699,
|
||||
14713, 14717, 14723, 14731, 14737, 14741, 14747, 14753,
|
||||
14759, 14767, 14771, 14779, 14783, 14797, 14813, 14821,
|
||||
14827, 14831, 14843, 14851, 14867, 14869, 14879, 14887,
|
||||
14891, 14897, 14923, 14929, 14939, 14947, 14951, 14957,
|
||||
14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073,
|
||||
15077, 15083, 15091, 15101, 15107, 15121, 15131, 15137,
|
||||
15139, 15149, 15161, 15173, 15187, 15193, 15199, 15217,
|
||||
15227, 15233, 15241, 15259, 15263, 15269, 15271, 15277,
|
||||
15287, 15289, 15299, 15307, 15313, 15319, 15329, 15331,
|
||||
15349, 15359, 15361, 15373, 15377, 15383, 15391, 15401,
|
||||
15413, 15427, 15439, 15443, 15451, 15461, 15467, 15473,
|
||||
15493, 15497, 15511, 15527, 15541, 15551, 15559, 15569,
|
||||
15581, 15583, 15601, 15607, 15619, 15629, 15641, 15643,
|
||||
15647, 15649, 15661, 15667, 15671, 15679, 15683, 15727,
|
||||
15731, 15733, 15737, 15739, 15749, 15761, 15767, 15773,
|
||||
15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859,
|
||||
15877, 15881, 15887, 15889, 15901, 15907, 15913, 15919,
|
||||
15923, 15937, 15959, 15971, 15973, 15991, 16001, 16007,
|
||||
16033, 16057, 16061, 16063, 16067, 16069, 16073, 16087,
|
||||
16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183,
|
||||
16187, 16189, 16193, 16217, 16223, 16229, 16231, 16249,
|
||||
16253, 16267, 16273, 16301, 16319, 16333, 16339, 16349,
|
||||
16361, 16363, 16369, 16381, 16411, 16417, 16421, 16427,
|
||||
16433, 16447, 16451, 16453, 16477, 16481, 16487, 16493,
|
||||
16519, 16529, 16547, 16553, 16561, 16567, 16573, 16603,
|
||||
16607, 16619, 16631, 16633, 16649, 16651, 16657, 16661,
|
||||
16673, 16691, 16693, 16699, 16703, 16729, 16741, 16747,
|
||||
16759, 16763, 16787, 16811, 16823, 16829, 16831, 16843,
|
||||
16871, 16879, 16883, 16889, 16901, 16903, 16921, 16927,
|
||||
16931, 16937, 16943, 16963, 16979, 16981, 16987, 16993,
|
||||
17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053,
|
||||
17077, 17093, 17099, 17107, 17117, 17123, 17137, 17159,
|
||||
17167, 17183, 17189, 17191, 17203, 17207, 17209, 17231,
|
||||
17239, 17257, 17291, 17293, 17299, 17317, 17321, 17327,
|
||||
17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389,
|
||||
17393, 17401, 17417, 17419, 17431, 17443, 17449, 17467,
|
||||
17471, 17477, 17483, 17489, 17491, 17497, 17509, 17519,
|
||||
17539, 17551, 17569, 17573, 17579, 17581, 17597, 17599,
|
||||
17609, 17623, 17627, 17657, 17659, 17669, 17681, 17683,
|
||||
17707, 17713, 17729, 17737, 17747, 17749, 17761, 17783,
|
||||
17789, 17791, 17807, 17827, 17837, 17839, 17851, 17863,
|
||||
};
|
48
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_prime.pl
vendored
Normal file
48
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_prime.pl
vendored
Normal file
|
@ -0,0 +1,48 @@
|
|||
#! /usr/bin/env perl
|
||||
# Copyright 1998-2019 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
# Output year depends on the year of the script.
|
||||
my $YEAR = [localtime([stat($0)]->[9])]->[5] + 1900;
|
||||
print <<"EOF";
|
||||
/*
|
||||
* WARNING: do not edit!
|
||||
* Generated by crypto/bn/bn_prime.pl
|
||||
*
|
||||
* Copyright 1998-$YEAR The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
EOF
|
||||
|
||||
|
||||
my $num = shift || 2048;
|
||||
my @primes = ( 2 );
|
||||
my $p = 1;
|
||||
loop: while ($#primes < $num-1) {
|
||||
$p += 2;
|
||||
my $s = int(sqrt($p));
|
||||
|
||||
for (my $i = 0; defined($primes[$i]) && $primes[$i] <= $s; $i++) {
|
||||
next loop if ($p % $primes[$i]) == 0;
|
||||
}
|
||||
push(@primes, $p);
|
||||
}
|
||||
|
||||
print "typedef unsigned short prime_t;\n";
|
||||
printf "# define NUMPRIMES %d\n\n", $num;
|
||||
|
||||
printf "static const prime_t primes[%d] = {", $num;
|
||||
for (my $i = 0; $i <= $#primes; $i++) {
|
||||
printf "\n " if ($i % 8) == 0;
|
||||
printf " %5d,", $primes[$i];
|
||||
}
|
||||
print "\n};\n";
|
345
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_print.c
vendored
Normal file
345
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_print.c
vendored
Normal file
|
@ -0,0 +1,345 @@
|
|||
/*
|
||||
* Copyright 1995-2017 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "internal/ctype.h"
|
||||
#include <limits.h>
|
||||
#include "internal/cryptlib.h"
|
||||
#include <openssl/buffer.h>
|
||||
#include "bn_lcl.h"
|
||||
|
||||
static const char Hex[] = "0123456789ABCDEF";
|
||||
|
||||
/* Must 'OPENSSL_free' the returned data */
|
||||
char *BN_bn2hex(const BIGNUM *a)
|
||||
{
|
||||
int i, j, v, z = 0;
|
||||
char *buf;
|
||||
char *p;
|
||||
|
||||
if (BN_is_zero(a))
|
||||
return OPENSSL_strdup("0");
|
||||
buf = OPENSSL_malloc(a->top * BN_BYTES * 2 + 2);
|
||||
if (buf == NULL) {
|
||||
BNerr(BN_F_BN_BN2HEX, ERR_R_MALLOC_FAILURE);
|
||||
goto err;
|
||||
}
|
||||
p = buf;
|
||||
if (a->neg)
|
||||
*p++ = '-';
|
||||
for (i = a->top - 1; i >= 0; i--) {
|
||||
for (j = BN_BITS2 - 8; j >= 0; j -= 8) {
|
||||
/* strip leading zeros */
|
||||
v = (int)((a->d[i] >> j) & 0xff);
|
||||
if (z || v != 0) {
|
||||
*p++ = Hex[v >> 4];
|
||||
*p++ = Hex[v & 0x0f];
|
||||
z = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
*p = '\0';
|
||||
err:
|
||||
return buf;
|
||||
}
|
||||
|
||||
/* Must 'OPENSSL_free' the returned data */
|
||||
char *BN_bn2dec(const BIGNUM *a)
|
||||
{
|
||||
int i = 0, num, ok = 0, n, tbytes;
|
||||
char *buf = NULL;
|
||||
char *p;
|
||||
BIGNUM *t = NULL;
|
||||
BN_ULONG *bn_data = NULL, *lp;
|
||||
int bn_data_num;
|
||||
|
||||
/*-
|
||||
* get an upper bound for the length of the decimal integer
|
||||
* num <= (BN_num_bits(a) + 1) * log(2)
|
||||
* <= 3 * BN_num_bits(a) * 0.101 + log(2) + 1 (rounding error)
|
||||
* <= 3 * BN_num_bits(a) / 10 + 3 * BN_num_bits / 1000 + 1 + 1
|
||||
*/
|
||||
i = BN_num_bits(a) * 3;
|
||||
num = (i / 10 + i / 1000 + 1) + 1;
|
||||
tbytes = num + 3; /* negative and terminator and one spare? */
|
||||
bn_data_num = num / BN_DEC_NUM + 1;
|
||||
bn_data = OPENSSL_malloc(bn_data_num * sizeof(BN_ULONG));
|
||||
buf = OPENSSL_malloc(tbytes);
|
||||
if (buf == NULL || bn_data == NULL) {
|
||||
BNerr(BN_F_BN_BN2DEC, ERR_R_MALLOC_FAILURE);
|
||||
goto err;
|
||||
}
|
||||
if ((t = BN_dup(a)) == NULL)
|
||||
goto err;
|
||||
|
||||
p = buf;
|
||||
lp = bn_data;
|
||||
if (BN_is_zero(t)) {
|
||||
*p++ = '0';
|
||||
*p++ = '\0';
|
||||
} else {
|
||||
if (BN_is_negative(t))
|
||||
*p++ = '-';
|
||||
|
||||
while (!BN_is_zero(t)) {
|
||||
if (lp - bn_data >= bn_data_num)
|
||||
goto err;
|
||||
*lp = BN_div_word(t, BN_DEC_CONV);
|
||||
if (*lp == (BN_ULONG)-1)
|
||||
goto err;
|
||||
lp++;
|
||||
}
|
||||
lp--;
|
||||
/*
|
||||
* We now have a series of blocks, BN_DEC_NUM chars in length, where
|
||||
* the last one needs truncation. The blocks need to be reversed in
|
||||
* order.
|
||||
*/
|
||||
n = BIO_snprintf(p, tbytes - (size_t)(p - buf), BN_DEC_FMT1, *lp);
|
||||
if (n < 0)
|
||||
goto err;
|
||||
p += n;
|
||||
while (lp != bn_data) {
|
||||
lp--;
|
||||
n = BIO_snprintf(p, tbytes - (size_t)(p - buf), BN_DEC_FMT2, *lp);
|
||||
if (n < 0)
|
||||
goto err;
|
||||
p += n;
|
||||
}
|
||||
}
|
||||
ok = 1;
|
||||
err:
|
||||
OPENSSL_free(bn_data);
|
||||
BN_free(t);
|
||||
if (ok)
|
||||
return buf;
|
||||
OPENSSL_free(buf);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int BN_hex2bn(BIGNUM **bn, const char *a)
|
||||
{
|
||||
BIGNUM *ret = NULL;
|
||||
BN_ULONG l = 0;
|
||||
int neg = 0, h, m, i, j, k, c;
|
||||
int num;
|
||||
|
||||
if (a == NULL || *a == '\0')
|
||||
return 0;
|
||||
|
||||
if (*a == '-') {
|
||||
neg = 1;
|
||||
a++;
|
||||
}
|
||||
|
||||
for (i = 0; i <= INT_MAX / 4 && ossl_isxdigit(a[i]); i++)
|
||||
continue;
|
||||
|
||||
if (i == 0 || i > INT_MAX / 4)
|
||||
goto err;
|
||||
|
||||
num = i + neg;
|
||||
if (bn == NULL)
|
||||
return num;
|
||||
|
||||
/* a is the start of the hex digits, and it is 'i' long */
|
||||
if (*bn == NULL) {
|
||||
if ((ret = BN_new()) == NULL)
|
||||
return 0;
|
||||
} else {
|
||||
ret = *bn;
|
||||
BN_zero(ret);
|
||||
}
|
||||
|
||||
/* i is the number of hex digits */
|
||||
if (bn_expand(ret, i * 4) == NULL)
|
||||
goto err;
|
||||
|
||||
j = i; /* least significant 'hex' */
|
||||
m = 0;
|
||||
h = 0;
|
||||
while (j > 0) {
|
||||
m = (BN_BYTES * 2 <= j) ? BN_BYTES * 2 : j;
|
||||
l = 0;
|
||||
for (;;) {
|
||||
c = a[j - m];
|
||||
k = OPENSSL_hexchar2int(c);
|
||||
if (k < 0)
|
||||
k = 0; /* paranoia */
|
||||
l = (l << 4) | k;
|
||||
|
||||
if (--m <= 0) {
|
||||
ret->d[h++] = l;
|
||||
break;
|
||||
}
|
||||
}
|
||||
j -= BN_BYTES * 2;
|
||||
}
|
||||
ret->top = h;
|
||||
bn_correct_top(ret);
|
||||
|
||||
*bn = ret;
|
||||
bn_check_top(ret);
|
||||
/* Don't set the negative flag if it's zero. */
|
||||
if (ret->top != 0)
|
||||
ret->neg = neg;
|
||||
return num;
|
||||
err:
|
||||
if (*bn == NULL)
|
||||
BN_free(ret);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int BN_dec2bn(BIGNUM **bn, const char *a)
|
||||
{
|
||||
BIGNUM *ret = NULL;
|
||||
BN_ULONG l = 0;
|
||||
int neg = 0, i, j;
|
||||
int num;
|
||||
|
||||
if (a == NULL || *a == '\0')
|
||||
return 0;
|
||||
if (*a == '-') {
|
||||
neg = 1;
|
||||
a++;
|
||||
}
|
||||
|
||||
for (i = 0; i <= INT_MAX / 4 && ossl_isdigit(a[i]); i++)
|
||||
continue;
|
||||
|
||||
if (i == 0 || i > INT_MAX / 4)
|
||||
goto err;
|
||||
|
||||
num = i + neg;
|
||||
if (bn == NULL)
|
||||
return num;
|
||||
|
||||
/*
|
||||
* a is the start of the digits, and it is 'i' long. We chop it into
|
||||
* BN_DEC_NUM digits at a time
|
||||
*/
|
||||
if (*bn == NULL) {
|
||||
if ((ret = BN_new()) == NULL)
|
||||
return 0;
|
||||
} else {
|
||||
ret = *bn;
|
||||
BN_zero(ret);
|
||||
}
|
||||
|
||||
/* i is the number of digits, a bit of an over expand */
|
||||
if (bn_expand(ret, i * 4) == NULL)
|
||||
goto err;
|
||||
|
||||
j = BN_DEC_NUM - i % BN_DEC_NUM;
|
||||
if (j == BN_DEC_NUM)
|
||||
j = 0;
|
||||
l = 0;
|
||||
while (--i >= 0) {
|
||||
l *= 10;
|
||||
l += *a - '0';
|
||||
a++;
|
||||
if (++j == BN_DEC_NUM) {
|
||||
if (!BN_mul_word(ret, BN_DEC_CONV)
|
||||
|| !BN_add_word(ret, l))
|
||||
goto err;
|
||||
l = 0;
|
||||
j = 0;
|
||||
}
|
||||
}
|
||||
|
||||
bn_correct_top(ret);
|
||||
*bn = ret;
|
||||
bn_check_top(ret);
|
||||
/* Don't set the negative flag if it's zero. */
|
||||
if (ret->top != 0)
|
||||
ret->neg = neg;
|
||||
return num;
|
||||
err:
|
||||
if (*bn == NULL)
|
||||
BN_free(ret);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int BN_asc2bn(BIGNUM **bn, const char *a)
|
||||
{
|
||||
const char *p = a;
|
||||
|
||||
if (*p == '-')
|
||||
p++;
|
||||
|
||||
if (p[0] == '0' && (p[1] == 'X' || p[1] == 'x')) {
|
||||
if (!BN_hex2bn(bn, p + 2))
|
||||
return 0;
|
||||
} else {
|
||||
if (!BN_dec2bn(bn, p))
|
||||
return 0;
|
||||
}
|
||||
/* Don't set the negative flag if it's zero. */
|
||||
if (*a == '-' && (*bn)->top != 0)
|
||||
(*bn)->neg = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
# ifndef OPENSSL_NO_STDIO
|
||||
int BN_print_fp(FILE *fp, const BIGNUM *a)
|
||||
{
|
||||
BIO *b;
|
||||
int ret;
|
||||
|
||||
if ((b = BIO_new(BIO_s_file())) == NULL)
|
||||
return 0;
|
||||
BIO_set_fp(b, fp, BIO_NOCLOSE);
|
||||
ret = BN_print(b, a);
|
||||
BIO_free(b);
|
||||
return ret;
|
||||
}
|
||||
# endif
|
||||
|
||||
int BN_print(BIO *bp, const BIGNUM *a)
|
||||
{
|
||||
int i, j, v, z = 0;
|
||||
int ret = 0;
|
||||
|
||||
if ((a->neg) && BIO_write(bp, "-", 1) != 1)
|
||||
goto end;
|
||||
if (BN_is_zero(a) && BIO_write(bp, "0", 1) != 1)
|
||||
goto end;
|
||||
for (i = a->top - 1; i >= 0; i--) {
|
||||
for (j = BN_BITS2 - 4; j >= 0; j -= 4) {
|
||||
/* strip leading zeros */
|
||||
v = (int)((a->d[i] >> j) & 0x0f);
|
||||
if (z || v != 0) {
|
||||
if (BIO_write(bp, &Hex[v], 1) != 1)
|
||||
goto end;
|
||||
z = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
ret = 1;
|
||||
end:
|
||||
return ret;
|
||||
}
|
||||
|
||||
char *BN_options(void)
|
||||
{
|
||||
static int init = 0;
|
||||
static char data[16];
|
||||
|
||||
if (!init) {
|
||||
init++;
|
||||
#ifdef BN_LLONG
|
||||
BIO_snprintf(data, sizeof(data), "bn(%zu,%zu)",
|
||||
sizeof(BN_ULLONG) * 8, sizeof(BN_ULONG) * 8);
|
||||
#else
|
||||
BIO_snprintf(data, sizeof(data), "bn(%zu,%zu)",
|
||||
sizeof(BN_ULONG) * 8, sizeof(BN_ULONG) * 8);
|
||||
#endif
|
||||
}
|
||||
return data;
|
||||
}
|
268
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_rand.c
vendored
Normal file
268
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_rand.c
vendored
Normal file
|
@ -0,0 +1,268 @@
|
|||
/*
|
||||
* Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
#include "internal/cryptlib.h"
|
||||
#include "bn_lcl.h"
|
||||
#include <openssl/rand.h>
|
||||
#include <openssl/sha.h>
|
||||
|
||||
typedef enum bnrand_flag_e {
|
||||
NORMAL, TESTING, PRIVATE
|
||||
} BNRAND_FLAG;
|
||||
|
||||
static int bnrand(BNRAND_FLAG flag, BIGNUM *rnd, int bits, int top, int bottom)
|
||||
{
|
||||
unsigned char *buf = NULL;
|
||||
int b, ret = 0, bit, bytes, mask;
|
||||
|
||||
if (bits == 0) {
|
||||
if (top != BN_RAND_TOP_ANY || bottom != BN_RAND_BOTTOM_ANY)
|
||||
goto toosmall;
|
||||
BN_zero(rnd);
|
||||
return 1;
|
||||
}
|
||||
if (bits < 0 || (bits == 1 && top > 0))
|
||||
goto toosmall;
|
||||
|
||||
bytes = (bits + 7) / 8;
|
||||
bit = (bits - 1) % 8;
|
||||
mask = 0xff << (bit + 1);
|
||||
|
||||
buf = OPENSSL_malloc(bytes);
|
||||
if (buf == NULL) {
|
||||
BNerr(BN_F_BNRAND, ERR_R_MALLOC_FAILURE);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* make a random number and set the top and bottom bits */
|
||||
b = flag == NORMAL ? RAND_bytes(buf, bytes) : RAND_priv_bytes(buf, bytes);
|
||||
if (b <= 0)
|
||||
goto err;
|
||||
|
||||
if (flag == TESTING) {
|
||||
/*
|
||||
* generate patterns that are more likely to trigger BN library bugs
|
||||
*/
|
||||
int i;
|
||||
unsigned char c;
|
||||
|
||||
for (i = 0; i < bytes; i++) {
|
||||
if (RAND_bytes(&c, 1) <= 0)
|
||||
goto err;
|
||||
if (c >= 128 && i > 0)
|
||||
buf[i] = buf[i - 1];
|
||||
else if (c < 42)
|
||||
buf[i] = 0;
|
||||
else if (c < 84)
|
||||
buf[i] = 255;
|
||||
}
|
||||
}
|
||||
|
||||
if (top >= 0) {
|
||||
if (top) {
|
||||
if (bit == 0) {
|
||||
buf[0] = 1;
|
||||
buf[1] |= 0x80;
|
||||
} else {
|
||||
buf[0] |= (3 << (bit - 1));
|
||||
}
|
||||
} else {
|
||||
buf[0] |= (1 << bit);
|
||||
}
|
||||
}
|
||||
buf[0] &= ~mask;
|
||||
if (bottom) /* set bottom bit if requested */
|
||||
buf[bytes - 1] |= 1;
|
||||
if (!BN_bin2bn(buf, bytes, rnd))
|
||||
goto err;
|
||||
ret = 1;
|
||||
err:
|
||||
OPENSSL_clear_free(buf, bytes);
|
||||
bn_check_top(rnd);
|
||||
return ret;
|
||||
|
||||
toosmall:
|
||||
BNerr(BN_F_BNRAND, BN_R_BITS_TOO_SMALL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int BN_rand(BIGNUM *rnd, int bits, int top, int bottom)
|
||||
{
|
||||
return bnrand(NORMAL, rnd, bits, top, bottom);
|
||||
}
|
||||
|
||||
int BN_bntest_rand(BIGNUM *rnd, int bits, int top, int bottom)
|
||||
{
|
||||
return bnrand(TESTING, rnd, bits, top, bottom);
|
||||
}
|
||||
|
||||
int BN_priv_rand(BIGNUM *rnd, int bits, int top, int bottom)
|
||||
{
|
||||
return bnrand(PRIVATE, rnd, bits, top, bottom);
|
||||
}
|
||||
|
||||
/* random number r: 0 <= r < range */
|
||||
static int bnrand_range(BNRAND_FLAG flag, BIGNUM *r, const BIGNUM *range)
|
||||
{
|
||||
int n;
|
||||
int count = 100;
|
||||
|
||||
if (range->neg || BN_is_zero(range)) {
|
||||
BNerr(BN_F_BNRAND_RANGE, BN_R_INVALID_RANGE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
n = BN_num_bits(range); /* n > 0 */
|
||||
|
||||
/* BN_is_bit_set(range, n - 1) always holds */
|
||||
|
||||
if (n == 1)
|
||||
BN_zero(r);
|
||||
else if (!BN_is_bit_set(range, n - 2) && !BN_is_bit_set(range, n - 3)) {
|
||||
/*
|
||||
* range = 100..._2, so 3*range (= 11..._2) is exactly one bit longer
|
||||
* than range
|
||||
*/
|
||||
do {
|
||||
if (!bnrand(flag, r, n + 1, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If r < 3*range, use r := r MOD range (which is either r, r -
|
||||
* range, or r - 2*range). Otherwise, iterate once more. Since
|
||||
* 3*range = 11..._2, each iteration succeeds with probability >=
|
||||
* .75.
|
||||
*/
|
||||
if (BN_cmp(r, range) >= 0) {
|
||||
if (!BN_sub(r, r, range))
|
||||
return 0;
|
||||
if (BN_cmp(r, range) >= 0)
|
||||
if (!BN_sub(r, r, range))
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!--count) {
|
||||
BNerr(BN_F_BNRAND_RANGE, BN_R_TOO_MANY_ITERATIONS);
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
while (BN_cmp(r, range) >= 0);
|
||||
} else {
|
||||
do {
|
||||
/* range = 11..._2 or range = 101..._2 */
|
||||
if (!bnrand(flag, r, n, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY))
|
||||
return 0;
|
||||
|
||||
if (!--count) {
|
||||
BNerr(BN_F_BNRAND_RANGE, BN_R_TOO_MANY_ITERATIONS);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
while (BN_cmp(r, range) >= 0);
|
||||
}
|
||||
|
||||
bn_check_top(r);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int BN_rand_range(BIGNUM *r, const BIGNUM *range)
|
||||
{
|
||||
return bnrand_range(NORMAL, r, range);
|
||||
}
|
||||
|
||||
int BN_priv_rand_range(BIGNUM *r, const BIGNUM *range)
|
||||
{
|
||||
return bnrand_range(PRIVATE, r, range);
|
||||
}
|
||||
|
||||
int BN_pseudo_rand(BIGNUM *rnd, int bits, int top, int bottom)
|
||||
{
|
||||
return BN_rand(rnd, bits, top, bottom);
|
||||
}
|
||||
|
||||
int BN_pseudo_rand_range(BIGNUM *r, const BIGNUM *range)
|
||||
{
|
||||
return BN_rand_range(r, range);
|
||||
}
|
||||
|
||||
/*
|
||||
* BN_generate_dsa_nonce generates a random number 0 <= out < range. Unlike
|
||||
* BN_rand_range, it also includes the contents of |priv| and |message| in
|
||||
* the generation so that an RNG failure isn't fatal as long as |priv|
|
||||
* remains secret. This is intended for use in DSA and ECDSA where an RNG
|
||||
* weakness leads directly to private key exposure unless this function is
|
||||
* used.
|
||||
*/
|
||||
int BN_generate_dsa_nonce(BIGNUM *out, const BIGNUM *range,
|
||||
const BIGNUM *priv, const unsigned char *message,
|
||||
size_t message_len, BN_CTX *ctx)
|
||||
{
|
||||
SHA512_CTX sha;
|
||||
/*
|
||||
* We use 512 bits of random data per iteration to ensure that we have at
|
||||
* least |range| bits of randomness.
|
||||
*/
|
||||
unsigned char random_bytes[64];
|
||||
unsigned char digest[SHA512_DIGEST_LENGTH];
|
||||
unsigned done, todo;
|
||||
/* We generate |range|+8 bytes of random output. */
|
||||
const unsigned num_k_bytes = BN_num_bytes(range) + 8;
|
||||
unsigned char private_bytes[96];
|
||||
unsigned char *k_bytes;
|
||||
int ret = 0;
|
||||
|
||||
k_bytes = OPENSSL_malloc(num_k_bytes);
|
||||
if (k_bytes == NULL)
|
||||
goto err;
|
||||
|
||||
/* We copy |priv| into a local buffer to avoid exposing its length. */
|
||||
todo = sizeof(priv->d[0]) * priv->top;
|
||||
if (todo > sizeof(private_bytes)) {
|
||||
/*
|
||||
* No reasonable DSA or ECDSA key should have a private key this
|
||||
* large and we don't handle this case in order to avoid leaking the
|
||||
* length of the private key.
|
||||
*/
|
||||
BNerr(BN_F_BN_GENERATE_DSA_NONCE, BN_R_PRIVATE_KEY_TOO_LARGE);
|
||||
goto err;
|
||||
}
|
||||
memcpy(private_bytes, priv->d, todo);
|
||||
memset(private_bytes + todo, 0, sizeof(private_bytes) - todo);
|
||||
|
||||
for (done = 0; done < num_k_bytes;) {
|
||||
if (RAND_priv_bytes(random_bytes, sizeof(random_bytes)) != 1)
|
||||
goto err;
|
||||
SHA512_Init(&sha);
|
||||
SHA512_Update(&sha, &done, sizeof(done));
|
||||
SHA512_Update(&sha, private_bytes, sizeof(private_bytes));
|
||||
SHA512_Update(&sha, message, message_len);
|
||||
SHA512_Update(&sha, random_bytes, sizeof(random_bytes));
|
||||
SHA512_Final(digest, &sha);
|
||||
|
||||
todo = num_k_bytes - done;
|
||||
if (todo > SHA512_DIGEST_LENGTH)
|
||||
todo = SHA512_DIGEST_LENGTH;
|
||||
memcpy(k_bytes + done, digest, todo);
|
||||
done += todo;
|
||||
}
|
||||
|
||||
if (!BN_bin2bn(k_bytes, num_k_bytes, out))
|
||||
goto err;
|
||||
if (BN_mod(out, out, range, ctx) != 1)
|
||||
goto err;
|
||||
ret = 1;
|
||||
|
||||
err:
|
||||
OPENSSL_free(k_bytes);
|
||||
OPENSSL_cleanse(private_bytes, sizeof(private_bytes));
|
||||
return ret;
|
||||
}
|
194
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_recp.c
vendored
Normal file
194
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_recp.c
vendored
Normal file
|
@ -0,0 +1,194 @@
|
|||
/*
|
||||
* Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include "internal/cryptlib.h"
|
||||
#include "bn_lcl.h"
|
||||
|
||||
void BN_RECP_CTX_init(BN_RECP_CTX *recp)
|
||||
{
|
||||
memset(recp, 0, sizeof(*recp));
|
||||
bn_init(&(recp->N));
|
||||
bn_init(&(recp->Nr));
|
||||
}
|
||||
|
||||
BN_RECP_CTX *BN_RECP_CTX_new(void)
|
||||
{
|
||||
BN_RECP_CTX *ret;
|
||||
|
||||
if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL) {
|
||||
BNerr(BN_F_BN_RECP_CTX_NEW, ERR_R_MALLOC_FAILURE);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bn_init(&(ret->N));
|
||||
bn_init(&(ret->Nr));
|
||||
ret->flags = BN_FLG_MALLOCED;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void BN_RECP_CTX_free(BN_RECP_CTX *recp)
|
||||
{
|
||||
if (recp == NULL)
|
||||
return;
|
||||
BN_free(&recp->N);
|
||||
BN_free(&recp->Nr);
|
||||
if (recp->flags & BN_FLG_MALLOCED)
|
||||
OPENSSL_free(recp);
|
||||
}
|
||||
|
||||
int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx)
|
||||
{
|
||||
if (!BN_copy(&(recp->N), d))
|
||||
return 0;
|
||||
BN_zero(&(recp->Nr));
|
||||
recp->num_bits = BN_num_bits(d);
|
||||
recp->shift = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
|
||||
BN_RECP_CTX *recp, BN_CTX *ctx)
|
||||
{
|
||||
int ret = 0;
|
||||
BIGNUM *a;
|
||||
const BIGNUM *ca;
|
||||
|
||||
BN_CTX_start(ctx);
|
||||
if ((a = BN_CTX_get(ctx)) == NULL)
|
||||
goto err;
|
||||
if (y != NULL) {
|
||||
if (x == y) {
|
||||
if (!BN_sqr(a, x, ctx))
|
||||
goto err;
|
||||
} else {
|
||||
if (!BN_mul(a, x, y, ctx))
|
||||
goto err;
|
||||
}
|
||||
ca = a;
|
||||
} else
|
||||
ca = x; /* Just do the mod */
|
||||
|
||||
ret = BN_div_recp(NULL, r, ca, recp, ctx);
|
||||
err:
|
||||
BN_CTX_end(ctx);
|
||||
bn_check_top(r);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
|
||||
BN_RECP_CTX *recp, BN_CTX *ctx)
|
||||
{
|
||||
int i, j, ret = 0;
|
||||
BIGNUM *a, *b, *d, *r;
|
||||
|
||||
BN_CTX_start(ctx);
|
||||
d = (dv != NULL) ? dv : BN_CTX_get(ctx);
|
||||
r = (rem != NULL) ? rem : BN_CTX_get(ctx);
|
||||
a = BN_CTX_get(ctx);
|
||||
b = BN_CTX_get(ctx);
|
||||
if (b == NULL)
|
||||
goto err;
|
||||
|
||||
if (BN_ucmp(m, &(recp->N)) < 0) {
|
||||
BN_zero(d);
|
||||
if (!BN_copy(r, m)) {
|
||||
BN_CTX_end(ctx);
|
||||
return 0;
|
||||
}
|
||||
BN_CTX_end(ctx);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* We want the remainder Given input of ABCDEF / ab we need multiply
|
||||
* ABCDEF by 3 digests of the reciprocal of ab
|
||||
*/
|
||||
|
||||
/* i := max(BN_num_bits(m), 2*BN_num_bits(N)) */
|
||||
i = BN_num_bits(m);
|
||||
j = recp->num_bits << 1;
|
||||
if (j > i)
|
||||
i = j;
|
||||
|
||||
/* Nr := round(2^i / N) */
|
||||
if (i != recp->shift)
|
||||
recp->shift = BN_reciprocal(&(recp->Nr), &(recp->N), i, ctx);
|
||||
/* BN_reciprocal could have returned -1 for an error */
|
||||
if (recp->shift == -1)
|
||||
goto err;
|
||||
|
||||
/*-
|
||||
* d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i - BN_num_bits(N)))|
|
||||
* = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i - BN_num_bits(N)))|
|
||||
* <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)|
|
||||
* = |m/N|
|
||||
*/
|
||||
if (!BN_rshift(a, m, recp->num_bits))
|
||||
goto err;
|
||||
if (!BN_mul(b, a, &(recp->Nr), ctx))
|
||||
goto err;
|
||||
if (!BN_rshift(d, b, i - recp->num_bits))
|
||||
goto err;
|
||||
d->neg = 0;
|
||||
|
||||
if (!BN_mul(b, &(recp->N), d, ctx))
|
||||
goto err;
|
||||
if (!BN_usub(r, m, b))
|
||||
goto err;
|
||||
r->neg = 0;
|
||||
|
||||
j = 0;
|
||||
while (BN_ucmp(r, &(recp->N)) >= 0) {
|
||||
if (j++ > 2) {
|
||||
BNerr(BN_F_BN_DIV_RECP, BN_R_BAD_RECIPROCAL);
|
||||
goto err;
|
||||
}
|
||||
if (!BN_usub(r, r, &(recp->N)))
|
||||
goto err;
|
||||
if (!BN_add_word(d, 1))
|
||||
goto err;
|
||||
}
|
||||
|
||||
r->neg = BN_is_zero(r) ? 0 : m->neg;
|
||||
d->neg = m->neg ^ recp->N.neg;
|
||||
ret = 1;
|
||||
err:
|
||||
BN_CTX_end(ctx);
|
||||
bn_check_top(dv);
|
||||
bn_check_top(rem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* len is the expected size of the result We actually calculate with an extra
|
||||
* word of precision, so we can do faster division if the remainder is not
|
||||
* required.
|
||||
*/
|
||||
/* r := 2^len / m */
|
||||
int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx)
|
||||
{
|
||||
int ret = -1;
|
||||
BIGNUM *t;
|
||||
|
||||
BN_CTX_start(ctx);
|
||||
if ((t = BN_CTX_get(ctx)) == NULL)
|
||||
goto err;
|
||||
|
||||
if (!BN_set_bit(t, len))
|
||||
goto err;
|
||||
|
||||
if (!BN_div(r, NULL, t, m, ctx))
|
||||
goto err;
|
||||
|
||||
ret = len;
|
||||
err:
|
||||
bn_check_top(r);
|
||||
BN_CTX_end(ctx);
|
||||
return ret;
|
||||
}
|
257
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_shift.c
vendored
Normal file
257
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_shift.c
vendored
Normal file
|
@ -0,0 +1,257 @@
|
|||
/*
|
||||
* Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include "internal/cryptlib.h"
|
||||
#include "bn_lcl.h"
|
||||
|
||||
int BN_lshift1(BIGNUM *r, const BIGNUM *a)
|
||||
{
|
||||
register BN_ULONG *ap, *rp, t, c;
|
||||
int i;
|
||||
|
||||
bn_check_top(r);
|
||||
bn_check_top(a);
|
||||
|
||||
if (r != a) {
|
||||
r->neg = a->neg;
|
||||
if (bn_wexpand(r, a->top + 1) == NULL)
|
||||
return 0;
|
||||
r->top = a->top;
|
||||
} else {
|
||||
if (bn_wexpand(r, a->top + 1) == NULL)
|
||||
return 0;
|
||||
}
|
||||
ap = a->d;
|
||||
rp = r->d;
|
||||
c = 0;
|
||||
for (i = 0; i < a->top; i++) {
|
||||
t = *(ap++);
|
||||
*(rp++) = ((t << 1) | c) & BN_MASK2;
|
||||
c = (t & BN_TBIT) ? 1 : 0;
|
||||
}
|
||||
if (c) {
|
||||
*rp = 1;
|
||||
r->top++;
|
||||
}
|
||||
bn_check_top(r);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int BN_rshift1(BIGNUM *r, const BIGNUM *a)
|
||||
{
|
||||
BN_ULONG *ap, *rp, t, c;
|
||||
int i, j;
|
||||
|
||||
bn_check_top(r);
|
||||
bn_check_top(a);
|
||||
|
||||
if (BN_is_zero(a)) {
|
||||
BN_zero(r);
|
||||
return 1;
|
||||
}
|
||||
i = a->top;
|
||||
ap = a->d;
|
||||
j = i - (ap[i - 1] == 1);
|
||||
if (a != r) {
|
||||
if (bn_wexpand(r, j) == NULL)
|
||||
return 0;
|
||||
r->neg = a->neg;
|
||||
}
|
||||
rp = r->d;
|
||||
t = ap[--i];
|
||||
c = (t & 1) ? BN_TBIT : 0;
|
||||
if (t >>= 1)
|
||||
rp[i] = t;
|
||||
while (i > 0) {
|
||||
t = ap[--i];
|
||||
rp[i] = ((t >> 1) & BN_MASK2) | c;
|
||||
c = (t & 1) ? BN_TBIT : 0;
|
||||
}
|
||||
r->top = j;
|
||||
if (!r->top)
|
||||
r->neg = 0; /* don't allow negative zero */
|
||||
bn_check_top(r);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (n < 0) {
|
||||
BNerr(BN_F_BN_LSHIFT, BN_R_INVALID_SHIFT);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = bn_lshift_fixed_top(r, a, n);
|
||||
|
||||
bn_correct_top(r);
|
||||
bn_check_top(r);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* In respect to shift factor the execution time is invariant of
|
||||
* |n % BN_BITS2|, but not |n / BN_BITS2|. Or in other words pre-condition
|
||||
* for constant-time-ness is |n < BN_BITS2| or |n / BN_BITS2| being
|
||||
* non-secret.
|
||||
*/
|
||||
int bn_lshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n)
|
||||
{
|
||||
int i, nw;
|
||||
unsigned int lb, rb;
|
||||
BN_ULONG *t, *f;
|
||||
BN_ULONG l, m, rmask = 0;
|
||||
|
||||
assert(n >= 0);
|
||||
|
||||
bn_check_top(r);
|
||||
bn_check_top(a);
|
||||
|
||||
nw = n / BN_BITS2;
|
||||
if (bn_wexpand(r, a->top + nw + 1) == NULL)
|
||||
return 0;
|
||||
|
||||
if (a->top != 0) {
|
||||
lb = (unsigned int)n % BN_BITS2;
|
||||
rb = BN_BITS2 - lb;
|
||||
rb %= BN_BITS2; /* say no to undefined behaviour */
|
||||
rmask = (BN_ULONG)0 - rb; /* rmask = 0 - (rb != 0) */
|
||||
rmask |= rmask >> 8;
|
||||
f = &(a->d[0]);
|
||||
t = &(r->d[nw]);
|
||||
l = f[a->top - 1];
|
||||
t[a->top] = (l >> rb) & rmask;
|
||||
for (i = a->top - 1; i > 0; i--) {
|
||||
m = l << lb;
|
||||
l = f[i - 1];
|
||||
t[i] = (m | ((l >> rb) & rmask)) & BN_MASK2;
|
||||
}
|
||||
t[0] = (l << lb) & BN_MASK2;
|
||||
} else {
|
||||
/* shouldn't happen, but formally required */
|
||||
r->d[nw] = 0;
|
||||
}
|
||||
if (nw != 0)
|
||||
memset(r->d, 0, sizeof(*t) * nw);
|
||||
|
||||
r->neg = a->neg;
|
||||
r->top = a->top + nw + 1;
|
||||
r->flags |= BN_FLG_FIXED_TOP;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
|
||||
{
|
||||
int i, j, nw, lb, rb;
|
||||
BN_ULONG *t, *f;
|
||||
BN_ULONG l, tmp;
|
||||
|
||||
bn_check_top(r);
|
||||
bn_check_top(a);
|
||||
|
||||
if (n < 0) {
|
||||
BNerr(BN_F_BN_RSHIFT, BN_R_INVALID_SHIFT);
|
||||
return 0;
|
||||
}
|
||||
|
||||
nw = n / BN_BITS2;
|
||||
rb = n % BN_BITS2;
|
||||
lb = BN_BITS2 - rb;
|
||||
if (nw >= a->top || a->top == 0) {
|
||||
BN_zero(r);
|
||||
return 1;
|
||||
}
|
||||
i = (BN_num_bits(a) - n + (BN_BITS2 - 1)) / BN_BITS2;
|
||||
if (r != a) {
|
||||
if (bn_wexpand(r, i) == NULL)
|
||||
return 0;
|
||||
r->neg = a->neg;
|
||||
} else {
|
||||
if (n == 0)
|
||||
return 1; /* or the copying loop will go berserk */
|
||||
}
|
||||
|
||||
f = &(a->d[nw]);
|
||||
t = r->d;
|
||||
j = a->top - nw;
|
||||
r->top = i;
|
||||
|
||||
if (rb == 0) {
|
||||
for (i = j; i != 0; i--)
|
||||
*(t++) = *(f++);
|
||||
} else {
|
||||
l = *(f++);
|
||||
for (i = j - 1; i != 0; i--) {
|
||||
tmp = (l >> rb) & BN_MASK2;
|
||||
l = *(f++);
|
||||
*(t++) = (tmp | (l << lb)) & BN_MASK2;
|
||||
}
|
||||
if ((l = (l >> rb) & BN_MASK2))
|
||||
*(t) = l;
|
||||
}
|
||||
if (!r->top)
|
||||
r->neg = 0; /* don't allow negative zero */
|
||||
bn_check_top(r);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* In respect to shift factor the execution time is invariant of
|
||||
* |n % BN_BITS2|, but not |n / BN_BITS2|. Or in other words pre-condition
|
||||
* for constant-time-ness for sufficiently[!] zero-padded inputs is
|
||||
* |n < BN_BITS2| or |n / BN_BITS2| being non-secret.
|
||||
*/
|
||||
int bn_rshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n)
|
||||
{
|
||||
int i, top, nw;
|
||||
unsigned int lb, rb;
|
||||
BN_ULONG *t, *f;
|
||||
BN_ULONG l, m, mask;
|
||||
|
||||
bn_check_top(r);
|
||||
bn_check_top(a);
|
||||
|
||||
assert(n >= 0);
|
||||
|
||||
nw = n / BN_BITS2;
|
||||
if (nw >= a->top) {
|
||||
/* shouldn't happen, but formally required */
|
||||
BN_zero(r);
|
||||
return 1;
|
||||
}
|
||||
|
||||
rb = (unsigned int)n % BN_BITS2;
|
||||
lb = BN_BITS2 - rb;
|
||||
lb %= BN_BITS2; /* say no to undefined behaviour */
|
||||
mask = (BN_ULONG)0 - lb; /* mask = 0 - (lb != 0) */
|
||||
mask |= mask >> 8;
|
||||
top = a->top - nw;
|
||||
if (r != a && bn_wexpand(r, top) == NULL)
|
||||
return 0;
|
||||
|
||||
t = &(r->d[0]);
|
||||
f = &(a->d[nw]);
|
||||
l = f[0];
|
||||
for (i = 0; i < top - 1; i++) {
|
||||
m = f[i + 1];
|
||||
t[i] = (l >> rb) | ((m << lb) & mask);
|
||||
l = m;
|
||||
}
|
||||
t[i] = l >> rb;
|
||||
|
||||
r->neg = a->neg;
|
||||
r->top = top;
|
||||
r->flags |= BN_FLG_FIXED_TOP;
|
||||
|
||||
return 1;
|
||||
}
|
239
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_sqr.c
vendored
Normal file
239
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_sqr.c
vendored
Normal file
|
@ -0,0 +1,239 @@
|
|||
/*
|
||||
* Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include "internal/cryptlib.h"
|
||||
#include "bn_lcl.h"
|
||||
|
||||
/* r must not be a */
|
||||
/*
|
||||
* I've just gone over this and it is now %20 faster on x86 - eay - 27 Jun 96
|
||||
*/
|
||||
int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
|
||||
{
|
||||
int ret = bn_sqr_fixed_top(r, a, ctx);
|
||||
|
||||
bn_correct_top(r);
|
||||
bn_check_top(r);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bn_sqr_fixed_top(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
|
||||
{
|
||||
int max, al;
|
||||
int ret = 0;
|
||||
BIGNUM *tmp, *rr;
|
||||
|
||||
bn_check_top(a);
|
||||
|
||||
al = a->top;
|
||||
if (al <= 0) {
|
||||
r->top = 0;
|
||||
r->neg = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
BN_CTX_start(ctx);
|
||||
rr = (a != r) ? r : BN_CTX_get(ctx);
|
||||
tmp = BN_CTX_get(ctx);
|
||||
if (rr == NULL || tmp == NULL)
|
||||
goto err;
|
||||
|
||||
max = 2 * al; /* Non-zero (from above) */
|
||||
if (bn_wexpand(rr, max) == NULL)
|
||||
goto err;
|
||||
|
||||
if (al == 4) {
|
||||
#ifndef BN_SQR_COMBA
|
||||
BN_ULONG t[8];
|
||||
bn_sqr_normal(rr->d, a->d, 4, t);
|
||||
#else
|
||||
bn_sqr_comba4(rr->d, a->d);
|
||||
#endif
|
||||
} else if (al == 8) {
|
||||
#ifndef BN_SQR_COMBA
|
||||
BN_ULONG t[16];
|
||||
bn_sqr_normal(rr->d, a->d, 8, t);
|
||||
#else
|
||||
bn_sqr_comba8(rr->d, a->d);
|
||||
#endif
|
||||
} else {
|
||||
#if defined(BN_RECURSION)
|
||||
if (al < BN_SQR_RECURSIVE_SIZE_NORMAL) {
|
||||
BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL * 2];
|
||||
bn_sqr_normal(rr->d, a->d, al, t);
|
||||
} else {
|
||||
int j, k;
|
||||
|
||||
j = BN_num_bits_word((BN_ULONG)al);
|
||||
j = 1 << (j - 1);
|
||||
k = j + j;
|
||||
if (al == j) {
|
||||
if (bn_wexpand(tmp, k * 2) == NULL)
|
||||
goto err;
|
||||
bn_sqr_recursive(rr->d, a->d, al, tmp->d);
|
||||
} else {
|
||||
if (bn_wexpand(tmp, max) == NULL)
|
||||
goto err;
|
||||
bn_sqr_normal(rr->d, a->d, al, tmp->d);
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (bn_wexpand(tmp, max) == NULL)
|
||||
goto err;
|
||||
bn_sqr_normal(rr->d, a->d, al, tmp->d);
|
||||
#endif
|
||||
}
|
||||
|
||||
rr->neg = 0;
|
||||
rr->top = max;
|
||||
rr->flags |= BN_FLG_FIXED_TOP;
|
||||
if (r != rr && BN_copy(r, rr) == NULL)
|
||||
goto err;
|
||||
|
||||
ret = 1;
|
||||
err:
|
||||
bn_check_top(rr);
|
||||
bn_check_top(tmp);
|
||||
BN_CTX_end(ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* tmp must have 2*n words */
|
||||
void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp)
|
||||
{
|
||||
int i, j, max;
|
||||
const BN_ULONG *ap;
|
||||
BN_ULONG *rp;
|
||||
|
||||
max = n * 2;
|
||||
ap = a;
|
||||
rp = r;
|
||||
rp[0] = rp[max - 1] = 0;
|
||||
rp++;
|
||||
j = n;
|
||||
|
||||
if (--j > 0) {
|
||||
ap++;
|
||||
rp[j] = bn_mul_words(rp, ap, j, ap[-1]);
|
||||
rp += 2;
|
||||
}
|
||||
|
||||
for (i = n - 2; i > 0; i--) {
|
||||
j--;
|
||||
ap++;
|
||||
rp[j] = bn_mul_add_words(rp, ap, j, ap[-1]);
|
||||
rp += 2;
|
||||
}
|
||||
|
||||
bn_add_words(r, r, r, max);
|
||||
|
||||
/* There will not be a carry */
|
||||
|
||||
bn_sqr_words(tmp, a, n);
|
||||
|
||||
bn_add_words(r, r, tmp, max);
|
||||
}
|
||||
|
||||
#ifdef BN_RECURSION
|
||||
/*-
|
||||
* r is 2*n words in size,
|
||||
* a and b are both n words in size. (There's not actually a 'b' here ...)
|
||||
* n must be a power of 2.
|
||||
* We multiply and return the result.
|
||||
* t must be 2*n words in size
|
||||
* We calculate
|
||||
* a[0]*b[0]
|
||||
* a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
|
||||
* a[1]*b[1]
|
||||
*/
|
||||
void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t)
|
||||
{
|
||||
int n = n2 / 2;
|
||||
int zero, c1;
|
||||
BN_ULONG ln, lo, *p;
|
||||
|
||||
if (n2 == 4) {
|
||||
# ifndef BN_SQR_COMBA
|
||||
bn_sqr_normal(r, a, 4, t);
|
||||
# else
|
||||
bn_sqr_comba4(r, a);
|
||||
# endif
|
||||
return;
|
||||
} else if (n2 == 8) {
|
||||
# ifndef BN_SQR_COMBA
|
||||
bn_sqr_normal(r, a, 8, t);
|
||||
# else
|
||||
bn_sqr_comba8(r, a);
|
||||
# endif
|
||||
return;
|
||||
}
|
||||
if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL) {
|
||||
bn_sqr_normal(r, a, n2, t);
|
||||
return;
|
||||
}
|
||||
/* r=(a[0]-a[1])*(a[1]-a[0]) */
|
||||
c1 = bn_cmp_words(a, &(a[n]), n);
|
||||
zero = 0;
|
||||
if (c1 > 0)
|
||||
bn_sub_words(t, a, &(a[n]), n);
|
||||
else if (c1 < 0)
|
||||
bn_sub_words(t, &(a[n]), a, n);
|
||||
else
|
||||
zero = 1;
|
||||
|
||||
/* The result will always be negative unless it is zero */
|
||||
p = &(t[n2 * 2]);
|
||||
|
||||
if (!zero)
|
||||
bn_sqr_recursive(&(t[n2]), t, n, p);
|
||||
else
|
||||
memset(&t[n2], 0, sizeof(*t) * n2);
|
||||
bn_sqr_recursive(r, a, n, p);
|
||||
bn_sqr_recursive(&(r[n2]), &(a[n]), n, p);
|
||||
|
||||
/*-
|
||||
* t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero
|
||||
* r[10] holds (a[0]*b[0])
|
||||
* r[32] holds (b[1]*b[1])
|
||||
*/
|
||||
|
||||
c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
|
||||
|
||||
/* t[32] is negative */
|
||||
c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
|
||||
|
||||
/*-
|
||||
* t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1])
|
||||
* r[10] holds (a[0]*a[0])
|
||||
* r[32] holds (a[1]*a[1])
|
||||
* c1 holds the carry bits
|
||||
*/
|
||||
c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
|
||||
if (c1) {
|
||||
p = &(r[n + n2]);
|
||||
lo = *p;
|
||||
ln = (lo + c1) & BN_MASK2;
|
||||
*p = ln;
|
||||
|
||||
/*
|
||||
* The overflow will stop before we over write words we should not
|
||||
* overwrite
|
||||
*/
|
||||
if (ln < (BN_ULONG)c1) {
|
||||
do {
|
||||
p++;
|
||||
lo = *p;
|
||||
ln = (lo + 1) & BN_MASK2;
|
||||
*p = ln;
|
||||
} while (ln == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
358
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_sqrt.c
vendored
Normal file
358
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_sqrt.c
vendored
Normal file
|
@ -0,0 +1,358 @@
|
|||
/*
|
||||
* Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include "internal/cryptlib.h"
|
||||
#include "bn_lcl.h"
|
||||
|
||||
BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
|
||||
/*
|
||||
* Returns 'ret' such that ret^2 == a (mod p), using the Tonelli/Shanks
|
||||
* algorithm (cf. Henri Cohen, "A Course in Algebraic Computational Number
|
||||
* Theory", algorithm 1.5.1). 'p' must be prime!
|
||||
*/
|
||||
{
|
||||
BIGNUM *ret = in;
|
||||
int err = 1;
|
||||
int r;
|
||||
BIGNUM *A, *b, *q, *t, *x, *y;
|
||||
int e, i, j;
|
||||
|
||||
if (!BN_is_odd(p) || BN_abs_is_word(p, 1)) {
|
||||
if (BN_abs_is_word(p, 2)) {
|
||||
if (ret == NULL)
|
||||
ret = BN_new();
|
||||
if (ret == NULL)
|
||||
goto end;
|
||||
if (!BN_set_word(ret, BN_is_bit_set(a, 0))) {
|
||||
if (ret != in)
|
||||
BN_free(ret);
|
||||
return NULL;
|
||||
}
|
||||
bn_check_top(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (BN_is_zero(a) || BN_is_one(a)) {
|
||||
if (ret == NULL)
|
||||
ret = BN_new();
|
||||
if (ret == NULL)
|
||||
goto end;
|
||||
if (!BN_set_word(ret, BN_is_one(a))) {
|
||||
if (ret != in)
|
||||
BN_free(ret);
|
||||
return NULL;
|
||||
}
|
||||
bn_check_top(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
BN_CTX_start(ctx);
|
||||
A = BN_CTX_get(ctx);
|
||||
b = BN_CTX_get(ctx);
|
||||
q = BN_CTX_get(ctx);
|
||||
t = BN_CTX_get(ctx);
|
||||
x = BN_CTX_get(ctx);
|
||||
y = BN_CTX_get(ctx);
|
||||
if (y == NULL)
|
||||
goto end;
|
||||
|
||||
if (ret == NULL)
|
||||
ret = BN_new();
|
||||
if (ret == NULL)
|
||||
goto end;
|
||||
|
||||
/* A = a mod p */
|
||||
if (!BN_nnmod(A, a, p, ctx))
|
||||
goto end;
|
||||
|
||||
/* now write |p| - 1 as 2^e*q where q is odd */
|
||||
e = 1;
|
||||
while (!BN_is_bit_set(p, e))
|
||||
e++;
|
||||
/* we'll set q later (if needed) */
|
||||
|
||||
if (e == 1) {
|
||||
/*-
|
||||
* The easy case: (|p|-1)/2 is odd, so 2 has an inverse
|
||||
* modulo (|p|-1)/2, and square roots can be computed
|
||||
* directly by modular exponentiation.
|
||||
* We have
|
||||
* 2 * (|p|+1)/4 == 1 (mod (|p|-1)/2),
|
||||
* so we can use exponent (|p|+1)/4, i.e. (|p|-3)/4 + 1.
|
||||
*/
|
||||
if (!BN_rshift(q, p, 2))
|
||||
goto end;
|
||||
q->neg = 0;
|
||||
if (!BN_add_word(q, 1))
|
||||
goto end;
|
||||
if (!BN_mod_exp(ret, A, q, p, ctx))
|
||||
goto end;
|
||||
err = 0;
|
||||
goto vrfy;
|
||||
}
|
||||
|
||||
if (e == 2) {
|
||||
/*-
|
||||
* |p| == 5 (mod 8)
|
||||
*
|
||||
* In this case 2 is always a non-square since
|
||||
* Legendre(2,p) = (-1)^((p^2-1)/8) for any odd prime.
|
||||
* So if a really is a square, then 2*a is a non-square.
|
||||
* Thus for
|
||||
* b := (2*a)^((|p|-5)/8),
|
||||
* i := (2*a)*b^2
|
||||
* we have
|
||||
* i^2 = (2*a)^((1 + (|p|-5)/4)*2)
|
||||
* = (2*a)^((p-1)/2)
|
||||
* = -1;
|
||||
* so if we set
|
||||
* x := a*b*(i-1),
|
||||
* then
|
||||
* x^2 = a^2 * b^2 * (i^2 - 2*i + 1)
|
||||
* = a^2 * b^2 * (-2*i)
|
||||
* = a*(-i)*(2*a*b^2)
|
||||
* = a*(-i)*i
|
||||
* = a.
|
||||
*
|
||||
* (This is due to A.O.L. Atkin,
|
||||
* <URL: http://listserv.nodak.edu/scripts/wa.exe?A2=ind9211&L=nmbrthry&O=T&P=562>,
|
||||
* November 1992.)
|
||||
*/
|
||||
|
||||
/* t := 2*a */
|
||||
if (!BN_mod_lshift1_quick(t, A, p))
|
||||
goto end;
|
||||
|
||||
/* b := (2*a)^((|p|-5)/8) */
|
||||
if (!BN_rshift(q, p, 3))
|
||||
goto end;
|
||||
q->neg = 0;
|
||||
if (!BN_mod_exp(b, t, q, p, ctx))
|
||||
goto end;
|
||||
|
||||
/* y := b^2 */
|
||||
if (!BN_mod_sqr(y, b, p, ctx))
|
||||
goto end;
|
||||
|
||||
/* t := (2*a)*b^2 - 1 */
|
||||
if (!BN_mod_mul(t, t, y, p, ctx))
|
||||
goto end;
|
||||
if (!BN_sub_word(t, 1))
|
||||
goto end;
|
||||
|
||||
/* x = a*b*t */
|
||||
if (!BN_mod_mul(x, A, b, p, ctx))
|
||||
goto end;
|
||||
if (!BN_mod_mul(x, x, t, p, ctx))
|
||||
goto end;
|
||||
|
||||
if (!BN_copy(ret, x))
|
||||
goto end;
|
||||
err = 0;
|
||||
goto vrfy;
|
||||
}
|
||||
|
||||
/*
|
||||
* e > 2, so we really have to use the Tonelli/Shanks algorithm. First,
|
||||
* find some y that is not a square.
|
||||
*/
|
||||
if (!BN_copy(q, p))
|
||||
goto end; /* use 'q' as temp */
|
||||
q->neg = 0;
|
||||
i = 2;
|
||||
do {
|
||||
/*
|
||||
* For efficiency, try small numbers first; if this fails, try random
|
||||
* numbers.
|
||||
*/
|
||||
if (i < 22) {
|
||||
if (!BN_set_word(y, i))
|
||||
goto end;
|
||||
} else {
|
||||
if (!BN_priv_rand(y, BN_num_bits(p), 0, 0))
|
||||
goto end;
|
||||
if (BN_ucmp(y, p) >= 0) {
|
||||
if (!(p->neg ? BN_add : BN_sub) (y, y, p))
|
||||
goto end;
|
||||
}
|
||||
/* now 0 <= y < |p| */
|
||||
if (BN_is_zero(y))
|
||||
if (!BN_set_word(y, i))
|
||||
goto end;
|
||||
}
|
||||
|
||||
r = BN_kronecker(y, q, ctx); /* here 'q' is |p| */
|
||||
if (r < -1)
|
||||
goto end;
|
||||
if (r == 0) {
|
||||
/* m divides p */
|
||||
BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
while (r == 1 && ++i < 82);
|
||||
|
||||
if (r != -1) {
|
||||
/*
|
||||
* Many rounds and still no non-square -- this is more likely a bug
|
||||
* than just bad luck. Even if p is not prime, we should have found
|
||||
* some y such that r == -1.
|
||||
*/
|
||||
BNerr(BN_F_BN_MOD_SQRT, BN_R_TOO_MANY_ITERATIONS);
|
||||
goto end;
|
||||
}
|
||||
|
||||
/* Here's our actual 'q': */
|
||||
if (!BN_rshift(q, q, e))
|
||||
goto end;
|
||||
|
||||
/*
|
||||
* Now that we have some non-square, we can find an element of order 2^e
|
||||
* by computing its q'th power.
|
||||
*/
|
||||
if (!BN_mod_exp(y, y, q, p, ctx))
|
||||
goto end;
|
||||
if (BN_is_one(y)) {
|
||||
BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
|
||||
goto end;
|
||||
}
|
||||
|
||||
/*-
|
||||
* Now we know that (if p is indeed prime) there is an integer
|
||||
* k, 0 <= k < 2^e, such that
|
||||
*
|
||||
* a^q * y^k == 1 (mod p).
|
||||
*
|
||||
* As a^q is a square and y is not, k must be even.
|
||||
* q+1 is even, too, so there is an element
|
||||
*
|
||||
* X := a^((q+1)/2) * y^(k/2),
|
||||
*
|
||||
* and it satisfies
|
||||
*
|
||||
* X^2 = a^q * a * y^k
|
||||
* = a,
|
||||
*
|
||||
* so it is the square root that we are looking for.
|
||||
*/
|
||||
|
||||
/* t := (q-1)/2 (note that q is odd) */
|
||||
if (!BN_rshift1(t, q))
|
||||
goto end;
|
||||
|
||||
/* x := a^((q-1)/2) */
|
||||
if (BN_is_zero(t)) { /* special case: p = 2^e + 1 */
|
||||
if (!BN_nnmod(t, A, p, ctx))
|
||||
goto end;
|
||||
if (BN_is_zero(t)) {
|
||||
/* special case: a == 0 (mod p) */
|
||||
BN_zero(ret);
|
||||
err = 0;
|
||||
goto end;
|
||||
} else if (!BN_one(x))
|
||||
goto end;
|
||||
} else {
|
||||
if (!BN_mod_exp(x, A, t, p, ctx))
|
||||
goto end;
|
||||
if (BN_is_zero(x)) {
|
||||
/* special case: a == 0 (mod p) */
|
||||
BN_zero(ret);
|
||||
err = 0;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
|
||||
/* b := a*x^2 (= a^q) */
|
||||
if (!BN_mod_sqr(b, x, p, ctx))
|
||||
goto end;
|
||||
if (!BN_mod_mul(b, b, A, p, ctx))
|
||||
goto end;
|
||||
|
||||
/* x := a*x (= a^((q+1)/2)) */
|
||||
if (!BN_mod_mul(x, x, A, p, ctx))
|
||||
goto end;
|
||||
|
||||
while (1) {
|
||||
/*-
|
||||
* Now b is a^q * y^k for some even k (0 <= k < 2^E
|
||||
* where E refers to the original value of e, which we
|
||||
* don't keep in a variable), and x is a^((q+1)/2) * y^(k/2).
|
||||
*
|
||||
* We have a*b = x^2,
|
||||
* y^2^(e-1) = -1,
|
||||
* b^2^(e-1) = 1.
|
||||
*/
|
||||
|
||||
if (BN_is_one(b)) {
|
||||
if (!BN_copy(ret, x))
|
||||
goto end;
|
||||
err = 0;
|
||||
goto vrfy;
|
||||
}
|
||||
|
||||
/* find smallest i such that b^(2^i) = 1 */
|
||||
i = 1;
|
||||
if (!BN_mod_sqr(t, b, p, ctx))
|
||||
goto end;
|
||||
while (!BN_is_one(t)) {
|
||||
i++;
|
||||
if (i == e) {
|
||||
BNerr(BN_F_BN_MOD_SQRT, BN_R_NOT_A_SQUARE);
|
||||
goto end;
|
||||
}
|
||||
if (!BN_mod_mul(t, t, t, p, ctx))
|
||||
goto end;
|
||||
}
|
||||
|
||||
/* t := y^2^(e - i - 1) */
|
||||
if (!BN_copy(t, y))
|
||||
goto end;
|
||||
for (j = e - i - 1; j > 0; j--) {
|
||||
if (!BN_mod_sqr(t, t, p, ctx))
|
||||
goto end;
|
||||
}
|
||||
if (!BN_mod_mul(y, t, t, p, ctx))
|
||||
goto end;
|
||||
if (!BN_mod_mul(x, x, t, p, ctx))
|
||||
goto end;
|
||||
if (!BN_mod_mul(b, b, y, p, ctx))
|
||||
goto end;
|
||||
e = i;
|
||||
}
|
||||
|
||||
vrfy:
|
||||
if (!err) {
|
||||
/*
|
||||
* verify the result -- the input might have been not a square (test
|
||||
* added in 0.9.8)
|
||||
*/
|
||||
|
||||
if (!BN_mod_sqr(x, ret, p, ctx))
|
||||
err = 1;
|
||||
|
||||
if (!err && 0 != BN_cmp(x, A)) {
|
||||
BNerr(BN_F_BN_MOD_SQRT, BN_R_NOT_A_SQUARE);
|
||||
err = 1;
|
||||
}
|
||||
}
|
||||
|
||||
end:
|
||||
if (err) {
|
||||
if (ret != in)
|
||||
BN_clear_free(ret);
|
||||
ret = NULL;
|
||||
}
|
||||
BN_CTX_end(ctx);
|
||||
bn_check_top(ret);
|
||||
return ret;
|
||||
}
|
545
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_srp.c
vendored
Normal file
545
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_srp.c
vendored
Normal file
|
@ -0,0 +1,545 @@
|
|||
/*
|
||||
* Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include "bn_lcl.h"
|
||||
#include "internal/nelem.h"
|
||||
|
||||
#ifndef OPENSSL_NO_SRP
|
||||
|
||||
#include <openssl/srp.h>
|
||||
#include "internal/bn_srp.h"
|
||||
|
||||
# if (BN_BYTES == 8)
|
||||
# if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
|
||||
# define bn_pack4(a1,a2,a3,a4) ((a1##UI64<<48)|(a2##UI64<<32)|(a3##UI64<<16)|a4##UI64)
|
||||
# elif defined(__arch64__)
|
||||
# define bn_pack4(a1,a2,a3,a4) ((a1##UL<<48)|(a2##UL<<32)|(a3##UL<<16)|a4##UL)
|
||||
# else
|
||||
# define bn_pack4(a1,a2,a3,a4) ((a1##ULL<<48)|(a2##ULL<<32)|(a3##ULL<<16)|a4##ULL)
|
||||
# endif
|
||||
# elif (BN_BYTES == 4)
|
||||
# define bn_pack4(a1,a2,a3,a4) ((a3##UL<<16)|a4##UL), ((a1##UL<<16)|a2##UL)
|
||||
# else
|
||||
# error "unsupported BN_BYTES"
|
||||
# endif
|
||||
|
||||
static const BN_ULONG bn_group_1024_value[] = {
|
||||
bn_pack4(0x9FC6, 0x1D2F, 0xC0EB, 0x06E3),
|
||||
bn_pack4(0xFD51, 0x38FE, 0x8376, 0x435B),
|
||||
bn_pack4(0x2FD4, 0xCBF4, 0x976E, 0xAA9A),
|
||||
bn_pack4(0x68ED, 0xBC3C, 0x0572, 0x6CC0),
|
||||
bn_pack4(0xC529, 0xF566, 0x660E, 0x57EC),
|
||||
bn_pack4(0x8255, 0x9B29, 0x7BCF, 0x1885),
|
||||
bn_pack4(0xCE8E, 0xF4AD, 0x69B1, 0x5D49),
|
||||
bn_pack4(0x5DC7, 0xD7B4, 0x6154, 0xD6B6),
|
||||
bn_pack4(0x8E49, 0x5C1D, 0x6089, 0xDAD1),
|
||||
bn_pack4(0xE0D5, 0xD8E2, 0x50B9, 0x8BE4),
|
||||
bn_pack4(0x383B, 0x4813, 0xD692, 0xC6E0),
|
||||
bn_pack4(0xD674, 0xDF74, 0x96EA, 0x81D3),
|
||||
bn_pack4(0x9EA2, 0x314C, 0x9C25, 0x6576),
|
||||
bn_pack4(0x6072, 0x6187, 0x75FF, 0x3C0B),
|
||||
bn_pack4(0x9C33, 0xF80A, 0xFA8F, 0xC5E8),
|
||||
bn_pack4(0xEEAF, 0x0AB9, 0xADB3, 0x8DD6)
|
||||
};
|
||||
|
||||
const BIGNUM bn_group_1024 = {
|
||||
(BN_ULONG *)bn_group_1024_value,
|
||||
OSSL_NELEM(bn_group_1024_value),
|
||||
OSSL_NELEM(bn_group_1024_value),
|
||||
0,
|
||||
BN_FLG_STATIC_DATA
|
||||
};
|
||||
|
||||
static const BN_ULONG bn_group_1536_value[] = {
|
||||
bn_pack4(0xCF76, 0xE3FE, 0xD135, 0xF9BB),
|
||||
bn_pack4(0x1518, 0x0F93, 0x499A, 0x234D),
|
||||
bn_pack4(0x8CE7, 0xA28C, 0x2442, 0xC6F3),
|
||||
bn_pack4(0x5A02, 0x1FFF, 0x5E91, 0x479E),
|
||||
bn_pack4(0x7F8A, 0x2FE9, 0xB8B5, 0x292E),
|
||||
bn_pack4(0x837C, 0x264A, 0xE3A9, 0xBEB8),
|
||||
bn_pack4(0xE442, 0x734A, 0xF7CC, 0xB7AE),
|
||||
bn_pack4(0x6577, 0x2E43, 0x7D6C, 0x7F8C),
|
||||
bn_pack4(0xDB2F, 0xD53D, 0x24B7, 0xC486),
|
||||
bn_pack4(0x6EDF, 0x0195, 0x3934, 0x9627),
|
||||
bn_pack4(0x158B, 0xFD3E, 0x2B9C, 0x8CF5),
|
||||
bn_pack4(0x764E, 0x3F4B, 0x53DD, 0x9DA1),
|
||||
bn_pack4(0x4754, 0x8381, 0xDBC5, 0xB1FC),
|
||||
bn_pack4(0x9B60, 0x9E0B, 0xE3BA, 0xB63D),
|
||||
bn_pack4(0x8134, 0xB1C8, 0xB979, 0x8914),
|
||||
bn_pack4(0xDF02, 0x8A7C, 0xEC67, 0xF0D0),
|
||||
bn_pack4(0x80B6, 0x55BB, 0x9A22, 0xE8DC),
|
||||
bn_pack4(0x1558, 0x903B, 0xA0D0, 0xF843),
|
||||
bn_pack4(0x51C6, 0xA94B, 0xE460, 0x7A29),
|
||||
bn_pack4(0x5F4F, 0x5F55, 0x6E27, 0xCBDE),
|
||||
bn_pack4(0xBEEE, 0xA961, 0x4B19, 0xCC4D),
|
||||
bn_pack4(0xDBA5, 0x1DF4, 0x99AC, 0x4C80),
|
||||
bn_pack4(0xB1F1, 0x2A86, 0x17A4, 0x7BBB),
|
||||
bn_pack4(0x9DEF, 0x3CAF, 0xB939, 0x277A)
|
||||
};
|
||||
|
||||
const BIGNUM bn_group_1536 = {
|
||||
(BN_ULONG *)bn_group_1536_value,
|
||||
OSSL_NELEM(bn_group_1536_value),
|
||||
OSSL_NELEM(bn_group_1536_value),
|
||||
0,
|
||||
BN_FLG_STATIC_DATA
|
||||
};
|
||||
|
||||
static const BN_ULONG bn_group_2048_value[] = {
|
||||
bn_pack4(0x0FA7, 0x111F, 0x9E4A, 0xFF73),
|
||||
bn_pack4(0x9B65, 0xE372, 0xFCD6, 0x8EF2),
|
||||
bn_pack4(0x35DE, 0x236D, 0x525F, 0x5475),
|
||||
bn_pack4(0x94B5, 0xC803, 0xD89F, 0x7AE4),
|
||||
bn_pack4(0x71AE, 0x35F8, 0xE9DB, 0xFBB6),
|
||||
bn_pack4(0x2A56, 0x98F3, 0xA8D0, 0xC382),
|
||||
bn_pack4(0x9CCC, 0x041C, 0x7BC3, 0x08D8),
|
||||
bn_pack4(0xAF87, 0x4E73, 0x03CE, 0x5329),
|
||||
bn_pack4(0x6160, 0x2790, 0x04E5, 0x7AE6),
|
||||
bn_pack4(0x032C, 0xFBDB, 0xF52F, 0xB378),
|
||||
bn_pack4(0x5EA7, 0x7A27, 0x75D2, 0xECFA),
|
||||
bn_pack4(0x5445, 0x23B5, 0x24B0, 0xD57D),
|
||||
bn_pack4(0x5B9D, 0x32E6, 0x88F8, 0x7748),
|
||||
bn_pack4(0xF1D2, 0xB907, 0x8717, 0x461A),
|
||||
bn_pack4(0x76BD, 0x207A, 0x436C, 0x6481),
|
||||
bn_pack4(0xCA97, 0xB43A, 0x23FB, 0x8016),
|
||||
bn_pack4(0x1D28, 0x1E44, 0x6B14, 0x773B),
|
||||
bn_pack4(0x7359, 0xD041, 0xD5C3, 0x3EA7),
|
||||
bn_pack4(0xA80D, 0x740A, 0xDBF4, 0xFF74),
|
||||
bn_pack4(0x55F9, 0x7993, 0xEC97, 0x5EEA),
|
||||
bn_pack4(0x2918, 0xA996, 0x2F0B, 0x93B8),
|
||||
bn_pack4(0x661A, 0x05FB, 0xD5FA, 0xAAE8),
|
||||
bn_pack4(0xCF60, 0x9517, 0x9A16, 0x3AB3),
|
||||
bn_pack4(0xE808, 0x3969, 0xEDB7, 0x67B0),
|
||||
bn_pack4(0xCD7F, 0x48A9, 0xDA04, 0xFD50),
|
||||
bn_pack4(0xD523, 0x12AB, 0x4B03, 0x310D),
|
||||
bn_pack4(0x8193, 0xE075, 0x7767, 0xA13D),
|
||||
bn_pack4(0xA373, 0x29CB, 0xB4A0, 0x99ED),
|
||||
bn_pack4(0xFC31, 0x9294, 0x3DB5, 0x6050),
|
||||
bn_pack4(0xAF72, 0xB665, 0x1987, 0xEE07),
|
||||
bn_pack4(0xF166, 0xDE5E, 0x1389, 0x582F),
|
||||
bn_pack4(0xAC6B, 0xDB41, 0x324A, 0x9A9B)
|
||||
};
|
||||
|
||||
const BIGNUM bn_group_2048 = {
|
||||
(BN_ULONG *)bn_group_2048_value,
|
||||
OSSL_NELEM(bn_group_2048_value),
|
||||
OSSL_NELEM(bn_group_2048_value),
|
||||
0,
|
||||
BN_FLG_STATIC_DATA
|
||||
};
|
||||
|
||||
static const BN_ULONG bn_group_3072_value[] = {
|
||||
bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF),
|
||||
bn_pack4(0x4B82, 0xD120, 0xA93A, 0xD2CA),
|
||||
bn_pack4(0x43DB, 0x5BFC, 0xE0FD, 0x108E),
|
||||
bn_pack4(0x08E2, 0x4FA0, 0x74E5, 0xAB31),
|
||||
bn_pack4(0x7709, 0x88C0, 0xBAD9, 0x46E2),
|
||||
bn_pack4(0xBBE1, 0x1757, 0x7A61, 0x5D6C),
|
||||
bn_pack4(0x521F, 0x2B18, 0x177B, 0x200C),
|
||||
bn_pack4(0xD876, 0x0273, 0x3EC8, 0x6A64),
|
||||
bn_pack4(0xF12F, 0xFA06, 0xD98A, 0x0864),
|
||||
bn_pack4(0xCEE3, 0xD226, 0x1AD2, 0xEE6B),
|
||||
bn_pack4(0x1E8C, 0x94E0, 0x4A25, 0x619D),
|
||||
bn_pack4(0xABF5, 0xAE8C, 0xDB09, 0x33D7),
|
||||
bn_pack4(0xB397, 0x0F85, 0xA6E1, 0xE4C7),
|
||||
bn_pack4(0x8AEA, 0x7157, 0x5D06, 0x0C7D),
|
||||
bn_pack4(0xECFB, 0x8504, 0x58DB, 0xEF0A),
|
||||
bn_pack4(0xA855, 0x21AB, 0xDF1C, 0xBA64),
|
||||
bn_pack4(0xAD33, 0x170D, 0x0450, 0x7A33),
|
||||
bn_pack4(0x1572, 0x8E5A, 0x8AAA, 0xC42D),
|
||||
bn_pack4(0x15D2, 0x2618, 0x98FA, 0x0510),
|
||||
bn_pack4(0x3995, 0x497C, 0xEA95, 0x6AE5),
|
||||
bn_pack4(0xDE2B, 0xCBF6, 0x9558, 0x1718),
|
||||
bn_pack4(0xB5C5, 0x5DF0, 0x6F4C, 0x52C9),
|
||||
bn_pack4(0x9B27, 0x83A2, 0xEC07, 0xA28F),
|
||||
bn_pack4(0xE39E, 0x772C, 0x180E, 0x8603),
|
||||
bn_pack4(0x3290, 0x5E46, 0x2E36, 0xCE3B),
|
||||
bn_pack4(0xF174, 0x6C08, 0xCA18, 0x217C),
|
||||
bn_pack4(0x670C, 0x354E, 0x4ABC, 0x9804),
|
||||
bn_pack4(0x9ED5, 0x2907, 0x7096, 0x966D),
|
||||
bn_pack4(0x1C62, 0xF356, 0x2085, 0x52BB),
|
||||
bn_pack4(0x8365, 0x5D23, 0xDCA3, 0xAD96),
|
||||
bn_pack4(0x6916, 0x3FA8, 0xFD24, 0xCF5F),
|
||||
bn_pack4(0x98DA, 0x4836, 0x1C55, 0xD39A),
|
||||
bn_pack4(0xC200, 0x7CB8, 0xA163, 0xBF05),
|
||||
bn_pack4(0x4928, 0x6651, 0xECE4, 0x5B3D),
|
||||
bn_pack4(0xAE9F, 0x2411, 0x7C4B, 0x1FE6),
|
||||
bn_pack4(0xEE38, 0x6BFB, 0x5A89, 0x9FA5),
|
||||
bn_pack4(0x0BFF, 0x5CB6, 0xF406, 0xB7ED),
|
||||
bn_pack4(0xF44C, 0x42E9, 0xA637, 0xED6B),
|
||||
bn_pack4(0xE485, 0xB576, 0x625E, 0x7EC6),
|
||||
bn_pack4(0x4FE1, 0x356D, 0x6D51, 0xC245),
|
||||
bn_pack4(0x302B, 0x0A6D, 0xF25F, 0x1437),
|
||||
bn_pack4(0xEF95, 0x19B3, 0xCD3A, 0x431B),
|
||||
bn_pack4(0x514A, 0x0879, 0x8E34, 0x04DD),
|
||||
bn_pack4(0x020B, 0xBEA6, 0x3B13, 0x9B22),
|
||||
bn_pack4(0x2902, 0x4E08, 0x8A67, 0xCC74),
|
||||
bn_pack4(0xC4C6, 0x628B, 0x80DC, 0x1CD1),
|
||||
bn_pack4(0xC90F, 0xDAA2, 0x2168, 0xC234),
|
||||
bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF)
|
||||
};
|
||||
|
||||
const BIGNUM bn_group_3072 = {
|
||||
(BN_ULONG *)bn_group_3072_value,
|
||||
OSSL_NELEM(bn_group_3072_value),
|
||||
OSSL_NELEM(bn_group_3072_value),
|
||||
0,
|
||||
BN_FLG_STATIC_DATA
|
||||
};
|
||||
|
||||
static const BN_ULONG bn_group_4096_value[] = {
|
||||
bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF),
|
||||
bn_pack4(0x4DF4, 0x35C9, 0x3406, 0x3199),
|
||||
bn_pack4(0x86FF, 0xB7DC, 0x90A6, 0xC08F),
|
||||
bn_pack4(0x93B4, 0xEA98, 0x8D8F, 0xDDC1),
|
||||
bn_pack4(0xD006, 0x9127, 0xD5B0, 0x5AA9),
|
||||
bn_pack4(0xB81B, 0xDD76, 0x2170, 0x481C),
|
||||
bn_pack4(0x1F61, 0x2970, 0xCEE2, 0xD7AF),
|
||||
bn_pack4(0x233B, 0xA186, 0x515B, 0xE7ED),
|
||||
bn_pack4(0x99B2, 0x964F, 0xA090, 0xC3A2),
|
||||
bn_pack4(0x287C, 0x5947, 0x4E6B, 0xC05D),
|
||||
bn_pack4(0x2E8E, 0xFC14, 0x1FBE, 0xCAA6),
|
||||
bn_pack4(0xDBBB, 0xC2DB, 0x04DE, 0x8EF9),
|
||||
bn_pack4(0x2583, 0xE9CA, 0x2AD4, 0x4CE8),
|
||||
bn_pack4(0x1A94, 0x6834, 0xB615, 0x0BDA),
|
||||
bn_pack4(0x99C3, 0x2718, 0x6AF4, 0xE23C),
|
||||
bn_pack4(0x8871, 0x9A10, 0xBDBA, 0x5B26),
|
||||
bn_pack4(0x1A72, 0x3C12, 0xA787, 0xE6D7),
|
||||
bn_pack4(0x4B82, 0xD120, 0xA921, 0x0801),
|
||||
bn_pack4(0x43DB, 0x5BFC, 0xE0FD, 0x108E),
|
||||
bn_pack4(0x08E2, 0x4FA0, 0x74E5, 0xAB31),
|
||||
bn_pack4(0x7709, 0x88C0, 0xBAD9, 0x46E2),
|
||||
bn_pack4(0xBBE1, 0x1757, 0x7A61, 0x5D6C),
|
||||
bn_pack4(0x521F, 0x2B18, 0x177B, 0x200C),
|
||||
bn_pack4(0xD876, 0x0273, 0x3EC8, 0x6A64),
|
||||
bn_pack4(0xF12F, 0xFA06, 0xD98A, 0x0864),
|
||||
bn_pack4(0xCEE3, 0xD226, 0x1AD2, 0xEE6B),
|
||||
bn_pack4(0x1E8C, 0x94E0, 0x4A25, 0x619D),
|
||||
bn_pack4(0xABF5, 0xAE8C, 0xDB09, 0x33D7),
|
||||
bn_pack4(0xB397, 0x0F85, 0xA6E1, 0xE4C7),
|
||||
bn_pack4(0x8AEA, 0x7157, 0x5D06, 0x0C7D),
|
||||
bn_pack4(0xECFB, 0x8504, 0x58DB, 0xEF0A),
|
||||
bn_pack4(0xA855, 0x21AB, 0xDF1C, 0xBA64),
|
||||
bn_pack4(0xAD33, 0x170D, 0x0450, 0x7A33),
|
||||
bn_pack4(0x1572, 0x8E5A, 0x8AAA, 0xC42D),
|
||||
bn_pack4(0x15D2, 0x2618, 0x98FA, 0x0510),
|
||||
bn_pack4(0x3995, 0x497C, 0xEA95, 0x6AE5),
|
||||
bn_pack4(0xDE2B, 0xCBF6, 0x9558, 0x1718),
|
||||
bn_pack4(0xB5C5, 0x5DF0, 0x6F4C, 0x52C9),
|
||||
bn_pack4(0x9B27, 0x83A2, 0xEC07, 0xA28F),
|
||||
bn_pack4(0xE39E, 0x772C, 0x180E, 0x8603),
|
||||
bn_pack4(0x3290, 0x5E46, 0x2E36, 0xCE3B),
|
||||
bn_pack4(0xF174, 0x6C08, 0xCA18, 0x217C),
|
||||
bn_pack4(0x670C, 0x354E, 0x4ABC, 0x9804),
|
||||
bn_pack4(0x9ED5, 0x2907, 0x7096, 0x966D),
|
||||
bn_pack4(0x1C62, 0xF356, 0x2085, 0x52BB),
|
||||
bn_pack4(0x8365, 0x5D23, 0xDCA3, 0xAD96),
|
||||
bn_pack4(0x6916, 0x3FA8, 0xFD24, 0xCF5F),
|
||||
bn_pack4(0x98DA, 0x4836, 0x1C55, 0xD39A),
|
||||
bn_pack4(0xC200, 0x7CB8, 0xA163, 0xBF05),
|
||||
bn_pack4(0x4928, 0x6651, 0xECE4, 0x5B3D),
|
||||
bn_pack4(0xAE9F, 0x2411, 0x7C4B, 0x1FE6),
|
||||
bn_pack4(0xEE38, 0x6BFB, 0x5A89, 0x9FA5),
|
||||
bn_pack4(0x0BFF, 0x5CB6, 0xF406, 0xB7ED),
|
||||
bn_pack4(0xF44C, 0x42E9, 0xA637, 0xED6B),
|
||||
bn_pack4(0xE485, 0xB576, 0x625E, 0x7EC6),
|
||||
bn_pack4(0x4FE1, 0x356D, 0x6D51, 0xC245),
|
||||
bn_pack4(0x302B, 0x0A6D, 0xF25F, 0x1437),
|
||||
bn_pack4(0xEF95, 0x19B3, 0xCD3A, 0x431B),
|
||||
bn_pack4(0x514A, 0x0879, 0x8E34, 0x04DD),
|
||||
bn_pack4(0x020B, 0xBEA6, 0x3B13, 0x9B22),
|
||||
bn_pack4(0x2902, 0x4E08, 0x8A67, 0xCC74),
|
||||
bn_pack4(0xC4C6, 0x628B, 0x80DC, 0x1CD1),
|
||||
bn_pack4(0xC90F, 0xDAA2, 0x2168, 0xC234),
|
||||
bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF)
|
||||
};
|
||||
|
||||
const BIGNUM bn_group_4096 = {
|
||||
(BN_ULONG *)bn_group_4096_value,
|
||||
OSSL_NELEM(bn_group_4096_value),
|
||||
OSSL_NELEM(bn_group_4096_value),
|
||||
0,
|
||||
BN_FLG_STATIC_DATA
|
||||
};
|
||||
|
||||
static const BN_ULONG bn_group_6144_value[] = {
|
||||
bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF),
|
||||
bn_pack4(0xE694, 0xF91E, 0x6DCC, 0x4024),
|
||||
bn_pack4(0x12BF, 0x2D5B, 0x0B74, 0x74D6),
|
||||
bn_pack4(0x043E, 0x8F66, 0x3F48, 0x60EE),
|
||||
bn_pack4(0x387F, 0xE8D7, 0x6E3C, 0x0468),
|
||||
bn_pack4(0xDA56, 0xC9EC, 0x2EF2, 0x9632),
|
||||
bn_pack4(0xEB19, 0xCCB1, 0xA313, 0xD55C),
|
||||
bn_pack4(0xF550, 0xAA3D, 0x8A1F, 0xBFF0),
|
||||
bn_pack4(0x06A1, 0xD58B, 0xB7C5, 0xDA76),
|
||||
bn_pack4(0xA797, 0x15EE, 0xF29B, 0xE328),
|
||||
bn_pack4(0x14CC, 0x5ED2, 0x0F80, 0x37E0),
|
||||
bn_pack4(0xCC8F, 0x6D7E, 0xBF48, 0xE1D8),
|
||||
bn_pack4(0x4BD4, 0x07B2, 0x2B41, 0x54AA),
|
||||
bn_pack4(0x0F1D, 0x45B7, 0xFF58, 0x5AC5),
|
||||
bn_pack4(0x23A9, 0x7A7E, 0x36CC, 0x88BE),
|
||||
bn_pack4(0x59E7, 0xC97F, 0xBEC7, 0xE8F3),
|
||||
bn_pack4(0xB5A8, 0x4031, 0x900B, 0x1C9E),
|
||||
bn_pack4(0xD55E, 0x702F, 0x4698, 0x0C82),
|
||||
bn_pack4(0xF482, 0xD7CE, 0x6E74, 0xFEF6),
|
||||
bn_pack4(0xF032, 0xEA15, 0xD172, 0x1D03),
|
||||
bn_pack4(0x5983, 0xCA01, 0xC64B, 0x92EC),
|
||||
bn_pack4(0x6FB8, 0xF401, 0x378C, 0xD2BF),
|
||||
bn_pack4(0x3320, 0x5151, 0x2BD7, 0xAF42),
|
||||
bn_pack4(0xDB7F, 0x1447, 0xE6CC, 0x254B),
|
||||
bn_pack4(0x44CE, 0x6CBA, 0xCED4, 0xBB1B),
|
||||
bn_pack4(0xDA3E, 0xDBEB, 0xCF9B, 0x14ED),
|
||||
bn_pack4(0x1797, 0x27B0, 0x865A, 0x8918),
|
||||
bn_pack4(0xB06A, 0x53ED, 0x9027, 0xD831),
|
||||
bn_pack4(0xE5DB, 0x382F, 0x4130, 0x01AE),
|
||||
bn_pack4(0xF8FF, 0x9406, 0xAD9E, 0x530E),
|
||||
bn_pack4(0xC975, 0x1E76, 0x3DBA, 0x37BD),
|
||||
bn_pack4(0xC1D4, 0xDCB2, 0x6026, 0x46DE),
|
||||
bn_pack4(0x36C3, 0xFAB4, 0xD27C, 0x7026),
|
||||
bn_pack4(0x4DF4, 0x35C9, 0x3402, 0x8492),
|
||||
bn_pack4(0x86FF, 0xB7DC, 0x90A6, 0xC08F),
|
||||
bn_pack4(0x93B4, 0xEA98, 0x8D8F, 0xDDC1),
|
||||
bn_pack4(0xD006, 0x9127, 0xD5B0, 0x5AA9),
|
||||
bn_pack4(0xB81B, 0xDD76, 0x2170, 0x481C),
|
||||
bn_pack4(0x1F61, 0x2970, 0xCEE2, 0xD7AF),
|
||||
bn_pack4(0x233B, 0xA186, 0x515B, 0xE7ED),
|
||||
bn_pack4(0x99B2, 0x964F, 0xA090, 0xC3A2),
|
||||
bn_pack4(0x287C, 0x5947, 0x4E6B, 0xC05D),
|
||||
bn_pack4(0x2E8E, 0xFC14, 0x1FBE, 0xCAA6),
|
||||
bn_pack4(0xDBBB, 0xC2DB, 0x04DE, 0x8EF9),
|
||||
bn_pack4(0x2583, 0xE9CA, 0x2AD4, 0x4CE8),
|
||||
bn_pack4(0x1A94, 0x6834, 0xB615, 0x0BDA),
|
||||
bn_pack4(0x99C3, 0x2718, 0x6AF4, 0xE23C),
|
||||
bn_pack4(0x8871, 0x9A10, 0xBDBA, 0x5B26),
|
||||
bn_pack4(0x1A72, 0x3C12, 0xA787, 0xE6D7),
|
||||
bn_pack4(0x4B82, 0xD120, 0xA921, 0x0801),
|
||||
bn_pack4(0x43DB, 0x5BFC, 0xE0FD, 0x108E),
|
||||
bn_pack4(0x08E2, 0x4FA0, 0x74E5, 0xAB31),
|
||||
bn_pack4(0x7709, 0x88C0, 0xBAD9, 0x46E2),
|
||||
bn_pack4(0xBBE1, 0x1757, 0x7A61, 0x5D6C),
|
||||
bn_pack4(0x521F, 0x2B18, 0x177B, 0x200C),
|
||||
bn_pack4(0xD876, 0x0273, 0x3EC8, 0x6A64),
|
||||
bn_pack4(0xF12F, 0xFA06, 0xD98A, 0x0864),
|
||||
bn_pack4(0xCEE3, 0xD226, 0x1AD2, 0xEE6B),
|
||||
bn_pack4(0x1E8C, 0x94E0, 0x4A25, 0x619D),
|
||||
bn_pack4(0xABF5, 0xAE8C, 0xDB09, 0x33D7),
|
||||
bn_pack4(0xB397, 0x0F85, 0xA6E1, 0xE4C7),
|
||||
bn_pack4(0x8AEA, 0x7157, 0x5D06, 0x0C7D),
|
||||
bn_pack4(0xECFB, 0x8504, 0x58DB, 0xEF0A),
|
||||
bn_pack4(0xA855, 0x21AB, 0xDF1C, 0xBA64),
|
||||
bn_pack4(0xAD33, 0x170D, 0x0450, 0x7A33),
|
||||
bn_pack4(0x1572, 0x8E5A, 0x8AAA, 0xC42D),
|
||||
bn_pack4(0x15D2, 0x2618, 0x98FA, 0x0510),
|
||||
bn_pack4(0x3995, 0x497C, 0xEA95, 0x6AE5),
|
||||
bn_pack4(0xDE2B, 0xCBF6, 0x9558, 0x1718),
|
||||
bn_pack4(0xB5C5, 0x5DF0, 0x6F4C, 0x52C9),
|
||||
bn_pack4(0x9B27, 0x83A2, 0xEC07, 0xA28F),
|
||||
bn_pack4(0xE39E, 0x772C, 0x180E, 0x8603),
|
||||
bn_pack4(0x3290, 0x5E46, 0x2E36, 0xCE3B),
|
||||
bn_pack4(0xF174, 0x6C08, 0xCA18, 0x217C),
|
||||
bn_pack4(0x670C, 0x354E, 0x4ABC, 0x9804),
|
||||
bn_pack4(0x9ED5, 0x2907, 0x7096, 0x966D),
|
||||
bn_pack4(0x1C62, 0xF356, 0x2085, 0x52BB),
|
||||
bn_pack4(0x8365, 0x5D23, 0xDCA3, 0xAD96),
|
||||
bn_pack4(0x6916, 0x3FA8, 0xFD24, 0xCF5F),
|
||||
bn_pack4(0x98DA, 0x4836, 0x1C55, 0xD39A),
|
||||
bn_pack4(0xC200, 0x7CB8, 0xA163, 0xBF05),
|
||||
bn_pack4(0x4928, 0x6651, 0xECE4, 0x5B3D),
|
||||
bn_pack4(0xAE9F, 0x2411, 0x7C4B, 0x1FE6),
|
||||
bn_pack4(0xEE38, 0x6BFB, 0x5A89, 0x9FA5),
|
||||
bn_pack4(0x0BFF, 0x5CB6, 0xF406, 0xB7ED),
|
||||
bn_pack4(0xF44C, 0x42E9, 0xA637, 0xED6B),
|
||||
bn_pack4(0xE485, 0xB576, 0x625E, 0x7EC6),
|
||||
bn_pack4(0x4FE1, 0x356D, 0x6D51, 0xC245),
|
||||
bn_pack4(0x302B, 0x0A6D, 0xF25F, 0x1437),
|
||||
bn_pack4(0xEF95, 0x19B3, 0xCD3A, 0x431B),
|
||||
bn_pack4(0x514A, 0x0879, 0x8E34, 0x04DD),
|
||||
bn_pack4(0x020B, 0xBEA6, 0x3B13, 0x9B22),
|
||||
bn_pack4(0x2902, 0x4E08, 0x8A67, 0xCC74),
|
||||
bn_pack4(0xC4C6, 0x628B, 0x80DC, 0x1CD1),
|
||||
bn_pack4(0xC90F, 0xDAA2, 0x2168, 0xC234),
|
||||
bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF)
|
||||
};
|
||||
|
||||
const BIGNUM bn_group_6144 = {
|
||||
(BN_ULONG *)bn_group_6144_value,
|
||||
OSSL_NELEM(bn_group_6144_value),
|
||||
OSSL_NELEM(bn_group_6144_value),
|
||||
0,
|
||||
BN_FLG_STATIC_DATA
|
||||
};
|
||||
|
||||
static const BN_ULONG bn_group_8192_value[] = {
|
||||
bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF),
|
||||
bn_pack4(0x60C9, 0x80DD, 0x98ED, 0xD3DF),
|
||||
bn_pack4(0xC81F, 0x56E8, 0x80B9, 0x6E71),
|
||||
bn_pack4(0x9E30, 0x50E2, 0x7656, 0x94DF),
|
||||
bn_pack4(0x9558, 0xE447, 0x5677, 0xE9AA),
|
||||
bn_pack4(0xC919, 0x0DA6, 0xFC02, 0x6E47),
|
||||
bn_pack4(0x889A, 0x002E, 0xD5EE, 0x382B),
|
||||
bn_pack4(0x4009, 0x438B, 0x481C, 0x6CD7),
|
||||
bn_pack4(0x3590, 0x46F4, 0xEB87, 0x9F92),
|
||||
bn_pack4(0xFAF3, 0x6BC3, 0x1ECF, 0xA268),
|
||||
bn_pack4(0xB1D5, 0x10BD, 0x7EE7, 0x4D73),
|
||||
bn_pack4(0xF9AB, 0x4819, 0x5DED, 0x7EA1),
|
||||
bn_pack4(0x64F3, 0x1CC5, 0x0846, 0x851D),
|
||||
bn_pack4(0x4597, 0xE899, 0xA025, 0x5DC1),
|
||||
bn_pack4(0xDF31, 0x0EE0, 0x74AB, 0x6A36),
|
||||
bn_pack4(0x6D2A, 0x13F8, 0x3F44, 0xF82D),
|
||||
bn_pack4(0x062B, 0x3CF5, 0xB3A2, 0x78A6),
|
||||
bn_pack4(0x7968, 0x3303, 0xED5B, 0xDD3A),
|
||||
bn_pack4(0xFA9D, 0x4B7F, 0xA2C0, 0x87E8),
|
||||
bn_pack4(0x4BCB, 0xC886, 0x2F83, 0x85DD),
|
||||
bn_pack4(0x3473, 0xFC64, 0x6CEA, 0x306B),
|
||||
bn_pack4(0x13EB, 0x57A8, 0x1A23, 0xF0C7),
|
||||
bn_pack4(0x2222, 0x2E04, 0xA403, 0x7C07),
|
||||
bn_pack4(0xE3FD, 0xB8BE, 0xFC84, 0x8AD9),
|
||||
bn_pack4(0x238F, 0x16CB, 0xE39D, 0x652D),
|
||||
bn_pack4(0x3423, 0xB474, 0x2BF1, 0xC978),
|
||||
bn_pack4(0x3AAB, 0x639C, 0x5AE4, 0xF568),
|
||||
bn_pack4(0x2576, 0xF693, 0x6BA4, 0x2466),
|
||||
bn_pack4(0x741F, 0xA7BF, 0x8AFC, 0x47ED),
|
||||
bn_pack4(0x3BC8, 0x32B6, 0x8D9D, 0xD300),
|
||||
bn_pack4(0xD8BE, 0xC4D0, 0x73B9, 0x31BA),
|
||||
bn_pack4(0x3877, 0x7CB6, 0xA932, 0xDF8C),
|
||||
bn_pack4(0x74A3, 0x926F, 0x12FE, 0xE5E4),
|
||||
bn_pack4(0xE694, 0xF91E, 0x6DBE, 0x1159),
|
||||
bn_pack4(0x12BF, 0x2D5B, 0x0B74, 0x74D6),
|
||||
bn_pack4(0x043E, 0x8F66, 0x3F48, 0x60EE),
|
||||
bn_pack4(0x387F, 0xE8D7, 0x6E3C, 0x0468),
|
||||
bn_pack4(0xDA56, 0xC9EC, 0x2EF2, 0x9632),
|
||||
bn_pack4(0xEB19, 0xCCB1, 0xA313, 0xD55C),
|
||||
bn_pack4(0xF550, 0xAA3D, 0x8A1F, 0xBFF0),
|
||||
bn_pack4(0x06A1, 0xD58B, 0xB7C5, 0xDA76),
|
||||
bn_pack4(0xA797, 0x15EE, 0xF29B, 0xE328),
|
||||
bn_pack4(0x14CC, 0x5ED2, 0x0F80, 0x37E0),
|
||||
bn_pack4(0xCC8F, 0x6D7E, 0xBF48, 0xE1D8),
|
||||
bn_pack4(0x4BD4, 0x07B2, 0x2B41, 0x54AA),
|
||||
bn_pack4(0x0F1D, 0x45B7, 0xFF58, 0x5AC5),
|
||||
bn_pack4(0x23A9, 0x7A7E, 0x36CC, 0x88BE),
|
||||
bn_pack4(0x59E7, 0xC97F, 0xBEC7, 0xE8F3),
|
||||
bn_pack4(0xB5A8, 0x4031, 0x900B, 0x1C9E),
|
||||
bn_pack4(0xD55E, 0x702F, 0x4698, 0x0C82),
|
||||
bn_pack4(0xF482, 0xD7CE, 0x6E74, 0xFEF6),
|
||||
bn_pack4(0xF032, 0xEA15, 0xD172, 0x1D03),
|
||||
bn_pack4(0x5983, 0xCA01, 0xC64B, 0x92EC),
|
||||
bn_pack4(0x6FB8, 0xF401, 0x378C, 0xD2BF),
|
||||
bn_pack4(0x3320, 0x5151, 0x2BD7, 0xAF42),
|
||||
bn_pack4(0xDB7F, 0x1447, 0xE6CC, 0x254B),
|
||||
bn_pack4(0x44CE, 0x6CBA, 0xCED4, 0xBB1B),
|
||||
bn_pack4(0xDA3E, 0xDBEB, 0xCF9B, 0x14ED),
|
||||
bn_pack4(0x1797, 0x27B0, 0x865A, 0x8918),
|
||||
bn_pack4(0xB06A, 0x53ED, 0x9027, 0xD831),
|
||||
bn_pack4(0xE5DB, 0x382F, 0x4130, 0x01AE),
|
||||
bn_pack4(0xF8FF, 0x9406, 0xAD9E, 0x530E),
|
||||
bn_pack4(0xC975, 0x1E76, 0x3DBA, 0x37BD),
|
||||
bn_pack4(0xC1D4, 0xDCB2, 0x6026, 0x46DE),
|
||||
bn_pack4(0x36C3, 0xFAB4, 0xD27C, 0x7026),
|
||||
bn_pack4(0x4DF4, 0x35C9, 0x3402, 0x8492),
|
||||
bn_pack4(0x86FF, 0xB7DC, 0x90A6, 0xC08F),
|
||||
bn_pack4(0x93B4, 0xEA98, 0x8D8F, 0xDDC1),
|
||||
bn_pack4(0xD006, 0x9127, 0xD5B0, 0x5AA9),
|
||||
bn_pack4(0xB81B, 0xDD76, 0x2170, 0x481C),
|
||||
bn_pack4(0x1F61, 0x2970, 0xCEE2, 0xD7AF),
|
||||
bn_pack4(0x233B, 0xA186, 0x515B, 0xE7ED),
|
||||
bn_pack4(0x99B2, 0x964F, 0xA090, 0xC3A2),
|
||||
bn_pack4(0x287C, 0x5947, 0x4E6B, 0xC05D),
|
||||
bn_pack4(0x2E8E, 0xFC14, 0x1FBE, 0xCAA6),
|
||||
bn_pack4(0xDBBB, 0xC2DB, 0x04DE, 0x8EF9),
|
||||
bn_pack4(0x2583, 0xE9CA, 0x2AD4, 0x4CE8),
|
||||
bn_pack4(0x1A94, 0x6834, 0xB615, 0x0BDA),
|
||||
bn_pack4(0x99C3, 0x2718, 0x6AF4, 0xE23C),
|
||||
bn_pack4(0x8871, 0x9A10, 0xBDBA, 0x5B26),
|
||||
bn_pack4(0x1A72, 0x3C12, 0xA787, 0xE6D7),
|
||||
bn_pack4(0x4B82, 0xD120, 0xA921, 0x0801),
|
||||
bn_pack4(0x43DB, 0x5BFC, 0xE0FD, 0x108E),
|
||||
bn_pack4(0x08E2, 0x4FA0, 0x74E5, 0xAB31),
|
||||
bn_pack4(0x7709, 0x88C0, 0xBAD9, 0x46E2),
|
||||
bn_pack4(0xBBE1, 0x1757, 0x7A61, 0x5D6C),
|
||||
bn_pack4(0x521F, 0x2B18, 0x177B, 0x200C),
|
||||
bn_pack4(0xD876, 0x0273, 0x3EC8, 0x6A64),
|
||||
bn_pack4(0xF12F, 0xFA06, 0xD98A, 0x0864),
|
||||
bn_pack4(0xCEE3, 0xD226, 0x1AD2, 0xEE6B),
|
||||
bn_pack4(0x1E8C, 0x94E0, 0x4A25, 0x619D),
|
||||
bn_pack4(0xABF5, 0xAE8C, 0xDB09, 0x33D7),
|
||||
bn_pack4(0xB397, 0x0F85, 0xA6E1, 0xE4C7),
|
||||
bn_pack4(0x8AEA, 0x7157, 0x5D06, 0x0C7D),
|
||||
bn_pack4(0xECFB, 0x8504, 0x58DB, 0xEF0A),
|
||||
bn_pack4(0xA855, 0x21AB, 0xDF1C, 0xBA64),
|
||||
bn_pack4(0xAD33, 0x170D, 0x0450, 0x7A33),
|
||||
bn_pack4(0x1572, 0x8E5A, 0x8AAA, 0xC42D),
|
||||
bn_pack4(0x15D2, 0x2618, 0x98FA, 0x0510),
|
||||
bn_pack4(0x3995, 0x497C, 0xEA95, 0x6AE5),
|
||||
bn_pack4(0xDE2B, 0xCBF6, 0x9558, 0x1718),
|
||||
bn_pack4(0xB5C5, 0x5DF0, 0x6F4C, 0x52C9),
|
||||
bn_pack4(0x9B27, 0x83A2, 0xEC07, 0xA28F),
|
||||
bn_pack4(0xE39E, 0x772C, 0x180E, 0x8603),
|
||||
bn_pack4(0x3290, 0x5E46, 0x2E36, 0xCE3B),
|
||||
bn_pack4(0xF174, 0x6C08, 0xCA18, 0x217C),
|
||||
bn_pack4(0x670C, 0x354E, 0x4ABC, 0x9804),
|
||||
bn_pack4(0x9ED5, 0x2907, 0x7096, 0x966D),
|
||||
bn_pack4(0x1C62, 0xF356, 0x2085, 0x52BB),
|
||||
bn_pack4(0x8365, 0x5D23, 0xDCA3, 0xAD96),
|
||||
bn_pack4(0x6916, 0x3FA8, 0xFD24, 0xCF5F),
|
||||
bn_pack4(0x98DA, 0x4836, 0x1C55, 0xD39A),
|
||||
bn_pack4(0xC200, 0x7CB8, 0xA163, 0xBF05),
|
||||
bn_pack4(0x4928, 0x6651, 0xECE4, 0x5B3D),
|
||||
bn_pack4(0xAE9F, 0x2411, 0x7C4B, 0x1FE6),
|
||||
bn_pack4(0xEE38, 0x6BFB, 0x5A89, 0x9FA5),
|
||||
bn_pack4(0x0BFF, 0x5CB6, 0xF406, 0xB7ED),
|
||||
bn_pack4(0xF44C, 0x42E9, 0xA637, 0xED6B),
|
||||
bn_pack4(0xE485, 0xB576, 0x625E, 0x7EC6),
|
||||
bn_pack4(0x4FE1, 0x356D, 0x6D51, 0xC245),
|
||||
bn_pack4(0x302B, 0x0A6D, 0xF25F, 0x1437),
|
||||
bn_pack4(0xEF95, 0x19B3, 0xCD3A, 0x431B),
|
||||
bn_pack4(0x514A, 0x0879, 0x8E34, 0x04DD),
|
||||
bn_pack4(0x020B, 0xBEA6, 0x3B13, 0x9B22),
|
||||
bn_pack4(0x2902, 0x4E08, 0x8A67, 0xCC74),
|
||||
bn_pack4(0xC4C6, 0x628B, 0x80DC, 0x1CD1),
|
||||
bn_pack4(0xC90F, 0xDAA2, 0x2168, 0xC234),
|
||||
bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF)
|
||||
};
|
||||
|
||||
const BIGNUM bn_group_8192 = {
|
||||
(BN_ULONG *)bn_group_8192_value,
|
||||
OSSL_NELEM(bn_group_8192_value),
|
||||
OSSL_NELEM(bn_group_8192_value),
|
||||
0,
|
||||
BN_FLG_STATIC_DATA
|
||||
};
|
||||
|
||||
static const BN_ULONG bn_generator_19_value[] = { 19 };
|
||||
|
||||
const BIGNUM bn_generator_19 = {
|
||||
(BN_ULONG *)bn_generator_19_value,
|
||||
1,
|
||||
1,
|
||||
0,
|
||||
BN_FLG_STATIC_DATA
|
||||
};
|
||||
static const BN_ULONG bn_generator_5_value[] = { 5 };
|
||||
|
||||
const BIGNUM bn_generator_5 = {
|
||||
(BN_ULONG *)bn_generator_5_value,
|
||||
1,
|
||||
1,
|
||||
0,
|
||||
BN_FLG_STATIC_DATA
|
||||
};
|
||||
static const BN_ULONG bn_generator_2_value[] = { 2 };
|
||||
|
||||
const BIGNUM bn_generator_2 = {
|
||||
(BN_ULONG *)bn_generator_2_value,
|
||||
1,
|
||||
1,
|
||||
0,
|
||||
BN_FLG_STATIC_DATA
|
||||
};
|
||||
|
||||
#endif
|
201
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_word.c
vendored
Normal file
201
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_word.c
vendored
Normal file
|
@ -0,0 +1,201 @@
|
|||
/*
|
||||
* Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include "internal/cryptlib.h"
|
||||
#include "bn_lcl.h"
|
||||
|
||||
BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w)
|
||||
{
|
||||
#ifndef BN_LLONG
|
||||
BN_ULONG ret = 0;
|
||||
#else
|
||||
BN_ULLONG ret = 0;
|
||||
#endif
|
||||
int i;
|
||||
|
||||
if (w == 0)
|
||||
return (BN_ULONG)-1;
|
||||
|
||||
#ifndef BN_LLONG
|
||||
/*
|
||||
* If |w| is too long and we don't have BN_ULLONG then we need to fall
|
||||
* back to using BN_div_word
|
||||
*/
|
||||
if (w > ((BN_ULONG)1 << BN_BITS4)) {
|
||||
BIGNUM *tmp = BN_dup(a);
|
||||
if (tmp == NULL)
|
||||
return (BN_ULONG)-1;
|
||||
|
||||
ret = BN_div_word(tmp, w);
|
||||
BN_free(tmp);
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
bn_check_top(a);
|
||||
w &= BN_MASK2;
|
||||
for (i = a->top - 1; i >= 0; i--) {
|
||||
#ifndef BN_LLONG
|
||||
/*
|
||||
* We can assume here that | w <= ((BN_ULONG)1 << BN_BITS4) | and so
|
||||
* | ret < ((BN_ULONG)1 << BN_BITS4) | and therefore the shifts here are
|
||||
* safe and will not overflow
|
||||
*/
|
||||
ret = ((ret << BN_BITS4) | ((a->d[i] >> BN_BITS4) & BN_MASK2l)) % w;
|
||||
ret = ((ret << BN_BITS4) | (a->d[i] & BN_MASK2l)) % w;
|
||||
#else
|
||||
ret = (BN_ULLONG) (((ret << (BN_ULLONG) BN_BITS2) | a->d[i]) %
|
||||
(BN_ULLONG) w);
|
||||
#endif
|
||||
}
|
||||
return (BN_ULONG)ret;
|
||||
}
|
||||
|
||||
BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w)
|
||||
{
|
||||
BN_ULONG ret = 0;
|
||||
int i, j;
|
||||
|
||||
bn_check_top(a);
|
||||
w &= BN_MASK2;
|
||||
|
||||
if (!w)
|
||||
/* actually this an error (division by zero) */
|
||||
return (BN_ULONG)-1;
|
||||
if (a->top == 0)
|
||||
return 0;
|
||||
|
||||
/* normalize input (so bn_div_words doesn't complain) */
|
||||
j = BN_BITS2 - BN_num_bits_word(w);
|
||||
w <<= j;
|
||||
if (!BN_lshift(a, a, j))
|
||||
return (BN_ULONG)-1;
|
||||
|
||||
for (i = a->top - 1; i >= 0; i--) {
|
||||
BN_ULONG l, d;
|
||||
|
||||
l = a->d[i];
|
||||
d = bn_div_words(ret, l, w);
|
||||
ret = (l - ((d * w) & BN_MASK2)) & BN_MASK2;
|
||||
a->d[i] = d;
|
||||
}
|
||||
if ((a->top > 0) && (a->d[a->top - 1] == 0))
|
||||
a->top--;
|
||||
ret >>= j;
|
||||
if (!a->top)
|
||||
a->neg = 0; /* don't allow negative zero */
|
||||
bn_check_top(a);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int BN_add_word(BIGNUM *a, BN_ULONG w)
|
||||
{
|
||||
BN_ULONG l;
|
||||
int i;
|
||||
|
||||
bn_check_top(a);
|
||||
w &= BN_MASK2;
|
||||
|
||||
/* degenerate case: w is zero */
|
||||
if (!w)
|
||||
return 1;
|
||||
/* degenerate case: a is zero */
|
||||
if (BN_is_zero(a))
|
||||
return BN_set_word(a, w);
|
||||
/* handle 'a' when negative */
|
||||
if (a->neg) {
|
||||
a->neg = 0;
|
||||
i = BN_sub_word(a, w);
|
||||
if (!BN_is_zero(a))
|
||||
a->neg = !(a->neg);
|
||||
return i;
|
||||
}
|
||||
for (i = 0; w != 0 && i < a->top; i++) {
|
||||
a->d[i] = l = (a->d[i] + w) & BN_MASK2;
|
||||
w = (w > l) ? 1 : 0;
|
||||
}
|
||||
if (w && i == a->top) {
|
||||
if (bn_wexpand(a, a->top + 1) == NULL)
|
||||
return 0;
|
||||
a->top++;
|
||||
a->d[i] = w;
|
||||
}
|
||||
bn_check_top(a);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int BN_sub_word(BIGNUM *a, BN_ULONG w)
|
||||
{
|
||||
int i;
|
||||
|
||||
bn_check_top(a);
|
||||
w &= BN_MASK2;
|
||||
|
||||
/* degenerate case: w is zero */
|
||||
if (!w)
|
||||
return 1;
|
||||
/* degenerate case: a is zero */
|
||||
if (BN_is_zero(a)) {
|
||||
i = BN_set_word(a, w);
|
||||
if (i != 0)
|
||||
BN_set_negative(a, 1);
|
||||
return i;
|
||||
}
|
||||
/* handle 'a' when negative */
|
||||
if (a->neg) {
|
||||
a->neg = 0;
|
||||
i = BN_add_word(a, w);
|
||||
a->neg = 1;
|
||||
return i;
|
||||
}
|
||||
|
||||
if ((a->top == 1) && (a->d[0] < w)) {
|
||||
a->d[0] = w - a->d[0];
|
||||
a->neg = 1;
|
||||
return 1;
|
||||
}
|
||||
i = 0;
|
||||
for (;;) {
|
||||
if (a->d[i] >= w) {
|
||||
a->d[i] -= w;
|
||||
break;
|
||||
} else {
|
||||
a->d[i] = (a->d[i] - w) & BN_MASK2;
|
||||
i++;
|
||||
w = 1;
|
||||
}
|
||||
}
|
||||
if ((a->d[i] == 0) && (i == (a->top - 1)))
|
||||
a->top--;
|
||||
bn_check_top(a);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int BN_mul_word(BIGNUM *a, BN_ULONG w)
|
||||
{
|
||||
BN_ULONG ll;
|
||||
|
||||
bn_check_top(a);
|
||||
w &= BN_MASK2;
|
||||
if (a->top) {
|
||||
if (w == 0)
|
||||
BN_zero(a);
|
||||
else {
|
||||
ll = bn_mul_words(a->d, a->d, a->top, w);
|
||||
if (ll) {
|
||||
if (bn_wexpand(a, a->top + 1) == NULL)
|
||||
return 0;
|
||||
a->d[a->top++] = ll;
|
||||
}
|
||||
}
|
||||
}
|
||||
bn_check_top(a);
|
||||
return 1;
|
||||
}
|
244
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_x931p.c
vendored
Normal file
244
trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_x931p.c
vendored
Normal file
|
@ -0,0 +1,244 @@
|
|||
/*
|
||||
* Copyright 2011-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <openssl/bn.h>
|
||||
#include "bn_lcl.h"
|
||||
|
||||
/* X9.31 routines for prime derivation */
|
||||
|
||||
/*
|
||||
* X9.31 prime derivation. This is used to generate the primes pi (p1, p2,
|
||||
* q1, q2) from a parameter Xpi by checking successive odd integers.
|
||||
*/
|
||||
|
||||
static int bn_x931_derive_pi(BIGNUM *pi, const BIGNUM *Xpi, BN_CTX *ctx,
|
||||
BN_GENCB *cb)
|
||||
{
|
||||
int i = 0, is_prime;
|
||||
if (!BN_copy(pi, Xpi))
|
||||
return 0;
|
||||
if (!BN_is_odd(pi) && !BN_add_word(pi, 1))
|
||||
return 0;
|
||||
for (;;) {
|
||||
i++;
|
||||
BN_GENCB_call(cb, 0, i);
|
||||
/* NB 27 MR is specified in X9.31 */
|
||||
is_prime = BN_is_prime_fasttest_ex(pi, 27, ctx, 1, cb);
|
||||
if (is_prime < 0)
|
||||
return 0;
|
||||
if (is_prime)
|
||||
break;
|
||||
if (!BN_add_word(pi, 2))
|
||||
return 0;
|
||||
}
|
||||
BN_GENCB_call(cb, 2, i);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is the main X9.31 prime derivation function. From parameters Xp1, Xp2
|
||||
* and Xp derive the prime p. If the parameters p1 or p2 are not NULL they
|
||||
* will be returned too: this is needed for testing.
|
||||
*/
|
||||
|
||||
int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
|
||||
const BIGNUM *Xp, const BIGNUM *Xp1,
|
||||
const BIGNUM *Xp2, const BIGNUM *e, BN_CTX *ctx,
|
||||
BN_GENCB *cb)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
BIGNUM *t, *p1p2, *pm1;
|
||||
|
||||
/* Only even e supported */
|
||||
if (!BN_is_odd(e))
|
||||
return 0;
|
||||
|
||||
BN_CTX_start(ctx);
|
||||
if (p1 == NULL)
|
||||
p1 = BN_CTX_get(ctx);
|
||||
|
||||
if (p2 == NULL)
|
||||
p2 = BN_CTX_get(ctx);
|
||||
|
||||
t = BN_CTX_get(ctx);
|
||||
|
||||
p1p2 = BN_CTX_get(ctx);
|
||||
|
||||
pm1 = BN_CTX_get(ctx);
|
||||
|
||||
if (pm1 == NULL)
|
||||
goto err;
|
||||
|
||||
if (!bn_x931_derive_pi(p1, Xp1, ctx, cb))
|
||||
goto err;
|
||||
|
||||
if (!bn_x931_derive_pi(p2, Xp2, ctx, cb))
|
||||
goto err;
|
||||
|
||||
if (!BN_mul(p1p2, p1, p2, ctx))
|
||||
goto err;
|
||||
|
||||
/* First set p to value of Rp */
|
||||
|
||||
if (!BN_mod_inverse(p, p2, p1, ctx))
|
||||
goto err;
|
||||
|
||||
if (!BN_mul(p, p, p2, ctx))
|
||||
goto err;
|
||||
|
||||
if (!BN_mod_inverse(t, p1, p2, ctx))
|
||||
goto err;
|
||||
|
||||
if (!BN_mul(t, t, p1, ctx))
|
||||
goto err;
|
||||
|
||||
if (!BN_sub(p, p, t))
|
||||
goto err;
|
||||
|
||||
if (p->neg && !BN_add(p, p, p1p2))
|
||||
goto err;
|
||||
|
||||
/* p now equals Rp */
|
||||
|
||||
if (!BN_mod_sub(p, p, Xp, p1p2, ctx))
|
||||
goto err;
|
||||
|
||||
if (!BN_add(p, p, Xp))
|
||||
goto err;
|
||||
|
||||
/* p now equals Yp0 */
|
||||
|
||||
for (;;) {
|
||||
int i = 1;
|
||||
BN_GENCB_call(cb, 0, i++);
|
||||
if (!BN_copy(pm1, p))
|
||||
goto err;
|
||||
if (!BN_sub_word(pm1, 1))
|
||||
goto err;
|
||||
if (!BN_gcd(t, pm1, e, ctx))
|
||||
goto err;
|
||||
if (BN_is_one(t)) {
|
||||
/*
|
||||
* X9.31 specifies 8 MR and 1 Lucas test or any prime test
|
||||
* offering similar or better guarantees 50 MR is considerably
|
||||
* better.
|
||||
*/
|
||||
int r = BN_is_prime_fasttest_ex(p, 50, ctx, 1, cb);
|
||||
if (r < 0)
|
||||
goto err;
|
||||
if (r)
|
||||
break;
|
||||
}
|
||||
if (!BN_add(p, p, p1p2))
|
||||
goto err;
|
||||
}
|
||||
|
||||
BN_GENCB_call(cb, 3, 0);
|
||||
|
||||
ret = 1;
|
||||
|
||||
err:
|
||||
|
||||
BN_CTX_end(ctx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate pair of parameters Xp, Xq for X9.31 prime generation. Note: nbits
|
||||
* parameter is sum of number of bits in both.
|
||||
*/
|
||||
|
||||
int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx)
|
||||
{
|
||||
BIGNUM *t;
|
||||
int i;
|
||||
/*
|
||||
* Number of bits for each prime is of the form 512+128s for s = 0, 1,
|
||||
* ...
|
||||
*/
|
||||
if ((nbits < 1024) || (nbits & 0xff))
|
||||
return 0;
|
||||
nbits >>= 1;
|
||||
/*
|
||||
* The random value Xp must be between sqrt(2) * 2^(nbits-1) and 2^nbits
|
||||
* - 1. By setting the top two bits we ensure that the lower bound is
|
||||
* exceeded.
|
||||
*/
|
||||
if (!BN_priv_rand(Xp, nbits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ANY))
|
||||
goto err;
|
||||
|
||||
BN_CTX_start(ctx);
|
||||
t = BN_CTX_get(ctx);
|
||||
if (t == NULL)
|
||||
goto err;
|
||||
|
||||
for (i = 0; i < 1000; i++) {
|
||||
if (!BN_priv_rand(Xq, nbits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ANY))
|
||||
goto err;
|
||||
|
||||
/* Check that |Xp - Xq| > 2^(nbits - 100) */
|
||||
if (!BN_sub(t, Xp, Xq))
|
||||
goto err;
|
||||
if (BN_num_bits(t) > (nbits - 100))
|
||||
break;
|
||||
}
|
||||
|
||||
BN_CTX_end(ctx);
|
||||
|
||||
if (i < 1000)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
BN_CTX_end(ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate primes using X9.31 algorithm. Of the values p, p1, p2, Xp1 and
|
||||
* Xp2 only 'p' needs to be non-NULL. If any of the others are not NULL the
|
||||
* relevant parameter will be stored in it. Due to the fact that |Xp - Xq| >
|
||||
* 2^(nbits - 100) must be satisfied Xp and Xq are generated using the
|
||||
* previous function and supplied as input.
|
||||
*/
|
||||
|
||||
int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
|
||||
BIGNUM *Xp1, BIGNUM *Xp2,
|
||||
const BIGNUM *Xp,
|
||||
const BIGNUM *e, BN_CTX *ctx, BN_GENCB *cb)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
BN_CTX_start(ctx);
|
||||
if (Xp1 == NULL)
|
||||
Xp1 = BN_CTX_get(ctx);
|
||||
if (Xp2 == NULL)
|
||||
Xp2 = BN_CTX_get(ctx);
|
||||
if (Xp1 == NULL || Xp2 == NULL)
|
||||
goto error;
|
||||
|
||||
if (!BN_priv_rand(Xp1, 101, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY))
|
||||
goto error;
|
||||
if (!BN_priv_rand(Xp2, 101, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY))
|
||||
goto error;
|
||||
if (!BN_X931_derive_prime_ex(p, p1, p2, Xp, Xp1, Xp2, e, ctx, cb))
|
||||
goto error;
|
||||
|
||||
ret = 1;
|
||||
|
||||
error:
|
||||
BN_CTX_end(ctx);
|
||||
|
||||
return ret;
|
||||
|
||||
}
|
67
trunk/3rdparty/openssl-1.1-fit/crypto/bn/build.info
vendored
Normal file
67
trunk/3rdparty/openssl-1.1-fit/crypto/bn/build.info
vendored
Normal file
|
@ -0,0 +1,67 @@
|
|||
LIBS=../../libcrypto
|
||||
SOURCE[../../libcrypto]=\
|
||||
bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c bn_mod.c \
|
||||
bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \
|
||||
bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_err.c bn_sqr.c \
|
||||
{- $target{bn_asm_src} -} \
|
||||
bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_gf2m.c bn_nist.c \
|
||||
bn_depr.c bn_const.c bn_x931p.c bn_intern.c bn_dh.c bn_srp.c
|
||||
INCLUDE[../../libcrypto]=../../crypto/include
|
||||
|
||||
INCLUDE[bn_exp.o]=..
|
||||
|
||||
GENERATE[bn-586.s]=asm/bn-586.pl \
|
||||
$(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR)
|
||||
DEPEND[bn-586.s]=../perlasm/x86asm.pl
|
||||
GENERATE[co-586.s]=asm/co-586.pl \
|
||||
$(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR)
|
||||
DEPEND[co-586.s]=../perlasm/x86asm.pl
|
||||
GENERATE[x86-mont.s]=asm/x86-mont.pl \
|
||||
$(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR)
|
||||
DEPEND[x86-mont.s]=../perlasm/x86asm.pl
|
||||
GENERATE[x86-gf2m.s]=asm/x86-gf2m.pl \
|
||||
$(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR)
|
||||
DEPEND[x86-gf2m.s]=../perlasm/x86asm.pl
|
||||
|
||||
GENERATE[sparcv9a-mont.S]=asm/sparcv9a-mont.pl $(PERLASM_SCHEME)
|
||||
INCLUDE[sparcv9a-mont.o]=..
|
||||
GENERATE[sparcv9-mont.S]=asm/sparcv9-mont.pl $(PERLASM_SCHEME)
|
||||
INCLUDE[sparcv9-mont.o]=..
|
||||
GENERATE[vis3-mont.S]=asm/vis3-mont.pl $(PERLASM_SCHEME)
|
||||
INCLUDE[vis3-mont.o]=..
|
||||
GENERATE[sparct4-mont.S]=asm/sparct4-mont.pl $(PERLASM_SCHEME)
|
||||
INCLUDE[sparct4-mont.o]=..
|
||||
GENERATE[sparcv9-gf2m.S]=asm/sparcv9-gf2m.pl $(PERLASM_SCHEME)
|
||||
INCLUDE[sparcv9-gf2m.o]=..
|
||||
|
||||
GENERATE[bn-mips.S]=asm/mips.pl $(PERLASM_SCHEME)
|
||||
INCLUDE[bn-mips.o]=..
|
||||
GENERATE[mips-mont.S]=asm/mips-mont.pl $(PERLASM_SCHEME)
|
||||
INCLUDE[mips-mont.o]=..
|
||||
|
||||
GENERATE[s390x-mont.S]=asm/s390x-mont.pl $(PERLASM_SCHEME)
|
||||
GENERATE[s390x-gf2m.s]=asm/s390x-gf2m.pl $(PERLASM_SCHEME)
|
||||
|
||||
GENERATE[x86_64-mont.s]=asm/x86_64-mont.pl $(PERLASM_SCHEME)
|
||||
GENERATE[x86_64-mont5.s]=asm/x86_64-mont5.pl $(PERLASM_SCHEME)
|
||||
GENERATE[x86_64-gf2m.s]=asm/x86_64-gf2m.pl $(PERLASM_SCHEME)
|
||||
GENERATE[rsaz-x86_64.s]=asm/rsaz-x86_64.pl $(PERLASM_SCHEME)
|
||||
GENERATE[rsaz-avx2.s]=asm/rsaz-avx2.pl $(PERLASM_SCHEME)
|
||||
|
||||
GENERATE[bn-ia64.s]=asm/ia64.S
|
||||
GENERATE[ia64-mont.s]=asm/ia64-mont.pl $(LIB_CFLAGS) $(LIB_CPPFLAGS)
|
||||
|
||||
GENERATE[parisc-mont.s]=asm/parisc-mont.pl $(PERLASM_SCHEME)
|
||||
|
||||
# ppc - AIX, Linux, MacOS X...
|
||||
GENERATE[bn-ppc.s]=asm/ppc.pl $(PERLASM_SCHEME)
|
||||
GENERATE[ppc-mont.s]=asm/ppc-mont.pl $(PERLASM_SCHEME)
|
||||
GENERATE[ppc64-mont.s]=asm/ppc64-mont.pl $(PERLASM_SCHEME)
|
||||
|
||||
GENERATE[alpha-mont.S]=asm/alpha-mont.pl $(PERLASM_SCHEME)
|
||||
|
||||
GENERATE[armv4-mont.S]=asm/armv4-mont.pl $(PERLASM_SCHEME)
|
||||
INCLUDE[armv4-mont.o]=..
|
||||
GENERATE[armv4-gf2m.S]=asm/armv4-gf2m.pl $(PERLASM_SCHEME)
|
||||
INCLUDE[armv4-gf2m.o]=..
|
||||
GENERATE[armv8-mont.S]=asm/armv8-mont.pl $(PERLASM_SCHEME)
|
315
trunk/3rdparty/openssl-1.1-fit/crypto/bn/rsaz_exp.c
vendored
Normal file
315
trunk/3rdparty/openssl-1.1-fit/crypto/bn/rsaz_exp.c
vendored
Normal file
|
@ -0,0 +1,315 @@
|
|||
/*
|
||||
* Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
* Copyright (c) 2012, Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*
|
||||
* Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)
|
||||
* (1) Intel Corporation, Israel Development Center, Haifa, Israel
|
||||
* (2) University of Haifa, Israel
|
||||
*/
|
||||
|
||||
#include <openssl/opensslconf.h>
|
||||
#include "rsaz_exp.h"
|
||||
|
||||
#ifndef RSAZ_ENABLED
|
||||
NON_EMPTY_TRANSLATION_UNIT
|
||||
#else
|
||||
|
||||
/*
|
||||
* See crypto/bn/asm/rsaz-avx2.pl for further details.
|
||||
*/
|
||||
void rsaz_1024_norm2red_avx2(void *red, const void *norm);
|
||||
void rsaz_1024_mul_avx2(void *ret, const void *a, const void *b,
|
||||
const void *n, BN_ULONG k);
|
||||
void rsaz_1024_sqr_avx2(void *ret, const void *a, const void *n, BN_ULONG k,
|
||||
int cnt);
|
||||
void rsaz_1024_scatter5_avx2(void *tbl, const void *val, int i);
|
||||
void rsaz_1024_gather5_avx2(void *val, const void *tbl, int i);
|
||||
void rsaz_1024_red2norm_avx2(void *norm, const void *red);
|
||||
|
||||
#if defined(__GNUC__)
|
||||
# define ALIGN64 __attribute__((aligned(64)))
|
||||
#elif defined(_MSC_VER)
|
||||
# define ALIGN64 __declspec(align(64))
|
||||
#elif defined(__SUNPRO_C)
|
||||
# define ALIGN64
|
||||
# pragma align 64(one,two80)
|
||||
#else
|
||||
/* not fatal, might hurt performance a little */
|
||||
# define ALIGN64
|
||||
#endif
|
||||
|
||||
ALIGN64 static const BN_ULONG one[40] = {
|
||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
ALIGN64 static const BN_ULONG two80[40] = {
|
||||
0, 0, 1 << 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
void RSAZ_1024_mod_exp_avx2(BN_ULONG result_norm[16],
|
||||
const BN_ULONG base_norm[16],
|
||||
const BN_ULONG exponent[16],
|
||||
const BN_ULONG m_norm[16], const BN_ULONG RR[16],
|
||||
BN_ULONG k0)
|
||||
{
|
||||
unsigned char storage[320 * 3 + 32 * 9 * 16 + 64]; /* 5.5KB */
|
||||
unsigned char *p_str = storage + (64 - ((size_t)storage % 64));
|
||||
unsigned char *a_inv, *m, *result;
|
||||
unsigned char *table_s = p_str + 320 * 3;
|
||||
unsigned char *R2 = table_s; /* borrow */
|
||||
int index;
|
||||
int wvalue;
|
||||
|
||||
if ((((size_t)p_str & 4095) + 320) >> 12) {
|
||||
result = p_str;
|
||||
a_inv = p_str + 320;
|
||||
m = p_str + 320 * 2; /* should not cross page */
|
||||
} else {
|
||||
m = p_str; /* should not cross page */
|
||||
result = p_str + 320;
|
||||
a_inv = p_str + 320 * 2;
|
||||
}
|
||||
|
||||
rsaz_1024_norm2red_avx2(m, m_norm);
|
||||
rsaz_1024_norm2red_avx2(a_inv, base_norm);
|
||||
rsaz_1024_norm2red_avx2(R2, RR);
|
||||
|
||||
rsaz_1024_mul_avx2(R2, R2, R2, m, k0);
|
||||
rsaz_1024_mul_avx2(R2, R2, two80, m, k0);
|
||||
|
||||
/* table[0] = 1 */
|
||||
rsaz_1024_mul_avx2(result, R2, one, m, k0);
|
||||
/* table[1] = a_inv^1 */
|
||||
rsaz_1024_mul_avx2(a_inv, a_inv, R2, m, k0);
|
||||
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 0);
|
||||
rsaz_1024_scatter5_avx2(table_s, a_inv, 1);
|
||||
|
||||
/* table[2] = a_inv^2 */
|
||||
rsaz_1024_sqr_avx2(result, a_inv, m, k0, 1);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 2);
|
||||
#if 0
|
||||
/* this is almost 2x smaller and less than 1% slower */
|
||||
for (index = 3; index < 32; index++) {
|
||||
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, index);
|
||||
}
|
||||
#else
|
||||
/* table[4] = a_inv^4 */
|
||||
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 4);
|
||||
/* table[8] = a_inv^8 */
|
||||
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 8);
|
||||
/* table[16] = a_inv^16 */
|
||||
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 16);
|
||||
/* table[17] = a_inv^17 */
|
||||
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 17);
|
||||
|
||||
/* table[3] */
|
||||
rsaz_1024_gather5_avx2(result, table_s, 2);
|
||||
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 3);
|
||||
/* table[6] */
|
||||
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 6);
|
||||
/* table[12] */
|
||||
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 12);
|
||||
/* table[24] */
|
||||
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 24);
|
||||
/* table[25] */
|
||||
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 25);
|
||||
|
||||
/* table[5] */
|
||||
rsaz_1024_gather5_avx2(result, table_s, 4);
|
||||
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 5);
|
||||
/* table[10] */
|
||||
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 10);
|
||||
/* table[20] */
|
||||
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 20);
|
||||
/* table[21] */
|
||||
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 21);
|
||||
|
||||
/* table[7] */
|
||||
rsaz_1024_gather5_avx2(result, table_s, 6);
|
||||
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 7);
|
||||
/* table[14] */
|
||||
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 14);
|
||||
/* table[28] */
|
||||
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 28);
|
||||
/* table[29] */
|
||||
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 29);
|
||||
|
||||
/* table[9] */
|
||||
rsaz_1024_gather5_avx2(result, table_s, 8);
|
||||
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 9);
|
||||
/* table[18] */
|
||||
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 18);
|
||||
/* table[19] */
|
||||
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 19);
|
||||
|
||||
/* table[11] */
|
||||
rsaz_1024_gather5_avx2(result, table_s, 10);
|
||||
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 11);
|
||||
/* table[22] */
|
||||
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 22);
|
||||
/* table[23] */
|
||||
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 23);
|
||||
|
||||
/* table[13] */
|
||||
rsaz_1024_gather5_avx2(result, table_s, 12);
|
||||
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 13);
|
||||
/* table[26] */
|
||||
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 26);
|
||||
/* table[27] */
|
||||
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 27);
|
||||
|
||||
/* table[15] */
|
||||
rsaz_1024_gather5_avx2(result, table_s, 14);
|
||||
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 15);
|
||||
/* table[30] */
|
||||
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 30);
|
||||
/* table[31] */
|
||||
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
|
||||
rsaz_1024_scatter5_avx2(table_s, result, 31);
|
||||
#endif
|
||||
|
||||
/* load first window */
|
||||
p_str = (unsigned char *)exponent;
|
||||
wvalue = p_str[127] >> 3;
|
||||
rsaz_1024_gather5_avx2(result, table_s, wvalue);
|
||||
|
||||
index = 1014;
|
||||
|
||||
while (index > -1) { /* loop for the remaining 127 windows */
|
||||
|
||||
rsaz_1024_sqr_avx2(result, result, m, k0, 5);
|
||||
|
||||
wvalue = (p_str[(index / 8) + 1] << 8) | p_str[index / 8];
|
||||
wvalue = (wvalue >> (index % 8)) & 31;
|
||||
index -= 5;
|
||||
|
||||
rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); /* borrow a_inv */
|
||||
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
|
||||
}
|
||||
|
||||
/* square four times */
|
||||
rsaz_1024_sqr_avx2(result, result, m, k0, 4);
|
||||
|
||||
wvalue = p_str[0] & 15;
|
||||
|
||||
rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); /* borrow a_inv */
|
||||
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
|
||||
|
||||
/* from Montgomery */
|
||||
rsaz_1024_mul_avx2(result, result, one, m, k0);
|
||||
|
||||
rsaz_1024_red2norm_avx2(result_norm, result);
|
||||
|
||||
OPENSSL_cleanse(storage, sizeof(storage));
|
||||
}
|
||||
|
||||
/*
|
||||
* See crypto/bn/rsaz-x86_64.pl for further details.
|
||||
*/
|
||||
void rsaz_512_mul(void *ret, const void *a, const void *b, const void *n,
|
||||
BN_ULONG k);
|
||||
void rsaz_512_mul_scatter4(void *ret, const void *a, const void *n,
|
||||
BN_ULONG k, const void *tbl, unsigned int power);
|
||||
void rsaz_512_mul_gather4(void *ret, const void *a, const void *tbl,
|
||||
const void *n, BN_ULONG k, unsigned int power);
|
||||
void rsaz_512_mul_by_one(void *ret, const void *a, const void *n, BN_ULONG k);
|
||||
void rsaz_512_sqr(void *ret, const void *a, const void *n, BN_ULONG k,
|
||||
int cnt);
|
||||
void rsaz_512_scatter4(void *tbl, const BN_ULONG *val, int power);
|
||||
void rsaz_512_gather4(BN_ULONG *val, const void *tbl, int power);
|
||||
|
||||
void RSAZ_512_mod_exp(BN_ULONG result[8],
|
||||
const BN_ULONG base[8], const BN_ULONG exponent[8],
|
||||
const BN_ULONG m[8], BN_ULONG k0, const BN_ULONG RR[8])
|
||||
{
|
||||
unsigned char storage[16 * 8 * 8 + 64 * 2 + 64]; /* 1.2KB */
|
||||
unsigned char *table = storage + (64 - ((size_t)storage % 64));
|
||||
BN_ULONG *a_inv = (BN_ULONG *)(table + 16 * 8 * 8);
|
||||
BN_ULONG *temp = (BN_ULONG *)(table + 16 * 8 * 8 + 8 * 8);
|
||||
unsigned char *p_str = (unsigned char *)exponent;
|
||||
int index;
|
||||
unsigned int wvalue;
|
||||
|
||||
/* table[0] = 1_inv */
|
||||
temp[0] = 0 - m[0];
|
||||
temp[1] = ~m[1];
|
||||
temp[2] = ~m[2];
|
||||
temp[3] = ~m[3];
|
||||
temp[4] = ~m[4];
|
||||
temp[5] = ~m[5];
|
||||
temp[6] = ~m[6];
|
||||
temp[7] = ~m[7];
|
||||
rsaz_512_scatter4(table, temp, 0);
|
||||
|
||||
/* table [1] = a_inv^1 */
|
||||
rsaz_512_mul(a_inv, base, RR, m, k0);
|
||||
rsaz_512_scatter4(table, a_inv, 1);
|
||||
|
||||
/* table [2] = a_inv^2 */
|
||||
rsaz_512_sqr(temp, a_inv, m, k0, 1);
|
||||
rsaz_512_scatter4(table, temp, 2);
|
||||
|
||||
for (index = 3; index < 16; index++)
|
||||
rsaz_512_mul_scatter4(temp, a_inv, m, k0, table, index);
|
||||
|
||||
/* load first window */
|
||||
wvalue = p_str[63];
|
||||
|
||||
rsaz_512_gather4(temp, table, wvalue >> 4);
|
||||
rsaz_512_sqr(temp, temp, m, k0, 4);
|
||||
rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue & 0xf);
|
||||
|
||||
for (index = 62; index >= 0; index--) {
|
||||
wvalue = p_str[index];
|
||||
|
||||
rsaz_512_sqr(temp, temp, m, k0, 4);
|
||||
rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue >> 4);
|
||||
|
||||
rsaz_512_sqr(temp, temp, m, k0, 4);
|
||||
rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue & 0x0f);
|
||||
}
|
||||
|
||||
/* from Montgomery */
|
||||
rsaz_512_mul_by_one(result, temp, m, k0);
|
||||
|
||||
OPENSSL_cleanse(storage, sizeof(storage));
|
||||
}
|
||||
|
||||
#endif
|
40
trunk/3rdparty/openssl-1.1-fit/crypto/bn/rsaz_exp.h
vendored
Normal file
40
trunk/3rdparty/openssl-1.1-fit/crypto/bn/rsaz_exp.h
vendored
Normal file
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Copyright 2013-2018 The OpenSSL Project Authors. All Rights Reserved.
|
||||
* Copyright (c) 2012, Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*
|
||||
* Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)
|
||||
* (1) Intel Corporation, Israel Development Center, Haifa, Israel
|
||||
* (2) University of Haifa, Israel
|
||||
*/
|
||||
|
||||
#ifndef RSAZ_EXP_H
|
||||
# define RSAZ_EXP_H
|
||||
|
||||
# undef RSAZ_ENABLED
|
||||
# if defined(OPENSSL_BN_ASM_MONT) && \
|
||||
(defined(__x86_64) || defined(__x86_64__) || \
|
||||
defined(_M_AMD64) || defined(_M_X64))
|
||||
# define RSAZ_ENABLED
|
||||
|
||||
# include <openssl/bn.h>
|
||||
|
||||
void RSAZ_1024_mod_exp_avx2(BN_ULONG result[16],
|
||||
const BN_ULONG base_norm[16],
|
||||
const BN_ULONG exponent[16],
|
||||
const BN_ULONG m_norm[16], const BN_ULONG RR[16],
|
||||
BN_ULONG k0);
|
||||
int rsaz_avx2_eligible(void);
|
||||
|
||||
void RSAZ_512_mod_exp(BN_ULONG result[8],
|
||||
const BN_ULONG base_norm[8], const BN_ULONG exponent[8],
|
||||
const BN_ULONG m_norm[8], BN_ULONG k0,
|
||||
const BN_ULONG RR[8]);
|
||||
|
||||
# endif
|
||||
|
||||
#endif
|
Loading…
Add table
Add a link
Reference in a new issue