Upgrade openssl from 1.1.0e to 1.1.1b, with source code. 4.0.78

2025-03-09 15:49:59 +00:00 · 2021-03-01 20:47:57 +08:00 · 2021-03-01 20:47:57 +08:00 · 96dbd7bced
commit 96dbd7bced
parent 8f1c992379
1476 changed files with 616554 additions and 4 deletions
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/README.pod
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/README.pod
@ -0,0 +1,241 @@
+=pod
+
+=head1 NAME
+
+bn_mul_words, bn_mul_add_words, bn_sqr_words, bn_div_words,
+bn_add_words, bn_sub_words, bn_mul_comba4, bn_mul_comba8,
+bn_sqr_comba4, bn_sqr_comba8, bn_cmp_words, bn_mul_normal,
+bn_mul_low_normal, bn_mul_recursive, bn_mul_part_recursive,
+bn_mul_low_recursive, bn_sqr_normal, bn_sqr_recursive,
+bn_expand, bn_wexpand, bn_expand2, bn_fix_top, bn_check_top,
+bn_print, bn_dump, bn_set_max, bn_set_high, bn_set_low - BIGNUM
+library internal functions
+
+=head1 SYNOPSIS
+
+ #include <openssl/bn.h>
+
+ BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w);
+ BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num,
+   BN_ULONG w);
+ void     bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num);
+ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d);
+ BN_ULONG bn_add_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,
+   int num);
+ BN_ULONG bn_sub_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,
+   int num);
+
+ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
+ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
+ void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a);
+ void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a);
+
+ int bn_cmp_words(BN_ULONG *a, BN_ULONG *b, int n);
+
+ void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b,
+   int nb);
+ void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n);
+ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
+   int dna, int dnb, BN_ULONG *tmp);
+ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b,
+   int n, int tna, int tnb, BN_ULONG *tmp);
+ void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b,
+   int n2, BN_ULONG *tmp);
+
+ void bn_sqr_normal(BN_ULONG *r, BN_ULONG *a, int n, BN_ULONG *tmp);
+ void bn_sqr_recursive(BN_ULONG *r, BN_ULONG *a, int n2, BN_ULONG *tmp);
+
+ void mul(BN_ULONG r, BN_ULONG a, BN_ULONG w, BN_ULONG c);
+ void mul_add(BN_ULONG r, BN_ULONG a, BN_ULONG w, BN_ULONG c);
+ void sqr(BN_ULONG r0, BN_ULONG r1, BN_ULONG a);
+
+ BIGNUM *bn_expand(BIGNUM *a, int bits);
+ BIGNUM *bn_wexpand(BIGNUM *a, int n);
+ BIGNUM *bn_expand2(BIGNUM *a, int n);
+ void bn_fix_top(BIGNUM *a);
+
+ void bn_check_top(BIGNUM *a);
+ void bn_print(BIGNUM *a);
+ void bn_dump(BN_ULONG *d, int n);
+ void bn_set_max(BIGNUM *a);
+ void bn_set_high(BIGNUM *r, BIGNUM *a, int n);
+ void bn_set_low(BIGNUM *r, BIGNUM *a, int n);
+
+=head1 DESCRIPTION
+
+This page documents the internal functions used by the OpenSSL
+B<BIGNUM> implementation. They are described here to facilitate
+debugging and extending the library. They are I<not> to be used by
+applications.
+
+=head2 The BIGNUM structure
+
+ typedef struct bignum_st BIGNUM;
+
+ struct bignum_st
+        {
+        BN_ULONG *d;    /* Pointer to an array of 'BN_BITS2' bit chunks. */
+        int top;        /* Index of last used d +1. */
+        /* The next are internal book keeping for bn_expand. */
+        int dmax;       /* Size of the d array. */
+        int neg;        /* one if the number is negative */
+        int flags;
+        };
+
+
+The integer value is stored in B<d>, a malloc()ed array of words (B<BN_ULONG>),
+least significant word first. A B<BN_ULONG> can be either 16, 32 or 64 bits
+in size, depending on the 'number of bits' (B<BITS2>) specified in
+C<openssl/bn.h>.
+
+B<dmax> is the size of the B<d> array that has been allocated.  B<top>
+is the number of words being used, so for a value of 4, bn.d[0]=4 and
+bn.top=1.  B<neg> is 1 if the number is negative.  When a B<BIGNUM> is
+B<0>, the B<d> field can be B<NULL> and B<top> == B<0>.
+
+B<flags> is a bit field of flags which are defined in C<openssl/bn.h>. The
+flags begin with B<BN_FLG_>. The macros BN_set_flags(b, n) and
+BN_get_flags(b, n) exist to enable or fetch flag(s) B<n> from B<BIGNUM>
+structure B<b>.
+
+Various routines in this library require the use of temporary
+B<BIGNUM> variables during their execution.  Since dynamic memory
+allocation to create B<BIGNUM>s is rather expensive when used in
+conjunction with repeated subroutine calls, the B<BN_CTX> structure is
+used.  This structure contains B<BN_CTX_NUM> B<BIGNUM>s, see
+L<BN_CTX_start(3)>.
+
+=head2 Low-level arithmetic operations
+
+These functions are implemented in C and for several platforms in
+assembly language:
+
+bn_mul_words(B<rp>, B<ap>, B<num>, B<w>) operates on the B<num> word
+arrays B<rp> and B<ap>.  It computes B<ap> * B<w>, places the result
+in B<rp>, and returns the high word (carry).
+
+bn_mul_add_words(B<rp>, B<ap>, B<num>, B<w>) operates on the B<num>
+word arrays B<rp> and B<ap>.  It computes B<ap> * B<w> + B<rp>, places
+the result in B<rp>, and returns the high word (carry).
+
+bn_sqr_words(B<rp>, B<ap>, B<n>) operates on the B<num> word array
+B<ap> and the 2*B<num> word array B<ap>.  It computes B<ap> * B<ap>
+word-wise, and places the low and high bytes of the result in B<rp>.
+
+bn_div_words(B<h>, B<l>, B<d>) divides the two word number (B<h>, B<l>)
+by B<d> and returns the result.
+
+bn_add_words(B<rp>, B<ap>, B<bp>, B<num>) operates on the B<num> word
+arrays B<ap>, B<bp> and B<rp>.  It computes B<ap> + B<bp>, places the
+result in B<rp>, and returns the high word (carry).
+
+bn_sub_words(B<rp>, B<ap>, B<bp>, B<num>) operates on the B<num> word
+arrays B<ap>, B<bp> and B<rp>.  It computes B<ap> - B<bp>, places the
+result in B<rp>, and returns the carry (1 if B<bp> E<gt> B<ap>, 0
+otherwise).
+
+bn_mul_comba4(B<r>, B<a>, B<b>) operates on the 4 word arrays B<a> and
+B<b> and the 8 word array B<r>.  It computes B<a>*B<b> and places the
+result in B<r>.
+
+bn_mul_comba8(B<r>, B<a>, B<b>) operates on the 8 word arrays B<a> and
+B<b> and the 16 word array B<r>.  It computes B<a>*B<b> and places the
+result in B<r>.
+
+bn_sqr_comba4(B<r>, B<a>, B<b>) operates on the 4 word arrays B<a> and
+B<b> and the 8 word array B<r>.
+
+bn_sqr_comba8(B<r>, B<a>, B<b>) operates on the 8 word arrays B<a> and
+B<b> and the 16 word array B<r>.
+
+The following functions are implemented in C:
+
+bn_cmp_words(B<a>, B<b>, B<n>) operates on the B<n> word arrays B<a>
+and B<b>.  It returns 1, 0 and -1 if B<a> is greater than, equal and
+less than B<b>.
+
+bn_mul_normal(B<r>, B<a>, B<na>, B<b>, B<nb>) operates on the B<na>
+word array B<a>, the B<nb> word array B<b> and the B<na>+B<nb> word
+array B<r>.  It computes B<a>*B<b> and places the result in B<r>.
+
+bn_mul_low_normal(B<r>, B<a>, B<b>, B<n>) operates on the B<n> word
+arrays B<r>, B<a> and B<b>.  It computes the B<n> low words of
+B<a>*B<b> and places the result in B<r>.
+
+bn_mul_recursive(B<r>, B<a>, B<b>, B<n2>, B<dna>, B<dnb>, B<t>) operates
+on the word arrays B<a> and B<b> of length B<n2>+B<dna> and B<n2>+B<dnb>
+(B<dna> and B<dnb> are currently allowed to be 0 or negative) and the 2*B<n2>
+word arrays B<r> and B<t>.  B<n2> must be a power of 2.  It computes
+B<a>*B<b> and places the result in B<r>.
+
+bn_mul_part_recursive(B<r>, B<a>, B<b>, B<n>, B<tna>, B<tnb>, B<tmp>)
+operates on the word arrays B<a> and B<b> of length B<n>+B<tna> and
+B<n>+B<tnb> and the 4*B<n> word arrays B<r> and B<tmp>.
+
+bn_mul_low_recursive(B<r>, B<a>, B<b>, B<n2>, B<tmp>) operates on the
+B<n2> word arrays B<r> and B<tmp> and the B<n2>/2 word arrays B<a>
+and B<b>.
+
+BN_mul() calls bn_mul_normal(), or an optimized implementation if the
+factors have the same size: bn_mul_comba8() is used if they are 8
+words long, bn_mul_recursive() if they are larger than
+B<BN_MULL_SIZE_NORMAL> and the size is an exact multiple of the word
+size, and bn_mul_part_recursive() for others that are larger than
+B<BN_MULL_SIZE_NORMAL>.
+
+bn_sqr_normal(B<r>, B<a>, B<n>, B<tmp>) operates on the B<n> word array
+B<a> and the 2*B<n> word arrays B<tmp> and B<r>.
+
+The implementations use the following macros which, depending on the
+architecture, may use "long long" C operations or inline assembler.
+They are defined in C<bn_lcl.h>.
+
+mul(B<r>, B<a>, B<w>, B<c>) computes B<w>*B<a>+B<c> and places the
+low word of the result in B<r> and the high word in B<c>.
+
+mul_add(B<r>, B<a>, B<w>, B<c>) computes B<w>*B<a>+B<r>+B<c> and
+places the low word of the result in B<r> and the high word in B<c>.
+
+sqr(B<r0>, B<r1>, B<a>) computes B<a>*B<a> and places the low word
+of the result in B<r0> and the high word in B<r1>.
+
+=head2 Size changes
+
+bn_expand() ensures that B<b> has enough space for a B<bits> bit
+number.  bn_wexpand() ensures that B<b> has enough space for an
+B<n> word number.  If the number has to be expanded, both macros
+call bn_expand2(), which allocates a new B<d> array and copies the
+data.  They return B<NULL> on error, B<b> otherwise.
+
+The bn_fix_top() macro reduces B<a-E<gt>top> to point to the most
+significant non-zero word plus one when B<a> has shrunk.
+
+=head2 Debugging
+
+bn_check_top() verifies that C<((a)-E<gt>top E<gt>= 0 && (a)-E<gt>top
+E<lt>= (a)-E<gt>dmax)>.  A violation will cause the program to abort.
+
+bn_print() prints B<a> to stderr. bn_dump() prints B<n> words at B<d>
+(in reverse order, i.e. most significant word first) to stderr.
+
+bn_set_max() makes B<a> a static number with a B<dmax> of its current size.
+This is used by bn_set_low() and bn_set_high() to make B<r> a read-only
+B<BIGNUM> that contains the B<n> low or high words of B<a>.
+
+If B<BN_DEBUG> is not defined, bn_check_top(), bn_print(), bn_dump()
+and bn_set_max() are defined as empty macros.
+
+=head1 SEE ALSO
+
+L<bn(3)>
+
+=head1 COPYRIGHT
+
+Copyright 2000-2016 The OpenSSL Project Authors. All Rights Reserved.
+
+Licensed under the OpenSSL license (the "License").  You may not use
+this file except in compliance with the License.  You can obtain a copy
+in the file LICENSE in the source distribution or at
+L<https://www.openssl.org/source/license.html>.
+
+=cut
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/alpha-mont.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/alpha-mont.pl
@ -0,0 +1,328 @@
+#! /usr/bin/env perl
+# Copyright 2006-2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# On 21264 RSA sign performance improves by 70/35/20/15 percent for
+# 512/1024/2048/4096 bit key lengths. This is against vendor compiler
+# instructed to '-tune host' code with in-line assembler. Other
+# benchmarks improve by 15-20%. To anchor it to something else, the
+# code provides approximately the same performance per GHz as AMD64.
+# I.e. if you compare 1GHz 21264 and 2GHz Opteron, you'll observe ~2x
+# difference.
+
+$output=pop;
+open STDOUT,">$output";
+
+# int bn_mul_mont(
+$rp="a0";	# BN_ULONG *rp,
+$ap="a1";	# const BN_ULONG *ap,
+$bp="a2";	# const BN_ULONG *bp,
+$np="a3";	# const BN_ULONG *np,
+$n0="a4";	# const BN_ULONG *n0,
+$num="a5";	# int num);
+
+$lo0="t0";
+$hi0="t1";
+$lo1="t2";
+$hi1="t3";
+$aj="t4";
+$bi="t5";
+$nj="t6";
+$tp="t7";
+$alo="t8";
+$ahi="t9";
+$nlo="t10";
+$nhi="t11";
+$tj="t12";
+$i="s3";
+$j="s4";
+$m1="s5";
+
+$code=<<___;
+#ifdef __linux__
+#include <asm/regdef.h>
+#else
+#include <asm.h>
+#include <regdef.h>
+#endif
+
+.text
+
+.set	noat
+.set	noreorder
+
+.globl	bn_mul_mont
+.align	5
+.ent	bn_mul_mont
+bn_mul_mont:
+	lda	sp,-48(sp)
+	stq	ra,0(sp)
+	stq	s3,8(sp)
+	stq	s4,16(sp)
+	stq	s5,24(sp)
+	stq	fp,32(sp)
+	mov	sp,fp
+	.mask	0x0400f000,-48
+	.frame	fp,48,ra
+	.prologue 0
+
+	.align	4
+	.set	reorder
+	sextl	$num,$num
+	mov	0,v0
+	cmplt	$num,4,AT
+	bne	AT,.Lexit
+
+	ldq	$hi0,0($ap)	# ap[0]
+	s8addq	$num,16,AT
+	ldq	$aj,8($ap)
+	subq	sp,AT,sp
+	ldq	$bi,0($bp)	# bp[0]
+	lda	AT,-4096(zero)	# mov	-4096,AT
+	ldq	$n0,0($n0)
+	and	sp,AT,sp
+
+	mulq	$hi0,$bi,$lo0
+	ldq	$hi1,0($np)	# np[0]
+	umulh	$hi0,$bi,$hi0
+	ldq	$nj,8($np)
+
+	mulq	$lo0,$n0,$m1
+
+	mulq	$hi1,$m1,$lo1
+	umulh	$hi1,$m1,$hi1
+
+	addq	$lo1,$lo0,$lo1
+	cmpult	$lo1,$lo0,AT
+	addq	$hi1,AT,$hi1
+
+	mulq	$aj,$bi,$alo
+	mov	2,$j
+	umulh	$aj,$bi,$ahi
+	mov	sp,$tp
+
+	mulq	$nj,$m1,$nlo
+	s8addq	$j,$ap,$aj
+	umulh	$nj,$m1,$nhi
+	s8addq	$j,$np,$nj
+.align	4
+.L1st:
+	.set	noreorder
+	ldq	$aj,0($aj)
+	addl	$j,1,$j
+	ldq	$nj,0($nj)
+	lda	$tp,8($tp)
+
+	addq	$alo,$hi0,$lo0
+	mulq	$aj,$bi,$alo
+	cmpult	$lo0,$hi0,AT
+	addq	$nlo,$hi1,$lo1
+
+	mulq	$nj,$m1,$nlo
+	addq	$ahi,AT,$hi0
+	cmpult	$lo1,$hi1,v0
+	cmplt	$j,$num,$tj
+
+	umulh	$aj,$bi,$ahi
+	addq	$nhi,v0,$hi1
+	addq	$lo1,$lo0,$lo1
+	s8addq	$j,$ap,$aj
+
+	umulh	$nj,$m1,$nhi
+	cmpult	$lo1,$lo0,v0
+	addq	$hi1,v0,$hi1
+	s8addq	$j,$np,$nj
+
+	stq	$lo1,-8($tp)
+	nop
+	unop
+	bne	$tj,.L1st
+	.set	reorder
+
+	addq	$alo,$hi0,$lo0
+	addq	$nlo,$hi1,$lo1
+	cmpult	$lo0,$hi0,AT
+	cmpult	$lo1,$hi1,v0
+	addq	$ahi,AT,$hi0
+	addq	$nhi,v0,$hi1
+
+	addq	$lo1,$lo0,$lo1
+	cmpult	$lo1,$lo0,v0
+	addq	$hi1,v0,$hi1
+
+	stq	$lo1,0($tp)
+
+	addq	$hi1,$hi0,$hi1
+	cmpult	$hi1,$hi0,AT
+	stq	$hi1,8($tp)
+	stq	AT,16($tp)
+
+	mov	1,$i
+.align	4
+.Louter:
+	s8addq	$i,$bp,$bi
+	ldq	$hi0,0($ap)
+	ldq	$aj,8($ap)
+	ldq	$bi,0($bi)
+	ldq	$hi1,0($np)
+	ldq	$nj,8($np)
+	ldq	$tj,0(sp)
+
+	mulq	$hi0,$bi,$lo0
+	umulh	$hi0,$bi,$hi0
+
+	addq	$lo0,$tj,$lo0
+	cmpult	$lo0,$tj,AT
+	addq	$hi0,AT,$hi0
+
+	mulq	$lo0,$n0,$m1
+
+	mulq	$hi1,$m1,$lo1
+	umulh	$hi1,$m1,$hi1
+
+	addq	$lo1,$lo0,$lo1
+	cmpult	$lo1,$lo0,AT
+	mov	2,$j
+	addq	$hi1,AT,$hi1
+
+	mulq	$aj,$bi,$alo
+	mov	sp,$tp
+	umulh	$aj,$bi,$ahi
+
+	mulq	$nj,$m1,$nlo
+	s8addq	$j,$ap,$aj
+	umulh	$nj,$m1,$nhi
+.align	4
+.Linner:
+	.set	noreorder
+	ldq	$tj,8($tp)	#L0
+	nop			#U1
+	ldq	$aj,0($aj)	#L1
+	s8addq	$j,$np,$nj	#U0
+
+	ldq	$nj,0($nj)	#L0
+	nop			#U1
+	addq	$alo,$hi0,$lo0	#L1
+	lda	$tp,8($tp)
+
+	mulq	$aj,$bi,$alo	#U1
+	cmpult	$lo0,$hi0,AT	#L0
+	addq	$nlo,$hi1,$lo1	#L1
+	addl	$j,1,$j
+
+	mulq	$nj,$m1,$nlo	#U1
+	addq	$ahi,AT,$hi0	#L0
+	addq	$lo0,$tj,$lo0	#L1
+	cmpult	$lo1,$hi1,v0	#U0
+
+	umulh	$aj,$bi,$ahi	#U1
+	cmpult	$lo0,$tj,AT	#L0
+	addq	$lo1,$lo0,$lo1	#L1
+	addq	$nhi,v0,$hi1	#U0
+
+	umulh	$nj,$m1,$nhi	#U1
+	s8addq	$j,$ap,$aj	#L0
+	cmpult	$lo1,$lo0,v0	#L1
+	cmplt	$j,$num,$tj	#U0	# borrow $tj
+
+	addq	$hi0,AT,$hi0	#L0
+	addq	$hi1,v0,$hi1	#U1
+	stq	$lo1,-8($tp)	#L1
+	bne	$tj,.Linner	#U0
+	.set	reorder
+
+	ldq	$tj,8($tp)
+	addq	$alo,$hi0,$lo0
+	addq	$nlo,$hi1,$lo1
+	cmpult	$lo0,$hi0,AT
+	cmpult	$lo1,$hi1,v0
+	addq	$ahi,AT,$hi0
+	addq	$nhi,v0,$hi1
+
+	addq	$lo0,$tj,$lo0
+	cmpult	$lo0,$tj,AT
+	addq	$hi0,AT,$hi0
+
+	ldq	$tj,16($tp)
+	addq	$lo1,$lo0,$j
+	cmpult	$j,$lo0,v0
+	addq	$hi1,v0,$hi1
+
+	addq	$hi1,$hi0,$lo1
+	stq	$j,0($tp)
+	cmpult	$lo1,$hi0,$hi1
+	addq	$lo1,$tj,$lo1
+	cmpult	$lo1,$tj,AT
+	addl	$i,1,$i
+	addq	$hi1,AT,$hi1
+	stq	$lo1,8($tp)
+	cmplt	$i,$num,$tj	# borrow $tj
+	stq	$hi1,16($tp)
+	bne	$tj,.Louter
+
+	s8addq	$num,sp,$tj	# &tp[num]
+	mov	$rp,$bp		# put rp aside
+	mov	sp,$tp
+	mov	sp,$ap
+	mov	0,$hi0		# clear borrow bit
+
+.align	4
+.Lsub:	ldq	$lo0,0($tp)
+	ldq	$lo1,0($np)
+	lda	$tp,8($tp)
+	lda	$np,8($np)
+	subq	$lo0,$lo1,$lo1	# tp[i]-np[i]
+	cmpult	$lo0,$lo1,AT
+	subq	$lo1,$hi0,$lo0
+	cmpult	$lo1,$lo0,$hi0
+	or	$hi0,AT,$hi0
+	stq	$lo0,0($rp)
+	cmpult	$tp,$tj,v0
+	lda	$rp,8($rp)
+	bne	v0,.Lsub
+
+	subq	$hi1,$hi0,$hi0	# handle upmost overflow bit
+	mov	sp,$tp
+	mov	$bp,$rp		# restore rp
+
+.align	4
+.Lcopy:	ldq	$aj,0($tp)	# conditional copy
+	ldq	$nj,0($rp)
+	lda	$tp,8($tp)
+	lda	$rp,8($rp)
+	cmoveq	$hi0,$nj,$aj
+	stq	zero,-8($tp)	# zap tp
+	cmpult	$tp,$tj,AT
+	stq	$aj,-8($rp)
+	bne	AT,.Lcopy
+	mov	1,v0
+
+.Lexit:
+	.set	noreorder
+	mov	fp,sp
+	/*ldq	ra,0(sp)*/
+	ldq	s3,8(sp)
+	ldq	s4,16(sp)
+	ldq	s5,24(sp)
+	ldq	fp,32(sp)
+	lda	sp,48(sp)
+	ret	(ra)
+.end	bn_mul_mont
+.ascii	"Montgomery Multiplication for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
+.align	2
+___
+
+print $code;
+close STDOUT;
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/armv4-gf2m.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/armv4-gf2m.pl
@ -0,0 +1,332 @@
+#! /usr/bin/env perl
+# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# May 2011
+#
+# The module implements bn_GF2m_mul_2x2 polynomial multiplication
+# used in bn_gf2m.c. It's kind of low-hanging mechanical port from
+# C for the time being... Except that it has two code paths: pure
+# integer code suitable for any ARMv4 and later CPU and NEON code
+# suitable for ARMv7. Pure integer 1x1 multiplication subroutine runs
+# in ~45 cycles on dual-issue core such as Cortex A8, which is ~50%
+# faster than compiler-generated code. For ECDH and ECDSA verify (but
+# not for ECDSA sign) it means 25%-45% improvement depending on key
+# length, more for longer keys. Even though NEON 1x1 multiplication
+# runs in even less cycles, ~30, improvement is measurable only on
+# longer keys. One has to optimize code elsewhere to get NEON glow...
+#
+# April 2014
+#
+# Double bn_GF2m_mul_2x2 performance by using algorithm from paper
+# referred below, which improves ECDH and ECDSA verify benchmarks
+# by 18-40%.
+#
+# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
+# Polynomial Multiplication on ARM Processors using the NEON Engine.
+#
+# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
+
+$flavour = shift;
+if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
+else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
+
+if ($flavour && $flavour ne "void") {
+    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+    die "can't locate arm-xlate.pl";
+
+    open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+    open STDOUT,">$output";
+}
+
+$code=<<___;
+#include "arm_arch.h"
+
+.text
+#if defined(__thumb2__)
+.syntax	unified
+.thumb
+#else
+.code	32
+#endif
+___
+################
+# private interface to mul_1x1_ialu
+#
+$a="r1";
+$b="r0";
+
+($a0,$a1,$a2,$a12,$a4,$a14)=
+($hi,$lo,$t0,$t1, $i0,$i1 )=map("r$_",(4..9),12);
+
+$mask="r12";
+
+$code.=<<___;
+.type	mul_1x1_ialu,%function
+.align	5
+mul_1x1_ialu:
+	mov	$a0,#0
+	bic	$a1,$a,#3<<30		@ a1=a&0x3fffffff
+	str	$a0,[sp,#0]		@ tab[0]=0
+	add	$a2,$a1,$a1		@ a2=a1<<1
+	str	$a1,[sp,#4]		@ tab[1]=a1
+	eor	$a12,$a1,$a2		@ a1^a2
+	str	$a2,[sp,#8]		@ tab[2]=a2
+	mov	$a4,$a1,lsl#2		@ a4=a1<<2
+	str	$a12,[sp,#12]		@ tab[3]=a1^a2
+	eor	$a14,$a1,$a4		@ a1^a4
+	str	$a4,[sp,#16]		@ tab[4]=a4
+	eor	$a0,$a2,$a4		@ a2^a4
+	str	$a14,[sp,#20]		@ tab[5]=a1^a4
+	eor	$a12,$a12,$a4		@ a1^a2^a4
+	str	$a0,[sp,#24]		@ tab[6]=a2^a4
+	and	$i0,$mask,$b,lsl#2
+	str	$a12,[sp,#28]		@ tab[7]=a1^a2^a4
+
+	and	$i1,$mask,$b,lsr#1
+	ldr	$lo,[sp,$i0]		@ tab[b       & 0x7]
+	and	$i0,$mask,$b,lsr#4
+	ldr	$t1,[sp,$i1]		@ tab[b >>  3 & 0x7]
+	and	$i1,$mask,$b,lsr#7
+	ldr	$t0,[sp,$i0]		@ tab[b >>  6 & 0x7]
+	eor	$lo,$lo,$t1,lsl#3	@ stall
+	mov	$hi,$t1,lsr#29
+	ldr	$t1,[sp,$i1]		@ tab[b >>  9 & 0x7]
+
+	and	$i0,$mask,$b,lsr#10
+	eor	$lo,$lo,$t0,lsl#6
+	eor	$hi,$hi,$t0,lsr#26
+	ldr	$t0,[sp,$i0]		@ tab[b >> 12 & 0x7]
+
+	and	$i1,$mask,$b,lsr#13
+	eor	$lo,$lo,$t1,lsl#9
+	eor	$hi,$hi,$t1,lsr#23
+	ldr	$t1,[sp,$i1]		@ tab[b >> 15 & 0x7]
+
+	and	$i0,$mask,$b,lsr#16
+	eor	$lo,$lo,$t0,lsl#12
+	eor	$hi,$hi,$t0,lsr#20
+	ldr	$t0,[sp,$i0]		@ tab[b >> 18 & 0x7]
+
+	and	$i1,$mask,$b,lsr#19
+	eor	$lo,$lo,$t1,lsl#15
+	eor	$hi,$hi,$t1,lsr#17
+	ldr	$t1,[sp,$i1]		@ tab[b >> 21 & 0x7]
+
+	and	$i0,$mask,$b,lsr#22
+	eor	$lo,$lo,$t0,lsl#18
+	eor	$hi,$hi,$t0,lsr#14
+	ldr	$t0,[sp,$i0]		@ tab[b >> 24 & 0x7]
+
+	and	$i1,$mask,$b,lsr#25
+	eor	$lo,$lo,$t1,lsl#21
+	eor	$hi,$hi,$t1,lsr#11
+	ldr	$t1,[sp,$i1]		@ tab[b >> 27 & 0x7]
+
+	tst	$a,#1<<30
+	and	$i0,$mask,$b,lsr#28
+	eor	$lo,$lo,$t0,lsl#24
+	eor	$hi,$hi,$t0,lsr#8
+	ldr	$t0,[sp,$i0]		@ tab[b >> 30      ]
+
+#ifdef	__thumb2__
+	itt	ne
+#endif
+	eorne	$lo,$lo,$b,lsl#30
+	eorne	$hi,$hi,$b,lsr#2
+	tst	$a,#1<<31
+	eor	$lo,$lo,$t1,lsl#27
+	eor	$hi,$hi,$t1,lsr#5
+#ifdef	__thumb2__
+	itt	ne
+#endif
+	eorne	$lo,$lo,$b,lsl#31
+	eorne	$hi,$hi,$b,lsr#1
+	eor	$lo,$lo,$t0,lsl#30
+	eor	$hi,$hi,$t0,lsr#2
+
+	mov	pc,lr
+.size	mul_1x1_ialu,.-mul_1x1_ialu
+___
+################
+# void	bn_GF2m_mul_2x2(BN_ULONG *r,
+#	BN_ULONG a1,BN_ULONG a0,
+#	BN_ULONG b1,BN_ULONG b0);	# r[3..0]=a1a0·b1b0
+{
+$code.=<<___;
+.global	bn_GF2m_mul_2x2
+.type	bn_GF2m_mul_2x2,%function
+.align	5
+bn_GF2m_mul_2x2:
+#if __ARM_MAX_ARCH__>=7
+	stmdb	sp!,{r10,lr}
+	ldr	r12,.LOPENSSL_armcap
+	adr	r10,.LOPENSSL_armcap
+	ldr	r12,[r12,r10]
+#ifdef	__APPLE__
+	ldr	r12,[r12]
+#endif
+	tst	r12,#ARMV7_NEON
+	itt	ne
+	ldrne	r10,[sp],#8
+	bne	.LNEON
+	stmdb	sp!,{r4-r9}
+#else
+	stmdb	sp!,{r4-r10,lr}
+#endif
+___
+$ret="r10";	# reassigned 1st argument
+$code.=<<___;
+	mov	$ret,r0			@ reassign 1st argument
+	mov	$b,r3			@ $b=b1
+	sub	r7,sp,#36
+	mov	r8,sp
+	and	r7,r7,#-32
+	ldr	r3,[sp,#32]		@ load b0
+	mov	$mask,#7<<2
+	mov	sp,r7			@ allocate tab[8]
+	str	r8,[r7,#32]
+
+	bl	mul_1x1_ialu		@ a1·b1
+	str	$lo,[$ret,#8]
+	str	$hi,[$ret,#12]
+
+	eor	$b,$b,r3		@ flip b0 and b1
+	 eor	$a,$a,r2		@ flip a0 and a1
+	eor	r3,r3,$b
+	 eor	r2,r2,$a
+	eor	$b,$b,r3
+	 eor	$a,$a,r2
+	bl	mul_1x1_ialu		@ a0·b0
+	str	$lo,[$ret]
+	str	$hi,[$ret,#4]
+
+	eor	$a,$a,r2
+	eor	$b,$b,r3
+	bl	mul_1x1_ialu		@ (a1+a0)·(b1+b0)
+___
+@r=map("r$_",(6..9));
+$code.=<<___;
+	ldmia	$ret,{@r[0]-@r[3]}
+	eor	$lo,$lo,$hi
+	ldr	sp,[sp,#32]		@ destroy tab[8]
+	eor	$hi,$hi,@r[1]
+	eor	$lo,$lo,@r[0]
+	eor	$hi,$hi,@r[2]
+	eor	$lo,$lo,@r[3]
+	eor	$hi,$hi,@r[3]
+	str	$hi,[$ret,#8]
+	eor	$lo,$lo,$hi
+	str	$lo,[$ret,#4]
+
+#if __ARM_ARCH__>=5
+	ldmia	sp!,{r4-r10,pc}
+#else
+	ldmia	sp!,{r4-r10,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	bx	lr			@ interoperable with Thumb ISA:-)
+#endif
+___
+}
+{
+my ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12));
+my ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31));
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
+.arch	armv7-a
+.fpu	neon
+
+.align	5
+.LNEON:
+	ldr		r12, [sp]		@ 5th argument
+	vmov		$a, r2, r1
+	vmov		$b, r12, r3
+	vmov.i64	$k48, #0x0000ffffffffffff
+	vmov.i64	$k32, #0x00000000ffffffff
+	vmov.i64	$k16, #0x000000000000ffff
+
+	vext.8		$t0#lo, $a, $a, #1	@ A1
+	vmull.p8	$t0, $t0#lo, $b		@ F = A1*B
+	vext.8		$r#lo, $b, $b, #1	@ B1
+	vmull.p8	$r, $a, $r#lo		@ E = A*B1
+	vext.8		$t1#lo, $a, $a, #2	@ A2
+	vmull.p8	$t1, $t1#lo, $b		@ H = A2*B
+	vext.8		$t3#lo, $b, $b, #2	@ B2
+	vmull.p8	$t3, $a, $t3#lo		@ G = A*B2
+	vext.8		$t2#lo, $a, $a, #3	@ A3
+	veor		$t0, $t0, $r		@ L = E + F
+	vmull.p8	$t2, $t2#lo, $b		@ J = A3*B
+	vext.8		$r#lo, $b, $b, #3	@ B3
+	veor		$t1, $t1, $t3		@ M = G + H
+	vmull.p8	$r, $a, $r#lo		@ I = A*B3
+	veor		$t0#lo, $t0#lo, $t0#hi	@ t0 = (L) (P0 + P1) << 8
+	vand		$t0#hi, $t0#hi, $k48
+	vext.8		$t3#lo, $b, $b, #4	@ B4
+	veor		$t1#lo, $t1#lo, $t1#hi	@ t1 = (M) (P2 + P3) << 16
+	vand		$t1#hi, $t1#hi, $k32
+	vmull.p8	$t3, $a, $t3#lo		@ K = A*B4
+	veor		$t2, $t2, $r		@ N = I + J
+	veor		$t0#lo, $t0#lo, $t0#hi
+	veor		$t1#lo, $t1#lo, $t1#hi
+	veor		$t2#lo, $t2#lo, $t2#hi	@ t2 = (N) (P4 + P5) << 24
+	vand		$t2#hi, $t2#hi, $k16
+	vext.8		$t0, $t0, $t0, #15
+	veor		$t3#lo, $t3#lo, $t3#hi	@ t3 = (K) (P6 + P7) << 32
+	vmov.i64	$t3#hi, #0
+	vext.8		$t1, $t1, $t1, #14
+	veor		$t2#lo, $t2#lo, $t2#hi
+	vmull.p8	$r, $a, $b		@ D = A*B
+	vext.8		$t3, $t3, $t3, #12
+	vext.8		$t2, $t2, $t2, #13
+	veor		$t0, $t0, $t1
+	veor		$t2, $t2, $t3
+	veor		$r, $r, $t0
+	veor		$r, $r, $t2
+
+	vst1.32		{$r}, [r0]
+	ret		@ bx lr
+#endif
+___
+}
+$code.=<<___;
+.size	bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
+#if __ARM_MAX_ARCH__>=7
+.align	5
+.LOPENSSL_armcap:
+.word	OPENSSL_armcap_P-.
+#endif
+.asciz	"GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
+.align	5
+
+#if __ARM_MAX_ARCH__>=7
+.comm	OPENSSL_armcap_P,4,4
+#endif
+___
+
+foreach (split("\n",$code)) {
+	s/\`([^\`]*)\`/eval $1/geo;
+
+	s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo	or
+	s/\bret\b/bx	lr/go		or
+	s/\bbx\s+lr\b/.word\t0xe12fff1e/go;    # make it possible to compile with -march=armv4
+
+	print $_,"\n";
+}
+close STDOUT;   # enforce flush
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/armv4-mont.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/armv4-mont.pl
@ -0,0 +1,757 @@
+#! /usr/bin/env perl
+# Copyright 2007-2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# January 2007.
+
+# Montgomery multiplication for ARMv4.
+#
+# Performance improvement naturally varies among CPU implementations
+# and compilers. The code was observed to provide +65-35% improvement
+# [depending on key length, less for longer keys] on ARM920T, and
+# +115-80% on Intel IXP425. This is compared to pre-bn_mul_mont code
+# base and compiler generated code with in-lined umull and even umlal
+# instructions. The latter means that this code didn't really have an
+# "advantage" of utilizing some "secret" instruction.
+#
+# The code is interoperable with Thumb ISA and is rather compact, less
+# than 1/2KB. Windows CE port would be trivial, as it's exclusively
+# about decorations, ABI and instruction syntax are identical.
+
+# November 2013
+#
+# Add NEON code path, which handles lengths divisible by 8. RSA/DSA
+# performance improvement on Cortex-A8 is ~45-100% depending on key
+# length, more for longer keys. On Cortex-A15 the span is ~10-105%.
+# On Snapdragon S4 improvement was measured to vary from ~70% to
+# incredible ~380%, yes, 4.8x faster, for RSA4096 sign. But this is
+# rather because original integer-only code seems to perform
+# suboptimally on S4. Situation on Cortex-A9 is unfortunately
+# different. It's being looked into, but the trouble is that
+# performance for vectors longer than 256 bits is actually couple
+# of percent worse than for integer-only code. The code is chosen
+# for execution on all NEON-capable processors, because gain on
+# others outweighs the marginal loss on Cortex-A9.
+
+# September 2015
+#
+# Align Cortex-A9 performance with November 2013 improvements, i.e.
+# NEON code is now ~20-105% faster than integer-only one on this
+# processor. But this optimization further improved performance even
+# on other processors: NEON code path is ~45-180% faster than original
+# integer-only on Cortex-A8, ~10-210% on Cortex-A15, ~70-450% on
+# Snapdragon S4.
+
+$flavour = shift;
+if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
+else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
+
+if ($flavour && $flavour ne "void") {
+    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+    die "can't locate arm-xlate.pl";
+
+    open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+    open STDOUT,">$output";
+}
+
+$num="r0";	# starts as num argument, but holds &tp[num-1]
+$ap="r1";
+$bp="r2"; $bi="r2"; $rp="r2";
+$np="r3";
+$tp="r4";
+$aj="r5";
+$nj="r6";
+$tj="r7";
+$n0="r8";
+###########	# r9 is reserved by ELF as platform specific, e.g. TLS pointer
+$alo="r10";	# sl, gcc uses it to keep @GOT
+$ahi="r11";	# fp
+$nlo="r12";	# ip
+###########	# r13 is stack pointer
+$nhi="r14";	# lr
+###########	# r15 is program counter
+
+#### argument block layout relative to &tp[num-1], a.k.a. $num
+$_rp="$num,#12*4";
+# ap permanently resides in r1
+$_bp="$num,#13*4";
+# np permanently resides in r3
+$_n0="$num,#14*4";
+$_num="$num,#15*4";	$_bpend=$_num;
+
+$code=<<___;
+#include "arm_arch.h"
+
+.text
+#if defined(__thumb2__)
+.syntax	unified
+.thumb
+#else
+.code	32
+#endif
+
+#if __ARM_MAX_ARCH__>=7
+.align	5
+.LOPENSSL_armcap:
+.word	OPENSSL_armcap_P-.Lbn_mul_mont
+#endif
+
+.global	bn_mul_mont
+.type	bn_mul_mont,%function
+
+.align	5
+bn_mul_mont:
+.Lbn_mul_mont:
+	ldr	ip,[sp,#4]		@ load num
+	stmdb	sp!,{r0,r2}		@ sp points at argument block
+#if __ARM_MAX_ARCH__>=7
+	tst	ip,#7
+	bne	.Lialu
+	adr	r0,.Lbn_mul_mont
+	ldr	r2,.LOPENSSL_armcap
+	ldr	r0,[r0,r2]
+#ifdef	__APPLE__
+	ldr	r0,[r0]
+#endif
+	tst	r0,#ARMV7_NEON		@ NEON available?
+	ldmia	sp, {r0,r2}
+	beq	.Lialu
+	add	sp,sp,#8
+	b	bn_mul8x_mont_neon
+.align	4
+.Lialu:
+#endif
+	cmp	ip,#2
+	mov	$num,ip			@ load num
+#ifdef	__thumb2__
+	ittt	lt
+#endif
+	movlt	r0,#0
+	addlt	sp,sp,#2*4
+	blt	.Labrt
+
+	stmdb	sp!,{r4-r12,lr}		@ save 10 registers
+
+	mov	$num,$num,lsl#2		@ rescale $num for byte count
+	sub	sp,sp,$num		@ alloca(4*num)
+	sub	sp,sp,#4		@ +extra dword
+	sub	$num,$num,#4		@ "num=num-1"
+	add	$tp,$bp,$num		@ &bp[num-1]
+
+	add	$num,sp,$num		@ $num to point at &tp[num-1]
+	ldr	$n0,[$_n0]		@ &n0
+	ldr	$bi,[$bp]		@ bp[0]
+	ldr	$aj,[$ap],#4		@ ap[0],ap++
+	ldr	$nj,[$np],#4		@ np[0],np++
+	ldr	$n0,[$n0]		@ *n0
+	str	$tp,[$_bpend]		@ save &bp[num]
+
+	umull	$alo,$ahi,$aj,$bi	@ ap[0]*bp[0]
+	str	$n0,[$_n0]		@ save n0 value
+	mul	$n0,$alo,$n0		@ "tp[0]"*n0
+	mov	$nlo,#0
+	umlal	$alo,$nlo,$nj,$n0	@ np[0]*n0+"t[0]"
+	mov	$tp,sp
+
+.L1st:
+	ldr	$aj,[$ap],#4		@ ap[j],ap++
+	mov	$alo,$ahi
+	ldr	$nj,[$np],#4		@ np[j],np++
+	mov	$ahi,#0
+	umlal	$alo,$ahi,$aj,$bi	@ ap[j]*bp[0]
+	mov	$nhi,#0
+	umlal	$nlo,$nhi,$nj,$n0	@ np[j]*n0
+	adds	$nlo,$nlo,$alo
+	str	$nlo,[$tp],#4		@ tp[j-1]=,tp++
+	adc	$nlo,$nhi,#0
+	cmp	$tp,$num
+	bne	.L1st
+
+	adds	$nlo,$nlo,$ahi
+	ldr	$tp,[$_bp]		@ restore bp
+	mov	$nhi,#0
+	ldr	$n0,[$_n0]		@ restore n0
+	adc	$nhi,$nhi,#0
+	str	$nlo,[$num]		@ tp[num-1]=
+	mov	$tj,sp
+	str	$nhi,[$num,#4]		@ tp[num]=
+
+.Louter:
+	sub	$tj,$num,$tj		@ "original" $num-1 value
+	sub	$ap,$ap,$tj		@ "rewind" ap to &ap[1]
+	ldr	$bi,[$tp,#4]!		@ *(++bp)
+	sub	$np,$np,$tj		@ "rewind" np to &np[1]
+	ldr	$aj,[$ap,#-4]		@ ap[0]
+	ldr	$alo,[sp]		@ tp[0]
+	ldr	$nj,[$np,#-4]		@ np[0]
+	ldr	$tj,[sp,#4]		@ tp[1]
+
+	mov	$ahi,#0
+	umlal	$alo,$ahi,$aj,$bi	@ ap[0]*bp[i]+tp[0]
+	str	$tp,[$_bp]		@ save bp
+	mul	$n0,$alo,$n0
+	mov	$nlo,#0
+	umlal	$alo,$nlo,$nj,$n0	@ np[0]*n0+"tp[0]"
+	mov	$tp,sp
+
+.Linner:
+	ldr	$aj,[$ap],#4		@ ap[j],ap++
+	adds	$alo,$ahi,$tj		@ +=tp[j]
+	ldr	$nj,[$np],#4		@ np[j],np++
+	mov	$ahi,#0
+	umlal	$alo,$ahi,$aj,$bi	@ ap[j]*bp[i]
+	mov	$nhi,#0
+	umlal	$nlo,$nhi,$nj,$n0	@ np[j]*n0
+	adc	$ahi,$ahi,#0
+	ldr	$tj,[$tp,#8]		@ tp[j+1]
+	adds	$nlo,$nlo,$alo
+	str	$nlo,[$tp],#4		@ tp[j-1]=,tp++
+	adc	$nlo,$nhi,#0
+	cmp	$tp,$num
+	bne	.Linner
+
+	adds	$nlo,$nlo,$ahi
+	mov	$nhi,#0
+	ldr	$tp,[$_bp]		@ restore bp
+	adc	$nhi,$nhi,#0
+	ldr	$n0,[$_n0]		@ restore n0
+	adds	$nlo,$nlo,$tj
+	ldr	$tj,[$_bpend]		@ restore &bp[num]
+	adc	$nhi,$nhi,#0
+	str	$nlo,[$num]		@ tp[num-1]=
+	str	$nhi,[$num,#4]		@ tp[num]=
+
+	cmp	$tp,$tj
+#ifdef	__thumb2__
+	itt	ne
+#endif
+	movne	$tj,sp
+	bne	.Louter
+
+	ldr	$rp,[$_rp]		@ pull rp
+	mov	$aj,sp
+	add	$num,$num,#4		@ $num to point at &tp[num]
+	sub	$aj,$num,$aj		@ "original" num value
+	mov	$tp,sp			@ "rewind" $tp
+	mov	$ap,$tp			@ "borrow" $ap
+	sub	$np,$np,$aj		@ "rewind" $np to &np[0]
+
+	subs	$tj,$tj,$tj		@ "clear" carry flag
+.Lsub:	ldr	$tj,[$tp],#4
+	ldr	$nj,[$np],#4
+	sbcs	$tj,$tj,$nj		@ tp[j]-np[j]
+	str	$tj,[$rp],#4		@ rp[j]=
+	teq	$tp,$num		@ preserve carry
+	bne	.Lsub
+	sbcs	$nhi,$nhi,#0		@ upmost carry
+	mov	$tp,sp			@ "rewind" $tp
+	sub	$rp,$rp,$aj		@ "rewind" $rp
+
+.Lcopy:	ldr	$tj,[$tp]		@ conditional copy
+	ldr	$aj,[$rp]
+	str	sp,[$tp],#4		@ zap tp
+#ifdef	__thumb2__
+	it	cc
+#endif
+	movcc	$aj,$tj
+	str	$aj,[$rp],#4
+	teq	$tp,$num		@ preserve carry
+	bne	.Lcopy
+
+	mov	sp,$num
+	add	sp,sp,#4		@ skip over tp[num+1]
+	ldmia	sp!,{r4-r12,lr}		@ restore registers
+	add	sp,sp,#2*4		@ skip over {r0,r2}
+	mov	r0,#1
+.Labrt:
+#if __ARM_ARCH__>=5
+	ret				@ bx lr
+#else
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	bx	lr			@ interoperable with Thumb ISA:-)
+#endif
+.size	bn_mul_mont,.-bn_mul_mont
+___
+{
+my ($A0,$A1,$A2,$A3)=map("d$_",(0..3));
+my ($N0,$N1,$N2,$N3)=map("d$_",(4..7));
+my ($Z,$Temp)=("q4","q5");
+my @ACC=map("q$_",(6..13));
+my ($Bi,$Ni,$M0)=map("d$_",(28..31));
+my $zero="$Z#lo";
+my $temp="$Temp#lo";
+
+my ($rptr,$aptr,$bptr,$nptr,$n0,$num)=map("r$_",(0..5));
+my ($tinptr,$toutptr,$inner,$outer,$bnptr)=map("r$_",(6..11));
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
+.arch	armv7-a
+.fpu	neon
+
+.type	bn_mul8x_mont_neon,%function
+.align	5
+bn_mul8x_mont_neon:
+	mov	ip,sp
+	stmdb	sp!,{r4-r11}
+	vstmdb	sp!,{d8-d15}		@ ABI specification says so
+	ldmia	ip,{r4-r5}		@ load rest of parameter block
+	mov	ip,sp
+
+	cmp	$num,#8
+	bhi	.LNEON_8n
+
+	@ special case for $num==8, everything is in register bank...
+
+	vld1.32		{${Bi}[0]}, [$bptr,:32]!
+	veor		$zero,$zero,$zero
+	sub		$toutptr,sp,$num,lsl#4
+	vld1.32		{$A0-$A3},  [$aptr]!		@ can't specify :32 :-(
+	and		$toutptr,$toutptr,#-64
+	vld1.32		{${M0}[0]}, [$n0,:32]
+	mov		sp,$toutptr			@ alloca
+	vzip.16		$Bi,$zero
+
+	vmull.u32	@ACC[0],$Bi,${A0}[0]
+	vmull.u32	@ACC[1],$Bi,${A0}[1]
+	vmull.u32	@ACC[2],$Bi,${A1}[0]
+	vshl.i64	$Ni,@ACC[0]#hi,#16
+	vmull.u32	@ACC[3],$Bi,${A1}[1]
+
+	vadd.u64	$Ni,$Ni,@ACC[0]#lo
+	veor		$zero,$zero,$zero
+	vmul.u32	$Ni,$Ni,$M0
+
+	vmull.u32	@ACC[4],$Bi,${A2}[0]
+	 vld1.32	{$N0-$N3}, [$nptr]!
+	vmull.u32	@ACC[5],$Bi,${A2}[1]
+	vmull.u32	@ACC[6],$Bi,${A3}[0]
+	vzip.16		$Ni,$zero
+	vmull.u32	@ACC[7],$Bi,${A3}[1]
+
+	vmlal.u32	@ACC[0],$Ni,${N0}[0]
+	sub		$outer,$num,#1
+	vmlal.u32	@ACC[1],$Ni,${N0}[1]
+	vmlal.u32	@ACC[2],$Ni,${N1}[0]
+	vmlal.u32	@ACC[3],$Ni,${N1}[1]
+
+	vmlal.u32	@ACC[4],$Ni,${N2}[0]
+	vmov		$Temp,@ACC[0]
+	vmlal.u32	@ACC[5],$Ni,${N2}[1]
+	vmov		@ACC[0],@ACC[1]
+	vmlal.u32	@ACC[6],$Ni,${N3}[0]
+	vmov		@ACC[1],@ACC[2]
+	vmlal.u32	@ACC[7],$Ni,${N3}[1]
+	vmov		@ACC[2],@ACC[3]
+	vmov		@ACC[3],@ACC[4]
+	vshr.u64	$temp,$temp,#16
+	vmov		@ACC[4],@ACC[5]
+	vmov		@ACC[5],@ACC[6]
+	vadd.u64	$temp,$temp,$Temp#hi
+	vmov		@ACC[6],@ACC[7]
+	veor		@ACC[7],@ACC[7]
+	vshr.u64	$temp,$temp,#16
+
+	b	.LNEON_outer8
+
+.align	4
+.LNEON_outer8:
+	vld1.32		{${Bi}[0]}, [$bptr,:32]!
+	veor		$zero,$zero,$zero
+	vzip.16		$Bi,$zero
+	vadd.u64	@ACC[0]#lo,@ACC[0]#lo,$temp
+
+	vmlal.u32	@ACC[0],$Bi,${A0}[0]
+	vmlal.u32	@ACC[1],$Bi,${A0}[1]
+	vmlal.u32	@ACC[2],$Bi,${A1}[0]
+	vshl.i64	$Ni,@ACC[0]#hi,#16
+	vmlal.u32	@ACC[3],$Bi,${A1}[1]
+
+	vadd.u64	$Ni,$Ni,@ACC[0]#lo
+	veor		$zero,$zero,$zero
+	subs		$outer,$outer,#1
+	vmul.u32	$Ni,$Ni,$M0
+
+	vmlal.u32	@ACC[4],$Bi,${A2}[0]
+	vmlal.u32	@ACC[5],$Bi,${A2}[1]
+	vmlal.u32	@ACC[6],$Bi,${A3}[0]
+	vzip.16		$Ni,$zero
+	vmlal.u32	@ACC[7],$Bi,${A3}[1]
+
+	vmlal.u32	@ACC[0],$Ni,${N0}[0]
+	vmlal.u32	@ACC[1],$Ni,${N0}[1]
+	vmlal.u32	@ACC[2],$Ni,${N1}[0]
+	vmlal.u32	@ACC[3],$Ni,${N1}[1]
+
+	vmlal.u32	@ACC[4],$Ni,${N2}[0]
+	vmov		$Temp,@ACC[0]
+	vmlal.u32	@ACC[5],$Ni,${N2}[1]
+	vmov		@ACC[0],@ACC[1]
+	vmlal.u32	@ACC[6],$Ni,${N3}[0]
+	vmov		@ACC[1],@ACC[2]
+	vmlal.u32	@ACC[7],$Ni,${N3}[1]
+	vmov		@ACC[2],@ACC[3]
+	vmov		@ACC[3],@ACC[4]
+	vshr.u64	$temp,$temp,#16
+	vmov		@ACC[4],@ACC[5]
+	vmov		@ACC[5],@ACC[6]
+	vadd.u64	$temp,$temp,$Temp#hi
+	vmov		@ACC[6],@ACC[7]
+	veor		@ACC[7],@ACC[7]
+	vshr.u64	$temp,$temp,#16
+
+	bne	.LNEON_outer8
+
+	vadd.u64	@ACC[0]#lo,@ACC[0]#lo,$temp
+	mov		$toutptr,sp
+	vshr.u64	$temp,@ACC[0]#lo,#16
+	mov		$inner,$num
+	vadd.u64	@ACC[0]#hi,@ACC[0]#hi,$temp
+	add		$tinptr,sp,#96
+	vshr.u64	$temp,@ACC[0]#hi,#16
+	vzip.16		@ACC[0]#lo,@ACC[0]#hi
+
+	b	.LNEON_tail_entry
+
+.align	4
+.LNEON_8n:
+	veor		@ACC[0],@ACC[0],@ACC[0]
+	 sub		$toutptr,sp,#128
+	veor		@ACC[1],@ACC[1],@ACC[1]
+	 sub		$toutptr,$toutptr,$num,lsl#4
+	veor		@ACC[2],@ACC[2],@ACC[2]
+	 and		$toutptr,$toutptr,#-64
+	veor		@ACC[3],@ACC[3],@ACC[3]
+	 mov		sp,$toutptr			@ alloca
+	veor		@ACC[4],@ACC[4],@ACC[4]
+	 add		$toutptr,$toutptr,#256
+	veor		@ACC[5],@ACC[5],@ACC[5]
+	 sub		$inner,$num,#8
+	veor		@ACC[6],@ACC[6],@ACC[6]
+	veor		@ACC[7],@ACC[7],@ACC[7]
+
+.LNEON_8n_init:
+	vst1.64		{@ACC[0]-@ACC[1]},[$toutptr,:256]!
+	subs		$inner,$inner,#8
+	vst1.64		{@ACC[2]-@ACC[3]},[$toutptr,:256]!
+	vst1.64		{@ACC[4]-@ACC[5]},[$toutptr,:256]!
+	vst1.64		{@ACC[6]-@ACC[7]},[$toutptr,:256]!
+	bne		.LNEON_8n_init
+
+	add		$tinptr,sp,#256
+	vld1.32		{$A0-$A3},[$aptr]!
+	add		$bnptr,sp,#8
+	vld1.32		{${M0}[0]},[$n0,:32]
+	mov		$outer,$num
+	b		.LNEON_8n_outer
+
+.align	4
+.LNEON_8n_outer:
+	vld1.32		{${Bi}[0]},[$bptr,:32]!	@ *b++
+	veor		$zero,$zero,$zero
+	vzip.16		$Bi,$zero
+	add		$toutptr,sp,#128
+	vld1.32		{$N0-$N3},[$nptr]!
+
+	vmlal.u32	@ACC[0],$Bi,${A0}[0]
+	vmlal.u32	@ACC[1],$Bi,${A0}[1]
+	 veor		$zero,$zero,$zero
+	vmlal.u32	@ACC[2],$Bi,${A1}[0]
+	 vshl.i64	$Ni,@ACC[0]#hi,#16
+	vmlal.u32	@ACC[3],$Bi,${A1}[1]
+	 vadd.u64	$Ni,$Ni,@ACC[0]#lo
+	vmlal.u32	@ACC[4],$Bi,${A2}[0]
+	 vmul.u32	$Ni,$Ni,$M0
+	vmlal.u32	@ACC[5],$Bi,${A2}[1]
+	vst1.32		{$Bi},[sp,:64]		@ put aside smashed b[8*i+0]
+	vmlal.u32	@ACC[6],$Bi,${A3}[0]
+	 vzip.16	$Ni,$zero
+	vmlal.u32	@ACC[7],$Bi,${A3}[1]
+___
+for ($i=0; $i<7;) {
+$code.=<<___;
+	vld1.32		{${Bi}[0]},[$bptr,:32]!	@ *b++
+	vmlal.u32	@ACC[0],$Ni,${N0}[0]
+	veor		$temp,$temp,$temp
+	vmlal.u32	@ACC[1],$Ni,${N0}[1]
+	vzip.16		$Bi,$temp
+	vmlal.u32	@ACC[2],$Ni,${N1}[0]
+	 vshr.u64	@ACC[0]#lo,@ACC[0]#lo,#16
+	vmlal.u32	@ACC[3],$Ni,${N1}[1]
+	vmlal.u32	@ACC[4],$Ni,${N2}[0]
+	 vadd.u64	@ACC[0]#lo,@ACC[0]#lo,@ACC[0]#hi
+	vmlal.u32	@ACC[5],$Ni,${N2}[1]
+	 vshr.u64	@ACC[0]#lo,@ACC[0]#lo,#16
+	vmlal.u32	@ACC[6],$Ni,${N3}[0]
+	vmlal.u32	@ACC[7],$Ni,${N3}[1]
+	 vadd.u64	@ACC[1]#lo,@ACC[1]#lo,@ACC[0]#lo
+	vst1.32		{$Ni},[$bnptr,:64]!	@ put aside smashed m[8*i+$i]
+___
+	push(@ACC,shift(@ACC));	$i++;
+$code.=<<___;
+	vmlal.u32	@ACC[0],$Bi,${A0}[0]
+	vld1.64		{@ACC[7]},[$tinptr,:128]!
+	vmlal.u32	@ACC[1],$Bi,${A0}[1]
+	 veor		$zero,$zero,$zero
+	vmlal.u32	@ACC[2],$Bi,${A1}[0]
+	 vshl.i64	$Ni,@ACC[0]#hi,#16
+	vmlal.u32	@ACC[3],$Bi,${A1}[1]
+	 vadd.u64	$Ni,$Ni,@ACC[0]#lo
+	vmlal.u32	@ACC[4],$Bi,${A2}[0]
+	 vmul.u32	$Ni,$Ni,$M0
+	vmlal.u32	@ACC[5],$Bi,${A2}[1]
+	vst1.32		{$Bi},[$bnptr,:64]!	@ put aside smashed b[8*i+$i]
+	vmlal.u32	@ACC[6],$Bi,${A3}[0]
+	 vzip.16	$Ni,$zero
+	vmlal.u32	@ACC[7],$Bi,${A3}[1]
+___
+}
+$code.=<<___;
+	vld1.32		{$Bi},[sp,:64]		@ pull smashed b[8*i+0]
+	vmlal.u32	@ACC[0],$Ni,${N0}[0]
+	vld1.32		{$A0-$A3},[$aptr]!
+	vmlal.u32	@ACC[1],$Ni,${N0}[1]
+	vmlal.u32	@ACC[2],$Ni,${N1}[0]
+	 vshr.u64	@ACC[0]#lo,@ACC[0]#lo,#16
+	vmlal.u32	@ACC[3],$Ni,${N1}[1]
+	vmlal.u32	@ACC[4],$Ni,${N2}[0]
+	 vadd.u64	@ACC[0]#lo,@ACC[0]#lo,@ACC[0]#hi
+	vmlal.u32	@ACC[5],$Ni,${N2}[1]
+	 vshr.u64	@ACC[0]#lo,@ACC[0]#lo,#16
+	vmlal.u32	@ACC[6],$Ni,${N3}[0]
+	vmlal.u32	@ACC[7],$Ni,${N3}[1]
+	 vadd.u64	@ACC[1]#lo,@ACC[1]#lo,@ACC[0]#lo
+	vst1.32		{$Ni},[$bnptr,:64]	@ put aside smashed m[8*i+$i]
+	add		$bnptr,sp,#8		@ rewind
+___
+	push(@ACC,shift(@ACC));
+$code.=<<___;
+	sub		$inner,$num,#8
+	b		.LNEON_8n_inner
+
+.align	4
+.LNEON_8n_inner:
+	subs		$inner,$inner,#8
+	vmlal.u32	@ACC[0],$Bi,${A0}[0]
+	vld1.64		{@ACC[7]},[$tinptr,:128]
+	vmlal.u32	@ACC[1],$Bi,${A0}[1]
+	vld1.32		{$Ni},[$bnptr,:64]!	@ pull smashed m[8*i+0]
+	vmlal.u32	@ACC[2],$Bi,${A1}[0]
+	vld1.32		{$N0-$N3},[$nptr]!
+	vmlal.u32	@ACC[3],$Bi,${A1}[1]
+	it		ne
+	addne		$tinptr,$tinptr,#16	@ don't advance in last iteration
+	vmlal.u32	@ACC[4],$Bi,${A2}[0]
+	vmlal.u32	@ACC[5],$Bi,${A2}[1]
+	vmlal.u32	@ACC[6],$Bi,${A3}[0]
+	vmlal.u32	@ACC[7],$Bi,${A3}[1]
+___
+for ($i=1; $i<8; $i++) {
+$code.=<<___;
+	vld1.32		{$Bi},[$bnptr,:64]!	@ pull smashed b[8*i+$i]
+	vmlal.u32	@ACC[0],$Ni,${N0}[0]
+	vmlal.u32	@ACC[1],$Ni,${N0}[1]
+	vmlal.u32	@ACC[2],$Ni,${N1}[0]
+	vmlal.u32	@ACC[3],$Ni,${N1}[1]
+	vmlal.u32	@ACC[4],$Ni,${N2}[0]
+	vmlal.u32	@ACC[5],$Ni,${N2}[1]
+	vmlal.u32	@ACC[6],$Ni,${N3}[0]
+	vmlal.u32	@ACC[7],$Ni,${N3}[1]
+	vst1.64		{@ACC[0]},[$toutptr,:128]!
+___
+	push(@ACC,shift(@ACC));
+$code.=<<___;
+	vmlal.u32	@ACC[0],$Bi,${A0}[0]
+	vld1.64		{@ACC[7]},[$tinptr,:128]
+	vmlal.u32	@ACC[1],$Bi,${A0}[1]
+	vld1.32		{$Ni},[$bnptr,:64]!	@ pull smashed m[8*i+$i]
+	vmlal.u32	@ACC[2],$Bi,${A1}[0]
+	it		ne
+	addne		$tinptr,$tinptr,#16	@ don't advance in last iteration
+	vmlal.u32	@ACC[3],$Bi,${A1}[1]
+	vmlal.u32	@ACC[4],$Bi,${A2}[0]
+	vmlal.u32	@ACC[5],$Bi,${A2}[1]
+	vmlal.u32	@ACC[6],$Bi,${A3}[0]
+	vmlal.u32	@ACC[7],$Bi,${A3}[1]
+___
+}
+$code.=<<___;
+	it		eq
+	subeq		$aptr,$aptr,$num,lsl#2	@ rewind
+	vmlal.u32	@ACC[0],$Ni,${N0}[0]
+	vld1.32		{$Bi},[sp,:64]		@ pull smashed b[8*i+0]
+	vmlal.u32	@ACC[1],$Ni,${N0}[1]
+	vld1.32		{$A0-$A3},[$aptr]!
+	vmlal.u32	@ACC[2],$Ni,${N1}[0]
+	add		$bnptr,sp,#8		@ rewind
+	vmlal.u32	@ACC[3],$Ni,${N1}[1]
+	vmlal.u32	@ACC[4],$Ni,${N2}[0]
+	vmlal.u32	@ACC[5],$Ni,${N2}[1]
+	vmlal.u32	@ACC[6],$Ni,${N3}[0]
+	vst1.64		{@ACC[0]},[$toutptr,:128]!
+	vmlal.u32	@ACC[7],$Ni,${N3}[1]
+
+	bne		.LNEON_8n_inner
+___
+	push(@ACC,shift(@ACC));
+$code.=<<___;
+	add		$tinptr,sp,#128
+	vst1.64		{@ACC[0]-@ACC[1]},[$toutptr,:256]!
+	veor		q2,q2,q2		@ $N0-$N1
+	vst1.64		{@ACC[2]-@ACC[3]},[$toutptr,:256]!
+	veor		q3,q3,q3		@ $N2-$N3
+	vst1.64		{@ACC[4]-@ACC[5]},[$toutptr,:256]!
+	vst1.64		{@ACC[6]},[$toutptr,:128]
+
+	subs		$outer,$outer,#8
+	vld1.64		{@ACC[0]-@ACC[1]},[$tinptr,:256]!
+	vld1.64		{@ACC[2]-@ACC[3]},[$tinptr,:256]!
+	vld1.64		{@ACC[4]-@ACC[5]},[$tinptr,:256]!
+	vld1.64		{@ACC[6]-@ACC[7]},[$tinptr,:256]!
+
+	itt		ne
+	subne		$nptr,$nptr,$num,lsl#2	@ rewind
+	bne		.LNEON_8n_outer
+
+	add		$toutptr,sp,#128
+	vst1.64		{q2-q3}, [sp,:256]!	@ start wiping stack frame
+	vshr.u64	$temp,@ACC[0]#lo,#16
+	vst1.64		{q2-q3},[sp,:256]!
+	vadd.u64	@ACC[0]#hi,@ACC[0]#hi,$temp
+	vst1.64		{q2-q3}, [sp,:256]!
+	vshr.u64	$temp,@ACC[0]#hi,#16
+	vst1.64		{q2-q3}, [sp,:256]!
+	vzip.16		@ACC[0]#lo,@ACC[0]#hi
+
+	mov		$inner,$num
+	b		.LNEON_tail_entry
+
+.align	4
+.LNEON_tail:
+	vadd.u64	@ACC[0]#lo,@ACC[0]#lo,$temp
+	vshr.u64	$temp,@ACC[0]#lo,#16
+	vld1.64		{@ACC[2]-@ACC[3]}, [$tinptr, :256]!
+	vadd.u64	@ACC[0]#hi,@ACC[0]#hi,$temp
+	vld1.64		{@ACC[4]-@ACC[5]}, [$tinptr, :256]!
+	vshr.u64	$temp,@ACC[0]#hi,#16
+	vld1.64		{@ACC[6]-@ACC[7]}, [$tinptr, :256]!
+	vzip.16		@ACC[0]#lo,@ACC[0]#hi
+
+.LNEON_tail_entry:
+___
+for ($i=1; $i<8; $i++) {
+$code.=<<___;
+	vadd.u64	@ACC[1]#lo,@ACC[1]#lo,$temp
+	vst1.32		{@ACC[0]#lo[0]}, [$toutptr, :32]!
+	vshr.u64	$temp,@ACC[1]#lo,#16
+	vadd.u64	@ACC[1]#hi,@ACC[1]#hi,$temp
+	vshr.u64	$temp,@ACC[1]#hi,#16
+	vzip.16		@ACC[1]#lo,@ACC[1]#hi
+___
+	push(@ACC,shift(@ACC));
+}
+	push(@ACC,shift(@ACC));
+$code.=<<___;
+	vld1.64		{@ACC[0]-@ACC[1]}, [$tinptr, :256]!
+	subs		$inner,$inner,#8
+	vst1.32		{@ACC[7]#lo[0]},   [$toutptr, :32]!
+	bne	.LNEON_tail
+
+	vst1.32	{${temp}[0]}, [$toutptr, :32]		@ top-most bit
+	sub	$nptr,$nptr,$num,lsl#2			@ rewind $nptr
+	subs	$aptr,sp,#0				@ clear carry flag
+	add	$bptr,sp,$num,lsl#2
+
+.LNEON_sub:
+	ldmia	$aptr!, {r4-r7}
+	ldmia	$nptr!, {r8-r11}
+	sbcs	r8, r4,r8
+	sbcs	r9, r5,r9
+	sbcs	r10,r6,r10
+	sbcs	r11,r7,r11
+	teq	$aptr,$bptr				@ preserves carry
+	stmia	$rptr!, {r8-r11}
+	bne	.LNEON_sub
+
+	ldr	r10, [$aptr]				@ load top-most bit
+	mov	r11,sp
+	veor	q0,q0,q0
+	sub	r11,$bptr,r11				@ this is num*4
+	veor	q1,q1,q1
+	mov	$aptr,sp
+	sub	$rptr,$rptr,r11				@ rewind $rptr
+	mov	$nptr,$bptr				@ second 3/4th of frame
+	sbcs	r10,r10,#0				@ result is carry flag
+
+.LNEON_copy_n_zap:
+	ldmia	$aptr!, {r4-r7}
+	ldmia	$rptr,  {r8-r11}
+	it	cc
+	movcc	r8, r4
+	vst1.64	{q0-q1}, [$nptr,:256]!			@ wipe
+	itt	cc
+	movcc	r9, r5
+	movcc	r10,r6
+	vst1.64	{q0-q1}, [$nptr,:256]!			@ wipe
+	it	cc
+	movcc	r11,r7
+	ldmia	$aptr, {r4-r7}
+	stmia	$rptr!, {r8-r11}
+	sub	$aptr,$aptr,#16
+	ldmia	$rptr, {r8-r11}
+	it	cc
+	movcc	r8, r4
+	vst1.64	{q0-q1}, [$aptr,:256]!			@ wipe
+	itt	cc
+	movcc	r9, r5
+	movcc	r10,r6
+	vst1.64	{q0-q1}, [$nptr,:256]!			@ wipe
+	it	cc
+	movcc	r11,r7
+	teq	$aptr,$bptr				@ preserves carry
+	stmia	$rptr!, {r8-r11}
+	bne	.LNEON_copy_n_zap
+
+	mov	sp,ip
+        vldmia  sp!,{d8-d15}
+        ldmia   sp!,{r4-r11}
+	ret						@ bx lr
+.size	bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
+#endif
+___
+}
+$code.=<<___;
+.asciz	"Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
+.align	2
+#if __ARM_MAX_ARCH__>=7
+.comm	OPENSSL_armcap_P,4,4
+#endif
+___
+
+foreach (split("\n",$code)) {
+	s/\`([^\`]*)\`/eval $1/ge;
+
+	s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/ge	or
+	s/\bret\b/bx    lr/g						or
+	s/\bbx\s+lr\b/.word\t0xe12fff1e/g;	# make it possible to compile with -march=armv4
+
+	print $_,"\n";
+}
+
+close STDOUT;
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/armv8-mont.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/armv8-mont.pl
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/bn-586.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/bn-586.pl
@ -0,0 +1,785 @@
+#! /usr/bin/env perl
+# Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+push(@INC,"${dir}","${dir}../../perlasm");
+require "x86asm.pl";
+
+$output = pop;
+open STDOUT,">$output";
+
+&asm_init($ARGV[0]);
+
+$sse2=0;
+for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
+
+&external_label("OPENSSL_ia32cap_P") if ($sse2);
+
+&bn_mul_add_words("bn_mul_add_words");
+&bn_mul_words("bn_mul_words");
+&bn_sqr_words("bn_sqr_words");
+&bn_div_words("bn_div_words");
+&bn_add_words("bn_add_words");
+&bn_sub_words("bn_sub_words");
+&bn_sub_part_words("bn_sub_part_words");
+
+&asm_finish();
+
+close STDOUT;
+
+sub bn_mul_add_words
+	{
+	local($name)=@_;
+
+	&function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
+
+	$r="eax";
+	$a="edx";
+	$c="ecx";
+
+	if ($sse2) {
+		&picmeup("eax","OPENSSL_ia32cap_P");
+		&bt(&DWP(0,"eax"),26);
+		&jnc(&label("maw_non_sse2"));
+
+		&mov($r,&wparam(0));
+		&mov($a,&wparam(1));
+		&mov($c,&wparam(2));
+		&movd("mm0",&wparam(3));	# mm0 = w
+		&pxor("mm1","mm1");		# mm1 = carry_in
+		&jmp(&label("maw_sse2_entry"));
+
+	&set_label("maw_sse2_unrolled",16);
+		&movd("mm3",&DWP(0,$r,"",0));	# mm3 = r[0]
+		&paddq("mm1","mm3");		# mm1 = carry_in + r[0]
+		&movd("mm2",&DWP(0,$a,"",0));	# mm2 = a[0]
+		&pmuludq("mm2","mm0");		# mm2 = w*a[0]
+		&movd("mm4",&DWP(4,$a,"",0));	# mm4 = a[1]
+		&pmuludq("mm4","mm0");		# mm4 = w*a[1]
+		&movd("mm6",&DWP(8,$a,"",0));	# mm6 = a[2]
+		&pmuludq("mm6","mm0");		# mm6 = w*a[2]
+		&movd("mm7",&DWP(12,$a,"",0));	# mm7 = a[3]
+		&pmuludq("mm7","mm0");		# mm7 = w*a[3]
+		&paddq("mm1","mm2");		# mm1 = carry_in + r[0] + w*a[0]
+		&movd("mm3",&DWP(4,$r,"",0));	# mm3 = r[1]
+		&paddq("mm3","mm4");		# mm3 = r[1] + w*a[1]
+		&movd("mm5",&DWP(8,$r,"",0));	# mm5 = r[2]
+		&paddq("mm5","mm6");		# mm5 = r[2] + w*a[2]
+		&movd("mm4",&DWP(12,$r,"",0));	# mm4 = r[3]
+		&paddq("mm7","mm4");		# mm7 = r[3] + w*a[3]
+		&movd(&DWP(0,$r,"",0),"mm1");
+		&movd("mm2",&DWP(16,$a,"",0));	# mm2 = a[4]
+		&pmuludq("mm2","mm0");		# mm2 = w*a[4]
+		&psrlq("mm1",32);		# mm1 = carry0
+		&movd("mm4",&DWP(20,$a,"",0));	# mm4 = a[5]
+		&pmuludq("mm4","mm0");		# mm4 = w*a[5]
+		&paddq("mm1","mm3");		# mm1 = carry0 + r[1] + w*a[1]
+		&movd("mm6",&DWP(24,$a,"",0));	# mm6 = a[6]
+		&pmuludq("mm6","mm0");		# mm6 = w*a[6]
+		&movd(&DWP(4,$r,"",0),"mm1");
+		&psrlq("mm1",32);		# mm1 = carry1
+		&movd("mm3",&DWP(28,$a,"",0));	# mm3 = a[7]
+		&add($a,32);
+		&pmuludq("mm3","mm0");		# mm3 = w*a[7]
+		&paddq("mm1","mm5");		# mm1 = carry1 + r[2] + w*a[2]
+		&movd("mm5",&DWP(16,$r,"",0));	# mm5 = r[4]
+		&paddq("mm2","mm5");		# mm2 = r[4] + w*a[4]
+		&movd(&DWP(8,$r,"",0),"mm1");
+		&psrlq("mm1",32);		# mm1 = carry2
+		&paddq("mm1","mm7");		# mm1 = carry2 + r[3] + w*a[3]
+		&movd("mm5",&DWP(20,$r,"",0));	# mm5 = r[5]
+		&paddq("mm4","mm5");		# mm4 = r[5] + w*a[5]
+		&movd(&DWP(12,$r,"",0),"mm1");
+		&psrlq("mm1",32);		# mm1 = carry3
+		&paddq("mm1","mm2");		# mm1 = carry3 + r[4] + w*a[4]
+		&movd("mm5",&DWP(24,$r,"",0));	# mm5 = r[6]
+		&paddq("mm6","mm5");		# mm6 = r[6] + w*a[6]
+		&movd(&DWP(16,$r,"",0),"mm1");
+		&psrlq("mm1",32);		# mm1 = carry4
+		&paddq("mm1","mm4");		# mm1 = carry4 + r[5] + w*a[5]
+		&movd("mm5",&DWP(28,$r,"",0));	# mm5 = r[7]
+		&paddq("mm3","mm5");		# mm3 = r[7] + w*a[7]
+		&movd(&DWP(20,$r,"",0),"mm1");
+		&psrlq("mm1",32);		# mm1 = carry5
+		&paddq("mm1","mm6");		# mm1 = carry5 + r[6] + w*a[6]
+		&movd(&DWP(24,$r,"",0),"mm1");
+		&psrlq("mm1",32);		# mm1 = carry6
+		&paddq("mm1","mm3");		# mm1 = carry6 + r[7] + w*a[7]
+		&movd(&DWP(28,$r,"",0),"mm1");
+		&lea($r,&DWP(32,$r));
+		&psrlq("mm1",32);		# mm1 = carry_out
+
+		&sub($c,8);
+		&jz(&label("maw_sse2_exit"));
+	&set_label("maw_sse2_entry");
+		&test($c,0xfffffff8);
+		&jnz(&label("maw_sse2_unrolled"));
+
+	&set_label("maw_sse2_loop",4);
+		&movd("mm2",&DWP(0,$a));	# mm2 = a[i]
+		&movd("mm3",&DWP(0,$r));	# mm3 = r[i]
+		&pmuludq("mm2","mm0");		# a[i] *= w
+		&lea($a,&DWP(4,$a));
+		&paddq("mm1","mm3");		# carry += r[i]
+		&paddq("mm1","mm2");		# carry += a[i]*w
+		&movd(&DWP(0,$r),"mm1");	# r[i] = carry_low
+		&sub($c,1);
+		&psrlq("mm1",32);		# carry = carry_high
+		&lea($r,&DWP(4,$r));
+		&jnz(&label("maw_sse2_loop"));
+	&set_label("maw_sse2_exit");
+		&movd("eax","mm1");		# c = carry_out
+		&emms();
+		&ret();
+
+	&set_label("maw_non_sse2",16);
+	}
+
+	# function_begin prologue
+	&push("ebp");
+	&push("ebx");
+	&push("esi");
+	&push("edi");
+
+	&comment("");
+	$Low="eax";
+	$High="edx";
+	$a="ebx";
+	$w="ebp";
+	$r="edi";
+	$c="esi";
+
+	&xor($c,$c);		# clear carry
+	&mov($r,&wparam(0));	#
+
+	&mov("ecx",&wparam(2));	#
+	&mov($a,&wparam(1));	#
+
+	&and("ecx",0xfffffff8);	# num / 8
+	&mov($w,&wparam(3));	#
+
+	&push("ecx");		# Up the stack for a tmp variable
+
+	&jz(&label("maw_finish"));
+
+	&set_label("maw_loop",16);
+
+	for ($i=0; $i<32; $i+=4)
+		{
+		&comment("Round $i");
+
+		 &mov("eax",&DWP($i,$a)); 	# *a
+		&mul($w);			# *a * w
+		&add("eax",$c);			# L(t)+= c
+		&adc("edx",0);			# H(t)+=carry
+		 &add("eax",&DWP($i,$r));	# L(t)+= *r
+		&adc("edx",0);			# H(t)+=carry
+		 &mov(&DWP($i,$r),"eax");	# *r= L(t);
+		&mov($c,"edx");			# c=  H(t);
+		}
+
+	&comment("");
+	&sub("ecx",8);
+	&lea($a,&DWP(32,$a));
+	&lea($r,&DWP(32,$r));
+	&jnz(&label("maw_loop"));
+
+	&set_label("maw_finish",0);
+	&mov("ecx",&wparam(2));	# get num
+	&and("ecx",7);
+	&jnz(&label("maw_finish2"));	# helps branch prediction
+	&jmp(&label("maw_end"));
+
+	&set_label("maw_finish2",1);
+	for ($i=0; $i<7; $i++)
+		{
+		&comment("Tail Round $i");
+		 &mov("eax",&DWP($i*4,$a));	# *a
+		&mul($w);			# *a * w
+		&add("eax",$c);			# L(t)+=c
+		&adc("edx",0);			# H(t)+=carry
+		 &add("eax",&DWP($i*4,$r));	# L(t)+= *r
+		&adc("edx",0);			# H(t)+=carry
+		 &dec("ecx") if ($i != 7-1);
+		&mov(&DWP($i*4,$r),"eax");	# *r= L(t);
+		 &mov($c,"edx");		# c=  H(t);
+		&jz(&label("maw_end")) if ($i != 7-1);
+		}
+	&set_label("maw_end",0);
+	&mov("eax",$c);
+
+	&pop("ecx");	# clear variable from
+
+	&function_end($name);
+	}
+
+sub bn_mul_words
+	{
+	local($name)=@_;
+
+	&function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
+
+	$r="eax";
+	$a="edx";
+	$c="ecx";
+
+	if ($sse2) {
+		&picmeup("eax","OPENSSL_ia32cap_P");
+		&bt(&DWP(0,"eax"),26);
+		&jnc(&label("mw_non_sse2"));
+
+		&mov($r,&wparam(0));
+		&mov($a,&wparam(1));
+		&mov($c,&wparam(2));
+		&movd("mm0",&wparam(3));	# mm0 = w
+		&pxor("mm1","mm1");		# mm1 = carry = 0
+
+	&set_label("mw_sse2_loop",16);
+		&movd("mm2",&DWP(0,$a));	# mm2 = a[i]
+		&pmuludq("mm2","mm0");		# a[i] *= w
+		&lea($a,&DWP(4,$a));
+		&paddq("mm1","mm2");		# carry += a[i]*w
+		&movd(&DWP(0,$r),"mm1");	# r[i] = carry_low
+		&sub($c,1);
+		&psrlq("mm1",32);		# carry = carry_high
+		&lea($r,&DWP(4,$r));
+		&jnz(&label("mw_sse2_loop"));
+
+		&movd("eax","mm1");		# return carry
+		&emms();
+		&ret();
+	&set_label("mw_non_sse2",16);
+	}
+
+	# function_begin prologue
+	&push("ebp");
+	&push("ebx");
+	&push("esi");
+	&push("edi");
+
+	&comment("");
+	$Low="eax";
+	$High="edx";
+	$a="ebx";
+	$w="ecx";
+	$r="edi";
+	$c="esi";
+	$num="ebp";
+
+	&xor($c,$c);		# clear carry
+	&mov($r,&wparam(0));	#
+	&mov($a,&wparam(1));	#
+	&mov($num,&wparam(2));	#
+	&mov($w,&wparam(3));	#
+
+	&and($num,0xfffffff8);	# num / 8
+	&jz(&label("mw_finish"));
+
+	&set_label("mw_loop",0);
+	for ($i=0; $i<32; $i+=4)
+		{
+		&comment("Round $i");
+
+		 &mov("eax",&DWP($i,$a,"",0)); 	# *a
+		&mul($w);			# *a * w
+		&add("eax",$c);			# L(t)+=c
+		 # XXX
+
+		&adc("edx",0);			# H(t)+=carry
+		 &mov(&DWP($i,$r,"",0),"eax");	# *r= L(t);
+
+		&mov($c,"edx");			# c=  H(t);
+		}
+
+	&comment("");
+	&add($a,32);
+	&add($r,32);
+	&sub($num,8);
+	&jz(&label("mw_finish"));
+	&jmp(&label("mw_loop"));
+
+	&set_label("mw_finish",0);
+	&mov($num,&wparam(2));	# get num
+	&and($num,7);
+	&jnz(&label("mw_finish2"));
+	&jmp(&label("mw_end"));
+
+	&set_label("mw_finish2",1);
+	for ($i=0; $i<7; $i++)
+		{
+		&comment("Tail Round $i");
+		 &mov("eax",&DWP($i*4,$a,"",0));# *a
+		&mul($w);			# *a * w
+		&add("eax",$c);			# L(t)+=c
+		 # XXX
+		&adc("edx",0);			# H(t)+=carry
+		 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
+		&mov($c,"edx");			# c=  H(t);
+		 &dec($num) if ($i != 7-1);
+		&jz(&label("mw_end")) if ($i != 7-1);
+		}
+	&set_label("mw_end",0);
+	&mov("eax",$c);
+
+	&function_end($name);
+	}
+
+sub bn_sqr_words
+	{
+	local($name)=@_;
+
+	&function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
+
+	$r="eax";
+	$a="edx";
+	$c="ecx";
+
+	if ($sse2) {
+		&picmeup("eax","OPENSSL_ia32cap_P");
+		&bt(&DWP(0,"eax"),26);
+		&jnc(&label("sqr_non_sse2"));
+
+		&mov($r,&wparam(0));
+		&mov($a,&wparam(1));
+		&mov($c,&wparam(2));
+
+	&set_label("sqr_sse2_loop",16);
+		&movd("mm0",&DWP(0,$a));	# mm0 = a[i]
+		&pmuludq("mm0","mm0");		# a[i] *= a[i]
+		&lea($a,&DWP(4,$a));		# a++
+		&movq(&QWP(0,$r),"mm0");	# r[i] = a[i]*a[i]
+		&sub($c,1);
+		&lea($r,&DWP(8,$r));		# r += 2
+		&jnz(&label("sqr_sse2_loop"));
+
+		&emms();
+		&ret();
+	&set_label("sqr_non_sse2",16);
+	}
+
+	# function_begin prologue
+	&push("ebp");
+	&push("ebx");
+	&push("esi");
+	&push("edi");
+
+	&comment("");
+	$r="esi";
+	$a="edi";
+	$num="ebx";
+
+	&mov($r,&wparam(0));	#
+	&mov($a,&wparam(1));	#
+	&mov($num,&wparam(2));	#
+
+	&and($num,0xfffffff8);	# num / 8
+	&jz(&label("sw_finish"));
+
+	&set_label("sw_loop",0);
+	for ($i=0; $i<32; $i+=4)
+		{
+		&comment("Round $i");
+		&mov("eax",&DWP($i,$a,"",0)); 	# *a
+		 # XXX
+		&mul("eax");			# *a * *a
+		&mov(&DWP($i*2,$r,"",0),"eax");	#
+		 &mov(&DWP($i*2+4,$r,"",0),"edx");#
+		}
+
+	&comment("");
+	&add($a,32);
+	&add($r,64);
+	&sub($num,8);
+	&jnz(&label("sw_loop"));
+
+	&set_label("sw_finish",0);
+	&mov($num,&wparam(2));	# get num
+	&and($num,7);
+	&jz(&label("sw_end"));
+
+	for ($i=0; $i<7; $i++)
+		{
+		&comment("Tail Round $i");
+		&mov("eax",&DWP($i*4,$a,"",0));	# *a
+		 # XXX
+		&mul("eax");			# *a * *a
+		&mov(&DWP($i*8,$r,"",0),"eax");	#
+		 &dec($num) if ($i != 7-1);
+		&mov(&DWP($i*8+4,$r,"",0),"edx");
+		 &jz(&label("sw_end")) if ($i != 7-1);
+		}
+	&set_label("sw_end",0);
+
+	&function_end($name);
+	}
+
+sub bn_div_words
+	{
+	local($name)=@_;
+
+	&function_begin_B($name,"");
+	&mov("edx",&wparam(0));	#
+	&mov("eax",&wparam(1));	#
+	&mov("ecx",&wparam(2));	#
+	&div("ecx");
+	&ret();
+	&function_end_B($name);
+	}
+
+sub bn_add_words
+	{
+	local($name)=@_;
+
+	&function_begin($name,"");
+
+	&comment("");
+	$a="esi";
+	$b="edi";
+	$c="eax";
+	$r="ebx";
+	$tmp1="ecx";
+	$tmp2="edx";
+	$num="ebp";
+
+	&mov($r,&wparam(0));	# get r
+	 &mov($a,&wparam(1));	# get a
+	&mov($b,&wparam(2));	# get b
+	 &mov($num,&wparam(3));	# get num
+	&xor($c,$c);		# clear carry
+	 &and($num,0xfffffff8);	# num / 8
+
+	&jz(&label("aw_finish"));
+
+	&set_label("aw_loop",0);
+	for ($i=0; $i<8; $i++)
+		{
+		&comment("Round $i");
+
+		&mov($tmp1,&DWP($i*4,$a,"",0)); 	# *a
+		 &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
+		&add($tmp1,$c);
+		 &mov($c,0);
+		&adc($c,$c);
+		 &add($tmp1,$tmp2);
+		&adc($c,0);
+		 &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
+		}
+
+	&comment("");
+	&add($a,32);
+	 &add($b,32);
+	&add($r,32);
+	 &sub($num,8);
+	&jnz(&label("aw_loop"));
+
+	&set_label("aw_finish",0);
+	&mov($num,&wparam(3));	# get num
+	&and($num,7);
+	 &jz(&label("aw_end"));
+
+	for ($i=0; $i<7; $i++)
+		{
+		&comment("Tail Round $i");
+		&mov($tmp1,&DWP($i*4,$a,"",0));	# *a
+		 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
+		&add($tmp1,$c);
+		 &mov($c,0);
+		&adc($c,$c);
+		 &add($tmp1,$tmp2);
+		&adc($c,0);
+		 &dec($num) if ($i != 6);
+		&mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
+		 &jz(&label("aw_end")) if ($i != 6);
+		}
+	&set_label("aw_end",0);
+
+#	&mov("eax",$c);		# $c is "eax"
+
+	&function_end($name);
+	}
+
+sub bn_sub_words
+	{
+	local($name)=@_;
+
+	&function_begin($name,"");
+
+	&comment("");
+	$a="esi";
+	$b="edi";
+	$c="eax";
+	$r="ebx";
+	$tmp1="ecx";
+	$tmp2="edx";
+	$num="ebp";
+
+	&mov($r,&wparam(0));	# get r
+	 &mov($a,&wparam(1));	# get a
+	&mov($b,&wparam(2));	# get b
+	 &mov($num,&wparam(3));	# get num
+	&xor($c,$c);		# clear carry
+	 &and($num,0xfffffff8);	# num / 8
+
+	&jz(&label("aw_finish"));
+
+	&set_label("aw_loop",0);
+	for ($i=0; $i<8; $i++)
+		{
+		&comment("Round $i");
+
+		&mov($tmp1,&DWP($i*4,$a,"",0)); 	# *a
+		 &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
+		&sub($tmp1,$c);
+		 &mov($c,0);
+		&adc($c,$c);
+		 &sub($tmp1,$tmp2);
+		&adc($c,0);
+		 &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
+		}
+
+	&comment("");
+	&add($a,32);
+	 &add($b,32);
+	&add($r,32);
+	 &sub($num,8);
+	&jnz(&label("aw_loop"));
+
+	&set_label("aw_finish",0);
+	&mov($num,&wparam(3));	# get num
+	&and($num,7);
+	 &jz(&label("aw_end"));
+
+	for ($i=0; $i<7; $i++)
+		{
+		&comment("Tail Round $i");
+		&mov($tmp1,&DWP($i*4,$a,"",0));	# *a
+		 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
+		&sub($tmp1,$c);
+		 &mov($c,0);
+		&adc($c,$c);
+		 &sub($tmp1,$tmp2);
+		&adc($c,0);
+		 &dec($num) if ($i != 6);
+		&mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
+		 &jz(&label("aw_end")) if ($i != 6);
+		}
+	&set_label("aw_end",0);
+
+#	&mov("eax",$c);		# $c is "eax"
+
+	&function_end($name);
+	}
+
+sub bn_sub_part_words
+	{
+	local($name)=@_;
+
+	&function_begin($name,"");
+
+	&comment("");
+	$a="esi";
+	$b="edi";
+	$c="eax";
+	$r="ebx";
+	$tmp1="ecx";
+	$tmp2="edx";
+	$num="ebp";
+
+	&mov($r,&wparam(0));	# get r
+	 &mov($a,&wparam(1));	# get a
+	&mov($b,&wparam(2));	# get b
+	 &mov($num,&wparam(3));	# get num
+	&xor($c,$c);		# clear carry
+	 &and($num,0xfffffff8);	# num / 8
+
+	&jz(&label("aw_finish"));
+
+	&set_label("aw_loop",0);
+	for ($i=0; $i<8; $i++)
+		{
+		&comment("Round $i");
+
+		&mov($tmp1,&DWP($i*4,$a,"",0)); 	# *a
+		 &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
+		&sub($tmp1,$c);
+		 &mov($c,0);
+		&adc($c,$c);
+		 &sub($tmp1,$tmp2);
+		&adc($c,0);
+		 &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
+		}
+
+	&comment("");
+	&add($a,32);
+	 &add($b,32);
+	&add($r,32);
+	 &sub($num,8);
+	&jnz(&label("aw_loop"));
+
+	&set_label("aw_finish",0);
+	&mov($num,&wparam(3));	# get num
+	&and($num,7);
+	 &jz(&label("aw_end"));
+
+	for ($i=0; $i<7; $i++)
+		{
+		&comment("Tail Round $i");
+		&mov($tmp1,&DWP(0,$a,"",0));	# *a
+		 &mov($tmp2,&DWP(0,$b,"",0));# *b
+		&sub($tmp1,$c);
+		 &mov($c,0);
+		&adc($c,$c);
+		 &sub($tmp1,$tmp2);
+		&adc($c,0);
+		&mov(&DWP(0,$r,"",0),$tmp1);	# *r
+		&add($a, 4);
+		&add($b, 4);
+		&add($r, 4);
+		 &dec($num) if ($i != 6);
+		 &jz(&label("aw_end")) if ($i != 6);
+		}
+	&set_label("aw_end",0);
+
+	&cmp(&wparam(4),0);
+	&je(&label("pw_end"));
+
+	&mov($num,&wparam(4));	# get dl
+	&cmp($num,0);
+	&je(&label("pw_end"));
+	&jge(&label("pw_pos"));
+
+	&comment("pw_neg");
+	&mov($tmp2,0);
+	&sub($tmp2,$num);
+	&mov($num,$tmp2);
+	&and($num,0xfffffff8);	# num / 8
+	&jz(&label("pw_neg_finish"));
+
+	&set_label("pw_neg_loop",0);
+	for ($i=0; $i<8; $i++)
+	{
+	    &comment("dl<0 Round $i");
+
+	    &mov($tmp1,0);
+	    &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
+	    &sub($tmp1,$c);
+	    &mov($c,0);
+	    &adc($c,$c);
+	    &sub($tmp1,$tmp2);
+	    &adc($c,0);
+	    &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
+	}
+
+	&comment("");
+	&add($b,32);
+	&add($r,32);
+	&sub($num,8);
+	&jnz(&label("pw_neg_loop"));
+
+	&set_label("pw_neg_finish",0);
+	&mov($tmp2,&wparam(4));	# get dl
+	&mov($num,0);
+	&sub($num,$tmp2);
+	&and($num,7);
+	&jz(&label("pw_end"));
+
+	for ($i=0; $i<7; $i++)
+	{
+	    &comment("dl<0 Tail Round $i");
+	    &mov($tmp1,0);
+	    &mov($tmp2,&DWP($i*4,$b,"",0));# *b
+	    &sub($tmp1,$c);
+	    &mov($c,0);
+	    &adc($c,$c);
+	    &sub($tmp1,$tmp2);
+	    &adc($c,0);
+	    &dec($num) if ($i != 6);
+	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
+	    &jz(&label("pw_end")) if ($i != 6);
+	}
+
+	&jmp(&label("pw_end"));
+
+	&set_label("pw_pos",0);
+
+	&and($num,0xfffffff8);	# num / 8
+	&jz(&label("pw_pos_finish"));
+
+	&set_label("pw_pos_loop",0);
+
+	for ($i=0; $i<8; $i++)
+	{
+	    &comment("dl>0 Round $i");
+
+	    &mov($tmp1,&DWP($i*4,$a,"",0));	# *a
+	    &sub($tmp1,$c);
+	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
+	    &jnc(&label("pw_nc".$i));
+	}
+
+	&comment("");
+	&add($a,32);
+	&add($r,32);
+	&sub($num,8);
+	&jnz(&label("pw_pos_loop"));
+
+	&set_label("pw_pos_finish",0);
+	&mov($num,&wparam(4));	# get dl
+	&and($num,7);
+	&jz(&label("pw_end"));
+
+	for ($i=0; $i<7; $i++)
+	{
+	    &comment("dl>0 Tail Round $i");
+	    &mov($tmp1,&DWP($i*4,$a,"",0));	# *a
+	    &sub($tmp1,$c);
+	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
+	    &jnc(&label("pw_tail_nc".$i));
+	    &dec($num) if ($i != 6);
+	    &jz(&label("pw_end")) if ($i != 6);
+	}
+	&mov($c,1);
+	&jmp(&label("pw_end"));
+
+	&set_label("pw_nc_loop",0);
+	for ($i=0; $i<8; $i++)
+	{
+	    &mov($tmp1,&DWP($i*4,$a,"",0));	# *a
+	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
+	    &set_label("pw_nc".$i,0);
+	}
+
+	&comment("");
+	&add($a,32);
+	&add($r,32);
+	&sub($num,8);
+	&jnz(&label("pw_nc_loop"));
+
+	&mov($num,&wparam(4));	# get dl
+	&and($num,7);
+	&jz(&label("pw_nc_end"));
+
+	for ($i=0; $i<7; $i++)
+	{
+	    &mov($tmp1,&DWP($i*4,$a,"",0));	# *a
+	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
+	    &set_label("pw_tail_nc".$i,0);
+	    &dec($num) if ($i != 6);
+	    &jz(&label("pw_nc_end")) if ($i != 6);
+	}
+
+	&set_label("pw_nc_end",0);
+	&mov($c,0);
+
+	&set_label("pw_end",0);
+
+#	&mov("eax",$c);		# $c is "eax"
+
+	&function_end($name);
+	}
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/bn-c64xplus.asm
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/bn-c64xplus.asm
@ -0,0 +1,382 @@
+;; Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
+;;
+;; Licensed under the OpenSSL license (the "License").  You may not use
+;; this file except in compliance with the License.  You can obtain a copy
+;; in the file LICENSE in the source distribution or at
+;; https://www.openssl.org/source/license.html
+;;
+;;====================================================================
+;; Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+;; project.
+;;
+;; Rights for redistribution and usage in source and binary forms are
+;; granted according to the OpenSSL license. Warranty of any kind is
+;; disclaimed.
+;;====================================================================
+;; Compiler-generated multiply-n-add SPLOOP runs at 12*n cycles, n
+;; being the number of 32-bit words, addition - 8*n. Corresponding 4x
+;; unrolled SPLOOP-free loops - at ~8*n and ~5*n. Below assembler
+;; SPLOOPs spin at ... 2*n cycles [plus epilogue].
+;;====================================================================
+	.text
+
+	.if	.ASSEMBLER_VERSION<7000000
+	.asg	0,__TI_EABI__
+	.endif
+	.if	__TI_EABI__
+	.asg	bn_mul_add_words,_bn_mul_add_words
+	.asg	bn_mul_words,_bn_mul_words
+	.asg	bn_sqr_words,_bn_sqr_words
+	.asg	bn_add_words,_bn_add_words
+	.asg	bn_sub_words,_bn_sub_words
+	.asg	bn_div_words,_bn_div_words
+	.asg	bn_sqr_comba8,_bn_sqr_comba8
+	.asg	bn_mul_comba8,_bn_mul_comba8
+	.asg	bn_sqr_comba4,_bn_sqr_comba4
+	.asg	bn_mul_comba4,_bn_mul_comba4
+	.endif
+
+	.asg	B3,RA
+	.asg	A4,ARG0
+	.asg	B4,ARG1
+	.asg	A6,ARG2
+	.asg	B6,ARG3
+	.asg	A8,ARG4
+	.asg	B8,ARG5
+	.asg	A4,RET
+	.asg	A15,FP
+	.asg	B14,DP
+	.asg	B15,SP
+
+	.global	_bn_mul_add_words
+_bn_mul_add_words:
+	.asmfunc
+	MV	ARG2,B0
+  [!B0]	BNOP	RA
+||[!B0]	MVK	0,RET
+   [B0]	MVC	B0,ILC
+   [B0]	ZERO	A19		; high part of accumulator
+|| [B0]	MV	ARG0,A2
+|| [B0]	MV	ARG3,A3
+	NOP	3
+
+	SPLOOP	2		; 2*n+10
+;;====================================================================
+	LDW	*ARG1++,B7	; ap[i]
+	NOP	3
+	LDW	*ARG0++,A7	; rp[i]
+	MPY32U	B7,A3,A17:A16
+	NOP	3		; [2,0] in epilogue
+	ADDU	A16,A7,A21:A20
+	ADDU	A19,A21:A20,A19:A18
+||	MV.S	A17,A23
+	SPKERNEL 2,1		; leave slot for "return value"
+||	STW	A18,*A2++	; rp[i]
+||	ADD	A19,A23,A19
+;;====================================================================
+	BNOP	RA,4
+	MV	A19,RET		; return value
+	.endasmfunc
+
+	.global	_bn_mul_words
+_bn_mul_words:
+	.asmfunc
+	MV	ARG2,B0
+  [!B0]	BNOP	RA
+||[!B0]	MVK	0,RET
+   [B0]	MVC	B0,ILC
+   [B0]	ZERO	A19		; high part of accumulator
+	NOP	3
+
+	SPLOOP	2		; 2*n+10
+;;====================================================================
+	LDW	*ARG1++,A7	; ap[i]
+	NOP	4
+	MPY32U	A7,ARG3,A17:A16
+	NOP	4		; [2,0] in epiloque
+	ADDU	A19,A16,A19:A18
+||	MV.S	A17,A21
+	SPKERNEL 2,1		; leave slot for "return value"
+||	STW	A18,*ARG0++	; rp[i]
+||	ADD.L	A19,A21,A19
+;;====================================================================
+	BNOP	RA,4
+	MV	A19,RET		; return value
+	.endasmfunc
+
+	.global	_bn_sqr_words
+_bn_sqr_words:
+	.asmfunc
+	MV	ARG2,B0
+  [!B0]	BNOP	RA
+||[!B0]	MVK	0,RET
+   [B0]	MVC	B0,ILC
+   [B0]	MV	ARG0,B2
+|| [B0]	ADD	4,ARG0,ARG0
+	NOP	3
+
+	SPLOOP	2		; 2*n+10
+;;====================================================================
+	LDW	*ARG1++,B7	; ap[i]
+	NOP	4
+	MPY32U	B7,B7,B1:B0
+	NOP	3		; [2,0] in epilogue
+	STW	B0,*B2++(8)	; rp[2*i]
+	MV	B1,A1
+	SPKERNEL 2,0		; fully overlap BNOP RA,5
+||	STW	A1,*ARG0++(8)	; rp[2*i+1]
+;;====================================================================
+	BNOP	RA,5
+	.endasmfunc
+
+	.global	_bn_add_words
+_bn_add_words:
+	.asmfunc
+	MV	ARG3,B0
+  [!B0]	BNOP	RA
+||[!B0]	MVK	0,RET
+   [B0]	MVC	B0,ILC
+   [B0]	ZERO	A1		; carry flag
+|| [B0]	MV	ARG0,A3
+	NOP	3
+
+	SPLOOP	2		; 2*n+6
+;;====================================================================
+	LDW	*ARG2++,A7	; bp[i]
+||	LDW	*ARG1++,B7	; ap[i]
+	NOP	4
+	ADDU	A7,B7,A9:A8
+	ADDU	A1,A9:A8,A1:A0
+	SPKERNEL 0,0		; fully overlap BNOP RA,5
+||	STW	A0,*A3++	; write result
+||	MV	A1,RET		; keep carry flag in RET
+;;====================================================================
+	BNOP	RA,5
+	.endasmfunc
+
+	.global	_bn_sub_words
+_bn_sub_words:
+	.asmfunc
+	MV	ARG3,B0
+  [!B0]	BNOP	RA
+||[!B0]	MVK	0,RET
+   [B0]	MVC	B0,ILC
+   [B0]	ZERO	A2		; borrow flag
+|| [B0]	MV	ARG0,A3
+	NOP	3
+
+	SPLOOP	2		; 2*n+6
+;;====================================================================
+	LDW	*ARG2++,A7	; bp[i]
+||	LDW	*ARG1++,B7	; ap[i]
+	NOP	4
+	SUBU	B7,A7,A1:A0
+  [A2]	SUB	A1:A0,1,A1:A0
+	SPKERNEL 0,1		; leave slot for "return borrow flag"
+||	STW	A0,*A3++	; write result
+||	AND	1,A1,A2		; pass on borrow flag
+;;====================================================================
+	BNOP	RA,4
+	AND	1,A1,RET	; return borrow flag
+	.endasmfunc
+
+	.global	_bn_div_words
+_bn_div_words:
+	.asmfunc
+	LMBD	1,A6,A0		; leading zero bits in dv
+	LMBD	1,A4,A1		; leading zero bits in hi
+||	MVK	32,B0
+	CMPLTU	A1,A0,A2
+||	ADD	A0,B0,B0
+  [ A2]	BNOP	RA
+||[ A2]	MVK	-1,A4		; return overflow
+||[!A2]	MV	A4,A3		; reassign hi
+  [!A2]	MV	B4,A4		; reassign lo, will be quotient
+||[!A2]	MVC	B0,ILC
+  [!A2]	SHL	A6,A0,A6	; normalize dv
+||	MVK	1,A1
+
+  [!A2]	CMPLTU	A3,A6,A1	; hi<dv?
+||[!A2]	SHL	A4,1,A5:A4	; lo<<1
+  [!A1]	SUB	A3,A6,A3	; hi-=dv
+||[!A1]	OR	1,A4,A4
+  [!A2]	SHRU	A3,31,A1	; upper bit
+||[!A2]	ADDAH	A5,A3,A3	; hi<<1|lo>>31
+
+	SPLOOP	3
+  [!A1]	CMPLTU	A3,A6,A1	; hi<dv?
+||[ A1]	ZERO	A1
+||	SHL	A4,1,A5:A4	; lo<<1
+  [!A1]	SUB	A3,A6,A3	; hi-=dv
+||[!A1]	OR	1,A4,A4		; quotient
+	SHRU	A3,31,A1	; upper bit
+||	ADDAH	A5,A3,A3	; hi<<1|lo>>31
+	SPKERNEL
+
+	BNOP	RA,5
+	.endasmfunc
+
+;;====================================================================
+;; Not really Comba algorithm, just straightforward NxM... Dedicated
+;; fully unrolled real Comba implementations are asymptotically 2x
+;; faster, but naturally larger undertaking. Purpose of this exercise
+;; was rather to learn to master nested SPLOOPs...
+;;====================================================================
+	.global	_bn_sqr_comba8
+	.global	_bn_mul_comba8
+_bn_sqr_comba8:
+	MV	ARG1,ARG2
+_bn_mul_comba8:
+	.asmfunc
+	MVK	8,B0		; N, RILC
+||	MVK	8,A0		; M, outer loop counter
+||	MV	ARG1,A5		; copy ap
+||	MV	ARG0,B4		; copy rp
+||	ZERO	B19		; high part of accumulator
+	MVC	B0,RILC
+||	SUB	B0,2,B1		; N-2, initial ILC
+||	SUB	B0,1,B2		; const B2=N-1
+||	LDW	*A5++,B6	; ap[0]
+||	MV	A0,A3		; const A3=M
+sploopNxM?:			; for best performance arrange M<=N
+   [A0]	SPLOOPD	2		; 2*n+10
+||	MVC	B1,ILC
+||	ADDAW	B4,B0,B5
+||	ZERO	B7
+||	LDW	*A5++,A9	; pre-fetch ap[1]
+||	ZERO	A1
+||	SUB	A0,1,A0
+;;====================================================================
+;; SPLOOP from bn_mul_add_words, but with flipped A<>B register files.
+;; This is because of Advisory 15 from TI publication SPRZ247I.
+	LDW	*ARG2++,A7	; bp[i]
+	NOP	3
+   [A1]	LDW	*B5++,B7	; rp[i]
+	MPY32U	A7,B6,B17:B16
+	NOP	3
+	ADDU	B16,B7,B21:B20
+	ADDU	B19,B21:B20,B19:B18
+||	MV.S	B17,B23
+	SPKERNEL
+||	STW	B18,*B4++	; rp[i]
+||	ADD.S	B19,B23,B19
+;;====================================================================
+outer?:				; m*2*(n+1)+10
+	SUBAW	ARG2,A3,ARG2	; rewind bp to bp[0]
+	SPMASKR
+||	CMPGT	A0,1,A2		; done pre-fetching ap[i+1]?
+	MVD	A9,B6		; move through .M unit(*)
+   [A2]	LDW	*A5++,A9	; pre-fetch ap[i+1]
+	SUBAW	B5,B2,B5	; rewind rp to rp[1]
+	MVK	1,A1
+   [A0]	BNOP.S1	outer?,4
+|| [A0]	SUB.L	A0,1,A0
+	STW	B19,*B4--[B2]	; rewind rp tp rp[1]
+||	ZERO.S	B19		; high part of accumulator
+;; end of outer?
+	BNOP	RA,5		; return
+	.endasmfunc
+;; (*)	It should be noted that B6 is used as input to MPY32U in
+;;	chronologically next cycle in *preceding* SPLOOP iteration.
+;;	Normally such arrangement would require DINT, but at this
+;;	point SPLOOP is draining and interrupts are disabled
+;;	implicitly.
+
+	.global	_bn_sqr_comba4
+	.global	_bn_mul_comba4
+_bn_sqr_comba4:
+	MV	ARG1,ARG2
+_bn_mul_comba4:
+	.asmfunc
+	.if	0
+	BNOP	sploopNxM?,3
+	;; Above mentioned m*2*(n+1)+10 does not apply in n=m=4 case,
+	;; because of low-counter effect, when prologue phase finishes
+	;; before SPKERNEL instruction is reached. As result it's 25%
+	;; slower than expected...
+	MVK	4,B0		; N, RILC
+||	MVK	4,A0		; M, outer loop counter
+||	MV	ARG1,A5		; copy ap
+||	MV	ARG0,B4		; copy rp
+||	ZERO	B19		; high part of accumulator
+	MVC	B0,RILC
+||	SUB	B0,2,B1		; first ILC
+||	SUB	B0,1,B2		; const B2=N-1
+||	LDW	*A5++,B6	; ap[0]
+||	MV	A0,A3		; const A3=M
+	.else
+	;; This alternative is an exercise in fully unrolled Comba
+	;; algorithm implementation that operates at n*(n+1)+12, or
+	;; as little as 32 cycles...
+	LDW	*ARG1[0],B16	; a[0]
+||	LDW	*ARG2[0],A16	; b[0]
+	LDW	*ARG1[1],B17	; a[1]
+||	LDW	*ARG2[1],A17	; b[1]
+	LDW	*ARG1[2],B18	; a[2]
+||	LDW	*ARG2[2],A18	; b[2]
+	LDW	*ARG1[3],B19	; a[3]
+||	LDW	*ARG2[3],A19	; b[3]
+	NOP
+	MPY32U	A16,B16,A1:A0	; a[0]*b[0]
+	MPY32U	A17,B16,A23:A22	; a[0]*b[1]
+	MPY32U	A16,B17,A25:A24	; a[1]*b[0]
+	MPY32U	A16,B18,A27:A26	; a[2]*b[0]
+	STW	A0,*ARG0[0]
+||	MPY32U	A17,B17,A29:A28	; a[1]*b[1]
+	MPY32U	A18,B16,A31:A30	; a[0]*b[2]
+||	ADDU	A22,A1,A1:A0
+	MV	A23,B0
+||	MPY32U	A19,B16,A21:A20	; a[3]*b[0]
+||	ADDU	A24,A1:A0,A1:A0
+	ADDU	A25,B0,B1:B0
+||	STW	A0,*ARG0[1]
+||	MPY32U	A18,B17,A23:A22	; a[2]*b[1]
+||	ADDU	A26,A1,A9:A8
+	ADDU	A27,B1,B9:B8
+||	MPY32U	A17,B18,A25:A24	; a[1]*b[2]
+||	ADDU	A28,A9:A8,A9:A8
+	ADDU	A29,B9:B8,B9:B8
+||	MPY32U	A16,B19,A27:A26	; a[0]*b[3]
+||	ADDU	A30,A9:A8,A9:A8
+	ADDU	A31,B9:B8,B9:B8
+||	ADDU	B0,A9:A8,A9:A8
+	STW	A8,*ARG0[2]
+||	ADDU	A20,A9,A1:A0
+	ADDU	A21,B9,B1:B0
+||	MPY32U	A19,B17,A21:A20	; a[3]*b[1]
+||	ADDU	A22,A1:A0,A1:A0
+	ADDU	A23,B1:B0,B1:B0
+||	MPY32U	A18,B18,A23:A22	; a[2]*b[2]
+||	ADDU	A24,A1:A0,A1:A0
+	ADDU	A25,B1:B0,B1:B0
+||	MPY32U	A17,B19,A25:A24	; a[1]*b[3]
+||	ADDU	A26,A1:A0,A1:A0
+	ADDU	A27,B1:B0,B1:B0
+||	ADDU	B8,A1:A0,A1:A0
+	STW	A0,*ARG0[3]
+||	MPY32U	A19,B18,A27:A26	; a[3]*b[2]
+||	ADDU	A20,A1,A9:A8
+	ADDU	A21,B1,B9:B8
+||	MPY32U	A18,B19,A29:A28	; a[2]*b[3]
+||	ADDU	A22,A9:A8,A9:A8
+	ADDU	A23,B9:B8,B9:B8
+||	MPY32U	A19,B19,A31:A30	; a[3]*b[3]
+||	ADDU	A24,A9:A8,A9:A8
+	ADDU	A25,B9:B8,B9:B8
+||	ADDU	B0,A9:A8,A9:A8
+	STW	A8,*ARG0[4]
+||	ADDU	A26,A9,A1:A0
+	ADDU	A27,B9,B1:B0
+||	ADDU	A28,A1:A0,A1:A0
+	ADDU	A29,B1:B0,B1:B0
+||	BNOP	RA
+||	ADDU	B8,A1:A0,A1:A0
+	STW	A0,*ARG0[5]
+||	ADDU	A30,A1,A9:A8
+	ADD	A31,B1,B8
+	ADDU	B0,A9:A8,A9:A8	; removed || to avoid cross-path stall below
+	ADD	B8,A9,A9
+||	STW	A8,*ARG0[6]
+	STW	A9,*ARG0[7]
+	.endif
+	.endasmfunc
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/c64xplus-gf2m.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/c64xplus-gf2m.pl
@ -0,0 +1,160 @@
+#! /usr/bin/env perl
+# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# February 2012
+#
+# The module implements bn_GF2m_mul_2x2 polynomial multiplication
+# used in bn_gf2m.c. It's kind of low-hanging mechanical port from
+# C for the time being... The subroutine runs in 37 cycles, which is
+# 4.5x faster than compiler-generated code. Though comparison is
+# totally unfair, because this module utilizes Galois Field Multiply
+# instruction.
+
+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+($rp,$a1,$a0,$b1,$b0)=("A4","B4","A6","B6","A8");   # argument vector
+
+($Alo,$Alox0,$Alox1,$Alox2,$Alox3)=map("A$_",(16..20));
+($Ahi,$Ahix0,$Ahix1,$Ahix2,$Ahix3)=map("B$_",(16..20));
+($B_0,$B_1,$B_2,$B_3)=("B5","A5","A7","B7");
+($A,$B)=($Alo,$B_1);
+$xFF="B1";
+
+sub mul_1x1_upper {
+my ($A,$B)=@_;
+$code.=<<___;
+	EXTU	$B,8,24,$B_2		; smash $B to 4 bytes
+||	AND	$B,$xFF,$B_0
+||	SHRU	$B,24,$B_3
+	SHRU	$A,16,   $Ahi		; smash $A to two halfwords
+||	EXTU	$A,16,16,$Alo
+
+	XORMPY	$Alo,$B_2,$Alox2	; 16x8 bits multiplication
+||	XORMPY	$Ahi,$B_2,$Ahix2
+||	EXTU	$B,16,24,$B_1
+	XORMPY	$Alo,$B_0,$Alox0
+||	XORMPY	$Ahi,$B_0,$Ahix0
+	XORMPY	$Alo,$B_3,$Alox3
+||	XORMPY	$Ahi,$B_3,$Ahix3
+	XORMPY	$Alo,$B_1,$Alox1
+||	XORMPY	$Ahi,$B_1,$Ahix1
+___
+}
+sub mul_1x1_merged {
+my ($OUTlo,$OUThi,$A,$B)=@_;
+$code.=<<___;
+	 EXTU	$B,8,24,$B_2		; smash $B to 4 bytes
+||	 AND	$B,$xFF,$B_0
+||	 SHRU	$B,24,$B_3
+	 SHRU	$A,16,   $Ahi		; smash $A to two halfwords
+||	 EXTU	$A,16,16,$Alo
+
+	XOR	$Ahix0,$Alox2,$Ahix0
+||	MV	$Ahix2,$OUThi
+||	 XORMPY	$Alo,$B_2,$Alox2
+	 XORMPY	$Ahi,$B_2,$Ahix2
+||	 EXTU	$B,16,24,$B_1
+||	 XORMPY	$Alo,$B_0,A1		; $Alox0
+	XOR	$Ahix1,$Alox3,$Ahix1
+||	SHL	$Ahix0,16,$OUTlo
+||	SHRU	$Ahix0,16,$Ahix0
+	XOR	$Alox0,$OUTlo,$OUTlo
+||	XOR	$Ahix0,$OUThi,$OUThi
+||	 XORMPY	$Ahi,$B_0,$Ahix0
+||	 XORMPY	$Alo,$B_3,$Alox3
+||	SHL	$Alox1,8,$Alox1
+||	SHL	$Ahix3,8,$Ahix3
+	XOR	$Alox1,$OUTlo,$OUTlo
+||	XOR	$Ahix3,$OUThi,$OUThi
+||	 XORMPY	$Ahi,$B_3,$Ahix3
+||	SHL	$Ahix1,24,$Alox1
+||	SHRU	$Ahix1,8, $Ahix1
+	XOR	$Alox1,$OUTlo,$OUTlo
+||	XOR	$Ahix1,$OUThi,$OUThi
+||	 XORMPY	$Alo,$B_1,$Alox1
+||	 XORMPY	$Ahi,$B_1,$Ahix1
+||	 MV	A1,$Alox0
+___
+}
+sub mul_1x1_lower {
+my ($OUTlo,$OUThi)=@_;
+$code.=<<___;
+	;NOP
+	XOR	$Ahix0,$Alox2,$Ahix0
+||	MV	$Ahix2,$OUThi
+	NOP
+	XOR	$Ahix1,$Alox3,$Ahix1
+||	SHL	$Ahix0,16,$OUTlo
+||	SHRU	$Ahix0,16,$Ahix0
+	XOR	$Alox0,$OUTlo,$OUTlo
+||	XOR	$Ahix0,$OUThi,$OUThi
+||	SHL	$Alox1,8,$Alox1
+||	SHL	$Ahix3,8,$Ahix3
+	XOR	$Alox1,$OUTlo,$OUTlo
+||	XOR	$Ahix3,$OUThi,$OUThi
+||	SHL	$Ahix1,24,$Alox1
+||	SHRU	$Ahix1,8, $Ahix1
+	XOR	$Alox1,$OUTlo,$OUTlo
+||	XOR	$Ahix1,$OUThi,$OUThi
+___
+}
+$code.=<<___;
+	.text
+
+	.if	.ASSEMBLER_VERSION<7000000
+	.asg	0,__TI_EABI__
+	.endif
+	.if	__TI_EABI__
+	.asg	bn_GF2m_mul_2x2,_bn_GF2m_mul_2x2
+	.endif
+
+	.global	_bn_GF2m_mul_2x2
+_bn_GF2m_mul_2x2:
+	.asmfunc
+	MVK	0xFF,$xFF
+___
+	&mul_1x1_upper($a0,$b0);		# a0·b0
+$code.=<<___;
+||	MV	$b1,$B
+	MV	$a1,$A
+___
+	&mul_1x1_merged("A28","B28",$A,$B);	# a0·b0/a1·b1
+$code.=<<___;
+||	XOR	$b0,$b1,$B
+	XOR	$a0,$a1,$A
+___
+	&mul_1x1_merged("A31","B31",$A,$B);	# a1·b1/(a0+a1)·(b0+b1)
+$code.=<<___;
+	XOR	A28,A31,A29
+||	XOR	B28,B31,B29			; a0·b0+a1·b1
+___
+	&mul_1x1_lower("A30","B30");		# (a0+a1)·(b0+b1)
+$code.=<<___;
+||	BNOP	B3
+	XOR	A29,A30,A30
+||	XOR	B29,B30,B30			; (a0+a1)·(b0+b1)-a0·b0-a1·b1
+	XOR	B28,A30,A30
+||	STW	A28,*${rp}[0]
+	XOR	B30,A31,A31
+||	STW	A30,*${rp}[1]
+	STW	A31,*${rp}[2]
+	STW	B31,*${rp}[3]
+	.endasmfunc
+___
+
+print $code;
+close STDOUT;
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/co-586.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/co-586.pl
@ -0,0 +1,298 @@
+#! /usr/bin/env perl
+# Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+push(@INC,"${dir}","${dir}../../perlasm");
+require "x86asm.pl";
+
+$output = pop;
+open STDOUT,">$output";
+
+&asm_init($ARGV[0]);
+
+&bn_mul_comba("bn_mul_comba8",8);
+&bn_mul_comba("bn_mul_comba4",4);
+&bn_sqr_comba("bn_sqr_comba8",8);
+&bn_sqr_comba("bn_sqr_comba4",4);
+
+&asm_finish();
+
+close STDOUT;
+
+sub mul_add_c
+	{
+	local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
+
+	# pos == -1 if eax and edx are pre-loaded, 0 to load from next
+	# words, and 1 if load return value
+
+	&comment("mul a[$ai]*b[$bi]");
+
+	# "eax" and "edx" will always be pre-loaded.
+	# &mov("eax",&DWP($ai*4,$a,"",0)) ;
+	# &mov("edx",&DWP($bi*4,$b,"",0));
+
+	&mul("edx");
+	&add($c0,"eax");
+	 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0;	# laod next a
+	 &mov("eax",&wparam(0)) if $pos > 0;			# load r[]
+	 ###
+	&adc($c1,"edx");
+	 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0;	# laod next b
+	 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1;	# laod next b
+	 ###
+	&adc($c2,0);
+	 # is pos > 1, it means it is the last loop
+	 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0;		# save r[];
+	&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;		# laod next a
+	}
+
+sub sqr_add_c
+	{
+	local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
+
+	# pos == -1 if eax and edx are pre-loaded, 0 to load from next
+	# words, and 1 if load return value
+
+	&comment("sqr a[$ai]*a[$bi]");
+
+	# "eax" and "edx" will always be pre-loaded.
+	# &mov("eax",&DWP($ai*4,$a,"",0)) ;
+	# &mov("edx",&DWP($bi*4,$b,"",0));
+
+	if ($ai == $bi)
+		{ &mul("eax");}
+	else
+		{ &mul("edx");}
+	&add($c0,"eax");
+	 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0;	# load next a
+	 ###
+	&adc($c1,"edx");
+	 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
+	 ###
+	&adc($c2,0);
+	 # is pos > 1, it means it is the last loop
+	 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0;		# save r[];
+	&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;		# load next b
+	}
+
+sub sqr_add_c2
+	{
+	local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
+
+	# pos == -1 if eax and edx are pre-loaded, 0 to load from next
+	# words, and 1 if load return value
+
+	&comment("sqr a[$ai]*a[$bi]");
+
+	# "eax" and "edx" will always be pre-loaded.
+	# &mov("eax",&DWP($ai*4,$a,"",0)) ;
+	# &mov("edx",&DWP($bi*4,$a,"",0));
+
+	if ($ai == $bi)
+		{ &mul("eax");}
+	else
+		{ &mul("edx");}
+	&add("eax","eax");
+	 ###
+	&adc("edx","edx");
+	 ###
+	&adc($c2,0);
+	 &add($c0,"eax");
+	&adc($c1,"edx");
+	 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0;	# load next a
+	 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;	# load next b
+	&adc($c2,0);
+	&mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0;		# save r[];
+	 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
+	 ###
+	}
+
+sub bn_mul_comba
+	{
+	local($name,$num)=@_;
+	local($a,$b,$c0,$c1,$c2);
+	local($i,$as,$ae,$bs,$be,$ai,$bi);
+	local($tot,$end);
+
+	&function_begin_B($name,"");
+
+	$c0="ebx";
+	$c1="ecx";
+	$c2="ebp";
+	$a="esi";
+	$b="edi";
+
+	$as=0;
+	$ae=0;
+	$bs=0;
+	$be=0;
+	$tot=$num+$num-1;
+
+	&push("esi");
+	 &mov($a,&wparam(1));
+	&push("edi");
+	 &mov($b,&wparam(2));
+	&push("ebp");
+	 &push("ebx");
+
+	&xor($c0,$c0);
+	 &mov("eax",&DWP(0,$a,"",0));	# load the first word
+	&xor($c1,$c1);
+	 &mov("edx",&DWP(0,$b,"",0));	# load the first second
+
+	for ($i=0; $i<$tot; $i++)
+		{
+		$ai=$as;
+		$bi=$bs;
+		$end=$be+1;
+
+		&comment("################## Calculate word $i");
+
+		for ($j=$bs; $j<$end; $j++)
+			{
+			&xor($c2,$c2) if ($j == $bs);
+			if (($j+1) == $end)
+				{
+				$v=1;
+				$v=2 if (($i+1) == $tot);
+				}
+			else
+				{ $v=0; }
+			if (($j+1) != $end)
+				{
+				$na=($ai-1);
+				$nb=($bi+1);
+				}
+			else
+				{
+				$na=$as+($i < ($num-1));
+				$nb=$bs+($i >= ($num-1));
+				}
+#printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
+			&mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
+			if ($v)
+				{
+				&comment("saved r[$i]");
+				# &mov("eax",&wparam(0));
+				# &mov(&DWP($i*4,"eax","",0),$c0);
+				($c0,$c1,$c2)=($c1,$c2,$c0);
+				}
+			$ai--;
+			$bi++;
+			}
+		$as++ if ($i < ($num-1));
+		$ae++ if ($i >= ($num-1));
+
+		$bs++ if ($i >= ($num-1));
+		$be++ if ($i < ($num-1));
+		}
+	&comment("save r[$i]");
+	# &mov("eax",&wparam(0));
+	&mov(&DWP($i*4,"eax","",0),$c0);
+
+	&pop("ebx");
+	&pop("ebp");
+	&pop("edi");
+	&pop("esi");
+	&ret();
+	&function_end_B($name);
+	}
+
+sub bn_sqr_comba
+	{
+	local($name,$num)=@_;
+	local($r,$a,$c0,$c1,$c2)=@_;
+	local($i,$as,$ae,$bs,$be,$ai,$bi);
+	local($b,$tot,$end,$half);
+
+	&function_begin_B($name,"");
+
+	$c0="ebx";
+	$c1="ecx";
+	$c2="ebp";
+	$a="esi";
+	$r="edi";
+
+	&push("esi");
+	 &push("edi");
+	&push("ebp");
+	 &push("ebx");
+	&mov($r,&wparam(0));
+	 &mov($a,&wparam(1));
+	&xor($c0,$c0);
+	 &xor($c1,$c1);
+	&mov("eax",&DWP(0,$a,"",0)); # load the first word
+
+	$as=0;
+	$ae=0;
+	$bs=0;
+	$be=0;
+	$tot=$num+$num-1;
+
+	for ($i=0; $i<$tot; $i++)
+		{
+		$ai=$as;
+		$bi=$bs;
+		$end=$be+1;
+
+		&comment("############### Calculate word $i");
+		for ($j=$bs; $j<$end; $j++)
+			{
+			&xor($c2,$c2) if ($j == $bs);
+			if (($ai-1) < ($bi+1))
+				{
+				$v=1;
+				$v=2 if ($i+1) == $tot;
+				}
+			else
+				{ $v=0; }
+			if (!$v)
+				{
+				$na=$ai-1;
+				$nb=$bi+1;
+				}
+			else
+				{
+				$na=$as+($i < ($num-1));
+				$nb=$bs+($i >= ($num-1));
+				}
+			if ($ai == $bi)
+				{
+				&sqr_add_c($r,$a,$ai,$bi,
+					$c0,$c1,$c2,$v,$i,$na,$nb);
+				}
+			else
+				{
+				&sqr_add_c2($r,$a,$ai,$bi,
+					$c0,$c1,$c2,$v,$i,$na,$nb);
+				}
+			if ($v)
+				{
+				&comment("saved r[$i]");
+				#&mov(&DWP($i*4,$r,"",0),$c0);
+				($c0,$c1,$c2)=($c1,$c2,$c0);
+				last;
+				}
+			$ai--;
+			$bi++;
+			}
+		$as++ if ($i < ($num-1));
+		$ae++ if ($i >= ($num-1));
+
+		$bs++ if ($i >= ($num-1));
+		$be++ if ($i < ($num-1));
+		}
+	&mov(&DWP($i*4,$r,"",0),$c0);
+	&pop("ebx");
+	&pop("ebp");
+	&pop("edi");
+	&pop("esi");
+	&ret();
+	&function_end_B($name);
+	}
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/ia64-mont.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/ia64-mont.pl
@ -0,0 +1,860 @@
+#! /usr/bin/env perl
+# Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# January 2010
+#
+# "Teaser" Montgomery multiplication module for IA-64. There are
+# several possibilities for improvement:
+#
+# - modulo-scheduling outer loop would eliminate quite a number of
+#   stalls after ldf8, xma and getf.sig outside inner loop and
+#   improve shorter key performance;
+# - shorter vector support [with input vectors being fetched only
+#   once] should be added;
+# - 2x unroll with help of n0[1] would make the code scalable on
+#   "wider" IA-64, "wider" than Itanium 2 that is, which is not of
+#   acute interest, because upcoming Tukwila's individual cores are
+#   reportedly based on Itanium 2 design;
+# - dedicated squaring procedure(?);
+#
+# January 2010
+#
+# Shorter vector support is implemented by zero-padding ap and np
+# vectors up to 8 elements, or 512 bits. This means that 256-bit
+# inputs will be processed only 2 times faster than 512-bit inputs,
+# not 4 [as one would expect, because algorithm complexity is n^2].
+# The reason for padding is that inputs shorter than 512 bits won't
+# be processed faster anyway, because minimal critical path of the
+# core loop happens to match 512-bit timing. Either way, it resulted
+# in >100% improvement of 512-bit RSA sign benchmark and 50% - of
+# 1024-bit one [in comparison to original version of *this* module].
+#
+# So far 'openssl speed rsa dsa' output on 900MHz Itanium 2 *with*
+# this module is:
+#                   sign    verify    sign/s verify/s
+# rsa  512 bits 0.000290s 0.000024s   3452.8  42031.4
+# rsa 1024 bits 0.000793s 0.000058s   1261.7  17172.0
+# rsa 2048 bits 0.005908s 0.000148s    169.3   6754.0
+# rsa 4096 bits 0.033456s 0.000469s     29.9   2133.6
+# dsa  512 bits 0.000253s 0.000198s   3949.9   5057.0
+# dsa 1024 bits 0.000585s 0.000607s   1708.4   1647.4
+# dsa 2048 bits 0.001453s 0.001703s    688.1    587.4
+#
+# ... and *without* (but still with ia64.S):
+#
+# rsa  512 bits 0.000670s 0.000041s   1491.8  24145.5
+# rsa 1024 bits 0.001988s 0.000080s    502.9  12499.3
+# rsa 2048 bits 0.008702s 0.000189s    114.9   5293.9
+# rsa 4096 bits 0.043860s 0.000533s     22.8   1875.9
+# dsa  512 bits 0.000441s 0.000427s   2265.3   2340.6
+# dsa 1024 bits 0.000823s 0.000867s   1215.6   1153.2
+# dsa 2048 bits 0.001894s 0.002179s    528.1    458.9
+#
+# As it can be seen, RSA sign performance improves by 130-30%,
+# hereafter less for longer keys, while verify - by 74-13%.
+# DSA performance improves by 115-30%.
+
+$output=pop;
+
+if ($^O eq "hpux") {
+    $ADDP="addp4";
+    for (@ARGV) { $ADDP="add" if (/[\+DD|\-mlp]64/); }
+} else { $ADDP="add"; }
+
+$code=<<___;
+.explicit
+.text
+
+// int bn_mul_mont (BN_ULONG *rp,const BN_ULONG *ap,
+//		    const BN_ULONG *bp,const BN_ULONG *np,
+//		    const BN_ULONG *n0p,int num);
+.align	64
+.global	bn_mul_mont#
+.proc	bn_mul_mont#
+bn_mul_mont:
+	.prologue
+	.body
+{ .mmi;	cmp4.le		p6,p7=2,r37;;
+(p6)	cmp4.lt.unc	p8,p9=8,r37
+	mov		ret0=r0		};;
+{ .bbb;
+(p9)	br.cond.dptk.many	bn_mul_mont_8
+(p8)	br.cond.dpnt.many	bn_mul_mont_general
+(p7)	br.ret.spnt.many	b0	};;
+.endp	bn_mul_mont#
+
+prevfs=r2;	prevpr=r3;	prevlc=r10;	prevsp=r11;
+
+rptr=r8;	aptr=r9;	bptr=r14;	nptr=r15;
+tptr=r16;	// &tp[0]
+tp_1=r17;	// &tp[-1]
+num=r18;	len=r19;	lc=r20;
+topbit=r21;	// carry bit from tmp[num]
+
+n0=f6;
+m0=f7;
+bi=f8;
+
+.align	64
+.local	bn_mul_mont_general#
+.proc	bn_mul_mont_general#
+bn_mul_mont_general:
+	.prologue
+{ .mmi;	.save	ar.pfs,prevfs
+	alloc	prevfs=ar.pfs,6,2,0,8
+	$ADDP	aptr=0,in1
+	.save	ar.lc,prevlc
+	mov	prevlc=ar.lc		}
+{ .mmi;	.vframe	prevsp
+	mov	prevsp=sp
+	$ADDP	bptr=0,in2
+	.save	pr,prevpr
+	mov	prevpr=pr		};;
+
+	.body
+	.rotf		alo[6],nlo[4],ahi[8],nhi[6]
+	.rotr		a[3],n[3],t[2]
+
+{ .mmi;	ldf8		bi=[bptr],8		// (*bp++)
+	ldf8		alo[4]=[aptr],16	// ap[0]
+	$ADDP		r30=8,in1	};;
+{ .mmi;	ldf8		alo[3]=[r30],16		// ap[1]
+	ldf8		alo[2]=[aptr],16	// ap[2]
+	$ADDP		in4=0,in4	};;
+{ .mmi;	ldf8		alo[1]=[r30]		// ap[3]
+	ldf8		n0=[in4]		// n0
+	$ADDP		rptr=0,in0		}
+{ .mmi;	$ADDP		nptr=0,in3
+	mov		r31=16
+	zxt4		num=in5		};;
+{ .mmi;	ldf8		nlo[2]=[nptr],8		// np[0]
+	shladd		len=num,3,r0
+	shladd		r31=num,3,r31	};;
+{ .mmi;	ldf8		nlo[1]=[nptr],8		// np[1]
+	add		lc=-5,num
+	sub		r31=sp,r31	};;
+{ .mfb;	and		sp=-16,r31		// alloca
+	xmpy.hu		ahi[2]=alo[4],bi	// ap[0]*bp[0]
+	nop.b		0		}
+{ .mfb;	nop.m		0
+	xmpy.lu		alo[4]=alo[4],bi
+	brp.loop.imp	.L1st_ctop,.L1st_cend-16
+					};;
+{ .mfi;	nop.m		0
+	xma.hu		ahi[1]=alo[3],bi,ahi[2]	// ap[1]*bp[0]
+	add		tp_1=8,sp	}
+{ .mfi;	nop.m		0
+	xma.lu		alo[3]=alo[3],bi,ahi[2]
+	mov		pr.rot=0x20001f<<16
+			// ------^----- (p40) at first (p23)
+			// ----------^^ p[16:20]=1
+					};;
+{ .mfi;	nop.m		0
+	xmpy.lu		m0=alo[4],n0		// (ap[0]*bp[0])*n0
+	mov		ar.lc=lc	}
+{ .mfi;	nop.m		0
+	fcvt.fxu.s1	nhi[1]=f0
+	mov		ar.ec=8		};;
+
+.align	32
+.L1st_ctop:
+.pred.rel	"mutex",p40,p42
+{ .mfi;	(p16)	ldf8		alo[0]=[aptr],8		    // *(aptr++)
+	(p18)	xma.hu		ahi[0]=alo[2],bi,ahi[1]
+	(p40)	add		n[2]=n[2],a[2]		}   // (p23)					}
+{ .mfi;	(p18)	ldf8		nlo[0]=[nptr],8		    // *(nptr++)(p16)
+	(p18)	xma.lu		alo[2]=alo[2],bi,ahi[1]
+	(p42)	add		n[2]=n[2],a[2],1	};; // (p23)
+{ .mfi;	(p21)	getf.sig	a[0]=alo[5]
+	(p20)	xma.hu		nhi[0]=nlo[2],m0,nhi[1]
+	(p42)	cmp.leu		p41,p39=n[2],a[2]   	}   // (p23)
+{ .mfi;	(p23)	st8		[tp_1]=n[2],8
+	(p20)	xma.lu		nlo[2]=nlo[2],m0,nhi[1]
+	(p40)	cmp.ltu		p41,p39=n[2],a[2]	}   // (p23)
+{ .mmb;	(p21)	getf.sig	n[0]=nlo[3]
+	(p16)	nop.m		0
+	br.ctop.sptk	.L1st_ctop			};;
+.L1st_cend:
+
+{ .mmi;	getf.sig	a[0]=ahi[6]		// (p24)
+	getf.sig	n[0]=nhi[4]
+	add		num=-1,num	};;	// num--
+{ .mmi;	.pred.rel	"mutex",p40,p42
+(p40)	add		n[0]=n[0],a[0]
+(p42)	add		n[0]=n[0],a[0],1
+	sub		aptr=aptr,len	};;	// rewind
+{ .mmi;	.pred.rel	"mutex",p40,p42
+(p40)	cmp.ltu		p41,p39=n[0],a[0]
+(p42)	cmp.leu		p41,p39=n[0],a[0]
+	sub		nptr=nptr,len	};;
+{ .mmi;	.pred.rel	"mutex",p39,p41
+(p39)	add		topbit=r0,r0
+(p41)	add		topbit=r0,r0,1
+	nop.i		0		}
+{ .mmi;	st8		[tp_1]=n[0]
+	add		tptr=16,sp
+	add		tp_1=8,sp	};;
+
+.Louter:
+{ .mmi;	ldf8		bi=[bptr],8		// (*bp++)
+	ldf8		ahi[3]=[tptr]		// tp[0]
+	add		r30=8,aptr	};;
+{ .mmi;	ldf8		alo[4]=[aptr],16	// ap[0]
+	ldf8		alo[3]=[r30],16		// ap[1]
+	add		r31=8,nptr	};;
+{ .mfb;	ldf8		alo[2]=[aptr],16	// ap[2]
+	xma.hu		ahi[2]=alo[4],bi,ahi[3]	// ap[0]*bp[i]+tp[0]
+	brp.loop.imp	.Linner_ctop,.Linner_cend-16
+					}
+{ .mfb;	ldf8		alo[1]=[r30]		// ap[3]
+	xma.lu		alo[4]=alo[4],bi,ahi[3]
+	clrrrb.pr			};;
+{ .mfi;	ldf8		nlo[2]=[nptr],16	// np[0]
+	xma.hu		ahi[1]=alo[3],bi,ahi[2]	// ap[1]*bp[i]
+	nop.i		0		}
+{ .mfi;	ldf8		nlo[1]=[r31]		// np[1]
+	xma.lu		alo[3]=alo[3],bi,ahi[2]
+	mov		pr.rot=0x20101f<<16
+			// ------^----- (p40) at first (p23)
+			// --------^--- (p30) at first (p22)
+			// ----------^^ p[16:20]=1
+					};;
+{ .mfi;	st8		[tptr]=r0		// tp[0] is already accounted
+	xmpy.lu		m0=alo[4],n0		// (ap[0]*bp[i]+tp[0])*n0
+	mov		ar.lc=lc	}
+{ .mfi;
+	fcvt.fxu.s1	nhi[1]=f0
+	mov		ar.ec=8		};;
+
+// This loop spins in 4*(n+7) ticks on Itanium 2 and should spin in
+// 7*(n+7) ticks on Itanium (the one codenamed Merced). Factor of 7
+// in latter case accounts for two-tick pipeline stall, which means
+// that its performance would be ~20% lower than optimal one. No
+// attempt was made to address this, because original Itanium is
+// hardly represented out in the wild...
+.align	32
+.Linner_ctop:
+.pred.rel	"mutex",p40,p42
+.pred.rel	"mutex",p30,p32
+{ .mfi;	(p16)	ldf8		alo[0]=[aptr],8		    // *(aptr++)
+	(p18)	xma.hu		ahi[0]=alo[2],bi,ahi[1]
+	(p40)	add		n[2]=n[2],a[2]		}   // (p23)
+{ .mfi;	(p16)	nop.m		0
+	(p18)	xma.lu		alo[2]=alo[2],bi,ahi[1]
+	(p42)	add		n[2]=n[2],a[2],1	};; // (p23)
+{ .mfi;	(p21)	getf.sig	a[0]=alo[5]
+	(p16)	nop.f		0
+	(p40)	cmp.ltu		p41,p39=n[2],a[2]	}   // (p23)
+{ .mfi;	(p21)	ld8		t[0]=[tptr],8
+	(p16)	nop.f		0
+	(p42)	cmp.leu		p41,p39=n[2],a[2]	};; // (p23)
+{ .mfi;	(p18)	ldf8		nlo[0]=[nptr],8		    // *(nptr++)
+	(p20)	xma.hu		nhi[0]=nlo[2],m0,nhi[1]
+	(p30)	add		a[1]=a[1],t[1]		}   // (p22)
+{ .mfi;	(p16)	nop.m		0
+	(p20)	xma.lu		nlo[2]=nlo[2],m0,nhi[1]
+	(p32)	add		a[1]=a[1],t[1],1	};; // (p22)
+{ .mmi;	(p21)	getf.sig	n[0]=nlo[3]
+	(p16)	nop.m		0
+	(p30)	cmp.ltu		p31,p29=a[1],t[1]	}   // (p22)
+{ .mmb;	(p23)	st8		[tp_1]=n[2],8
+	(p32)	cmp.leu		p31,p29=a[1],t[1]	    // (p22)
+	br.ctop.sptk	.Linner_ctop			};;
+.Linner_cend:
+
+{ .mmi;	getf.sig	a[0]=ahi[6]		// (p24)
+	getf.sig	n[0]=nhi[4]
+	nop.i		0		};;
+
+{ .mmi;	.pred.rel	"mutex",p31,p33
+(p31)	add		a[0]=a[0],topbit
+(p33)	add		a[0]=a[0],topbit,1
+	mov		topbit=r0	};;
+{ .mfi; .pred.rel	"mutex",p31,p33
+(p31)	cmp.ltu		p32,p30=a[0],topbit
+(p33)	cmp.leu		p32,p30=a[0],topbit
+					}
+{ .mfi;	.pred.rel	"mutex",p40,p42
+(p40)	add		n[0]=n[0],a[0]
+(p42)	add		n[0]=n[0],a[0],1
+					};;
+{ .mmi;	.pred.rel	"mutex",p44,p46
+(p40)	cmp.ltu		p41,p39=n[0],a[0]
+(p42)	cmp.leu		p41,p39=n[0],a[0]
+(p32)	add		topbit=r0,r0,1	}
+
+{ .mmi;	st8		[tp_1]=n[0],8
+	cmp4.ne		p6,p0=1,num
+	sub		aptr=aptr,len	};;	// rewind
+{ .mmi;	sub		nptr=nptr,len
+(p41)	add		topbit=r0,r0,1
+	add		tptr=16,sp	}
+{ .mmb;	add		tp_1=8,sp
+	add		num=-1,num		// num--
+(p6)	br.cond.sptk.many	.Louter	};;
+
+{ .mbb;	add		lc=4,lc
+	brp.loop.imp	.Lsub_ctop,.Lsub_cend-16
+	clrrrb.pr			};;
+{ .mii;	nop.m		0
+	mov		pr.rot=0x10001<<16
+			// ------^---- (p33) at first (p17)
+	mov		ar.lc=lc	}
+{ .mii;	nop.m		0
+	mov		ar.ec=3
+	nop.i		0		};;
+
+.Lsub_ctop:
+.pred.rel	"mutex",p33,p35
+{ .mfi;	(p16)	ld8		t[0]=[tptr],8		    // t=*(tp++)
+	(p16)	nop.f		0
+	(p33)	sub		n[1]=t[1],n[1]		}   // (p17)
+{ .mfi;	(p16)	ld8		n[0]=[nptr],8		    // n=*(np++)
+	(p16)	nop.f		0
+	(p35)	sub		n[1]=t[1],n[1],1	};; // (p17)
+{ .mib;	(p18)	st8		[rptr]=n[2],8		    // *(rp++)=r
+	(p33)	cmp.gtu		p34,p32=n[1],t[1]	    // (p17)
+	(p18)	nop.b		0			}
+{ .mib;	(p18)	nop.m		0
+	(p35)	cmp.geu		p34,p32=n[1],t[1]	    // (p17)
+	br.ctop.sptk	.Lsub_ctop			};;
+.Lsub_cend:
+
+{ .mmb;	.pred.rel	"mutex",p34,p36
+(p34)	sub	topbit=topbit,r0	// (p19)
+(p36)	sub	topbit=topbit,r0,1
+	brp.loop.imp	.Lcopy_ctop,.Lcopy_cend-16
+					}
+{ .mmb;	sub	rptr=rptr,len		// rewind
+	sub	tptr=tptr,len
+	clrrrb.pr			};;
+{ .mmi;	mov	aptr=rptr
+	mov	bptr=tptr
+	mov	pr.rot=1<<16		};;
+{ .mii;	cmp.eq	p0,p6=topbit,r0
+	mov	ar.lc=lc
+	mov	ar.ec=2			};;
+
+.Lcopy_ctop:
+{ .mmi;	(p16)	ld8	a[0]=[aptr],8
+	(p16)	ld8	t[0]=[bptr],8
+	(p6)	mov	a[1]=t[1]	};;	// (p17)
+{ .mmb;	(p17)	st8	[rptr]=a[1],8
+	(p17)	st8	[tptr]=r0,8
+	br.ctop.sptk	.Lcopy_ctop	};;
+.Lcopy_cend:
+
+{ .mmi;	mov		ret0=1			// signal "handled"
+	rum		1<<5			// clear um.mfh
+	mov		ar.lc=prevlc	}
+{ .mib;	.restore	sp
+	mov		sp=prevsp
+	mov		pr=prevpr,0x1ffff
+	br.ret.sptk.many	b0	};;
+.endp	bn_mul_mont_general#
+
+a1=r16;  a2=r17;  a3=r18;  a4=r19;  a5=r20;  a6=r21;  a7=r22;  a8=r23;
+n1=r24;  n2=r25;  n3=r26;  n4=r27;  n5=r28;  n6=r29;  n7=r30;  n8=r31;
+t0=r15;
+
+ai0=f8;  ai1=f9;  ai2=f10; ai3=f11; ai4=f12; ai5=f13; ai6=f14; ai7=f15;
+ni0=f16; ni1=f17; ni2=f18; ni3=f19; ni4=f20; ni5=f21; ni6=f22; ni7=f23;
+
+.align	64
+.skip	48		// aligns loop body
+.local	bn_mul_mont_8#
+.proc	bn_mul_mont_8#
+bn_mul_mont_8:
+	.prologue
+{ .mmi;	.save		ar.pfs,prevfs
+	alloc		prevfs=ar.pfs,6,2,0,8
+	.vframe		prevsp
+	mov		prevsp=sp
+	.save		ar.lc,prevlc
+	mov		prevlc=ar.lc	}
+{ .mmi;	add		r17=-6*16,sp
+	add		sp=-7*16,sp
+	.save		pr,prevpr
+	mov		prevpr=pr	};;
+
+{ .mmi;	.save.gf	0,0x10
+	stf.spill	[sp]=f16,-16
+	.save.gf	0,0x20
+	stf.spill	[r17]=f17,32
+	add		r16=-5*16,prevsp};;
+{ .mmi;	.save.gf	0,0x40
+	stf.spill	[r16]=f18,32
+	.save.gf	0,0x80
+	stf.spill	[r17]=f19,32
+	$ADDP		aptr=0,in1	};;
+{ .mmi;	.save.gf	0,0x100
+	stf.spill	[r16]=f20,32
+	.save.gf	0,0x200
+	stf.spill	[r17]=f21,32
+	$ADDP		r29=8,in1	};;
+{ .mmi;	.save.gf	0,0x400
+	stf.spill	[r16]=f22
+	.save.gf	0,0x800
+	stf.spill	[r17]=f23
+	$ADDP		rptr=0,in0	};;
+
+	.body
+	.rotf		bj[8],mj[2],tf[2],alo[10],ahi[10],nlo[10],nhi[10]
+	.rotr		t[8]
+
+// load input vectors padding them to 8 elements
+{ .mmi;	ldf8		ai0=[aptr],16		// ap[0]
+	ldf8		ai1=[r29],16		// ap[1]
+	$ADDP		bptr=0,in2	}
+{ .mmi;	$ADDP		r30=8,in2
+	$ADDP		nptr=0,in3
+	$ADDP		r31=8,in3	};;
+{ .mmi;	ldf8		bj[7]=[bptr],16		// bp[0]
+	ldf8		bj[6]=[r30],16		// bp[1]
+	cmp4.le		p4,p5=3,in5	}
+{ .mmi;	ldf8		ni0=[nptr],16		// np[0]
+	ldf8		ni1=[r31],16		// np[1]
+	cmp4.le		p6,p7=4,in5	};;
+
+{ .mfi;	(p4)ldf8	ai2=[aptr],16		// ap[2]
+	(p5)fcvt.fxu	ai2=f0
+	cmp4.le		p8,p9=5,in5	}
+{ .mfi;	(p6)ldf8	ai3=[r29],16		// ap[3]
+	(p7)fcvt.fxu	ai3=f0
+	cmp4.le		p10,p11=6,in5	}
+{ .mfi;	(p4)ldf8	bj[5]=[bptr],16		// bp[2]
+	(p5)fcvt.fxu	bj[5]=f0
+	cmp4.le		p12,p13=7,in5	}
+{ .mfi;	(p6)ldf8	bj[4]=[r30],16		// bp[3]
+	(p7)fcvt.fxu	bj[4]=f0
+	cmp4.le		p14,p15=8,in5	}
+{ .mfi;	(p4)ldf8	ni2=[nptr],16		// np[2]
+	(p5)fcvt.fxu	ni2=f0
+	addp4		r28=-1,in5	}
+{ .mfi;	(p6)ldf8	ni3=[r31],16		// np[3]
+	(p7)fcvt.fxu	ni3=f0
+	$ADDP		in4=0,in4	};;
+
+{ .mfi;	ldf8		n0=[in4]
+	fcvt.fxu	tf[1]=f0
+	nop.i		0		}
+
+{ .mfi;	(p8)ldf8	ai4=[aptr],16		// ap[4]
+	(p9)fcvt.fxu	ai4=f0
+	mov		t[0]=r0		}
+{ .mfi;	(p10)ldf8	ai5=[r29],16		// ap[5]
+	(p11)fcvt.fxu	ai5=f0
+	mov		t[1]=r0		}
+{ .mfi;	(p8)ldf8	bj[3]=[bptr],16		// bp[4]
+	(p9)fcvt.fxu	bj[3]=f0
+	mov		t[2]=r0		}
+{ .mfi;	(p10)ldf8	bj[2]=[r30],16		// bp[5]
+	(p11)fcvt.fxu	bj[2]=f0
+	mov		t[3]=r0		}
+{ .mfi;	(p8)ldf8	ni4=[nptr],16		// np[4]
+	(p9)fcvt.fxu	ni4=f0
+	mov		t[4]=r0		}
+{ .mfi;	(p10)ldf8	ni5=[r31],16		// np[5]
+	(p11)fcvt.fxu	ni5=f0
+	mov		t[5]=r0		};;
+
+{ .mfi;	(p12)ldf8	ai6=[aptr],16		// ap[6]
+	(p13)fcvt.fxu	ai6=f0
+	mov		t[6]=r0		}
+{ .mfi;	(p14)ldf8	ai7=[r29],16		// ap[7]
+	(p15)fcvt.fxu	ai7=f0
+	mov		t[7]=r0		}
+{ .mfi;	(p12)ldf8	bj[1]=[bptr],16		// bp[6]
+	(p13)fcvt.fxu	bj[1]=f0
+	mov		ar.lc=r28	}
+{ .mfi;	(p14)ldf8	bj[0]=[r30],16		// bp[7]
+	(p15)fcvt.fxu	bj[0]=f0
+	mov		ar.ec=1		}
+{ .mfi;	(p12)ldf8	ni6=[nptr],16		// np[6]
+	(p13)fcvt.fxu	ni6=f0
+	mov		pr.rot=1<<16	}
+{ .mfb;	(p14)ldf8	ni7=[r31],16		// np[7]
+	(p15)fcvt.fxu	ni7=f0
+	brp.loop.imp	.Louter_8_ctop,.Louter_8_cend-16
+					};;
+
+// The loop is scheduled for 32*n ticks on Itanium 2. Actual attempt
+// to measure with help of Interval Time Counter indicated that the
+// factor is a tad higher: 33 or 34, if not 35. Exact measurement and
+// addressing the issue is problematic, because I don't have access
+// to platform-specific instruction-level profiler. On Itanium it
+// should run in 56*n ticks, because of higher xma latency...
+.Louter_8_ctop:
+	.pred.rel		"mutex",p40,p42
+	.pred.rel		"mutex",p48,p50
+{ .mfi;	(p16)	nop.m		0			// 0:
+	(p16)	xma.hu		ahi[0]=ai0,bj[7],tf[1]	//	ap[0]*b[i]+t[0]
+	(p40)	add		a3=a3,n3	}	//	(p17) a3+=n3
+{ .mfi;	(p42)	add		a3=a3,n3,1
+	(p16)	xma.lu		alo[0]=ai0,bj[7],tf[1]
+	(p16)	nop.i		0		};;
+{ .mii;	(p17)	getf.sig	a7=alo[8]		// 1:
+	(p48)	add		t[6]=t[6],a3		//	(p17) t[6]+=a3
+	(p50)	add		t[6]=t[6],a3,1	};;
+{ .mfi;	(p17)	getf.sig	a8=ahi[8]		// 2:
+	(p17)	xma.hu		nhi[7]=ni6,mj[1],nhi[6]	//	np[6]*m0
+	(p40)	cmp.ltu		p43,p41=a3,n3	}
+{ .mfi;	(p42)	cmp.leu		p43,p41=a3,n3
+	(p17)	xma.lu		nlo[7]=ni6,mj[1],nhi[6]
+	(p16)	nop.i		0		};;
+{ .mii;	(p17)	getf.sig	n5=nlo[6]		// 3:
+	(p48)	cmp.ltu		p51,p49=t[6],a3
+	(p50)	cmp.leu		p51,p49=t[6],a3	};;
+	.pred.rel		"mutex",p41,p43
+	.pred.rel		"mutex",p49,p51
+{ .mfi;	(p16)	nop.m		0			// 4:
+	(p16)	xma.hu		ahi[1]=ai1,bj[7],ahi[0]	//	ap[1]*b[i]
+	(p41)	add		a4=a4,n4	}	//	(p17) a4+=n4
+{ .mfi;	(p43)	add		a4=a4,n4,1
+	(p16)	xma.lu		alo[1]=ai1,bj[7],ahi[0]
+	(p16)	nop.i		0		};;
+{ .mfi;	(p49)	add		t[5]=t[5],a4		// 5:	(p17) t[5]+=a4
+	(p16)	xmpy.lu		mj[0]=alo[0],n0		//	(ap[0]*b[i]+t[0])*n0
+	(p51)	add		t[5]=t[5],a4,1	};;
+{ .mfi;	(p16)	nop.m		0			// 6:
+	(p17)	xma.hu		nhi[8]=ni7,mj[1],nhi[7]	//	np[7]*m0
+	(p41)	cmp.ltu		p42,p40=a4,n4	}
+{ .mfi;	(p43)	cmp.leu		p42,p40=a4,n4
+	(p17)	xma.lu		nlo[8]=ni7,mj[1],nhi[7]
+	(p16)	nop.i		0		};;
+{ .mii;	(p17)	getf.sig	n6=nlo[7]		// 7:
+	(p49)	cmp.ltu		p50,p48=t[5],a4
+	(p51)	cmp.leu		p50,p48=t[5],a4	};;
+	.pred.rel		"mutex",p40,p42
+	.pred.rel		"mutex",p48,p50
+{ .mfi;	(p16)	nop.m		0			// 8:
+	(p16)	xma.hu		ahi[2]=ai2,bj[7],ahi[1]	//	ap[2]*b[i]
+	(p40)	add		a5=a5,n5	}	//	(p17) a5+=n5
+{ .mfi;	(p42)	add		a5=a5,n5,1
+	(p16)	xma.lu		alo[2]=ai2,bj[7],ahi[1]
+	(p16)	nop.i		0		};;
+{ .mii;	(p16)	getf.sig	a1=alo[1]		// 9:
+	(p48)	add		t[4]=t[4],a5		//	p(17) t[4]+=a5
+	(p50)	add		t[4]=t[4],a5,1	};;
+{ .mfi;	(p16)	nop.m		0			// 10:
+	(p16)	xma.hu		nhi[0]=ni0,mj[0],alo[0]	//	np[0]*m0
+	(p40)	cmp.ltu		p43,p41=a5,n5	}
+{ .mfi;	(p42)	cmp.leu		p43,p41=a5,n5
+	(p16)	xma.lu		nlo[0]=ni0,mj[0],alo[0]
+	(p16)	nop.i		0		};;
+{ .mii;	(p17)	getf.sig	n7=nlo[8]		// 11:
+	(p48)	cmp.ltu		p51,p49=t[4],a5
+	(p50)	cmp.leu		p51,p49=t[4],a5	};;
+	.pred.rel		"mutex",p41,p43
+	.pred.rel		"mutex",p49,p51
+{ .mfi;	(p17)	getf.sig	n8=nhi[8]		// 12:
+	(p16)	xma.hu		ahi[3]=ai3,bj[7],ahi[2]	//	ap[3]*b[i]
+	(p41)	add		a6=a6,n6	}	//	(p17) a6+=n6
+{ .mfi;	(p43)	add		a6=a6,n6,1
+	(p16)	xma.lu		alo[3]=ai3,bj[7],ahi[2]
+	(p16)	nop.i		0		};;
+{ .mii;	(p16)	getf.sig	a2=alo[2]		// 13:
+	(p49)	add		t[3]=t[3],a6		//	(p17) t[3]+=a6
+	(p51)	add		t[3]=t[3],a6,1	};;
+{ .mfi;	(p16)	nop.m		0			// 14:
+	(p16)	xma.hu		nhi[1]=ni1,mj[0],nhi[0]	//	np[1]*m0
+	(p41)	cmp.ltu		p42,p40=a6,n6	}
+{ .mfi;	(p43)	cmp.leu		p42,p40=a6,n6
+	(p16)	xma.lu		nlo[1]=ni1,mj[0],nhi[0]
+	(p16)	nop.i		0		};;
+{ .mii;	(p16)	nop.m		0			// 15:
+	(p49)	cmp.ltu		p50,p48=t[3],a6
+	(p51)	cmp.leu		p50,p48=t[3],a6	};;
+	.pred.rel		"mutex",p40,p42
+	.pred.rel		"mutex",p48,p50
+{ .mfi;	(p16)	nop.m		0			// 16:
+	(p16)	xma.hu		ahi[4]=ai4,bj[7],ahi[3]	//	ap[4]*b[i]
+	(p40)	add		a7=a7,n7	}	//	(p17) a7+=n7
+{ .mfi;	(p42)	add		a7=a7,n7,1
+	(p16)	xma.lu		alo[4]=ai4,bj[7],ahi[3]
+	(p16)	nop.i		0		};;
+{ .mii;	(p16)	getf.sig	a3=alo[3]		// 17:
+	(p48)	add		t[2]=t[2],a7		//	(p17) t[2]+=a7
+	(p50)	add		t[2]=t[2],a7,1	};;
+{ .mfi;	(p16)	nop.m		0			// 18:
+	(p16)	xma.hu		nhi[2]=ni2,mj[0],nhi[1]	//	np[2]*m0
+	(p40)	cmp.ltu		p43,p41=a7,n7	}
+{ .mfi;	(p42)	cmp.leu		p43,p41=a7,n7
+	(p16)	xma.lu		nlo[2]=ni2,mj[0],nhi[1]
+	(p16)	nop.i		0		};;
+{ .mii;	(p16)	getf.sig	n1=nlo[1]		// 19:
+	(p48)	cmp.ltu		p51,p49=t[2],a7
+	(p50)	cmp.leu		p51,p49=t[2],a7	};;
+	.pred.rel		"mutex",p41,p43
+	.pred.rel		"mutex",p49,p51
+{ .mfi;	(p16)	nop.m		0			// 20:
+	(p16)	xma.hu		ahi[5]=ai5,bj[7],ahi[4]	//	ap[5]*b[i]
+	(p41)	add		a8=a8,n8	}	//	(p17) a8+=n8
+{ .mfi;	(p43)	add		a8=a8,n8,1
+	(p16)	xma.lu		alo[5]=ai5,bj[7],ahi[4]
+	(p16)	nop.i		0		};;
+{ .mii;	(p16)	getf.sig	a4=alo[4]		// 21:
+	(p49)	add		t[1]=t[1],a8		//	(p17) t[1]+=a8
+	(p51)	add		t[1]=t[1],a8,1	};;
+{ .mfi;	(p16)	nop.m		0			// 22:
+	(p16)	xma.hu		nhi[3]=ni3,mj[0],nhi[2]	//	np[3]*m0
+	(p41)	cmp.ltu		p42,p40=a8,n8	}
+{ .mfi;	(p43)	cmp.leu		p42,p40=a8,n8
+	(p16)	xma.lu		nlo[3]=ni3,mj[0],nhi[2]
+	(p16)	nop.i		0		};;
+{ .mii;	(p16)	getf.sig	n2=nlo[2]		// 23:
+	(p49)	cmp.ltu		p50,p48=t[1],a8
+	(p51)	cmp.leu		p50,p48=t[1],a8	};;
+{ .mfi;	(p16)	nop.m		0			// 24:
+	(p16)	xma.hu		ahi[6]=ai6,bj[7],ahi[5]	//	ap[6]*b[i]
+	(p16)	add		a1=a1,n1	}	//	(p16) a1+=n1
+{ .mfi;	(p16)	nop.m		0
+	(p16)	xma.lu		alo[6]=ai6,bj[7],ahi[5]
+	(p17)	mov		t[0]=r0		};;
+{ .mii;	(p16)	getf.sig	a5=alo[5]		// 25:
+	(p16)	add		t0=t[7],a1		//	(p16) t[7]+=a1
+	(p42)	add		t[0]=t[0],r0,1	};;
+{ .mfi;	(p16)	setf.sig	tf[0]=t0		// 26:
+	(p16)	xma.hu		nhi[4]=ni4,mj[0],nhi[3]	//	np[4]*m0
+	(p50)	add		t[0]=t[0],r0,1	}
+{ .mfi;	(p16)	cmp.ltu.unc	p42,p40=a1,n1
+	(p16)	xma.lu		nlo[4]=ni4,mj[0],nhi[3]
+	(p16)	nop.i		0		};;
+{ .mii;	(p16)	getf.sig	n3=nlo[3]		// 27:
+	(p16)	cmp.ltu.unc	p50,p48=t0,a1
+	(p16)	nop.i		0		};;
+	.pred.rel		"mutex",p40,p42
+	.pred.rel		"mutex",p48,p50
+{ .mfi;	(p16)	nop.m		0			// 28:
+	(p16)	xma.hu		ahi[7]=ai7,bj[7],ahi[6]	//	ap[7]*b[i]
+	(p40)	add		a2=a2,n2	}	//	(p16) a2+=n2
+{ .mfi;	(p42)	add		a2=a2,n2,1
+	(p16)	xma.lu		alo[7]=ai7,bj[7],ahi[6]
+	(p16)	nop.i		0		};;
+{ .mii;	(p16)	getf.sig	a6=alo[6]		// 29:
+	(p48)	add		t[6]=t[6],a2		//	(p16) t[6]+=a2
+	(p50)	add		t[6]=t[6],a2,1	};;
+{ .mfi;	(p16)	nop.m		0			// 30:
+	(p16)	xma.hu		nhi[5]=ni5,mj[0],nhi[4]	//	np[5]*m0
+	(p40)	cmp.ltu		p41,p39=a2,n2	}
+{ .mfi;	(p42)	cmp.leu		p41,p39=a2,n2
+	(p16)	xma.lu		nlo[5]=ni5,mj[0],nhi[4]
+	(p16)	nop.i		0		};;
+{ .mfi;	(p16)	getf.sig	n4=nlo[4]		// 31:
+	(p16)	nop.f		0
+	(p48)	cmp.ltu		p49,p47=t[6],a2	}
+{ .mfb;	(p50)	cmp.leu		p49,p47=t[6],a2
+	(p16)	nop.f		0
+	br.ctop.sptk.many	.Louter_8_ctop	};;
+.Louter_8_cend:
+
+// above loop has to execute one more time, without (p16), which is
+// replaced with merged move of np[8] to GPR bank
+	.pred.rel		"mutex",p40,p42
+	.pred.rel		"mutex",p48,p50
+{ .mmi;	(p0)	getf.sig	n1=ni0			// 0:
+	(p40)	add		a3=a3,n3		//	(p17) a3+=n3
+	(p42)	add		a3=a3,n3,1	};;
+{ .mii;	(p17)	getf.sig	a7=alo[8]		// 1:
+	(p48)	add		t[6]=t[6],a3		//	(p17) t[6]+=a3
+	(p50)	add		t[6]=t[6],a3,1	};;
+{ .mfi;	(p17)	getf.sig	a8=ahi[8]		// 2:
+	(p17)	xma.hu		nhi[7]=ni6,mj[1],nhi[6]	//	np[6]*m0
+	(p40)	cmp.ltu		p43,p41=a3,n3	}
+{ .mfi;	(p42)	cmp.leu		p43,p41=a3,n3
+	(p17)	xma.lu		nlo[7]=ni6,mj[1],nhi[6]
+	(p0)	nop.i		0		};;
+{ .mii;	(p17)	getf.sig	n5=nlo[6]		// 3:
+	(p48)	cmp.ltu		p51,p49=t[6],a3
+	(p50)	cmp.leu		p51,p49=t[6],a3	};;
+	.pred.rel		"mutex",p41,p43
+	.pred.rel		"mutex",p49,p51
+{ .mmi;	(p0)	getf.sig	n2=ni1			// 4:
+	(p41)	add		a4=a4,n4		//	(p17) a4+=n4
+	(p43)	add		a4=a4,n4,1	};;
+{ .mfi;	(p49)	add		t[5]=t[5],a4		// 5:	(p17) t[5]+=a4
+	(p0)	nop.f		0
+	(p51)	add		t[5]=t[5],a4,1	};;
+{ .mfi;	(p0)	getf.sig	n3=ni2			// 6:
+	(p17)	xma.hu		nhi[8]=ni7,mj[1],nhi[7]	//	np[7]*m0
+	(p41)	cmp.ltu		p42,p40=a4,n4	}
+{ .mfi;	(p43)	cmp.leu		p42,p40=a4,n4
+	(p17)	xma.lu		nlo[8]=ni7,mj[1],nhi[7]
+	(p0)	nop.i		0		};;
+{ .mii;	(p17)	getf.sig	n6=nlo[7]		// 7:
+	(p49)	cmp.ltu		p50,p48=t[5],a4
+	(p51)	cmp.leu		p50,p48=t[5],a4	};;
+	.pred.rel		"mutex",p40,p42
+	.pred.rel		"mutex",p48,p50
+{ .mii;	(p0)	getf.sig	n4=ni3			// 8:
+	(p40)	add		a5=a5,n5		//	(p17) a5+=n5
+	(p42)	add		a5=a5,n5,1	};;
+{ .mii;	(p0)	nop.m		0			// 9:
+	(p48)	add		t[4]=t[4],a5		//	p(17) t[4]+=a5
+	(p50)	add		t[4]=t[4],a5,1	};;
+{ .mii;	(p0)	nop.m		0			// 10:
+	(p40)	cmp.ltu		p43,p41=a5,n5
+	(p42)	cmp.leu		p43,p41=a5,n5	};;
+{ .mii;	(p17)	getf.sig	n7=nlo[8]		// 11:
+	(p48)	cmp.ltu		p51,p49=t[4],a5
+	(p50)	cmp.leu		p51,p49=t[4],a5	};;
+	.pred.rel		"mutex",p41,p43
+	.pred.rel		"mutex",p49,p51
+{ .mii;	(p17)	getf.sig	n8=nhi[8]		// 12:
+	(p41)	add		a6=a6,n6		//	(p17) a6+=n6
+	(p43)	add		a6=a6,n6,1	};;
+{ .mii;	(p0)	getf.sig	n5=ni4			// 13:
+	(p49)	add		t[3]=t[3],a6		//	(p17) t[3]+=a6
+	(p51)	add		t[3]=t[3],a6,1	};;
+{ .mii;	(p0)	nop.m		0			// 14:
+	(p41)	cmp.ltu		p42,p40=a6,n6
+	(p43)	cmp.leu		p42,p40=a6,n6	};;
+{ .mii;	(p0)	getf.sig	n6=ni5			// 15:
+	(p49)	cmp.ltu		p50,p48=t[3],a6
+	(p51)	cmp.leu		p50,p48=t[3],a6	};;
+	.pred.rel		"mutex",p40,p42
+	.pred.rel		"mutex",p48,p50
+{ .mii;	(p0)	nop.m		0			// 16:
+	(p40)	add		a7=a7,n7		//	(p17) a7+=n7
+	(p42)	add		a7=a7,n7,1	};;
+{ .mii;	(p0)	nop.m		0			// 17:
+	(p48)	add		t[2]=t[2],a7		//	(p17) t[2]+=a7
+	(p50)	add		t[2]=t[2],a7,1	};;
+{ .mii;	(p0)	nop.m		0			// 18:
+	(p40)	cmp.ltu		p43,p41=a7,n7
+	(p42)	cmp.leu		p43,p41=a7,n7	};;
+{ .mii;	(p0)	getf.sig	n7=ni6			// 19:
+	(p48)	cmp.ltu		p51,p49=t[2],a7
+	(p50)	cmp.leu		p51,p49=t[2],a7	};;
+	.pred.rel		"mutex",p41,p43
+	.pred.rel		"mutex",p49,p51
+{ .mii;	(p0)	nop.m		0			// 20:
+	(p41)	add		a8=a8,n8		//	(p17) a8+=n8
+	(p43)	add		a8=a8,n8,1	};;
+{ .mmi;	(p0)	nop.m		0			// 21:
+	(p49)	add		t[1]=t[1],a8		//	(p17) t[1]+=a8
+	(p51)	add		t[1]=t[1],a8,1	}
+{ .mmi;	(p17)	mov		t[0]=r0
+	(p41)	cmp.ltu		p42,p40=a8,n8
+	(p43)	cmp.leu		p42,p40=a8,n8	};;
+{ .mmi;	(p0)	getf.sig	n8=ni7			// 22:
+	(p49)	cmp.ltu		p50,p48=t[1],a8
+	(p51)	cmp.leu		p50,p48=t[1],a8	}
+{ .mmi;	(p42)	add		t[0]=t[0],r0,1
+	(p0)	add		r16=-7*16,prevsp
+	(p0)	add		r17=-6*16,prevsp	};;
+
+// subtract np[8] from carrybit|tmp[8]
+// carrybit|tmp[8] layout upon exit from above loop is:
+//	t[0]|t[1]|t[2]|t[3]|t[4]|t[5]|t[6]|t[7]|t0 (least significant)
+{ .mmi;	(p50)add	t[0]=t[0],r0,1
+	add		r18=-5*16,prevsp
+	sub		n1=t0,n1	};;
+{ .mmi;	cmp.gtu		p34,p32=n1,t0;;
+	.pred.rel	"mutex",p32,p34
+	(p32)sub	n2=t[7],n2
+	(p34)sub	n2=t[7],n2,1	};;
+{ .mii;	(p32)cmp.gtu	p35,p33=n2,t[7]
+	(p34)cmp.geu	p35,p33=n2,t[7];;
+	.pred.rel	"mutex",p33,p35
+	(p33)sub	n3=t[6],n3	}
+{ .mmi;	(p35)sub	n3=t[6],n3,1;;
+	(p33)cmp.gtu	p34,p32=n3,t[6]
+	(p35)cmp.geu	p34,p32=n3,t[6]	};;
+	.pred.rel	"mutex",p32,p34
+{ .mii;	(p32)sub	n4=t[5],n4
+	(p34)sub	n4=t[5],n4,1;;
+	(p32)cmp.gtu	p35,p33=n4,t[5]	}
+{ .mmi;	(p34)cmp.geu	p35,p33=n4,t[5];;
+	.pred.rel	"mutex",p33,p35
+	(p33)sub	n5=t[4],n5
+	(p35)sub	n5=t[4],n5,1	};;
+{ .mii;	(p33)cmp.gtu	p34,p32=n5,t[4]
+	(p35)cmp.geu	p34,p32=n5,t[4];;
+	.pred.rel	"mutex",p32,p34
+	(p32)sub	n6=t[3],n6	}
+{ .mmi;	(p34)sub	n6=t[3],n6,1;;
+	(p32)cmp.gtu	p35,p33=n6,t[3]
+	(p34)cmp.geu	p35,p33=n6,t[3]	};;
+	.pred.rel	"mutex",p33,p35
+{ .mii;	(p33)sub	n7=t[2],n7
+	(p35)sub	n7=t[2],n7,1;;
+	(p33)cmp.gtu	p34,p32=n7,t[2]	}
+{ .mmi;	(p35)cmp.geu	p34,p32=n7,t[2];;
+	.pred.rel	"mutex",p32,p34
+	(p32)sub	n8=t[1],n8
+	(p34)sub	n8=t[1],n8,1	};;
+{ .mii;	(p32)cmp.gtu	p35,p33=n8,t[1]
+	(p34)cmp.geu	p35,p33=n8,t[1];;
+	.pred.rel	"mutex",p33,p35
+	(p33)sub	a8=t[0],r0	}
+{ .mmi;	(p35)sub	a8=t[0],r0,1;;
+	(p33)cmp.gtu	p34,p32=a8,t[0]
+	(p35)cmp.geu	p34,p32=a8,t[0]	};;
+
+// save the result, either tmp[num] or tmp[num]-np[num]
+	.pred.rel	"mutex",p32,p34
+{ .mmi;	(p32)st8	[rptr]=n1,8
+	(p34)st8	[rptr]=t0,8
+	add		r19=-4*16,prevsp};;
+{ .mmb;	(p32)st8	[rptr]=n2,8
+	(p34)st8	[rptr]=t[7],8
+	(p5)br.cond.dpnt.few	.Ldone	};;
+{ .mmb;	(p32)st8	[rptr]=n3,8
+	(p34)st8	[rptr]=t[6],8
+	(p7)br.cond.dpnt.few	.Ldone	};;
+{ .mmb;	(p32)st8	[rptr]=n4,8
+	(p34)st8	[rptr]=t[5],8
+	(p9)br.cond.dpnt.few	.Ldone	};;
+{ .mmb;	(p32)st8	[rptr]=n5,8
+	(p34)st8	[rptr]=t[4],8
+	(p11)br.cond.dpnt.few	.Ldone	};;
+{ .mmb;	(p32)st8	[rptr]=n6,8
+	(p34)st8	[rptr]=t[3],8
+	(p13)br.cond.dpnt.few	.Ldone	};;
+{ .mmb;	(p32)st8	[rptr]=n7,8
+	(p34)st8	[rptr]=t[2],8
+	(p15)br.cond.dpnt.few	.Ldone	};;
+{ .mmb;	(p32)st8	[rptr]=n8,8
+	(p34)st8	[rptr]=t[1],8
+	nop.b		0		};;
+.Ldone:						// epilogue
+{ .mmi;	ldf.fill	f16=[r16],64
+	ldf.fill	f17=[r17],64
+	nop.i		0		}
+{ .mmi;	ldf.fill	f18=[r18],64
+	ldf.fill	f19=[r19],64
+	mov		pr=prevpr,0x1ffff	};;
+{ .mmi;	ldf.fill	f20=[r16]
+	ldf.fill	f21=[r17]
+	mov		ar.lc=prevlc	}
+{ .mmi;	ldf.fill	f22=[r18]
+	ldf.fill	f23=[r19]
+	mov		ret0=1		}	// signal "handled"
+{ .mib;	rum		1<<5
+	.restore	sp
+	mov		sp=prevsp
+	br.ret.sptk.many	b0	};;
+.endp	bn_mul_mont_8#
+
+.type	copyright#,\@object
+copyright:
+stringz	"Montgomery multiplication for IA-64, CRYPTOGAMS by <appro\@openssl.org>"
+___
+
+open STDOUT,">$output" if $output;
+print $code;
+close STDOUT;
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/ia64.S
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/ia64.S
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/mips-mont.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/mips-mont.pl
@ -0,0 +1,433 @@
+#! /usr/bin/env perl
+# Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# This module doesn't present direct interest for OpenSSL, because it
+# doesn't provide better performance for longer keys, at least not on
+# in-order-execution cores. While 512-bit RSA sign operations can be
+# 65% faster in 64-bit mode, 1024-bit ones are only 15% faster, and
+# 4096-bit ones are up to 15% slower. In 32-bit mode it varies from
+# 16% improvement for 512-bit RSA sign to -33% for 4096-bit RSA
+# verify:-( All comparisons are against bn_mul_mont-free assembler.
+# The module might be of interest to embedded system developers, as
+# the code is smaller than 1KB, yet offers >3x improvement on MIPS64
+# and 75-30% [less for longer keys] on MIPS32 over compiler-generated
+# code.
+
+######################################################################
+# There is a number of MIPS ABI in use, O32 and N32/64 are most
+# widely used. Then there is a new contender: NUBI. It appears that if
+# one picks the latter, it's possible to arrange code in ABI neutral
+# manner. Therefore let's stick to NUBI register layout:
+#
+($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
+($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
+($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
+($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
+#
+# The return value is placed in $a0. Following coding rules facilitate
+# interoperability:
+#
+# - never ever touch $tp, "thread pointer", former $gp;
+# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
+#   old code];
+# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
+#
+# For reference here is register layout for N32/64 MIPS ABIs:
+#
+# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
+# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
+# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
+# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
+# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
+#
+$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
+
+if ($flavour =~ /64|n32/i) {
+	$PTR_ADD="daddu";	# incidentally works even on n32
+	$PTR_SUB="dsubu";	# incidentally works even on n32
+	$REG_S="sd";
+	$REG_L="ld";
+	$SZREG=8;
+} else {
+	$PTR_ADD="addu";
+	$PTR_SUB="subu";
+	$REG_S="sw";
+	$REG_L="lw";
+	$SZREG=4;
+}
+$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0x00fff000 : 0x00ff0000;
+#
+# <appro@openssl.org>
+#
+######################################################################
+
+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+if ($flavour =~ /64|n32/i) {
+	$LD="ld";
+	$ST="sd";
+	$MULTU="dmultu";
+	$ADDU="daddu";
+	$SUBU="dsubu";
+	$BNSZ=8;
+} else {
+	$LD="lw";
+	$ST="sw";
+	$MULTU="multu";
+	$ADDU="addu";
+	$SUBU="subu";
+	$BNSZ=4;
+}
+
+# int bn_mul_mont(
+$rp=$a0;	# BN_ULONG *rp,
+$ap=$a1;	# const BN_ULONG *ap,
+$bp=$a2;	# const BN_ULONG *bp,
+$np=$a3;	# const BN_ULONG *np,
+$n0=$a4;	# const BN_ULONG *n0,
+$num=$a5;	# int num);
+
+$lo0=$a6;
+$hi0=$a7;
+$lo1=$t1;
+$hi1=$t2;
+$aj=$s0;
+$bi=$s1;
+$nj=$s2;
+$tp=$s3;
+$alo=$s4;
+$ahi=$s5;
+$nlo=$s6;
+$nhi=$s7;
+$tj=$s8;
+$i=$s9;
+$j=$s10;
+$m1=$s11;
+
+$FRAMESIZE=14;
+
+$code=<<___;
+#include "mips_arch.h"
+
+.text
+
+.set	noat
+.set	noreorder
+
+.align	5
+.globl	bn_mul_mont
+.ent	bn_mul_mont
+bn_mul_mont:
+___
+$code.=<<___ if ($flavour =~ /o32/i);
+	lw	$n0,16($sp)
+	lw	$num,20($sp)
+___
+$code.=<<___;
+	slt	$at,$num,4
+	bnez	$at,1f
+	li	$t0,0
+	slt	$at,$num,17	# on in-order CPU
+	bnez	$at,bn_mul_mont_internal
+	nop
+1:	jr	$ra
+	li	$a0,0
+.end	bn_mul_mont
+
+.align	5
+.ent	bn_mul_mont_internal
+bn_mul_mont_internal:
+	.frame	$fp,$FRAMESIZE*$SZREG,$ra
+	.mask	0x40000000|$SAVED_REGS_MASK,-$SZREG
+	$PTR_SUB $sp,$FRAMESIZE*$SZREG
+	$REG_S	$fp,($FRAMESIZE-1)*$SZREG($sp)
+	$REG_S	$s11,($FRAMESIZE-2)*$SZREG($sp)
+	$REG_S	$s10,($FRAMESIZE-3)*$SZREG($sp)
+	$REG_S	$s9,($FRAMESIZE-4)*$SZREG($sp)
+	$REG_S	$s8,($FRAMESIZE-5)*$SZREG($sp)
+	$REG_S	$s7,($FRAMESIZE-6)*$SZREG($sp)
+	$REG_S	$s6,($FRAMESIZE-7)*$SZREG($sp)
+	$REG_S	$s5,($FRAMESIZE-8)*$SZREG($sp)
+	$REG_S	$s4,($FRAMESIZE-9)*$SZREG($sp)
+___
+$code.=<<___ if ($flavour =~ /nubi/i);
+	$REG_S	$s3,($FRAMESIZE-10)*$SZREG($sp)
+	$REG_S	$s2,($FRAMESIZE-11)*$SZREG($sp)
+	$REG_S	$s1,($FRAMESIZE-12)*$SZREG($sp)
+	$REG_S	$s0,($FRAMESIZE-13)*$SZREG($sp)
+___
+$code.=<<___;
+	move	$fp,$sp
+
+	.set	reorder
+	$LD	$n0,0($n0)
+	$LD	$bi,0($bp)	# bp[0]
+	$LD	$aj,0($ap)	# ap[0]
+	$LD	$nj,0($np)	# np[0]
+
+	$PTR_SUB $sp,2*$BNSZ	# place for two extra words
+	sll	$num,`log($BNSZ)/log(2)`
+	li	$at,-4096
+	$PTR_SUB $sp,$num
+	and	$sp,$at
+
+	$MULTU	($aj,$bi)
+	$LD	$ahi,$BNSZ($ap)
+	$LD	$nhi,$BNSZ($np)
+	mflo	($lo0,$aj,$bi)
+	mfhi	($hi0,$aj,$bi)
+	$MULTU	($lo0,$n0)
+	mflo	($m1,$lo0,$n0)
+
+	$MULTU	($ahi,$bi)
+	mflo	($alo,$ahi,$bi)
+	mfhi	($ahi,$ahi,$bi)
+
+	$MULTU	($nj,$m1)
+	mflo	($lo1,$nj,$m1)
+	mfhi	($hi1,$nj,$m1)
+	$MULTU	($nhi,$m1)
+	$ADDU	$lo1,$lo0
+	sltu	$at,$lo1,$lo0
+	$ADDU	$hi1,$at
+	mflo	($nlo,$nhi,$m1)
+	mfhi	($nhi,$nhi,$m1)
+
+	move	$tp,$sp
+	li	$j,2*$BNSZ
+.align	4
+.L1st:
+	.set	noreorder
+	$PTR_ADD $aj,$ap,$j
+	$PTR_ADD $nj,$np,$j
+	$LD	$aj,($aj)
+	$LD	$nj,($nj)
+
+	$MULTU	($aj,$bi)
+	$ADDU	$lo0,$alo,$hi0
+	$ADDU	$lo1,$nlo,$hi1
+	sltu	$at,$lo0,$hi0
+	sltu	$t0,$lo1,$hi1
+	$ADDU	$hi0,$ahi,$at
+	$ADDU	$hi1,$nhi,$t0
+	mflo	($alo,$aj,$bi)
+	mfhi	($ahi,$aj,$bi)
+
+	$ADDU	$lo1,$lo0
+	sltu	$at,$lo1,$lo0
+	$MULTU	($nj,$m1)
+	$ADDU	$hi1,$at
+	addu	$j,$BNSZ
+	$ST	$lo1,($tp)
+	sltu	$t0,$j,$num
+	mflo	($nlo,$nj,$m1)
+	mfhi	($nhi,$nj,$m1)
+
+	bnez	$t0,.L1st
+	$PTR_ADD $tp,$BNSZ
+	.set	reorder
+
+	$ADDU	$lo0,$alo,$hi0
+	sltu	$at,$lo0,$hi0
+	$ADDU	$hi0,$ahi,$at
+
+	$ADDU	$lo1,$nlo,$hi1
+	sltu	$t0,$lo1,$hi1
+	$ADDU	$hi1,$nhi,$t0
+	$ADDU	$lo1,$lo0
+	sltu	$at,$lo1,$lo0
+	$ADDU	$hi1,$at
+
+	$ST	$lo1,($tp)
+
+	$ADDU	$hi1,$hi0
+	sltu	$at,$hi1,$hi0
+	$ST	$hi1,$BNSZ($tp)
+	$ST	$at,2*$BNSZ($tp)
+
+	li	$i,$BNSZ
+.align	4
+.Louter:
+	$PTR_ADD $bi,$bp,$i
+	$LD	$bi,($bi)
+	$LD	$aj,($ap)
+	$LD	$ahi,$BNSZ($ap)
+	$LD	$tj,($sp)
+
+	$MULTU	($aj,$bi)
+	$LD	$nj,($np)
+	$LD	$nhi,$BNSZ($np)
+	mflo	($lo0,$aj,$bi)
+	mfhi	($hi0,$aj,$bi)
+	$ADDU	$lo0,$tj
+	$MULTU	($lo0,$n0)
+	sltu	$at,$lo0,$tj
+	$ADDU	$hi0,$at
+	mflo	($m1,$lo0,$n0)
+
+	$MULTU	($ahi,$bi)
+	mflo	($alo,$ahi,$bi)
+	mfhi	($ahi,$ahi,$bi)
+
+	$MULTU	($nj,$m1)
+	mflo	($lo1,$nj,$m1)
+	mfhi	($hi1,$nj,$m1)
+
+	$MULTU	($nhi,$m1)
+	$ADDU	$lo1,$lo0
+	sltu	$at,$lo1,$lo0
+	$ADDU	$hi1,$at
+	mflo	($nlo,$nhi,$m1)
+	mfhi	($nhi,$nhi,$m1)
+
+	move	$tp,$sp
+	li	$j,2*$BNSZ
+	$LD	$tj,$BNSZ($tp)
+.align	4
+.Linner:
+	.set	noreorder
+	$PTR_ADD $aj,$ap,$j
+	$PTR_ADD $nj,$np,$j
+	$LD	$aj,($aj)
+	$LD	$nj,($nj)
+
+	$MULTU	($aj,$bi)
+	$ADDU	$lo0,$alo,$hi0
+	$ADDU	$lo1,$nlo,$hi1
+	sltu	$at,$lo0,$hi0
+	sltu	$t0,$lo1,$hi1
+	$ADDU	$hi0,$ahi,$at
+	$ADDU	$hi1,$nhi,$t0
+	mflo	($alo,$aj,$bi)
+	mfhi	($ahi,$aj,$bi)
+
+	$ADDU	$lo0,$tj
+	addu	$j,$BNSZ
+	$MULTU	($nj,$m1)
+	sltu	$at,$lo0,$tj
+	$ADDU	$lo1,$lo0
+	$ADDU	$hi0,$at
+	sltu	$t0,$lo1,$lo0
+	$LD	$tj,2*$BNSZ($tp)
+	$ADDU	$hi1,$t0
+	sltu	$at,$j,$num
+	mflo	($nlo,$nj,$m1)
+	mfhi	($nhi,$nj,$m1)
+	$ST	$lo1,($tp)
+	bnez	$at,.Linner
+	$PTR_ADD $tp,$BNSZ
+	.set	reorder
+
+	$ADDU	$lo0,$alo,$hi0
+	sltu	$at,$lo0,$hi0
+	$ADDU	$hi0,$ahi,$at
+	$ADDU	$lo0,$tj
+	sltu	$t0,$lo0,$tj
+	$ADDU	$hi0,$t0
+
+	$LD	$tj,2*$BNSZ($tp)
+	$ADDU	$lo1,$nlo,$hi1
+	sltu	$at,$lo1,$hi1
+	$ADDU	$hi1,$nhi,$at
+	$ADDU	$lo1,$lo0
+	sltu	$t0,$lo1,$lo0
+	$ADDU	$hi1,$t0
+	$ST	$lo1,($tp)
+
+	$ADDU	$lo1,$hi1,$hi0
+	sltu	$hi1,$lo1,$hi0
+	$ADDU	$lo1,$tj
+	sltu	$at,$lo1,$tj
+	$ADDU	$hi1,$at
+	$ST	$lo1,$BNSZ($tp)
+	$ST	$hi1,2*$BNSZ($tp)
+
+	addu	$i,$BNSZ
+	sltu	$t0,$i,$num
+	bnez	$t0,.Louter
+
+	.set	noreorder
+	$PTR_ADD $tj,$sp,$num	# &tp[num]
+	move	$tp,$sp
+	move	$ap,$sp
+	li	$hi0,0		# clear borrow bit
+
+.align	4
+.Lsub:	$LD	$lo0,($tp)
+	$LD	$lo1,($np)
+	$PTR_ADD $tp,$BNSZ
+	$PTR_ADD $np,$BNSZ
+	$SUBU	$lo1,$lo0,$lo1	# tp[i]-np[i]
+	sgtu	$at,$lo1,$lo0
+	$SUBU	$lo0,$lo1,$hi0
+	sgtu	$hi0,$lo0,$lo1
+	$ST	$lo0,($rp)
+	or	$hi0,$at
+	sltu	$at,$tp,$tj
+	bnez	$at,.Lsub
+	$PTR_ADD $rp,$BNSZ
+
+	$SUBU	$hi0,$hi1,$hi0	# handle upmost overflow bit
+	move	$tp,$sp
+	$PTR_SUB $rp,$num	# restore rp
+	not	$hi1,$hi0
+
+.Lcopy:	$LD	$nj,($tp)	# conditional move
+	$LD	$aj,($rp)
+	$ST	$zero,($tp)
+	$PTR_ADD $tp,$BNSZ
+	and	$nj,$hi0
+	and	$aj,$hi1
+	or	$aj,$nj
+	sltu	$at,$tp,$tj
+	$ST	$aj,($rp)
+	bnez	$at,.Lcopy
+	$PTR_ADD $rp,$BNSZ
+
+	li	$a0,1
+	li	$t0,1
+
+	.set	noreorder
+	move	$sp,$fp
+	$REG_L	$fp,($FRAMESIZE-1)*$SZREG($sp)
+	$REG_L	$s11,($FRAMESIZE-2)*$SZREG($sp)
+	$REG_L	$s10,($FRAMESIZE-3)*$SZREG($sp)
+	$REG_L	$s9,($FRAMESIZE-4)*$SZREG($sp)
+	$REG_L	$s8,($FRAMESIZE-5)*$SZREG($sp)
+	$REG_L	$s7,($FRAMESIZE-6)*$SZREG($sp)
+	$REG_L	$s6,($FRAMESIZE-7)*$SZREG($sp)
+	$REG_L	$s5,($FRAMESIZE-8)*$SZREG($sp)
+	$REG_L	$s4,($FRAMESIZE-9)*$SZREG($sp)
+___
+$code.=<<___ if ($flavour =~ /nubi/i);
+	$REG_L	$s3,($FRAMESIZE-10)*$SZREG($sp)
+	$REG_L	$s2,($FRAMESIZE-11)*$SZREG($sp)
+	$REG_L	$s1,($FRAMESIZE-12)*$SZREG($sp)
+	$REG_L	$s0,($FRAMESIZE-13)*$SZREG($sp)
+___
+$code.=<<___;
+	jr	$ra
+	$PTR_ADD $sp,$FRAMESIZE*$SZREG
+.end	bn_mul_mont_internal
+.rdata
+.asciiz	"Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro\@openssl.org>"
+___
+
+$code =~ s/\`([^\`]*)\`/eval $1/gem;
+
+print $code;
+close STDOUT;
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/mips.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/mips.pl
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/parisc-mont.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/parisc-mont.pl
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/ppc-mont.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/ppc-mont.pl
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/ppc.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/ppc.pl
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/ppc64-mont.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/ppc64-mont.pl
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/rsaz-avx2.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/rsaz-avx2.pl
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/rsaz-x86_64.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/rsaz-x86_64.pl
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/s390x-gf2m.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/s390x-gf2m.pl
@ -0,0 +1,228 @@
+#! /usr/bin/env perl
+# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# May 2011
+#
+# The module implements bn_GF2m_mul_2x2 polynomial multiplication used
+# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
+# the time being... gcc 4.3 appeared to generate poor code, therefore
+# the effort. And indeed, the module delivers 55%-90%(*) improvement
+# on heaviest ECDSA verify and ECDH benchmarks for 163- and 571-bit
+# key lengths on z990, 30%-55%(*) - on z10, and 70%-110%(*) - on z196.
+# This is for 64-bit build. In 32-bit "highgprs" case improvement is
+# even higher, for example on z990 it was measured 80%-150%. ECDSA
+# sign is modest 9%-12% faster. Keep in mind that these coefficients
+# are not ones for bn_GF2m_mul_2x2 itself, as not all CPU time is
+# burnt in it...
+#
+# (*)	gcc 4.1 was observed to deliver better results than gcc 4.3,
+#	so that improvement coefficients can vary from one specific
+#	setup to another.
+
+$flavour = shift;
+
+if ($flavour =~ /3[12]/) {
+        $SIZE_T=4;
+        $g="";
+} else {
+        $SIZE_T=8;
+        $g="g";
+}
+
+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+$stdframe=16*$SIZE_T+4*8;
+
+$rp="%r2";
+$a1="%r3";
+$a0="%r4";
+$b1="%r5";
+$b0="%r6";
+
+$ra="%r14";
+$sp="%r15";
+
+@T=("%r0","%r1");
+@i=("%r12","%r13");
+
+($a1,$a2,$a4,$a8,$a12,$a48)=map("%r$_",(6..11));
+($lo,$hi,$b)=map("%r$_",(3..5)); $a=$lo; $mask=$a8;
+
+$code.=<<___;
+.text
+
+.type	_mul_1x1,\@function
+.align	16
+_mul_1x1:
+	lgr	$a1,$a
+	sllg	$a2,$a,1
+	sllg	$a4,$a,2
+	sllg	$a8,$a,3
+
+	srag	$lo,$a1,63			# broadcast 63rd bit
+	nihh	$a1,0x1fff
+	srag	@i[0],$a2,63			# broadcast 62nd bit
+	nihh	$a2,0x3fff
+	srag	@i[1],$a4,63			# broadcast 61st bit
+	nihh	$a4,0x7fff
+	ngr	$lo,$b
+	ngr	@i[0],$b
+	ngr	@i[1],$b
+
+	lghi	@T[0],0
+	lgr	$a12,$a1
+	stg	@T[0],`$stdframe+0*8`($sp)	# tab[0]=0
+	xgr	$a12,$a2
+	stg	$a1,`$stdframe+1*8`($sp)	# tab[1]=a1
+	 lgr	$a48,$a4
+	stg	$a2,`$stdframe+2*8`($sp)	# tab[2]=a2
+	 xgr	$a48,$a8
+	stg	$a12,`$stdframe+3*8`($sp)	# tab[3]=a1^a2
+	 xgr	$a1,$a4
+
+	stg	$a4,`$stdframe+4*8`($sp)	# tab[4]=a4
+	xgr	$a2,$a4
+	stg	$a1,`$stdframe+5*8`($sp)	# tab[5]=a1^a4
+	xgr	$a12,$a4
+	stg	$a2,`$stdframe+6*8`($sp)	# tab[6]=a2^a4
+	 xgr	$a1,$a48
+	stg	$a12,`$stdframe+7*8`($sp)	# tab[7]=a1^a2^a4
+	 xgr	$a2,$a48
+
+	stg	$a8,`$stdframe+8*8`($sp)	# tab[8]=a8
+	xgr	$a12,$a48
+	stg	$a1,`$stdframe+9*8`($sp)	# tab[9]=a1^a8
+	 xgr	$a1,$a4
+	stg	$a2,`$stdframe+10*8`($sp)	# tab[10]=a2^a8
+	 xgr	$a2,$a4
+	stg	$a12,`$stdframe+11*8`($sp)	# tab[11]=a1^a2^a8
+
+	xgr	$a12,$a4
+	stg	$a48,`$stdframe+12*8`($sp)	# tab[12]=a4^a8
+	 srlg	$hi,$lo,1
+	stg	$a1,`$stdframe+13*8`($sp)	# tab[13]=a1^a4^a8
+	 sllg	$lo,$lo,63
+	stg	$a2,`$stdframe+14*8`($sp)	# tab[14]=a2^a4^a8
+	 srlg	@T[0],@i[0],2
+	stg	$a12,`$stdframe+15*8`($sp)	# tab[15]=a1^a2^a4^a8
+
+	lghi	$mask,`0xf<<3`
+	sllg	$a1,@i[0],62
+	 sllg	@i[0],$b,3
+	srlg	@T[1],@i[1],3
+	 ngr	@i[0],$mask
+	sllg	$a2,@i[1],61
+	 srlg	@i[1],$b,4-3
+	xgr	$hi,@T[0]
+	 ngr	@i[1],$mask
+	xgr	$lo,$a1
+	xgr	$hi,@T[1]
+	xgr	$lo,$a2
+
+	xg	$lo,$stdframe(@i[0],$sp)
+	srlg	@i[0],$b,8-3
+	ngr	@i[0],$mask
+___
+for($n=1;$n<14;$n++) {
+$code.=<<___;
+	lg	@T[1],$stdframe(@i[1],$sp)
+	srlg	@i[1],$b,`($n+2)*4`-3
+	sllg	@T[0],@T[1],`$n*4`
+	ngr	@i[1],$mask
+	srlg	@T[1],@T[1],`64-$n*4`
+	xgr	$lo,@T[0]
+	xgr	$hi,@T[1]
+___
+	push(@i,shift(@i)); push(@T,shift(@T));
+}
+$code.=<<___;
+	lg	@T[1],$stdframe(@i[1],$sp)
+	sllg	@T[0],@T[1],`$n*4`
+	srlg	@T[1],@T[1],`64-$n*4`
+	xgr	$lo,@T[0]
+	xgr	$hi,@T[1]
+
+	lg	@T[0],$stdframe(@i[0],$sp)
+	sllg	@T[1],@T[0],`($n+1)*4`
+	srlg	@T[0],@T[0],`64-($n+1)*4`
+	xgr	$lo,@T[1]
+	xgr	$hi,@T[0]
+
+	br	$ra
+.size	_mul_1x1,.-_mul_1x1
+
+.globl	bn_GF2m_mul_2x2
+.type	bn_GF2m_mul_2x2,\@function
+.align	16
+bn_GF2m_mul_2x2:
+	stm${g}	%r3,%r15,3*$SIZE_T($sp)
+
+	lghi	%r1,-$stdframe-128
+	la	%r0,0($sp)
+	la	$sp,0(%r1,$sp)			# alloca
+	st${g}	%r0,0($sp)			# back chain
+___
+if ($SIZE_T==8) {
+my @r=map("%r$_",(6..9));
+$code.=<<___;
+	bras	$ra,_mul_1x1			# a1·b1
+	stmg	$lo,$hi,16($rp)
+
+	lg	$a,`$stdframe+128+4*$SIZE_T`($sp)
+	lg	$b,`$stdframe+128+6*$SIZE_T`($sp)
+	bras	$ra,_mul_1x1			# a0·b0
+	stmg	$lo,$hi,0($rp)
+
+	lg	$a,`$stdframe+128+3*$SIZE_T`($sp)
+	lg	$b,`$stdframe+128+5*$SIZE_T`($sp)
+	xg	$a,`$stdframe+128+4*$SIZE_T`($sp)
+	xg	$b,`$stdframe+128+6*$SIZE_T`($sp)
+	bras	$ra,_mul_1x1			# (a0+a1)·(b0+b1)
+	lmg	@r[0],@r[3],0($rp)
+
+	xgr	$lo,$hi
+	xgr	$hi,@r[1]
+	xgr	$lo,@r[0]
+	xgr	$hi,@r[2]
+	xgr	$lo,@r[3]
+	xgr	$hi,@r[3]
+	xgr	$lo,$hi
+	stg	$hi,16($rp)
+	stg	$lo,8($rp)
+___
+} else {
+$code.=<<___;
+	sllg	%r3,%r3,32
+	sllg	%r5,%r5,32
+	or	%r3,%r4
+	or	%r5,%r6
+	bras	$ra,_mul_1x1
+	rllg	$lo,$lo,32
+	rllg	$hi,$hi,32
+	stmg	$lo,$hi,0($rp)
+___
+}
+$code.=<<___;
+	lm${g}	%r6,%r15,`$stdframe+128+6*$SIZE_T`($sp)
+	br	$ra
+.size	bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
+.string	"GF(2^m) Multiplication for s390x, CRYPTOGAMS by <appro\@openssl.org>"
+___
+
+$code =~ s/\`([^\`]*)\`/eval($1)/gem;
+print $code;
+close STDOUT;
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/s390x-mont.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/s390x-mont.pl
@ -0,0 +1,284 @@
+#! /usr/bin/env perl
+# Copyright 2007-2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# April 2007.
+#
+# Performance improvement over vanilla C code varies from 85% to 45%
+# depending on key length and benchmark. Unfortunately in this context
+# these are not very impressive results [for code that utilizes "wide"
+# 64x64=128-bit multiplication, which is not commonly available to C
+# programmers], at least hand-coded bn_asm.c replacement is known to
+# provide 30-40% better results for longest keys. Well, on a second
+# thought it's not very surprising, because z-CPUs are single-issue
+# and _strictly_ in-order execution, while bn_mul_mont is more or less
+# dependent on CPU ability to pipe-line instructions and have several
+# of them "in-flight" at the same time. I mean while other methods,
+# for example Karatsuba, aim to minimize amount of multiplications at
+# the cost of other operations increase, bn_mul_mont aim to neatly
+# "overlap" multiplications and the other operations [and on most
+# platforms even minimize the amount of the other operations, in
+# particular references to memory]. But it's possible to improve this
+# module performance by implementing dedicated squaring code-path and
+# possibly by unrolling loops...
+
+# January 2009.
+#
+# Reschedule to minimize/avoid Address Generation Interlock hazard,
+# make inner loops counter-based.
+
+# November 2010.
+#
+# Adapt for -m31 build. If kernel supports what's called "highgprs"
+# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
+# instructions and achieve "64-bit" performance even in 31-bit legacy
+# application context. The feature is not specific to any particular
+# processor, as long as it's "z-CPU". Latter implies that the code
+# remains z/Architecture specific. Compatibility with 32-bit BN_ULONG
+# is achieved by swapping words after 64-bit loads, follow _dswap-s.
+# On z990 it was measured to perform 2.6-2.2 times better than
+# compiler-generated code, less for longer keys...
+
+$flavour = shift;
+
+if ($flavour =~ /3[12]/) {
+	$SIZE_T=4;
+	$g="";
+} else {
+	$SIZE_T=8;
+	$g="g";
+}
+
+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+$stdframe=16*$SIZE_T+4*8;
+
+$mn0="%r0";
+$num="%r1";
+
+# int bn_mul_mont(
+$rp="%r2";		# BN_ULONG *rp,
+$ap="%r3";		# const BN_ULONG *ap,
+$bp="%r4";		# const BN_ULONG *bp,
+$np="%r5";		# const BN_ULONG *np,
+$n0="%r6";		# const BN_ULONG *n0,
+#$num="160(%r15)"	# int num);
+
+$bi="%r2";	# zaps rp
+$j="%r7";
+
+$ahi="%r8";
+$alo="%r9";
+$nhi="%r10";
+$nlo="%r11";
+$AHI="%r12";
+$NHI="%r13";
+$count="%r14";
+$sp="%r15";
+
+$code.=<<___;
+.text
+.globl	bn_mul_mont
+.type	bn_mul_mont,\@function
+bn_mul_mont:
+	lgf	$num,`$stdframe+$SIZE_T-4`($sp)	# pull $num
+	sla	$num,`log($SIZE_T)/log(2)`	# $num to enumerate bytes
+	la	$bp,0($num,$bp)
+
+	st${g}	%r2,2*$SIZE_T($sp)
+
+	cghi	$num,16		#
+	lghi	%r2,0		#
+	blr	%r14		# if($num<16) return 0;
+___
+$code.=<<___ if ($flavour =~ /3[12]/);
+	tmll	$num,4
+	bnzr	%r14		# if ($num&1) return 0;
+___
+$code.=<<___ if ($flavour !~ /3[12]/);
+	cghi	$num,96		#
+	bhr	%r14		# if($num>96) return 0;
+___
+$code.=<<___;
+	stm${g}	%r3,%r15,3*$SIZE_T($sp)
+
+	lghi	$rp,-$stdframe-8	# leave room for carry bit
+	lcgr	$j,$num		# -$num
+	lgr	%r0,$sp
+	la	$rp,0($rp,$sp)
+	la	$sp,0($j,$rp)	# alloca
+	st${g}	%r0,0($sp)	# back chain
+
+	sra	$num,3		# restore $num
+	la	$bp,0($j,$bp)	# restore $bp
+	ahi	$num,-1		# adjust $num for inner loop
+	lg	$n0,0($n0)	# pull n0
+	_dswap	$n0
+
+	lg	$bi,0($bp)
+	_dswap	$bi
+	lg	$alo,0($ap)
+	_dswap	$alo
+	mlgr	$ahi,$bi	# ap[0]*bp[0]
+	lgr	$AHI,$ahi
+
+	lgr	$mn0,$alo	# "tp[0]"*n0
+	msgr	$mn0,$n0
+
+	lg	$nlo,0($np)	#
+	_dswap	$nlo
+	mlgr	$nhi,$mn0	# np[0]*m1
+	algr	$nlo,$alo	# +="tp[0]"
+	lghi	$NHI,0
+	alcgr	$NHI,$nhi
+
+	la	$j,8(%r0)	# j=1
+	lr	$count,$num
+
+.align	16
+.L1st:
+	lg	$alo,0($j,$ap)
+	_dswap	$alo
+	mlgr	$ahi,$bi	# ap[j]*bp[0]
+	algr	$alo,$AHI
+	lghi	$AHI,0
+	alcgr	$AHI,$ahi
+
+	lg	$nlo,0($j,$np)
+	_dswap	$nlo
+	mlgr	$nhi,$mn0	# np[j]*m1
+	algr	$nlo,$NHI
+	lghi	$NHI,0
+	alcgr	$nhi,$NHI	# +="tp[j]"
+	algr	$nlo,$alo
+	alcgr	$NHI,$nhi
+
+	stg	$nlo,$stdframe-8($j,$sp)	# tp[j-1]=
+	la	$j,8($j)	# j++
+	brct	$count,.L1st
+
+	algr	$NHI,$AHI
+	lghi	$AHI,0
+	alcgr	$AHI,$AHI	# upmost overflow bit
+	stg	$NHI,$stdframe-8($j,$sp)
+	stg	$AHI,$stdframe($j,$sp)
+	la	$bp,8($bp)	# bp++
+
+.Louter:
+	lg	$bi,0($bp)	# bp[i]
+	_dswap	$bi
+	lg	$alo,0($ap)
+	_dswap	$alo
+	mlgr	$ahi,$bi	# ap[0]*bp[i]
+	alg	$alo,$stdframe($sp)	# +=tp[0]
+	lghi	$AHI,0
+	alcgr	$AHI,$ahi
+
+	lgr	$mn0,$alo
+	msgr	$mn0,$n0	# tp[0]*n0
+
+	lg	$nlo,0($np)	# np[0]
+	_dswap	$nlo
+	mlgr	$nhi,$mn0	# np[0]*m1
+	algr	$nlo,$alo	# +="tp[0]"
+	lghi	$NHI,0
+	alcgr	$NHI,$nhi
+
+	la	$j,8(%r0)	# j=1
+	lr	$count,$num
+
+.align	16
+.Linner:
+	lg	$alo,0($j,$ap)
+	_dswap	$alo
+	mlgr	$ahi,$bi	# ap[j]*bp[i]
+	algr	$alo,$AHI
+	lghi	$AHI,0
+	alcgr	$ahi,$AHI
+	alg	$alo,$stdframe($j,$sp)# +=tp[j]
+	alcgr	$AHI,$ahi
+
+	lg	$nlo,0($j,$np)
+	_dswap	$nlo
+	mlgr	$nhi,$mn0	# np[j]*m1
+	algr	$nlo,$NHI
+	lghi	$NHI,0
+	alcgr	$nhi,$NHI
+	algr	$nlo,$alo	# +="tp[j]"
+	alcgr	$NHI,$nhi
+
+	stg	$nlo,$stdframe-8($j,$sp)	# tp[j-1]=
+	la	$j,8($j)	# j++
+	brct	$count,.Linner
+
+	algr	$NHI,$AHI
+	lghi	$AHI,0
+	alcgr	$AHI,$AHI
+	alg	$NHI,$stdframe($j,$sp)# accumulate previous upmost overflow bit
+	lghi	$ahi,0
+	alcgr	$AHI,$ahi	# new upmost overflow bit
+	stg	$NHI,$stdframe-8($j,$sp)
+	stg	$AHI,$stdframe($j,$sp)
+
+	la	$bp,8($bp)	# bp++
+	cl${g}	$bp,`$stdframe+8+4*$SIZE_T`($j,$sp)	# compare to &bp[num]
+	jne	.Louter
+
+	l${g}	$rp,`$stdframe+8+2*$SIZE_T`($j,$sp)	# reincarnate rp
+	la	$ap,$stdframe($sp)
+	ahi	$num,1		# restore $num, incidentally clears "borrow"
+
+	la	$j,0(%r0)
+	lr	$count,$num
+.Lsub:	lg	$alo,0($j,$ap)
+	lg	$nlo,0($j,$np)
+	_dswap	$nlo
+	slbgr	$alo,$nlo
+	stg	$alo,0($j,$rp)
+	la	$j,8($j)
+	brct	$count,.Lsub
+	lghi	$ahi,0
+	slbgr	$AHI,$ahi	# handle upmost carry
+	lghi	$NHI,-1
+	xgr	$NHI,$AHI
+
+	la	$j,0(%r0)
+	lgr	$count,$num
+.Lcopy:	lg	$ahi,$stdframe($j,$sp)	# conditional copy
+	lg	$alo,0($j,$rp)
+	ngr	$ahi,$AHI
+	ngr	$alo,$NHI
+	ogr	$alo,$ahi
+	_dswap	$alo
+	stg	$j,$stdframe($j,$sp)	# zap tp
+	stg	$alo,0($j,$rp)
+	la	$j,8($j)
+	brct	$count,.Lcopy
+
+	la	%r1,`$stdframe+8+6*$SIZE_T`($j,$sp)
+	lm${g}	%r6,%r15,0(%r1)
+	lghi	%r2,1		# signal "processed"
+	br	%r14
+.size	bn_mul_mont,.-bn_mul_mont
+.string	"Montgomery Multiplication for s390x, CRYPTOGAMS by <appro\@openssl.org>"
+___
+
+foreach (split("\n",$code)) {
+	s/\`([^\`]*)\`/eval $1/ge;
+	s/_dswap\s+(%r[0-9]+)/sprintf("rllg\t%s,%s,32",$1,$1) if($SIZE_T==4)/e;
+	print $_,"\n";
+}
+close STDOUT;
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/s390x.S
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/s390x.S
@ -0,0 +1,713 @@
+.ident "s390x.S, version 1.1"
+// ====================================================================
+// Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the OpenSSL license (the "License").  You may not use
+// this file except in compliance with the License.  You can obtain a copy
+// in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+// ====================================================================
+
+.text
+
+#define zero	%r0
+
+// BN_ULONG bn_mul_add_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5);
+.globl	bn_mul_add_words
+.type	bn_mul_add_words,@function
+.align	4
+bn_mul_add_words:
+	lghi	zero,0		// zero = 0
+	la	%r1,0(%r2)	// put rp aside [to give way to]
+	lghi	%r2,0		// return value
+	ltgfr	%r4,%r4
+	bler	%r14		// if (len<=0) return 0;
+
+	stmg	%r6,%r13,48(%r15)
+	lghi	%r2,3
+	lghi	%r12,0		// carry = 0
+	slgr	%r1,%r3		// rp-=ap
+	nr	%r2,%r4		// len%4
+	sra	%r4,2		// cnt=len/4
+	jz	.Loop1_madd	// carry is incidentally cleared if branch taken
+	algr	zero,zero	// clear carry
+
+	lg	%r7,0(%r3)	// ap[0]
+	lg	%r9,8(%r3)	// ap[1]
+	mlgr	%r6,%r5		// *=w
+	brct	%r4,.Loop4_madd
+	j	.Loop4_madd_tail
+
+.Loop4_madd:
+	mlgr	%r8,%r5
+	lg	%r11,16(%r3)	// ap[i+2]
+	alcgr	%r7,%r12	// +=carry
+	alcgr	%r6,zero
+	alg	%r7,0(%r3,%r1)	// +=rp[i]
+	stg	%r7,0(%r3,%r1)	// rp[i]=
+
+	mlgr	%r10,%r5
+	lg	%r13,24(%r3)
+	alcgr	%r9,%r6
+	alcgr	%r8,zero
+	alg	%r9,8(%r3,%r1)
+	stg	%r9,8(%r3,%r1)
+
+	mlgr	%r12,%r5
+	lg	%r7,32(%r3)
+	alcgr	%r11,%r8
+	alcgr	%r10,zero
+	alg	%r11,16(%r3,%r1)
+	stg	%r11,16(%r3,%r1)
+
+	mlgr	%r6,%r5
+	lg	%r9,40(%r3)
+	alcgr	%r13,%r10
+	alcgr	%r12,zero
+	alg	%r13,24(%r3,%r1)
+	stg	%r13,24(%r3,%r1)
+
+	la	%r3,32(%r3)	// i+=4
+	brct	%r4,.Loop4_madd
+
+.Loop4_madd_tail:
+	mlgr	%r8,%r5
+	lg	%r11,16(%r3)
+	alcgr	%r7,%r12	// +=carry
+	alcgr	%r6,zero
+	alg	%r7,0(%r3,%r1)	// +=rp[i]
+	stg	%r7,0(%r3,%r1)	// rp[i]=
+
+	mlgr	%r10,%r5
+	lg	%r13,24(%r3)
+	alcgr	%r9,%r6
+	alcgr	%r8,zero
+	alg	%r9,8(%r3,%r1)
+	stg	%r9,8(%r3,%r1)
+
+	mlgr	%r12,%r5
+	alcgr	%r11,%r8
+	alcgr	%r10,zero
+	alg	%r11,16(%r3,%r1)
+	stg	%r11,16(%r3,%r1)
+
+	alcgr	%r13,%r10
+	alcgr	%r12,zero
+	alg	%r13,24(%r3,%r1)
+	stg	%r13,24(%r3,%r1)
+
+	la	%r3,32(%r3)	// i+=4
+
+	la	%r2,1(%r2)	// see if len%4 is zero ...
+	brct	%r2,.Loop1_madd	// without touching condition code:-)
+
+.Lend_madd:
+	lgr	%r2,zero	// return value
+	alcgr	%r2,%r12	// collect even carry bit
+	lmg	%r6,%r13,48(%r15)
+	br	%r14
+
+.Loop1_madd:
+	lg	%r7,0(%r3)	// ap[i]
+	mlgr	%r6,%r5		// *=w
+	alcgr	%r7,%r12	// +=carry
+	alcgr	%r6,zero
+	alg	%r7,0(%r3,%r1)	// +=rp[i]
+	stg	%r7,0(%r3,%r1)	// rp[i]=
+
+	lgr	%r12,%r6
+	la	%r3,8(%r3)	// i++
+	brct	%r2,.Loop1_madd
+
+	j	.Lend_madd
+.size	bn_mul_add_words,.-bn_mul_add_words
+
+// BN_ULONG bn_mul_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5);
+.globl	bn_mul_words
+.type	bn_mul_words,@function
+.align	4
+bn_mul_words:
+	lghi	zero,0		// zero = 0
+	la	%r1,0(%r2)	// put rp aside
+	lghi	%r2,0		// i=0;
+	ltgfr	%r4,%r4
+	bler	%r14		// if (len<=0) return 0;
+
+	stmg	%r6,%r10,48(%r15)
+	lghi	%r10,3
+	lghi	%r8,0		// carry = 0
+	nr	%r10,%r4	// len%4
+	sra	%r4,2		// cnt=len/4
+	jz	.Loop1_mul	// carry is incidentally cleared if branch taken
+	algr	zero,zero	// clear carry
+
+.Loop4_mul:
+	lg	%r7,0(%r2,%r3)	// ap[i]
+	mlgr	%r6,%r5		// *=w
+	alcgr	%r7,%r8		// +=carry
+	stg	%r7,0(%r2,%r1)	// rp[i]=
+
+	lg	%r9,8(%r2,%r3)
+	mlgr	%r8,%r5
+	alcgr	%r9,%r6
+	stg	%r9,8(%r2,%r1)
+
+	lg	%r7,16(%r2,%r3)
+	mlgr	%r6,%r5
+	alcgr	%r7,%r8
+	stg	%r7,16(%r2,%r1)
+
+	lg	%r9,24(%r2,%r3)
+	mlgr	%r8,%r5
+	alcgr	%r9,%r6
+	stg	%r9,24(%r2,%r1)
+
+	la	%r2,32(%r2)	// i+=4
+	brct	%r4,.Loop4_mul
+
+	la	%r10,1(%r10)		// see if len%4 is zero ...
+	brct	%r10,.Loop1_mul		// without touching condition code:-)
+
+.Lend_mul:
+	alcgr	%r8,zero	// collect carry bit
+	lgr	%r2,%r8
+	lmg	%r6,%r10,48(%r15)
+	br	%r14
+
+.Loop1_mul:
+	lg	%r7,0(%r2,%r3)	// ap[i]
+	mlgr	%r6,%r5		// *=w
+	alcgr	%r7,%r8		// +=carry
+	stg	%r7,0(%r2,%r1)	// rp[i]=
+
+	lgr	%r8,%r6
+	la	%r2,8(%r2)	// i++
+	brct	%r10,.Loop1_mul
+
+	j	.Lend_mul
+.size	bn_mul_words,.-bn_mul_words
+
+// void bn_sqr_words(BN_ULONG *r2,BN_ULONG *r2,int r4)
+.globl	bn_sqr_words
+.type	bn_sqr_words,@function
+.align	4
+bn_sqr_words:
+	ltgfr	%r4,%r4
+	bler	%r14
+
+	stmg	%r6,%r7,48(%r15)
+	srag	%r1,%r4,2	// cnt=len/4
+	jz	.Loop1_sqr
+
+.Loop4_sqr:
+	lg	%r7,0(%r3)
+	mlgr	%r6,%r7
+	stg	%r7,0(%r2)
+	stg	%r6,8(%r2)
+
+	lg	%r7,8(%r3)
+	mlgr	%r6,%r7
+	stg	%r7,16(%r2)
+	stg	%r6,24(%r2)
+
+	lg	%r7,16(%r3)
+	mlgr	%r6,%r7
+	stg	%r7,32(%r2)
+	stg	%r6,40(%r2)
+
+	lg	%r7,24(%r3)
+	mlgr	%r6,%r7
+	stg	%r7,48(%r2)
+	stg	%r6,56(%r2)
+
+	la	%r3,32(%r3)
+	la	%r2,64(%r2)
+	brct	%r1,.Loop4_sqr
+
+	lghi	%r1,3
+	nr	%r4,%r1		// cnt=len%4
+	jz	.Lend_sqr
+
+.Loop1_sqr:
+	lg	%r7,0(%r3)
+	mlgr	%r6,%r7
+	stg	%r7,0(%r2)
+	stg	%r6,8(%r2)
+
+	la	%r3,8(%r3)
+	la	%r2,16(%r2)
+	brct	%r4,.Loop1_sqr
+
+.Lend_sqr:
+	lmg	%r6,%r7,48(%r15)
+	br	%r14
+.size	bn_sqr_words,.-bn_sqr_words
+
+// BN_ULONG bn_div_words(BN_ULONG h,BN_ULONG l,BN_ULONG d);
+.globl	bn_div_words
+.type	bn_div_words,@function
+.align	4
+bn_div_words:
+	dlgr	%r2,%r4
+	lgr	%r2,%r3
+	br	%r14
+.size	bn_div_words,.-bn_div_words
+
+// BN_ULONG bn_add_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5);
+.globl	bn_add_words
+.type	bn_add_words,@function
+.align	4
+bn_add_words:
+	la	%r1,0(%r2)	// put rp aside
+	lghi	%r2,0		// i=0
+	ltgfr	%r5,%r5
+	bler	%r14		// if (len<=0) return 0;
+
+	stg	%r6,48(%r15)
+	lghi	%r6,3
+	nr	%r6,%r5		// len%4
+	sra	%r5,2		// len/4, use sra because it sets condition code
+	jz	.Loop1_add	// carry is incidentally cleared if branch taken
+	algr	%r2,%r2		// clear carry
+
+.Loop4_add:
+	lg	%r0,0(%r2,%r3)
+	alcg	%r0,0(%r2,%r4)
+	stg	%r0,0(%r2,%r1)
+	lg	%r0,8(%r2,%r3)
+	alcg	%r0,8(%r2,%r4)
+	stg	%r0,8(%r2,%r1)
+	lg	%r0,16(%r2,%r3)
+	alcg	%r0,16(%r2,%r4)
+	stg	%r0,16(%r2,%r1)
+	lg	%r0,24(%r2,%r3)
+	alcg	%r0,24(%r2,%r4)
+	stg	%r0,24(%r2,%r1)
+
+	la	%r2,32(%r2)	// i+=4
+	brct	%r5,.Loop4_add
+
+	la	%r6,1(%r6)	// see if len%4 is zero ...
+	brct	%r6,.Loop1_add	// without touching condition code:-)
+
+.Lexit_add:
+	lghi	%r2,0
+	alcgr	%r2,%r2
+	lg	%r6,48(%r15)
+	br	%r14
+
+.Loop1_add:
+	lg	%r0,0(%r2,%r3)
+	alcg	%r0,0(%r2,%r4)
+	stg	%r0,0(%r2,%r1)
+
+	la	%r2,8(%r2)	// i++
+	brct	%r6,.Loop1_add
+
+	j	.Lexit_add
+.size	bn_add_words,.-bn_add_words
+
+// BN_ULONG bn_sub_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5);
+.globl	bn_sub_words
+.type	bn_sub_words,@function
+.align	4
+bn_sub_words:
+	la	%r1,0(%r2)	// put rp aside
+	lghi	%r2,0		// i=0
+	ltgfr	%r5,%r5
+	bler	%r14		// if (len<=0) return 0;
+
+	stg	%r6,48(%r15)
+	lghi	%r6,3
+	nr	%r6,%r5		// len%4
+	sra	%r5,2		// len/4, use sra because it sets condition code
+	jnz	.Loop4_sub	// borrow is incidentally cleared if branch taken
+	slgr	%r2,%r2		// clear borrow
+
+.Loop1_sub:
+	lg	%r0,0(%r2,%r3)
+	slbg	%r0,0(%r2,%r4)
+	stg	%r0,0(%r2,%r1)
+
+	la	%r2,8(%r2)	// i++
+	brct	%r6,.Loop1_sub
+	j	.Lexit_sub
+
+.Loop4_sub:
+	lg	%r0,0(%r2,%r3)
+	slbg	%r0,0(%r2,%r4)
+	stg	%r0,0(%r2,%r1)
+	lg	%r0,8(%r2,%r3)
+	slbg	%r0,8(%r2,%r4)
+	stg	%r0,8(%r2,%r1)
+	lg	%r0,16(%r2,%r3)
+	slbg	%r0,16(%r2,%r4)
+	stg	%r0,16(%r2,%r1)
+	lg	%r0,24(%r2,%r3)
+	slbg	%r0,24(%r2,%r4)
+	stg	%r0,24(%r2,%r1)
+
+	la	%r2,32(%r2)	// i+=4
+	brct	%r5,.Loop4_sub
+
+	la	%r6,1(%r6)	// see if len%4 is zero ...
+	brct	%r6,.Loop1_sub	// without touching condition code:-)
+
+.Lexit_sub:
+	lghi	%r2,0
+	slbgr	%r2,%r2
+	lcgr	%r2,%r2
+	lg	%r6,48(%r15)
+	br	%r14
+.size	bn_sub_words,.-bn_sub_words
+
+#define c1	%r1
+#define c2	%r5
+#define c3	%r8
+
+#define mul_add_c(ai,bi,c1,c2,c3)	\
+	lg	%r7,ai*8(%r3);		\
+	mlg	%r6,bi*8(%r4);		\
+	algr	c1,%r7;			\
+	alcgr	c2,%r6;			\
+	alcgr	c3,zero
+
+// void bn_mul_comba8(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4);
+.globl	bn_mul_comba8
+.type	bn_mul_comba8,@function
+.align	4
+bn_mul_comba8:
+	stmg	%r6,%r8,48(%r15)
+
+	lghi	c1,0
+	lghi	c2,0
+	lghi	c3,0
+	lghi	zero,0
+
+	mul_add_c(0,0,c1,c2,c3);
+	stg	c1,0*8(%r2)
+	lghi	c1,0
+
+	mul_add_c(0,1,c2,c3,c1);
+	mul_add_c(1,0,c2,c3,c1);
+	stg	c2,1*8(%r2)
+	lghi	c2,0
+
+	mul_add_c(2,0,c3,c1,c2);
+	mul_add_c(1,1,c3,c1,c2);
+	mul_add_c(0,2,c3,c1,c2);
+	stg	c3,2*8(%r2)
+	lghi	c3,0
+
+	mul_add_c(0,3,c1,c2,c3);
+	mul_add_c(1,2,c1,c2,c3);
+	mul_add_c(2,1,c1,c2,c3);
+	mul_add_c(3,0,c1,c2,c3);
+	stg	c1,3*8(%r2)
+	lghi	c1,0
+
+	mul_add_c(4,0,c2,c3,c1);
+	mul_add_c(3,1,c2,c3,c1);
+	mul_add_c(2,2,c2,c3,c1);
+	mul_add_c(1,3,c2,c3,c1);
+	mul_add_c(0,4,c2,c3,c1);
+	stg	c2,4*8(%r2)
+	lghi	c2,0
+
+	mul_add_c(0,5,c3,c1,c2);
+	mul_add_c(1,4,c3,c1,c2);
+	mul_add_c(2,3,c3,c1,c2);
+	mul_add_c(3,2,c3,c1,c2);
+	mul_add_c(4,1,c3,c1,c2);
+	mul_add_c(5,0,c3,c1,c2);
+	stg	c3,5*8(%r2)
+	lghi	c3,0
+
+	mul_add_c(6,0,c1,c2,c3);
+	mul_add_c(5,1,c1,c2,c3);
+	mul_add_c(4,2,c1,c2,c3);
+	mul_add_c(3,3,c1,c2,c3);
+	mul_add_c(2,4,c1,c2,c3);
+	mul_add_c(1,5,c1,c2,c3);
+	mul_add_c(0,6,c1,c2,c3);
+	stg	c1,6*8(%r2)
+	lghi	c1,0
+
+	mul_add_c(0,7,c2,c3,c1);
+	mul_add_c(1,6,c2,c3,c1);
+	mul_add_c(2,5,c2,c3,c1);
+	mul_add_c(3,4,c2,c3,c1);
+	mul_add_c(4,3,c2,c3,c1);
+	mul_add_c(5,2,c2,c3,c1);
+	mul_add_c(6,1,c2,c3,c1);
+	mul_add_c(7,0,c2,c3,c1);
+	stg	c2,7*8(%r2)
+	lghi	c2,0
+
+	mul_add_c(7,1,c3,c1,c2);
+	mul_add_c(6,2,c3,c1,c2);
+	mul_add_c(5,3,c3,c1,c2);
+	mul_add_c(4,4,c3,c1,c2);
+	mul_add_c(3,5,c3,c1,c2);
+	mul_add_c(2,6,c3,c1,c2);
+	mul_add_c(1,7,c3,c1,c2);
+	stg	c3,8*8(%r2)
+	lghi	c3,0
+
+	mul_add_c(2,7,c1,c2,c3);
+	mul_add_c(3,6,c1,c2,c3);
+	mul_add_c(4,5,c1,c2,c3);
+	mul_add_c(5,4,c1,c2,c3);
+	mul_add_c(6,3,c1,c2,c3);
+	mul_add_c(7,2,c1,c2,c3);
+	stg	c1,9*8(%r2)
+	lghi	c1,0
+
+	mul_add_c(7,3,c2,c3,c1);
+	mul_add_c(6,4,c2,c3,c1);
+	mul_add_c(5,5,c2,c3,c1);
+	mul_add_c(4,6,c2,c3,c1);
+	mul_add_c(3,7,c2,c3,c1);
+	stg	c2,10*8(%r2)
+	lghi	c2,0
+
+	mul_add_c(4,7,c3,c1,c2);
+	mul_add_c(5,6,c3,c1,c2);
+	mul_add_c(6,5,c3,c1,c2);
+	mul_add_c(7,4,c3,c1,c2);
+	stg	c3,11*8(%r2)
+	lghi	c3,0
+
+	mul_add_c(7,5,c1,c2,c3);
+	mul_add_c(6,6,c1,c2,c3);
+	mul_add_c(5,7,c1,c2,c3);
+	stg	c1,12*8(%r2)
+	lghi	c1,0
+
+
+	mul_add_c(6,7,c2,c3,c1);
+	mul_add_c(7,6,c2,c3,c1);
+	stg	c2,13*8(%r2)
+	lghi	c2,0
+
+	mul_add_c(7,7,c3,c1,c2);
+	stg	c3,14*8(%r2)
+	stg	c1,15*8(%r2)
+
+	lmg	%r6,%r8,48(%r15)
+	br	%r14
+.size	bn_mul_comba8,.-bn_mul_comba8
+
+// void bn_mul_comba4(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4);
+.globl	bn_mul_comba4
+.type	bn_mul_comba4,@function
+.align	4
+bn_mul_comba4:
+	stmg	%r6,%r8,48(%r15)
+
+	lghi	c1,0
+	lghi	c2,0
+	lghi	c3,0
+	lghi	zero,0
+
+	mul_add_c(0,0,c1,c2,c3);
+	stg	c1,0*8(%r3)
+	lghi	c1,0
+
+	mul_add_c(0,1,c2,c3,c1);
+	mul_add_c(1,0,c2,c3,c1);
+	stg	c2,1*8(%r2)
+	lghi	c2,0
+
+	mul_add_c(2,0,c3,c1,c2);
+	mul_add_c(1,1,c3,c1,c2);
+	mul_add_c(0,2,c3,c1,c2);
+	stg	c3,2*8(%r2)
+	lghi	c3,0
+
+	mul_add_c(0,3,c1,c2,c3);
+	mul_add_c(1,2,c1,c2,c3);
+	mul_add_c(2,1,c1,c2,c3);
+	mul_add_c(3,0,c1,c2,c3);
+	stg	c1,3*8(%r2)
+	lghi	c1,0
+
+	mul_add_c(3,1,c2,c3,c1);
+	mul_add_c(2,2,c2,c3,c1);
+	mul_add_c(1,3,c2,c3,c1);
+	stg	c2,4*8(%r2)
+	lghi	c2,0
+
+	mul_add_c(2,3,c3,c1,c2);
+	mul_add_c(3,2,c3,c1,c2);
+	stg	c3,5*8(%r2)
+	lghi	c3,0
+
+	mul_add_c(3,3,c1,c2,c3);
+	stg	c1,6*8(%r2)
+	stg	c2,7*8(%r2)
+
+	stmg	%r6,%r8,48(%r15)
+	br	%r14
+.size	bn_mul_comba4,.-bn_mul_comba4
+
+#define sqr_add_c(ai,c1,c2,c3)		\
+	lg	%r7,ai*8(%r3);		\
+	mlgr	%r6,%r7;		\
+	algr	c1,%r7;			\
+	alcgr	c2,%r6;			\
+	alcgr	c3,zero
+
+#define sqr_add_c2(ai,aj,c1,c2,c3)	\
+	lg	%r7,ai*8(%r3);		\
+	mlg	%r6,aj*8(%r3);		\
+	algr	c1,%r7;			\
+	alcgr	c2,%r6;			\
+	alcgr	c3,zero;		\
+	algr	c1,%r7;			\
+	alcgr	c2,%r6;			\
+	alcgr	c3,zero
+
+// void bn_sqr_comba8(BN_ULONG *r2,BN_ULONG *r3);
+.globl	bn_sqr_comba8
+.type	bn_sqr_comba8,@function
+.align	4
+bn_sqr_comba8:
+	stmg	%r6,%r8,48(%r15)
+
+	lghi	c1,0
+	lghi	c2,0
+	lghi	c3,0
+	lghi	zero,0
+
+	sqr_add_c(0,c1,c2,c3);
+	stg	c1,0*8(%r2)
+	lghi	c1,0
+
+	sqr_add_c2(1,0,c2,c3,c1);
+	stg	c2,1*8(%r2)
+	lghi	c2,0
+
+	sqr_add_c(1,c3,c1,c2);
+	sqr_add_c2(2,0,c3,c1,c2);
+	stg	c3,2*8(%r2)
+	lghi	c3,0
+
+	sqr_add_c2(3,0,c1,c2,c3);
+	sqr_add_c2(2,1,c1,c2,c3);
+	stg	c1,3*8(%r2)
+	lghi	c1,0
+
+	sqr_add_c(2,c2,c3,c1);
+	sqr_add_c2(3,1,c2,c3,c1);
+	sqr_add_c2(4,0,c2,c3,c1);
+	stg	c2,4*8(%r2)
+	lghi	c2,0
+
+	sqr_add_c2(5,0,c3,c1,c2);
+	sqr_add_c2(4,1,c3,c1,c2);
+	sqr_add_c2(3,2,c3,c1,c2);
+	stg	c3,5*8(%r2)
+	lghi	c3,0
+
+	sqr_add_c(3,c1,c2,c3);
+	sqr_add_c2(4,2,c1,c2,c3);
+	sqr_add_c2(5,1,c1,c2,c3);
+	sqr_add_c2(6,0,c1,c2,c3);
+	stg	c1,6*8(%r2)
+	lghi	c1,0
+
+	sqr_add_c2(7,0,c2,c3,c1);
+	sqr_add_c2(6,1,c2,c3,c1);
+	sqr_add_c2(5,2,c2,c3,c1);
+	sqr_add_c2(4,3,c2,c3,c1);
+	stg	c2,7*8(%r2)
+	lghi	c2,0
+
+	sqr_add_c(4,c3,c1,c2);
+	sqr_add_c2(5,3,c3,c1,c2);
+	sqr_add_c2(6,2,c3,c1,c2);
+	sqr_add_c2(7,1,c3,c1,c2);
+	stg	c3,8*8(%r2)
+	lghi	c3,0
+
+	sqr_add_c2(7,2,c1,c2,c3);
+	sqr_add_c2(6,3,c1,c2,c3);
+	sqr_add_c2(5,4,c1,c2,c3);
+	stg	c1,9*8(%r2)
+	lghi	c1,0
+
+	sqr_add_c(5,c2,c3,c1);
+	sqr_add_c2(6,4,c2,c3,c1);
+	sqr_add_c2(7,3,c2,c3,c1);
+	stg	c2,10*8(%r2)
+	lghi	c2,0
+
+	sqr_add_c2(7,4,c3,c1,c2);
+	sqr_add_c2(6,5,c3,c1,c2);
+	stg	c3,11*8(%r2)
+	lghi	c3,0
+
+	sqr_add_c(6,c1,c2,c3);
+	sqr_add_c2(7,5,c1,c2,c3);
+	stg	c1,12*8(%r2)
+	lghi	c1,0
+
+	sqr_add_c2(7,6,c2,c3,c1);
+	stg	c2,13*8(%r2)
+	lghi	c2,0
+
+	sqr_add_c(7,c3,c1,c2);
+	stg	c3,14*8(%r2)
+	stg	c1,15*8(%r2)
+
+	lmg	%r6,%r8,48(%r15)
+	br	%r14
+.size	bn_sqr_comba8,.-bn_sqr_comba8
+
+// void bn_sqr_comba4(BN_ULONG *r2,BN_ULONG *r3);
+.globl bn_sqr_comba4
+.type	bn_sqr_comba4,@function
+.align	4
+bn_sqr_comba4:
+	stmg	%r6,%r8,48(%r15)
+
+	lghi	c1,0
+	lghi	c2,0
+	lghi	c3,0
+	lghi	zero,0
+
+	sqr_add_c(0,c1,c2,c3);
+	stg	c1,0*8(%r2)
+	lghi	c1,0
+
+	sqr_add_c2(1,0,c2,c3,c1);
+	stg	c2,1*8(%r2)
+	lghi	c2,0
+
+	sqr_add_c(1,c3,c1,c2);
+	sqr_add_c2(2,0,c3,c1,c2);
+	stg	c3,2*8(%r2)
+	lghi	c3,0
+
+	sqr_add_c2(3,0,c1,c2,c3);
+	sqr_add_c2(2,1,c1,c2,c3);
+	stg	c1,3*8(%r2)
+	lghi	c1,0
+
+	sqr_add_c(2,c2,c3,c1);
+	sqr_add_c2(3,1,c2,c3,c1);
+	stg	c2,4*8(%r2)
+	lghi	c2,0
+
+	sqr_add_c2(3,2,c3,c1,c2);
+	stg	c3,5*8(%r2)
+	lghi	c3,0
+
+	sqr_add_c(3,c1,c2,c3);
+	stg	c1,6*8(%r2)
+	stg	c2,7*8(%r2)
+
+	lmg	%r6,%r8,48(%r15)
+	br	%r14
+.size	bn_sqr_comba4,.-bn_sqr_comba4
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparct4-mont.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparct4-mont.pl
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparcv8.S
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparcv8.S
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparcv8plus.S
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparcv8plus.S
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparcv9-gf2m.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparcv9-gf2m.pl
@ -0,0 +1,200 @@
+#! /usr/bin/env perl
+# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# October 2012
+#
+# The module implements bn_GF2m_mul_2x2 polynomial multiplication used
+# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
+# the time being... Except that it has two code paths: one suitable
+# for all SPARCv9 processors and one for VIS3-capable ones. Former
+# delivers ~25-45% more, more for longer keys, heaviest DH and DSA
+# verify operations on venerable UltraSPARC II. On T4 VIS3 code is
+# ~100-230% faster than gcc-generated code and ~35-90% faster than
+# the pure SPARCv9 code path.
+
+$output = pop;
+open STDOUT,">$output";
+
+$locals=16*8;
+
+$tab="%l0";
+
+@T=("%g2","%g3");
+@i=("%g4","%g5");
+
+($a1,$a2,$a4,$a8,$a12,$a48)=map("%o$_",(0..5));
+($lo,$hi,$b)=("%g1",$a8,"%o7"); $a=$lo;
+
+$code.=<<___;
+#include <sparc_arch.h>
+
+#ifdef __arch64__
+.register	%g2,#scratch
+.register	%g3,#scratch
+#endif
+
+#ifdef __PIC__
+SPARC_PIC_THUNK(%g1)
+#endif
+
+.globl	bn_GF2m_mul_2x2
+.align	16
+bn_GF2m_mul_2x2:
+        SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
+        ld	[%g1+0],%g1             	! OPENSSL_sparcv9cap_P[0]
+
+        andcc	%g1, SPARCV9_VIS3, %g0
+        bz,pn	%icc,.Lsoftware
+        nop
+
+	sllx	%o1, 32, %o1
+	sllx	%o3, 32, %o3
+	or	%o2, %o1, %o1
+	or	%o4, %o3, %o3
+	.word	0x95b262ab			! xmulx   %o1, %o3, %o2
+	.word	0x99b262cb			! xmulxhi %o1, %o3, %o4
+	srlx	%o2, 32, %o1			! 13 cycles later
+	st	%o2, [%o0+0]
+	st	%o1, [%o0+4]
+	srlx	%o4, 32, %o3
+	st	%o4, [%o0+8]
+	retl
+	st	%o3, [%o0+12]
+
+.align	16
+.Lsoftware:
+	save	%sp,-STACK_FRAME-$locals,%sp
+
+	sllx	%i1,32,$a
+	mov	-1,$a12
+	sllx	%i3,32,$b
+	or	%i2,$a,$a
+	srlx	$a12,1,$a48			! 0x7fff...
+	or	%i4,$b,$b
+	srlx	$a12,2,$a12			! 0x3fff...
+	add	%sp,STACK_BIAS+STACK_FRAME,$tab
+
+	sllx	$a,2,$a4
+	mov	$a,$a1
+	sllx	$a,1,$a2
+
+	srax	$a4,63,@i[1]			! broadcast 61st bit
+	and	$a48,$a4,$a4			! (a<<2)&0x7fff...
+	srlx	$a48,2,$a48
+	srax	$a2,63,@i[0]			! broadcast 62nd bit
+	and	$a12,$a2,$a2			! (a<<1)&0x3fff...
+	srax	$a1,63,$lo			! broadcast 63rd bit
+	and	$a48,$a1,$a1			! (a<<0)&0x1fff...
+
+	sllx	$a1,3,$a8
+	and	$b,$lo,$lo
+	and	$b,@i[0],@i[0]
+	and	$b,@i[1],@i[1]
+
+	stx	%g0,[$tab+0*8]			! tab[0]=0
+	xor	$a1,$a2,$a12
+	stx	$a1,[$tab+1*8]			! tab[1]=a1
+	stx	$a2,[$tab+2*8]			! tab[2]=a2
+	 xor	$a4,$a8,$a48
+	stx	$a12,[$tab+3*8]			! tab[3]=a1^a2
+	 xor	$a4,$a1,$a1
+
+	stx	$a4,[$tab+4*8]			! tab[4]=a4
+	xor	$a4,$a2,$a2
+	stx	$a1,[$tab+5*8]			! tab[5]=a1^a4
+	xor	$a4,$a12,$a12
+	stx	$a2,[$tab+6*8]			! tab[6]=a2^a4
+	 xor	$a48,$a1,$a1
+	stx	$a12,[$tab+7*8]			! tab[7]=a1^a2^a4
+	 xor	$a48,$a2,$a2
+
+	stx	$a8,[$tab+8*8]			! tab[8]=a8
+	xor	$a48,$a12,$a12
+	stx	$a1,[$tab+9*8]			! tab[9]=a1^a8
+	 xor	$a4,$a1,$a1
+	stx	$a2,[$tab+10*8]			! tab[10]=a2^a8
+	 xor	$a4,$a2,$a2
+	stx	$a12,[$tab+11*8]		! tab[11]=a1^a2^a8
+
+	xor	$a4,$a12,$a12
+	stx	$a48,[$tab+12*8]		! tab[12]=a4^a8
+	 srlx	$lo,1,$hi
+	stx	$a1,[$tab+13*8]			! tab[13]=a1^a4^a8
+	 sllx	$lo,63,$lo
+	stx	$a2,[$tab+14*8]			! tab[14]=a2^a4^a8
+	 srlx	@i[0],2,@T[0]
+	stx	$a12,[$tab+15*8]		! tab[15]=a1^a2^a4^a8
+
+	sllx	@i[0],62,$a1
+	 sllx	$b,3,@i[0]
+	srlx	@i[1],3,@T[1]
+	 and	@i[0],`0xf<<3`,@i[0]
+	sllx	@i[1],61,$a2
+	 ldx	[$tab+@i[0]],@i[0]
+	 srlx	$b,4-3,@i[1]
+	xor	@T[0],$hi,$hi
+	 and	@i[1],`0xf<<3`,@i[1]
+	xor	$a1,$lo,$lo
+	 ldx	[$tab+@i[1]],@i[1]
+	xor	@T[1],$hi,$hi
+
+	xor	@i[0],$lo,$lo
+	srlx	$b,8-3,@i[0]
+	 xor	$a2,$lo,$lo
+	and	@i[0],`0xf<<3`,@i[0]
+___
+for($n=1;$n<14;$n++) {
+$code.=<<___;
+	sllx	@i[1],`$n*4`,@T[0]
+	ldx	[$tab+@i[0]],@i[0]
+	srlx	@i[1],`64-$n*4`,@T[1]
+	xor	@T[0],$lo,$lo
+	srlx	$b,`($n+2)*4`-3,@i[1]
+	xor	@T[1],$hi,$hi
+	and	@i[1],`0xf<<3`,@i[1]
+___
+	push(@i,shift(@i)); push(@T,shift(@T));
+}
+$code.=<<___;
+	sllx	@i[1],`$n*4`,@T[0]
+	ldx	[$tab+@i[0]],@i[0]
+	srlx	@i[1],`64-$n*4`,@T[1]
+	xor	@T[0],$lo,$lo
+
+	sllx	@i[0],`($n+1)*4`,@T[0]
+	 xor	@T[1],$hi,$hi
+	srlx	@i[0],`64-($n+1)*4`,@T[1]
+	xor	@T[0],$lo,$lo
+	xor	@T[1],$hi,$hi
+
+	srlx	$lo,32,%i1
+	st	$lo,[%i0+0]
+	st	%i1,[%i0+4]
+	srlx	$hi,32,%i2
+	st	$hi,[%i0+8]
+	st	%i2,[%i0+12]
+
+	ret
+	restore
+.type	bn_GF2m_mul_2x2,#function
+.size	bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
+.asciz	"GF(2^m) Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
+.align	4
+___
+
+$code =~ s/\`([^\`]*)\`/eval($1)/gem;
+print $code;
+close STDOUT;
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparcv9-mont.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparcv9-mont.pl
@ -0,0 +1,620 @@
+#! /usr/bin/env perl
+# Copyright 2005-2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# December 2005
+#
+# Pure SPARCv9/8+ and IALU-only bn_mul_mont implementation. The reasons
+# for undertaken effort are multiple. First of all, UltraSPARC is not
+# the whole SPARCv9 universe and other VIS-free implementations deserve
+# optimized code as much. Secondly, newly introduced UltraSPARC T1,
+# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive paths,
+# such as sparcv9a-mont, will simply sink it. Yes, T1 is equipped with
+# several integrated RSA/DSA accelerator circuits accessible through
+# kernel driver [only(*)], but having decent user-land software
+# implementation is important too. Finally, reasons like desire to
+# experiment with dedicated squaring procedure. Yes, this module
+# implements one, because it was easiest to draft it in SPARCv9
+# instructions...
+
+# (*)	Engine accessing the driver in question is on my TODO list.
+#	For reference, accelerator is estimated to give 6 to 10 times
+#	improvement on single-threaded RSA sign. It should be noted
+#	that 6-10x improvement coefficient does not actually mean
+#	something extraordinary in terms of absolute [single-threaded]
+#	performance, as SPARCv9 instruction set is by all means least
+#	suitable for high performance crypto among other 64 bit
+#	platforms. 6-10x factor simply places T1 in same performance
+#	domain as say AMD64 and IA-64. Improvement of RSA verify don't
+#	appear impressive at all, but it's the sign operation which is
+#	far more critical/interesting.
+
+# You might notice that inner loops are modulo-scheduled:-) This has
+# essentially negligible impact on UltraSPARC performance, it's
+# Fujitsu SPARC64 V users who should notice and hopefully appreciate
+# the advantage... Currently this module surpasses sparcv9a-mont.pl
+# by ~20% on UltraSPARC-III and later cores, but recall that sparcv9a
+# module still have hidden potential [see TODO list there], which is
+# estimated to be larger than 20%...
+
+$output = pop;
+open STDOUT,">$output";
+
+# int bn_mul_mont(
+$rp="%i0";	# BN_ULONG *rp,
+$ap="%i1";	# const BN_ULONG *ap,
+$bp="%i2";	# const BN_ULONG *bp,
+$np="%i3";	# const BN_ULONG *np,
+$n0="%i4";	# const BN_ULONG *n0,
+$num="%i5";	# int num);
+
+$frame="STACK_FRAME";
+$bias="STACK_BIAS";
+
+$car0="%o0";
+$car1="%o1";
+$car2="%o2";	# 1 bit
+$acc0="%o3";
+$acc1="%o4";
+$mask="%g1";	# 32 bits, what a waste...
+$tmp0="%g4";
+$tmp1="%g5";
+
+$i="%l0";
+$j="%l1";
+$mul0="%l2";
+$mul1="%l3";
+$tp="%l4";
+$apj="%l5";
+$npj="%l6";
+$tpj="%l7";
+
+$fname="bn_mul_mont_int";
+
+$code=<<___;
+#include "sparc_arch.h"
+
+.section	".text",#alloc,#execinstr
+
+.global	$fname
+.align	32
+$fname:
+	cmp	%o5,4			! 128 bits minimum
+	bge,pt	%icc,.Lenter
+	sethi	%hi(0xffffffff),$mask
+	retl
+	clr	%o0
+.align	32
+.Lenter:
+	save	%sp,-$frame,%sp
+	sll	$num,2,$num		! num*=4
+	or	$mask,%lo(0xffffffff),$mask
+	ld	[$n0],$n0
+	cmp	$ap,$bp
+	and	$num,$mask,$num
+	ld	[$bp],$mul0		! bp[0]
+	nop
+
+	add	%sp,$bias,%o7		! real top of stack
+	ld	[$ap],$car0		! ap[0] ! redundant in squaring context
+	sub	%o7,$num,%o7
+	ld	[$ap+4],$apj		! ap[1]
+	and	%o7,-1024,%o7
+	ld	[$np],$car1		! np[0]
+	sub	%o7,$bias,%sp		! alloca
+	ld	[$np+4],$npj		! np[1]
+	be,pt	SIZE_T_CC,.Lbn_sqr_mont
+	mov	12,$j
+
+	mulx	$car0,$mul0,$car0	! ap[0]*bp[0]
+	mulx	$apj,$mul0,$tmp0	!prologue! ap[1]*bp[0]
+	and	$car0,$mask,$acc0
+	add	%sp,$bias+$frame,$tp
+	ld	[$ap+8],$apj		!prologue!
+
+	mulx	$n0,$acc0,$mul1		! "t[0]"*n0
+	and	$mul1,$mask,$mul1
+
+	mulx	$car1,$mul1,$car1	! np[0]*"t[0]"*n0
+	mulx	$npj,$mul1,$acc1	!prologue! np[1]*"t[0]"*n0
+	srlx	$car0,32,$car0
+	add	$acc0,$car1,$car1
+	ld	[$np+8],$npj		!prologue!
+	srlx	$car1,32,$car1
+	mov	$tmp0,$acc0		!prologue!
+
+.L1st:
+	mulx	$apj,$mul0,$tmp0
+	mulx	$npj,$mul1,$tmp1
+	add	$acc0,$car0,$car0
+	ld	[$ap+$j],$apj		! ap[j]
+	and	$car0,$mask,$acc0
+	add	$acc1,$car1,$car1
+	ld	[$np+$j],$npj		! np[j]
+	srlx	$car0,32,$car0
+	add	$acc0,$car1,$car1
+	add	$j,4,$j			! j++
+	mov	$tmp0,$acc0
+	st	$car1,[$tp]
+	cmp	$j,$num
+	mov	$tmp1,$acc1
+	srlx	$car1,32,$car1
+	bl	%icc,.L1st
+	add	$tp,4,$tp		! tp++
+!.L1st
+
+	mulx	$apj,$mul0,$tmp0	!epilogue!
+	mulx	$npj,$mul1,$tmp1
+	add	$acc0,$car0,$car0
+	and	$car0,$mask,$acc0
+	add	$acc1,$car1,$car1
+	srlx	$car0,32,$car0
+	add	$acc0,$car1,$car1
+	st	$car1,[$tp]
+	srlx	$car1,32,$car1
+
+	add	$tmp0,$car0,$car0
+	and	$car0,$mask,$acc0
+	add	$tmp1,$car1,$car1
+	srlx	$car0,32,$car0
+	add	$acc0,$car1,$car1
+	st	$car1,[$tp+4]
+	srlx	$car1,32,$car1
+
+	add	$car0,$car1,$car1
+	st	$car1,[$tp+8]
+	srlx	$car1,32,$car2
+
+	mov	4,$i			! i++
+	ld	[$bp+4],$mul0		! bp[1]
+.Louter:
+	add	%sp,$bias+$frame,$tp
+	ld	[$ap],$car0		! ap[0]
+	ld	[$ap+4],$apj		! ap[1]
+	ld	[$np],$car1		! np[0]
+	ld	[$np+4],$npj		! np[1]
+	ld	[$tp],$tmp1		! tp[0]
+	ld	[$tp+4],$tpj		! tp[1]
+	mov	12,$j
+
+	mulx	$car0,$mul0,$car0
+	mulx	$apj,$mul0,$tmp0	!prologue!
+	add	$tmp1,$car0,$car0
+	ld	[$ap+8],$apj		!prologue!
+	and	$car0,$mask,$acc0
+
+	mulx	$n0,$acc0,$mul1
+	and	$mul1,$mask,$mul1
+
+	mulx	$car1,$mul1,$car1
+	mulx	$npj,$mul1,$acc1	!prologue!
+	srlx	$car0,32,$car0
+	add	$acc0,$car1,$car1
+	ld	[$np+8],$npj		!prologue!
+	srlx	$car1,32,$car1
+	mov	$tmp0,$acc0		!prologue!
+
+.Linner:
+	mulx	$apj,$mul0,$tmp0
+	mulx	$npj,$mul1,$tmp1
+	add	$tpj,$car0,$car0
+	ld	[$ap+$j],$apj		! ap[j]
+	add	$acc0,$car0,$car0
+	add	$acc1,$car1,$car1
+	ld	[$np+$j],$npj		! np[j]
+	and	$car0,$mask,$acc0
+	ld	[$tp+8],$tpj		! tp[j]
+	srlx	$car0,32,$car0
+	add	$acc0,$car1,$car1
+	add	$j,4,$j			! j++
+	mov	$tmp0,$acc0
+	st	$car1,[$tp]		! tp[j-1]
+	srlx	$car1,32,$car1
+	mov	$tmp1,$acc1
+	cmp	$j,$num
+	bl	%icc,.Linner
+	add	$tp,4,$tp		! tp++
+!.Linner
+
+	mulx	$apj,$mul0,$tmp0	!epilogue!
+	mulx	$npj,$mul1,$tmp1
+	add	$tpj,$car0,$car0
+	add	$acc0,$car0,$car0
+	ld	[$tp+8],$tpj		! tp[j]
+	and	$car0,$mask,$acc0
+	add	$acc1,$car1,$car1
+	srlx	$car0,32,$car0
+	add	$acc0,$car1,$car1
+	st	$car1,[$tp]		! tp[j-1]
+	srlx	$car1,32,$car1
+
+	add	$tpj,$car0,$car0
+	add	$tmp0,$car0,$car0
+	and	$car0,$mask,$acc0
+	add	$tmp1,$car1,$car1
+	add	$acc0,$car1,$car1
+	st	$car1,[$tp+4]		! tp[j-1]
+	srlx	$car0,32,$car0
+	add	$i,4,$i			! i++
+	srlx	$car1,32,$car1
+
+	add	$car0,$car1,$car1
+	cmp	$i,$num
+	add	$car2,$car1,$car1
+	st	$car1,[$tp+8]
+
+	srlx	$car1,32,$car2
+	bl,a	%icc,.Louter
+	ld	[$bp+$i],$mul0		! bp[i]
+!.Louter
+
+	add	$tp,12,$tp
+
+.Ltail:
+	add	$np,$num,$np
+	add	$rp,$num,$rp
+	sub	%g0,$num,%o7		! k=-num
+	ba	.Lsub
+	subcc	%g0,%g0,%g0		! clear %icc.c
+.align	16
+.Lsub:
+	ld	[$tp+%o7],%o0
+	ld	[$np+%o7],%o1
+	subccc	%o0,%o1,%o1		! tp[j]-np[j]
+	add	$rp,%o7,$i
+	add	%o7,4,%o7
+	brnz	%o7,.Lsub
+	st	%o1,[$i]
+	subccc	$car2,0,$car2		! handle upmost overflow bit
+	sub	%g0,$num,%o7
+
+.Lcopy:
+	ld	[$tp+%o7],%o1		! conditional copy
+	ld	[$rp+%o7],%o0
+	st	%g0,[$tp+%o7]		! zap tp
+	movcs	%icc,%o1,%o0
+	st	%o0,[$rp+%o7]
+	add	%o7,4,%o7
+	brnz	%o7,.Lcopy
+	nop
+	mov	1,%i0
+	ret
+	restore
+___
+
+########
+######## .Lbn_sqr_mont gives up to 20% *overall* improvement over
+######## code without following dedicated squaring procedure.
+########
+$sbit="%o5";
+
+$code.=<<___;
+.align	32
+.Lbn_sqr_mont:
+	mulx	$mul0,$mul0,$car0		! ap[0]*ap[0]
+	mulx	$apj,$mul0,$tmp0		!prologue!
+	and	$car0,$mask,$acc0
+	add	%sp,$bias+$frame,$tp
+	ld	[$ap+8],$apj			!prologue!
+
+	mulx	$n0,$acc0,$mul1			! "t[0]"*n0
+	srlx	$car0,32,$car0
+	and	$mul1,$mask,$mul1
+
+	mulx	$car1,$mul1,$car1		! np[0]*"t[0]"*n0
+	mulx	$npj,$mul1,$acc1		!prologue!
+	and	$car0,1,$sbit
+	ld	[$np+8],$npj			!prologue!
+	srlx	$car0,1,$car0
+	add	$acc0,$car1,$car1
+	srlx	$car1,32,$car1
+	mov	$tmp0,$acc0			!prologue!
+
+.Lsqr_1st:
+	mulx	$apj,$mul0,$tmp0
+	mulx	$npj,$mul1,$tmp1
+	add	$acc0,$car0,$car0		! ap[j]*a0+c0
+	add	$acc1,$car1,$car1
+	ld	[$ap+$j],$apj			! ap[j]
+	and	$car0,$mask,$acc0
+	ld	[$np+$j],$npj			! np[j]
+	srlx	$car0,32,$car0
+	add	$acc0,$acc0,$acc0
+	or	$sbit,$acc0,$acc0
+	mov	$tmp1,$acc1
+	srlx	$acc0,32,$sbit
+	add	$j,4,$j				! j++
+	and	$acc0,$mask,$acc0
+	cmp	$j,$num
+	add	$acc0,$car1,$car1
+	st	$car1,[$tp]
+	mov	$tmp0,$acc0
+	srlx	$car1,32,$car1
+	bl	%icc,.Lsqr_1st
+	add	$tp,4,$tp			! tp++
+!.Lsqr_1st
+
+	mulx	$apj,$mul0,$tmp0		! epilogue
+	mulx	$npj,$mul1,$tmp1
+	add	$acc0,$car0,$car0		! ap[j]*a0+c0
+	add	$acc1,$car1,$car1
+	and	$car0,$mask,$acc0
+	srlx	$car0,32,$car0
+	add	$acc0,$acc0,$acc0
+	or	$sbit,$acc0,$acc0
+	srlx	$acc0,32,$sbit
+	and	$acc0,$mask,$acc0
+	add	$acc0,$car1,$car1
+	st	$car1,[$tp]
+	srlx	$car1,32,$car1
+
+	add	$tmp0,$car0,$car0		! ap[j]*a0+c0
+	add	$tmp1,$car1,$car1
+	and	$car0,$mask,$acc0
+	srlx	$car0,32,$car0
+	add	$acc0,$acc0,$acc0
+	or	$sbit,$acc0,$acc0
+	srlx	$acc0,32,$sbit
+	and	$acc0,$mask,$acc0
+	add	$acc0,$car1,$car1
+	st	$car1,[$tp+4]
+	srlx	$car1,32,$car1
+
+	add	$car0,$car0,$car0
+	or	$sbit,$car0,$car0
+	add	$car0,$car1,$car1
+	st	$car1,[$tp+8]
+	srlx	$car1,32,$car2
+
+	ld	[%sp+$bias+$frame],$tmp0	! tp[0]
+	ld	[%sp+$bias+$frame+4],$tmp1	! tp[1]
+	ld	[%sp+$bias+$frame+8],$tpj	! tp[2]
+	ld	[$ap+4],$mul0			! ap[1]
+	ld	[$ap+8],$apj			! ap[2]
+	ld	[$np],$car1			! np[0]
+	ld	[$np+4],$npj			! np[1]
+	mulx	$n0,$tmp0,$mul1
+
+	mulx	$mul0,$mul0,$car0
+	and	$mul1,$mask,$mul1
+
+	mulx	$car1,$mul1,$car1
+	mulx	$npj,$mul1,$acc1
+	add	$tmp0,$car1,$car1
+	and	$car0,$mask,$acc0
+	ld	[$np+8],$npj			! np[2]
+	srlx	$car1,32,$car1
+	add	$tmp1,$car1,$car1
+	srlx	$car0,32,$car0
+	add	$acc0,$car1,$car1
+	and	$car0,1,$sbit
+	add	$acc1,$car1,$car1
+	srlx	$car0,1,$car0
+	mov	12,$j
+	st	$car1,[%sp+$bias+$frame]	! tp[0]=
+	srlx	$car1,32,$car1
+	add	%sp,$bias+$frame+4,$tp
+
+.Lsqr_2nd:
+	mulx	$apj,$mul0,$acc0
+	mulx	$npj,$mul1,$acc1
+	add	$acc0,$car0,$car0
+	add	$tpj,$sbit,$sbit
+	ld	[$ap+$j],$apj			! ap[j]
+	and	$car0,$mask,$acc0
+	ld	[$np+$j],$npj			! np[j]
+	srlx	$car0,32,$car0
+	add	$acc1,$car1,$car1
+	ld	[$tp+8],$tpj			! tp[j]
+	add	$acc0,$acc0,$acc0
+	add	$j,4,$j				! j++
+	add	$sbit,$acc0,$acc0
+	srlx	$acc0,32,$sbit
+	and	$acc0,$mask,$acc0
+	cmp	$j,$num
+	add	$acc0,$car1,$car1
+	st	$car1,[$tp]			! tp[j-1]
+	srlx	$car1,32,$car1
+	bl	%icc,.Lsqr_2nd
+	add	$tp,4,$tp			! tp++
+!.Lsqr_2nd
+
+	mulx	$apj,$mul0,$acc0
+	mulx	$npj,$mul1,$acc1
+	add	$acc0,$car0,$car0
+	add	$tpj,$sbit,$sbit
+	and	$car0,$mask,$acc0
+	srlx	$car0,32,$car0
+	add	$acc1,$car1,$car1
+	add	$acc0,$acc0,$acc0
+	add	$sbit,$acc0,$acc0
+	srlx	$acc0,32,$sbit
+	and	$acc0,$mask,$acc0
+	add	$acc0,$car1,$car1
+	st	$car1,[$tp]			! tp[j-1]
+	srlx	$car1,32,$car1
+
+	add	$car0,$car0,$car0
+	add	$sbit,$car0,$car0
+	add	$car0,$car1,$car1
+	add	$car2,$car1,$car1
+	st	$car1,[$tp+4]
+	srlx	$car1,32,$car2
+
+	ld	[%sp+$bias+$frame],$tmp1	! tp[0]
+	ld	[%sp+$bias+$frame+4],$tpj	! tp[1]
+	ld	[$ap+8],$mul0			! ap[2]
+	ld	[$np],$car1			! np[0]
+	ld	[$np+4],$npj			! np[1]
+	mulx	$n0,$tmp1,$mul1
+	and	$mul1,$mask,$mul1
+	mov	8,$i
+
+	mulx	$mul0,$mul0,$car0
+	mulx	$car1,$mul1,$car1
+	and	$car0,$mask,$acc0
+	add	$tmp1,$car1,$car1
+	srlx	$car0,32,$car0
+	add	%sp,$bias+$frame,$tp
+	srlx	$car1,32,$car1
+	and	$car0,1,$sbit
+	srlx	$car0,1,$car0
+	mov	4,$j
+
+.Lsqr_outer:
+.Lsqr_inner1:
+	mulx	$npj,$mul1,$acc1
+	add	$tpj,$car1,$car1
+	add	$j,4,$j
+	ld	[$tp+8],$tpj
+	cmp	$j,$i
+	add	$acc1,$car1,$car1
+	ld	[$np+$j],$npj
+	st	$car1,[$tp]
+	srlx	$car1,32,$car1
+	bl	%icc,.Lsqr_inner1
+	add	$tp,4,$tp
+!.Lsqr_inner1
+
+	add	$j,4,$j
+	ld	[$ap+$j],$apj			! ap[j]
+	mulx	$npj,$mul1,$acc1
+	add	$tpj,$car1,$car1
+	ld	[$np+$j],$npj			! np[j]
+	srlx	$car1,32,$tmp0
+	and	$car1,$mask,$car1
+	add	$tmp0,$sbit,$sbit
+	add	$acc0,$car1,$car1
+	ld	[$tp+8],$tpj			! tp[j]
+	add	$acc1,$car1,$car1
+	st	$car1,[$tp]
+	srlx	$car1,32,$car1
+
+	add	$j,4,$j
+	cmp	$j,$num
+	be,pn	%icc,.Lsqr_no_inner2
+	add	$tp,4,$tp
+
+.Lsqr_inner2:
+	mulx	$apj,$mul0,$acc0
+	mulx	$npj,$mul1,$acc1
+	add	$tpj,$sbit,$sbit
+	add	$acc0,$car0,$car0
+	ld	[$ap+$j],$apj			! ap[j]
+	and	$car0,$mask,$acc0
+	ld	[$np+$j],$npj			! np[j]
+	srlx	$car0,32,$car0
+	add	$acc0,$acc0,$acc0
+	ld	[$tp+8],$tpj			! tp[j]
+	add	$sbit,$acc0,$acc0
+	add	$j,4,$j				! j++
+	srlx	$acc0,32,$sbit
+	and	$acc0,$mask,$acc0
+	cmp	$j,$num
+	add	$acc0,$car1,$car1
+	add	$acc1,$car1,$car1
+	st	$car1,[$tp]			! tp[j-1]
+	srlx	$car1,32,$car1
+	bl	%icc,.Lsqr_inner2
+	add	$tp,4,$tp			! tp++
+
+.Lsqr_no_inner2:
+	mulx	$apj,$mul0,$acc0
+	mulx	$npj,$mul1,$acc1
+	add	$tpj,$sbit,$sbit
+	add	$acc0,$car0,$car0
+	and	$car0,$mask,$acc0
+	srlx	$car0,32,$car0
+	add	$acc0,$acc0,$acc0
+	add	$sbit,$acc0,$acc0
+	srlx	$acc0,32,$sbit
+	and	$acc0,$mask,$acc0
+	add	$acc0,$car1,$car1
+	add	$acc1,$car1,$car1
+	st	$car1,[$tp]			! tp[j-1]
+	srlx	$car1,32,$car1
+
+	add	$car0,$car0,$car0
+	add	$sbit,$car0,$car0
+	add	$car0,$car1,$car1
+	add	$car2,$car1,$car1
+	st	$car1,[$tp+4]
+	srlx	$car1,32,$car2
+
+	add	$i,4,$i				! i++
+	ld	[%sp+$bias+$frame],$tmp1	! tp[0]
+	ld	[%sp+$bias+$frame+4],$tpj	! tp[1]
+	ld	[$ap+$i],$mul0			! ap[j]
+	ld	[$np],$car1			! np[0]
+	ld	[$np+4],$npj			! np[1]
+	mulx	$n0,$tmp1,$mul1
+	and	$mul1,$mask,$mul1
+	add	$i,4,$tmp0
+
+	mulx	$mul0,$mul0,$car0
+	mulx	$car1,$mul1,$car1
+	and	$car0,$mask,$acc0
+	add	$tmp1,$car1,$car1
+	srlx	$car0,32,$car0
+	add	%sp,$bias+$frame,$tp
+	srlx	$car1,32,$car1
+	and	$car0,1,$sbit
+	srlx	$car0,1,$car0
+
+	cmp	$tmp0,$num			! i<num-1
+	bl	%icc,.Lsqr_outer
+	mov	4,$j
+
+.Lsqr_last:
+	mulx	$npj,$mul1,$acc1
+	add	$tpj,$car1,$car1
+	add	$j,4,$j
+	ld	[$tp+8],$tpj
+	cmp	$j,$i
+	add	$acc1,$car1,$car1
+	ld	[$np+$j],$npj
+	st	$car1,[$tp]
+	srlx	$car1,32,$car1
+	bl	%icc,.Lsqr_last
+	add	$tp,4,$tp
+!.Lsqr_last
+
+	mulx	$npj,$mul1,$acc1
+	add	$tpj,$acc0,$acc0
+	srlx	$acc0,32,$tmp0
+	and	$acc0,$mask,$acc0
+	add	$tmp0,$sbit,$sbit
+	add	$acc0,$car1,$car1
+	add	$acc1,$car1,$car1
+	st	$car1,[$tp]
+	srlx	$car1,32,$car1
+
+	add	$car0,$car0,$car0		! recover $car0
+	add	$sbit,$car0,$car0
+	add	$car0,$car1,$car1
+	add	$car2,$car1,$car1
+	st	$car1,[$tp+4]
+	srlx	$car1,32,$car2
+
+	ba	.Ltail
+	add	$tp,8,$tp
+.type	$fname,#function
+.size	$fname,(.-$fname)
+.asciz	"Montgomery Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
+.align	32
+___
+$code =~ s/\`([^\`]*)\`/eval($1)/gem;
+print $code;
+close STDOUT;
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparcv9a-mont.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/sparcv9a-mont.pl
@ -0,0 +1,887 @@
+#! /usr/bin/env perl
+# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# October 2005
+#
+# "Teaser" Montgomery multiplication module for UltraSPARC. Why FPU?
+# Because unlike integer multiplier, which simply stalls whole CPU,
+# FPU is fully pipelined and can effectively emit 48 bit partial
+# product every cycle. Why not blended SPARC v9? One can argue that
+# making this module dependent on UltraSPARC VIS extension limits its
+# binary compatibility. Well yes, it does exclude SPARC64 prior-V(!)
+# implementations from compatibility matrix. But the rest, whole Sun
+# UltraSPARC family and brand new Fujitsu's SPARC64 V, all support
+# VIS extension instructions used in this module. This is considered
+# good enough to not care about HAL SPARC64 users [if any] who have
+# integer-only pure SPARCv9 module to "fall down" to.
+
+# USI&II cores currently exhibit uniform 2x improvement [over pre-
+# bn_mul_mont codebase] for all key lengths and benchmarks. On USIII
+# performance improves few percents for shorter keys and worsens few
+# percents for longer keys. This is because USIII integer multiplier
+# is >3x faster than USI&II one, which is harder to match [but see
+# TODO list below]. It should also be noted that SPARC64 V features
+# out-of-order execution, which *might* mean that integer multiplier
+# is pipelined, which in turn *might* be impossible to match... On
+# additional note, SPARC64 V implements FP Multiply-Add instruction,
+# which is perfectly usable in this context... In other words, as far
+# as Fujitsu SPARC64 V goes, talk to the author:-)
+
+# The implementation implies following "non-natural" limitations on
+# input arguments:
+# - num may not be less than 4;
+# - num has to be even;
+# Failure to meet either condition has no fatal effects, simply
+# doesn't give any performance gain.
+
+# TODO:
+# - modulo-schedule inner loop for better performance (on in-order
+#   execution core such as UltraSPARC this shall result in further
+#   noticeable(!) improvement);
+# - dedicated squaring procedure[?];
+
+######################################################################
+# November 2006
+#
+# Modulo-scheduled inner loops allow to interleave floating point and
+# integer instructions and minimize Read-After-Write penalties. This
+# results in *further* 20-50% performance improvement [depending on
+# key length, more for longer keys] on USI&II cores and 30-80% - on
+# USIII&IV.
+
+$output = pop;
+open STDOUT,">$output";
+
+$fname="bn_mul_mont_fpu";
+
+$frame="STACK_FRAME";
+$bias="STACK_BIAS";
+$locals=64;
+
+# In order to provide for 32-/64-bit ABI duality, I keep integers wider
+# than 32 bit in %g1-%g4 and %o0-%o5. %l0-%l7 and %i0-%i5 are used
+# exclusively for pointers, indexes and other small values...
+# int bn_mul_mont(
+$rp="%i0";	# BN_ULONG *rp,
+$ap="%i1";	# const BN_ULONG *ap,
+$bp="%i2";	# const BN_ULONG *bp,
+$np="%i3";	# const BN_ULONG *np,
+$n0="%i4";	# const BN_ULONG *n0,
+$num="%i5";	# int num);
+
+$tp="%l0";	# t[num]
+$ap_l="%l1";	# a[num],n[num] are smashed to 32-bit words and saved
+$ap_h="%l2";	# to these four vectors as double-precision FP values.
+$np_l="%l3";	# This way a bunch of fxtods are eliminated in second
+$np_h="%l4";	# loop and L1-cache aliasing is minimized...
+$i="%l5";
+$j="%l6";
+$mask="%l7";	# 16-bit mask, 0xffff
+
+$n0="%g4";	# reassigned(!) to "64-bit" register
+$carry="%i4";	# %i4 reused(!) for a carry bit
+
+# FP register naming chart
+#
+#     ..HILO
+#       dcba
+#   --------
+#        LOa
+#       LOb
+#      LOc
+#     LOd
+#      HIa
+#     HIb
+#    HIc
+#   HId
+#    ..a
+#   ..b
+$ba="%f0";    $bb="%f2";    $bc="%f4";    $bd="%f6";
+$na="%f8";    $nb="%f10";   $nc="%f12";   $nd="%f14";
+$alo="%f16";  $alo_="%f17"; $ahi="%f18";  $ahi_="%f19";
+$nlo="%f20";  $nlo_="%f21"; $nhi="%f22";  $nhi_="%f23";
+
+$dota="%f24"; $dotb="%f26";
+
+$aloa="%f32"; $alob="%f34"; $aloc="%f36"; $alod="%f38";
+$ahia="%f40"; $ahib="%f42"; $ahic="%f44"; $ahid="%f46";
+$nloa="%f48"; $nlob="%f50"; $nloc="%f52"; $nlod="%f54";
+$nhia="%f56"; $nhib="%f58"; $nhic="%f60"; $nhid="%f62";
+
+$ASI_FL16_P=0xD2;	# magic ASI value to engage 16-bit FP load
+
+$code=<<___;
+#include "sparc_arch.h"
+
+.section	".text",#alloc,#execinstr
+
+.global $fname
+.align  32
+$fname:
+	save	%sp,-$frame-$locals,%sp
+
+	cmp	$num,4
+	bl,a,pn %icc,.Lret
+	clr	%i0
+	andcc	$num,1,%g0		! $num has to be even...
+	bnz,a,pn %icc,.Lret
+	clr	%i0			! signal "unsupported input value"
+
+	srl	$num,1,$num
+	sethi	%hi(0xffff),$mask
+	ld	[%i4+0],$n0		! $n0 reassigned, remember?
+	or	$mask,%lo(0xffff),$mask
+	ld	[%i4+4],%o0
+	sllx	%o0,32,%o0
+	or	%o0,$n0,$n0		! $n0=n0[1].n0[0]
+
+	sll	$num,3,$num		! num*=8
+
+	add	%sp,$bias,%o0		! real top of stack
+	sll	$num,2,%o1
+	add	%o1,$num,%o1		! %o1=num*5
+	sub	%o0,%o1,%o0
+	and	%o0,-2048,%o0		! optimize TLB utilization
+	sub	%o0,$bias,%sp		! alloca(5*num*8)
+
+	rd	%asi,%o7		! save %asi
+	add	%sp,$bias+$frame+$locals,$tp
+	add	$tp,$num,$ap_l
+	add	$ap_l,$num,$ap_l	! [an]p_[lh] point at the vectors' ends !
+	add	$ap_l,$num,$ap_h
+	add	$ap_h,$num,$np_l
+	add	$np_l,$num,$np_h
+
+	wr	%g0,$ASI_FL16_P,%asi	! setup %asi for 16-bit FP loads
+
+	add	$rp,$num,$rp		! readjust input pointers to point
+	add	$ap,$num,$ap		! at the ends too...
+	add	$bp,$num,$bp
+	add	$np,$num,$np
+
+	stx	%o7,[%sp+$bias+$frame+48]	! save %asi
+
+	sub	%g0,$num,$i		! i=-num
+	sub	%g0,$num,$j		! j=-num
+
+	add	$ap,$j,%o3
+	add	$bp,$i,%o4
+
+	ld	[%o3+4],%g1		! bp[0]
+	ld	[%o3+0],%o0
+	ld	[%o4+4],%g5		! ap[0]
+	sllx	%g1,32,%g1
+	ld	[%o4+0],%o1
+	sllx	%g5,32,%g5
+	or	%g1,%o0,%o0
+	or	%g5,%o1,%o1
+
+	add	$np,$j,%o5
+
+	mulx	%o1,%o0,%o0		! ap[0]*bp[0]
+	mulx	$n0,%o0,%o0		! ap[0]*bp[0]*n0
+	stx	%o0,[%sp+$bias+$frame+0]
+
+	ld	[%o3+0],$alo_	! load a[j] as pair of 32-bit words
+	fzeros	$alo
+	ld	[%o3+4],$ahi_
+	fzeros	$ahi
+	ld	[%o5+0],$nlo_	! load n[j] as pair of 32-bit words
+	fzeros	$nlo
+	ld	[%o5+4],$nhi_
+	fzeros	$nhi
+
+	! transfer b[i] to FPU as 4x16-bit values
+	ldda	[%o4+2]%asi,$ba
+	fxtod	$alo,$alo
+	ldda	[%o4+0]%asi,$bb
+	fxtod	$ahi,$ahi
+	ldda	[%o4+6]%asi,$bc
+	fxtod	$nlo,$nlo
+	ldda	[%o4+4]%asi,$bd
+	fxtod	$nhi,$nhi
+
+	! transfer ap[0]*b[0]*n0 to FPU as 4x16-bit values
+	ldda	[%sp+$bias+$frame+6]%asi,$na
+	fxtod	$ba,$ba
+	ldda	[%sp+$bias+$frame+4]%asi,$nb
+	fxtod	$bb,$bb
+	ldda	[%sp+$bias+$frame+2]%asi,$nc
+	fxtod	$bc,$bc
+	ldda	[%sp+$bias+$frame+0]%asi,$nd
+	fxtod	$bd,$bd
+
+	std	$alo,[$ap_l+$j]		! save smashed ap[j] in double format
+	fxtod	$na,$na
+	std	$ahi,[$ap_h+$j]
+	fxtod	$nb,$nb
+	std	$nlo,[$np_l+$j]		! save smashed np[j] in double format
+	fxtod	$nc,$nc
+	std	$nhi,[$np_h+$j]
+	fxtod	$nd,$nd
+
+		fmuld	$alo,$ba,$aloa
+		fmuld	$nlo,$na,$nloa
+		fmuld	$alo,$bb,$alob
+		fmuld	$nlo,$nb,$nlob
+		fmuld	$alo,$bc,$aloc
+	faddd	$aloa,$nloa,$nloa
+		fmuld	$nlo,$nc,$nloc
+		fmuld	$alo,$bd,$alod
+	faddd	$alob,$nlob,$nlob
+		fmuld	$nlo,$nd,$nlod
+		fmuld	$ahi,$ba,$ahia
+	faddd	$aloc,$nloc,$nloc
+		fmuld	$nhi,$na,$nhia
+		fmuld	$ahi,$bb,$ahib
+	faddd	$alod,$nlod,$nlod
+		fmuld	$nhi,$nb,$nhib
+		fmuld	$ahi,$bc,$ahic
+	faddd	$ahia,$nhia,$nhia
+		fmuld	$nhi,$nc,$nhic
+		fmuld	$ahi,$bd,$ahid
+	faddd	$ahib,$nhib,$nhib
+		fmuld	$nhi,$nd,$nhid
+
+	faddd	$ahic,$nhic,$dota	! $nhic
+	faddd	$ahid,$nhid,$dotb	! $nhid
+
+	faddd	$nloc,$nhia,$nloc
+	faddd	$nlod,$nhib,$nlod
+
+	fdtox	$nloa,$nloa
+	fdtox	$nlob,$nlob
+	fdtox	$nloc,$nloc
+	fdtox	$nlod,$nlod
+
+	std	$nloa,[%sp+$bias+$frame+0]
+	add	$j,8,$j
+	std	$nlob,[%sp+$bias+$frame+8]
+	add	$ap,$j,%o4
+	std	$nloc,[%sp+$bias+$frame+16]
+	add	$np,$j,%o5
+	std	$nlod,[%sp+$bias+$frame+24]
+
+	ld	[%o4+0],$alo_	! load a[j] as pair of 32-bit words
+	fzeros	$alo
+	ld	[%o4+4],$ahi_
+	fzeros	$ahi
+	ld	[%o5+0],$nlo_	! load n[j] as pair of 32-bit words
+	fzeros	$nlo
+	ld	[%o5+4],$nhi_
+	fzeros	$nhi
+
+	fxtod	$alo,$alo
+	fxtod	$ahi,$ahi
+	fxtod	$nlo,$nlo
+	fxtod	$nhi,$nhi
+
+	ldx	[%sp+$bias+$frame+0],%o0
+		fmuld	$alo,$ba,$aloa
+	ldx	[%sp+$bias+$frame+8],%o1
+		fmuld	$nlo,$na,$nloa
+	ldx	[%sp+$bias+$frame+16],%o2
+		fmuld	$alo,$bb,$alob
+	ldx	[%sp+$bias+$frame+24],%o3
+		fmuld	$nlo,$nb,$nlob
+
+	srlx	%o0,16,%o7
+	std	$alo,[$ap_l+$j]		! save smashed ap[j] in double format
+		fmuld	$alo,$bc,$aloc
+	add	%o7,%o1,%o1
+	std	$ahi,[$ap_h+$j]
+		faddd	$aloa,$nloa,$nloa
+		fmuld	$nlo,$nc,$nloc
+	srlx	%o1,16,%o7
+	std	$nlo,[$np_l+$j]		! save smashed np[j] in double format
+		fmuld	$alo,$bd,$alod
+	add	%o7,%o2,%o2
+	std	$nhi,[$np_h+$j]
+		faddd	$alob,$nlob,$nlob
+		fmuld	$nlo,$nd,$nlod
+	srlx	%o2,16,%o7
+		fmuld	$ahi,$ba,$ahia
+	add	%o7,%o3,%o3		! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
+		faddd	$aloc,$nloc,$nloc
+		fmuld	$nhi,$na,$nhia
+	!and	%o0,$mask,%o0
+	!and	%o1,$mask,%o1
+	!and	%o2,$mask,%o2
+	!sllx	%o1,16,%o1
+	!sllx	%o2,32,%o2
+	!sllx	%o3,48,%o7
+	!or	%o1,%o0,%o0
+	!or	%o2,%o0,%o0
+	!or	%o7,%o0,%o0		! 64-bit result
+	srlx	%o3,16,%g1		! 34-bit carry
+		fmuld	$ahi,$bb,$ahib
+
+	faddd	$alod,$nlod,$nlod
+		fmuld	$nhi,$nb,$nhib
+		fmuld	$ahi,$bc,$ahic
+	faddd	$ahia,$nhia,$nhia
+		fmuld	$nhi,$nc,$nhic
+		fmuld	$ahi,$bd,$ahid
+	faddd	$ahib,$nhib,$nhib
+		fmuld	$nhi,$nd,$nhid
+
+	faddd	$dota,$nloa,$nloa
+	faddd	$dotb,$nlob,$nlob
+	faddd	$ahic,$nhic,$dota	! $nhic
+	faddd	$ahid,$nhid,$dotb	! $nhid
+
+	faddd	$nloc,$nhia,$nloc
+	faddd	$nlod,$nhib,$nlod
+
+	fdtox	$nloa,$nloa
+	fdtox	$nlob,$nlob
+	fdtox	$nloc,$nloc
+	fdtox	$nlod,$nlod
+
+	std	$nloa,[%sp+$bias+$frame+0]
+	std	$nlob,[%sp+$bias+$frame+8]
+	addcc	$j,8,$j
+	std	$nloc,[%sp+$bias+$frame+16]
+	bz,pn	%icc,.L1stskip
+	std	$nlod,[%sp+$bias+$frame+24]
+
+.align	32			! incidentally already aligned !
+.L1st:
+	add	$ap,$j,%o4
+	add	$np,$j,%o5
+	ld	[%o4+0],$alo_	! load a[j] as pair of 32-bit words
+	fzeros	$alo
+	ld	[%o4+4],$ahi_
+	fzeros	$ahi
+	ld	[%o5+0],$nlo_	! load n[j] as pair of 32-bit words
+	fzeros	$nlo
+	ld	[%o5+4],$nhi_
+	fzeros	$nhi
+
+	fxtod	$alo,$alo
+	fxtod	$ahi,$ahi
+	fxtod	$nlo,$nlo
+	fxtod	$nhi,$nhi
+
+	ldx	[%sp+$bias+$frame+0],%o0
+		fmuld	$alo,$ba,$aloa
+	ldx	[%sp+$bias+$frame+8],%o1
+		fmuld	$nlo,$na,$nloa
+	ldx	[%sp+$bias+$frame+16],%o2
+		fmuld	$alo,$bb,$alob
+	ldx	[%sp+$bias+$frame+24],%o3
+		fmuld	$nlo,$nb,$nlob
+
+	srlx	%o0,16,%o7
+	std	$alo,[$ap_l+$j]		! save smashed ap[j] in double format
+		fmuld	$alo,$bc,$aloc
+	add	%o7,%o1,%o1
+	std	$ahi,[$ap_h+$j]
+		faddd	$aloa,$nloa,$nloa
+		fmuld	$nlo,$nc,$nloc
+	srlx	%o1,16,%o7
+	std	$nlo,[$np_l+$j]		! save smashed np[j] in double format
+		fmuld	$alo,$bd,$alod
+	add	%o7,%o2,%o2
+	std	$nhi,[$np_h+$j]
+		faddd	$alob,$nlob,$nlob
+		fmuld	$nlo,$nd,$nlod
+	srlx	%o2,16,%o7
+		fmuld	$ahi,$ba,$ahia
+	add	%o7,%o3,%o3		! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
+	and	%o0,$mask,%o0
+		faddd	$aloc,$nloc,$nloc
+		fmuld	$nhi,$na,$nhia
+	and	%o1,$mask,%o1
+	and	%o2,$mask,%o2
+		fmuld	$ahi,$bb,$ahib
+	sllx	%o1,16,%o1
+		faddd	$alod,$nlod,$nlod
+		fmuld	$nhi,$nb,$nhib
+	sllx	%o2,32,%o2
+		fmuld	$ahi,$bc,$ahic
+	sllx	%o3,48,%o7
+	or	%o1,%o0,%o0
+		faddd	$ahia,$nhia,$nhia
+		fmuld	$nhi,$nc,$nhic
+	or	%o2,%o0,%o0
+		fmuld	$ahi,$bd,$ahid
+	or	%o7,%o0,%o0		! 64-bit result
+		faddd	$ahib,$nhib,$nhib
+		fmuld	$nhi,$nd,$nhid
+	addcc	%g1,%o0,%o0
+		faddd	$dota,$nloa,$nloa
+	srlx	%o3,16,%g1		! 34-bit carry
+		faddd	$dotb,$nlob,$nlob
+	bcs,a	%xcc,.+8
+	add	%g1,1,%g1
+
+	stx	%o0,[$tp]		! tp[j-1]=
+
+	faddd	$ahic,$nhic,$dota	! $nhic
+	faddd	$ahid,$nhid,$dotb	! $nhid
+
+	faddd	$nloc,$nhia,$nloc
+	faddd	$nlod,$nhib,$nlod
+
+	fdtox	$nloa,$nloa
+	fdtox	$nlob,$nlob
+	fdtox	$nloc,$nloc
+	fdtox	$nlod,$nlod
+
+	std	$nloa,[%sp+$bias+$frame+0]
+	std	$nlob,[%sp+$bias+$frame+8]
+	std	$nloc,[%sp+$bias+$frame+16]
+	std	$nlod,[%sp+$bias+$frame+24]
+
+	addcc	$j,8,$j
+	bnz,pt	%icc,.L1st
+	add	$tp,8,$tp
+
+.L1stskip:
+	fdtox	$dota,$dota
+	fdtox	$dotb,$dotb
+
+	ldx	[%sp+$bias+$frame+0],%o0
+	ldx	[%sp+$bias+$frame+8],%o1
+	ldx	[%sp+$bias+$frame+16],%o2
+	ldx	[%sp+$bias+$frame+24],%o3
+
+	srlx	%o0,16,%o7
+	std	$dota,[%sp+$bias+$frame+32]
+	add	%o7,%o1,%o1
+	std	$dotb,[%sp+$bias+$frame+40]
+	srlx	%o1,16,%o7
+	add	%o7,%o2,%o2
+	srlx	%o2,16,%o7
+	add	%o7,%o3,%o3		! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
+	and	%o0,$mask,%o0
+	and	%o1,$mask,%o1
+	and	%o2,$mask,%o2
+	sllx	%o1,16,%o1
+	sllx	%o2,32,%o2
+	sllx	%o3,48,%o7
+	or	%o1,%o0,%o0
+	or	%o2,%o0,%o0
+	or	%o7,%o0,%o0		! 64-bit result
+	ldx	[%sp+$bias+$frame+32],%o4
+	addcc	%g1,%o0,%o0
+	ldx	[%sp+$bias+$frame+40],%o5
+	srlx	%o3,16,%g1		! 34-bit carry
+	bcs,a	%xcc,.+8
+	add	%g1,1,%g1
+
+	stx	%o0,[$tp]		! tp[j-1]=
+	add	$tp,8,$tp
+
+	srlx	%o4,16,%o7
+	add	%o7,%o5,%o5
+	and	%o4,$mask,%o4
+	sllx	%o5,16,%o7
+	or	%o7,%o4,%o4
+	addcc	%g1,%o4,%o4
+	srlx	%o5,48,%g1
+	bcs,a	%xcc,.+8
+	add	%g1,1,%g1
+
+	mov	%g1,$carry
+	stx	%o4,[$tp]		! tp[num-1]=
+
+	ba	.Louter
+	add	$i,8,$i
+.align	32
+.Louter:
+	sub	%g0,$num,$j		! j=-num
+	add	%sp,$bias+$frame+$locals,$tp
+
+	add	$ap,$j,%o3
+	add	$bp,$i,%o4
+
+	ld	[%o3+4],%g1		! bp[i]
+	ld	[%o3+0],%o0
+	ld	[%o4+4],%g5		! ap[0]
+	sllx	%g1,32,%g1
+	ld	[%o4+0],%o1
+	sllx	%g5,32,%g5
+	or	%g1,%o0,%o0
+	or	%g5,%o1,%o1
+
+	ldx	[$tp],%o2		! tp[0]
+	mulx	%o1,%o0,%o0
+	addcc	%o2,%o0,%o0
+	mulx	$n0,%o0,%o0		! (ap[0]*bp[i]+t[0])*n0
+	stx	%o0,[%sp+$bias+$frame+0]
+
+	! transfer b[i] to FPU as 4x16-bit values
+	ldda	[%o4+2]%asi,$ba
+	ldda	[%o4+0]%asi,$bb
+	ldda	[%o4+6]%asi,$bc
+	ldda	[%o4+4]%asi,$bd
+
+	! transfer (ap[0]*b[i]+t[0])*n0 to FPU as 4x16-bit values
+	ldda	[%sp+$bias+$frame+6]%asi,$na
+	fxtod	$ba,$ba
+	ldda	[%sp+$bias+$frame+4]%asi,$nb
+	fxtod	$bb,$bb
+	ldda	[%sp+$bias+$frame+2]%asi,$nc
+	fxtod	$bc,$bc
+	ldda	[%sp+$bias+$frame+0]%asi,$nd
+	fxtod	$bd,$bd
+	ldd	[$ap_l+$j],$alo		! load a[j] in double format
+	fxtod	$na,$na
+	ldd	[$ap_h+$j],$ahi
+	fxtod	$nb,$nb
+	ldd	[$np_l+$j],$nlo		! load n[j] in double format
+	fxtod	$nc,$nc
+	ldd	[$np_h+$j],$nhi
+	fxtod	$nd,$nd
+
+		fmuld	$alo,$ba,$aloa
+		fmuld	$nlo,$na,$nloa
+		fmuld	$alo,$bb,$alob
+		fmuld	$nlo,$nb,$nlob
+		fmuld	$alo,$bc,$aloc
+	faddd	$aloa,$nloa,$nloa
+		fmuld	$nlo,$nc,$nloc
+		fmuld	$alo,$bd,$alod
+	faddd	$alob,$nlob,$nlob
+		fmuld	$nlo,$nd,$nlod
+		fmuld	$ahi,$ba,$ahia
+	faddd	$aloc,$nloc,$nloc
+		fmuld	$nhi,$na,$nhia
+		fmuld	$ahi,$bb,$ahib
+	faddd	$alod,$nlod,$nlod
+		fmuld	$nhi,$nb,$nhib
+		fmuld	$ahi,$bc,$ahic
+	faddd	$ahia,$nhia,$nhia
+		fmuld	$nhi,$nc,$nhic
+		fmuld	$ahi,$bd,$ahid
+	faddd	$ahib,$nhib,$nhib
+		fmuld	$nhi,$nd,$nhid
+
+	faddd	$ahic,$nhic,$dota	! $nhic
+	faddd	$ahid,$nhid,$dotb	! $nhid
+
+	faddd	$nloc,$nhia,$nloc
+	faddd	$nlod,$nhib,$nlod
+
+	fdtox	$nloa,$nloa
+	fdtox	$nlob,$nlob
+	fdtox	$nloc,$nloc
+	fdtox	$nlod,$nlod
+
+	std	$nloa,[%sp+$bias+$frame+0]
+	std	$nlob,[%sp+$bias+$frame+8]
+	std	$nloc,[%sp+$bias+$frame+16]
+	add	$j,8,$j
+	std	$nlod,[%sp+$bias+$frame+24]
+
+	ldd	[$ap_l+$j],$alo		! load a[j] in double format
+	ldd	[$ap_h+$j],$ahi
+	ldd	[$np_l+$j],$nlo		! load n[j] in double format
+	ldd	[$np_h+$j],$nhi
+
+		fmuld	$alo,$ba,$aloa
+		fmuld	$nlo,$na,$nloa
+		fmuld	$alo,$bb,$alob
+		fmuld	$nlo,$nb,$nlob
+		fmuld	$alo,$bc,$aloc
+	ldx	[%sp+$bias+$frame+0],%o0
+		faddd	$aloa,$nloa,$nloa
+		fmuld	$nlo,$nc,$nloc
+	ldx	[%sp+$bias+$frame+8],%o1
+		fmuld	$alo,$bd,$alod
+	ldx	[%sp+$bias+$frame+16],%o2
+		faddd	$alob,$nlob,$nlob
+		fmuld	$nlo,$nd,$nlod
+	ldx	[%sp+$bias+$frame+24],%o3
+		fmuld	$ahi,$ba,$ahia
+
+	srlx	%o0,16,%o7
+		faddd	$aloc,$nloc,$nloc
+		fmuld	$nhi,$na,$nhia
+	add	%o7,%o1,%o1
+		fmuld	$ahi,$bb,$ahib
+	srlx	%o1,16,%o7
+		faddd	$alod,$nlod,$nlod
+		fmuld	$nhi,$nb,$nhib
+	add	%o7,%o2,%o2
+		fmuld	$ahi,$bc,$ahic
+	srlx	%o2,16,%o7
+		faddd	$ahia,$nhia,$nhia
+		fmuld	$nhi,$nc,$nhic
+	add	%o7,%o3,%o3		! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
+	! why?
+	and	%o0,$mask,%o0
+		fmuld	$ahi,$bd,$ahid
+	and	%o1,$mask,%o1
+	and	%o2,$mask,%o2
+		faddd	$ahib,$nhib,$nhib
+		fmuld	$nhi,$nd,$nhid
+	sllx	%o1,16,%o1
+		faddd	$dota,$nloa,$nloa
+	sllx	%o2,32,%o2
+		faddd	$dotb,$nlob,$nlob
+	sllx	%o3,48,%o7
+	or	%o1,%o0,%o0
+		faddd	$ahic,$nhic,$dota	! $nhic
+	or	%o2,%o0,%o0
+		faddd	$ahid,$nhid,$dotb	! $nhid
+	or	%o7,%o0,%o0		! 64-bit result
+	ldx	[$tp],%o7
+		faddd	$nloc,$nhia,$nloc
+	addcc	%o7,%o0,%o0
+	! end-of-why?
+		faddd	$nlod,$nhib,$nlod
+	srlx	%o3,16,%g1		! 34-bit carry
+		fdtox	$nloa,$nloa
+	bcs,a	%xcc,.+8
+	add	%g1,1,%g1
+
+	fdtox	$nlob,$nlob
+	fdtox	$nloc,$nloc
+	fdtox	$nlod,$nlod
+
+	std	$nloa,[%sp+$bias+$frame+0]
+	std	$nlob,[%sp+$bias+$frame+8]
+	addcc	$j,8,$j
+	std	$nloc,[%sp+$bias+$frame+16]
+	bz,pn	%icc,.Linnerskip
+	std	$nlod,[%sp+$bias+$frame+24]
+
+	ba	.Linner
+	nop
+.align	32
+.Linner:
+	ldd	[$ap_l+$j],$alo		! load a[j] in double format
+	ldd	[$ap_h+$j],$ahi
+	ldd	[$np_l+$j],$nlo		! load n[j] in double format
+	ldd	[$np_h+$j],$nhi
+
+		fmuld	$alo,$ba,$aloa
+		fmuld	$nlo,$na,$nloa
+		fmuld	$alo,$bb,$alob
+		fmuld	$nlo,$nb,$nlob
+		fmuld	$alo,$bc,$aloc
+	ldx	[%sp+$bias+$frame+0],%o0
+		faddd	$aloa,$nloa,$nloa
+		fmuld	$nlo,$nc,$nloc
+	ldx	[%sp+$bias+$frame+8],%o1
+		fmuld	$alo,$bd,$alod
+	ldx	[%sp+$bias+$frame+16],%o2
+		faddd	$alob,$nlob,$nlob
+		fmuld	$nlo,$nd,$nlod
+	ldx	[%sp+$bias+$frame+24],%o3
+		fmuld	$ahi,$ba,$ahia
+
+	srlx	%o0,16,%o7
+		faddd	$aloc,$nloc,$nloc
+		fmuld	$nhi,$na,$nhia
+	add	%o7,%o1,%o1
+		fmuld	$ahi,$bb,$ahib
+	srlx	%o1,16,%o7
+		faddd	$alod,$nlod,$nlod
+		fmuld	$nhi,$nb,$nhib
+	add	%o7,%o2,%o2
+		fmuld	$ahi,$bc,$ahic
+	srlx	%o2,16,%o7
+		faddd	$ahia,$nhia,$nhia
+		fmuld	$nhi,$nc,$nhic
+	add	%o7,%o3,%o3		! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
+	and	%o0,$mask,%o0
+		fmuld	$ahi,$bd,$ahid
+	and	%o1,$mask,%o1
+	and	%o2,$mask,%o2
+		faddd	$ahib,$nhib,$nhib
+		fmuld	$nhi,$nd,$nhid
+	sllx	%o1,16,%o1
+		faddd	$dota,$nloa,$nloa
+	sllx	%o2,32,%o2
+		faddd	$dotb,$nlob,$nlob
+	sllx	%o3,48,%o7
+	or	%o1,%o0,%o0
+		faddd	$ahic,$nhic,$dota	! $nhic
+	or	%o2,%o0,%o0
+		faddd	$ahid,$nhid,$dotb	! $nhid
+	or	%o7,%o0,%o0		! 64-bit result
+		faddd	$nloc,$nhia,$nloc
+	addcc	%g1,%o0,%o0
+	ldx	[$tp+8],%o7		! tp[j]
+		faddd	$nlod,$nhib,$nlod
+	srlx	%o3,16,%g1		! 34-bit carry
+		fdtox	$nloa,$nloa
+	bcs,a	%xcc,.+8
+	add	%g1,1,%g1
+		fdtox	$nlob,$nlob
+	addcc	%o7,%o0,%o0
+		fdtox	$nloc,$nloc
+	bcs,a	%xcc,.+8
+	add	%g1,1,%g1
+
+	stx	%o0,[$tp]		! tp[j-1]
+		fdtox	$nlod,$nlod
+
+	std	$nloa,[%sp+$bias+$frame+0]
+	std	$nlob,[%sp+$bias+$frame+8]
+	std	$nloc,[%sp+$bias+$frame+16]
+	addcc	$j,8,$j
+	std	$nlod,[%sp+$bias+$frame+24]
+	bnz,pt	%icc,.Linner
+	add	$tp,8,$tp
+
+.Linnerskip:
+	fdtox	$dota,$dota
+	fdtox	$dotb,$dotb
+
+	ldx	[%sp+$bias+$frame+0],%o0
+	ldx	[%sp+$bias+$frame+8],%o1
+	ldx	[%sp+$bias+$frame+16],%o2
+	ldx	[%sp+$bias+$frame+24],%o3
+
+	srlx	%o0,16,%o7
+	std	$dota,[%sp+$bias+$frame+32]
+	add	%o7,%o1,%o1
+	std	$dotb,[%sp+$bias+$frame+40]
+	srlx	%o1,16,%o7
+	add	%o7,%o2,%o2
+	srlx	%o2,16,%o7
+	add	%o7,%o3,%o3		! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
+	and	%o0,$mask,%o0
+	and	%o1,$mask,%o1
+	and	%o2,$mask,%o2
+	sllx	%o1,16,%o1
+	sllx	%o2,32,%o2
+	sllx	%o3,48,%o7
+	or	%o1,%o0,%o0
+	or	%o2,%o0,%o0
+	ldx	[%sp+$bias+$frame+32],%o4
+	or	%o7,%o0,%o0		! 64-bit result
+	ldx	[%sp+$bias+$frame+40],%o5
+	addcc	%g1,%o0,%o0
+	ldx	[$tp+8],%o7		! tp[j]
+	srlx	%o3,16,%g1		! 34-bit carry
+	bcs,a	%xcc,.+8
+	add	%g1,1,%g1
+
+	addcc	%o7,%o0,%o0
+	bcs,a	%xcc,.+8
+	add	%g1,1,%g1
+
+	stx	%o0,[$tp]		! tp[j-1]
+	add	$tp,8,$tp
+
+	srlx	%o4,16,%o7
+	add	%o7,%o5,%o5
+	and	%o4,$mask,%o4
+	sllx	%o5,16,%o7
+	or	%o7,%o4,%o4
+	addcc	%g1,%o4,%o4
+	srlx	%o5,48,%g1
+	bcs,a	%xcc,.+8
+	add	%g1,1,%g1
+
+	addcc	$carry,%o4,%o4
+	stx	%o4,[$tp]		! tp[num-1]
+	mov	%g1,$carry
+	bcs,a	%xcc,.+8
+	add	$carry,1,$carry
+
+	addcc	$i,8,$i
+	bnz	%icc,.Louter
+	nop
+
+	add	$tp,8,$tp		! adjust tp to point at the end
+	orn	%g0,%g0,%g4
+	sub	%g0,$num,%o7		! n=-num
+	ba	.Lsub
+	subcc	%g0,%g0,%g0		! clear %icc.c
+
+.align	32
+.Lsub:
+	ldx	[$tp+%o7],%o0
+	add	$np,%o7,%g1
+	ld	[%g1+0],%o2
+	ld	[%g1+4],%o3
+	srlx	%o0,32,%o1
+	subccc	%o0,%o2,%o2
+	add	$rp,%o7,%g1
+	subccc	%o1,%o3,%o3
+	st	%o2,[%g1+0]
+	add	%o7,8,%o7
+	brnz,pt	%o7,.Lsub
+	st	%o3,[%g1+4]
+	subc	$carry,0,%g4
+	sub	%g0,$num,%o7		! n=-num
+	ba	.Lcopy
+	nop
+
+.align	32
+.Lcopy:
+	ldx	[$tp+%o7],%o0
+	add	$rp,%o7,%g1
+	ld	[%g1+0],%o2
+	ld	[%g1+4],%o3
+	stx	%g0,[$tp+%o7]
+	and	%o0,%g4,%o0
+	srlx	%o0,32,%o1
+	andn	%o2,%g4,%o2
+	andn	%o3,%g4,%o3
+	or	%o2,%o0,%o0
+	or	%o3,%o1,%o1
+	st	%o0,[%g1+0]
+	add	%o7,8,%o7
+	brnz,pt	%o7,.Lcopy
+	st	%o1,[%g1+4]
+	sub	%g0,$num,%o7		! n=-num
+
+.Lzap:
+	stx	%g0,[$ap_l+%o7]
+	stx	%g0,[$ap_h+%o7]
+	stx	%g0,[$np_l+%o7]
+	stx	%g0,[$np_h+%o7]
+	add	%o7,8,%o7
+	brnz,pt	%o7,.Lzap
+	nop
+
+	ldx	[%sp+$bias+$frame+48],%o7
+	wr	%g0,%o7,%asi		! restore %asi
+
+	mov	1,%i0
+.Lret:
+	ret
+	restore
+.type   $fname,#function
+.size	$fname,(.-$fname)
+.asciz	"Montgomery Multiplication for UltraSPARC, CRYPTOGAMS by <appro\@openssl.org>"
+.align	32
+___
+
+$code =~ s/\`([^\`]*)\`/eval($1)/gem;
+
+# Below substitution makes it possible to compile without demanding
+# VIS extensions on command line, e.g. -xarch=v9 vs. -xarch=v9a. I
+# dare to do this, because VIS capability is detected at run-time now
+# and this routine is not called on CPU not capable to execute it. Do
+# note that fzeros is not the only VIS dependency! Another dependency
+# is implicit and is just _a_ numerical value loaded to %asi register,
+# which assembler can't recognize as VIS specific...
+$code =~ s/fzeros\s+%f([0-9]+)/
+	   sprintf(".word\t0x%x\t! fzeros %%f%d",0x81b00c20|($1<<25),$1)
+	  /gem;
+
+print $code;
+# flush
+close STDOUT;
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/via-mont.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/via-mont.pl
@ -0,0 +1,251 @@
+#! /usr/bin/env perl
+# Copyright 2006-2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# Wrapper around 'rep montmul', VIA-specific instruction accessing
+# PadLock Montgomery Multiplier. The wrapper is designed as drop-in
+# replacement for OpenSSL bn_mul_mont [first implemented in 0.9.9].
+#
+# Below are interleaved outputs from 'openssl speed rsa dsa' for 4
+# different software configurations on 1.5GHz VIA Esther processor.
+# Lines marked with "software integer" denote performance of hand-
+# coded integer-only assembler found in OpenSSL 0.9.7. "Software SSE2"
+# refers to hand-coded SSE2 Montgomery multiplication procedure found
+# OpenSSL 0.9.9. "Hardware VIA SDK" refers to padlock_pmm routine from
+# Padlock SDK 2.0.1 available for download from VIA, which naturally
+# utilizes the magic 'repz montmul' instruction. And finally "hardware
+# this" refers to *this* implementation which also uses 'repz montmul'
+#
+#                   sign    verify    sign/s verify/s
+# rsa  512 bits 0.001720s 0.000140s    581.4   7149.7	software integer
+# rsa  512 bits 0.000690s 0.000086s   1450.3  11606.0	software SSE2
+# rsa  512 bits 0.006136s 0.000201s    163.0   4974.5	hardware VIA SDK
+# rsa  512 bits 0.000712s 0.000050s   1404.9  19858.5	hardware this
+#
+# rsa 1024 bits 0.008518s 0.000413s    117.4   2420.8	software integer
+# rsa 1024 bits 0.004275s 0.000277s    233.9   3609.7	software SSE2
+# rsa 1024 bits 0.012136s 0.000260s     82.4   3844.5	hardware VIA SDK
+# rsa 1024 bits 0.002522s 0.000116s    396.5   8650.9	hardware this
+#
+# rsa 2048 bits 0.050101s 0.001371s     20.0    729.6	software integer
+# rsa 2048 bits 0.030273s 0.001008s     33.0    991.9	software SSE2
+# rsa 2048 bits 0.030833s 0.000976s     32.4   1025.1	hardware VIA SDK
+# rsa 2048 bits 0.011879s 0.000342s     84.2   2921.7	hardware this
+#
+# rsa 4096 bits 0.327097s 0.004859s      3.1    205.8	software integer
+# rsa 4096 bits 0.229318s 0.003859s      4.4    259.2	software SSE2
+# rsa 4096 bits 0.233953s 0.003274s      4.3    305.4	hardware VIA SDK
+# rsa 4096 bits 0.070493s 0.001166s     14.2    857.6	hardware this
+#
+# dsa  512 bits 0.001342s 0.001651s    745.2    605.7	software integer
+# dsa  512 bits 0.000844s 0.000987s   1185.3   1013.1	software SSE2
+# dsa  512 bits 0.001902s 0.002247s    525.6    444.9	hardware VIA SDK
+# dsa  512 bits 0.000458s 0.000524s   2182.2   1909.1	hardware this
+#
+# dsa 1024 bits 0.003964s 0.004926s    252.3    203.0	software integer
+# dsa 1024 bits 0.002686s 0.003166s    372.3    315.8	software SSE2
+# dsa 1024 bits 0.002397s 0.002823s    417.1    354.3	hardware VIA SDK
+# dsa 1024 bits 0.000978s 0.001170s   1022.2    855.0	hardware this
+#
+# dsa 2048 bits 0.013280s 0.016518s     75.3     60.5	software integer
+# dsa 2048 bits 0.009911s 0.011522s    100.9     86.8	software SSE2
+# dsa 2048 bits 0.009542s 0.011763s    104.8     85.0	hardware VIA SDK
+# dsa 2048 bits 0.002884s 0.003352s    346.8    298.3	hardware this
+#
+# To give you some other reference point here is output for 2.4GHz P4
+# running hand-coded SSE2 bn_mul_mont found in 0.9.9, i.e. "software
+# SSE2" in above terms.
+#
+# rsa  512 bits 0.000407s 0.000047s   2454.2  21137.0
+# rsa 1024 bits 0.002426s 0.000141s    412.1   7100.0
+# rsa 2048 bits 0.015046s 0.000491s     66.5   2034.9
+# rsa 4096 bits 0.109770s 0.002379s      9.1    420.3
+# dsa  512 bits 0.000438s 0.000525s   2281.1   1904.1
+# dsa 1024 bits 0.001346s 0.001595s    742.7    627.0
+# dsa 2048 bits 0.004745s 0.005582s    210.7    179.1
+#
+# Conclusions:
+# - VIA SDK leaves a *lot* of room for improvement (which this
+#   implementation successfully fills:-);
+# - 'rep montmul' gives up to >3x performance improvement depending on
+#   key length;
+# - in terms of absolute performance it delivers approximately as much
+#   as modern out-of-order 32-bit cores [again, for longer keys].
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+push(@INC,"${dir}","${dir}../../perlasm");
+require "x86asm.pl";
+
+$output = pop;
+open STDOUT,">$output";
+
+&asm_init($ARGV[0]);
+
+# int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
+$func="bn_mul_mont_padlock";
+
+$pad=16*1;	# amount of reserved bytes on top of every vector
+
+# stack layout
+$mZeroPrime=&DWP(0,"esp");		# these are specified by VIA
+$A=&DWP(4,"esp");
+$B=&DWP(8,"esp");
+$T=&DWP(12,"esp");
+$M=&DWP(16,"esp");
+$scratch=&DWP(20,"esp");
+$rp=&DWP(24,"esp");			# these are mine
+$sp=&DWP(28,"esp");
+# &DWP(32,"esp")			# 32 byte scratch area
+# &DWP(64+(4*$num+$pad)*0,"esp")	# padded tp[num]
+# &DWP(64+(4*$num+$pad)*1,"esp")	# padded copy of ap[num]
+# &DWP(64+(4*$num+$pad)*2,"esp")	# padded copy of bp[num]
+# &DWP(64+(4*$num+$pad)*3,"esp")	# padded copy of np[num]
+# Note that SDK suggests to unconditionally allocate 2K per vector. This
+# has quite an impact on performance. It naturally depends on key length,
+# but to give an example 1024 bit private RSA key operations suffer >30%
+# penalty. I allocate only as much as actually required...
+
+&function_begin($func);
+	&xor	("eax","eax");
+	&mov	("ecx",&wparam(5));	# num
+	# meet VIA's limitations for num [note that the specification
+	# expresses them in bits, while we work with amount of 32-bit words]
+	&test	("ecx",3);
+	&jnz	(&label("leave"));	# num % 4 != 0
+	&cmp	("ecx",8);
+	&jb	(&label("leave"));	# num < 8
+	&cmp	("ecx",1024);
+	&ja	(&label("leave"));	# num > 1024
+
+	&pushf	();
+	&cld	();
+
+	&mov	("edi",&wparam(0));	# rp
+	&mov	("eax",&wparam(1));	# ap
+	&mov	("ebx",&wparam(2));	# bp
+	&mov	("edx",&wparam(3));	# np
+	&mov	("esi",&wparam(4));	# n0
+	&mov	("esi",&DWP(0,"esi"));	# *n0
+
+	&lea	("ecx",&DWP($pad,"","ecx",4));	# ecx becomes vector size in bytes
+	&lea	("ebp",&DWP(64,"","ecx",4));	# allocate 4 vectors + 64 bytes
+	&neg	("ebp");
+	&add	("ebp","esp");
+	&and	("ebp",-64);		# align to cache-line
+	&xchg	("ebp","esp");		# alloca
+
+	&mov	($rp,"edi");		# save rp
+	&mov	($sp,"ebp");		# save esp
+
+	&mov	($mZeroPrime,"esi");
+	&lea	("esi",&DWP(64,"esp"));	# tp
+	&mov	($T,"esi");
+	&lea	("edi",&DWP(32,"esp"));	# scratch area
+	&mov	($scratch,"edi");
+	&mov	("esi","eax");
+
+	&lea	("ebp",&DWP(-$pad,"ecx"));
+	&shr	("ebp",2);		# restore original num value in ebp
+
+	&xor	("eax","eax");
+
+	&mov	("ecx","ebp");
+	&lea	("ecx",&DWP((32+$pad)/4,"ecx"));# padded tp + scratch
+	&data_byte(0xf3,0xab);		# rep stosl, bzero
+
+	&mov	("ecx","ebp");
+	&lea	("edi",&DWP(64+$pad,"esp","ecx",4));# pointer to ap copy
+	&mov	($A,"edi");
+	&data_byte(0xf3,0xa5);		# rep movsl, memcpy
+	&mov	("ecx",$pad/4);
+	&data_byte(0xf3,0xab);		# rep stosl, bzero pad
+	# edi points at the end of padded ap copy...
+
+	&mov	("ecx","ebp");
+	&mov	("esi","ebx");
+	&mov	($B,"edi");
+	&data_byte(0xf3,0xa5);		# rep movsl, memcpy
+	&mov	("ecx",$pad/4);
+	&data_byte(0xf3,0xab);		# rep stosl, bzero pad
+	# edi points at the end of padded bp copy...
+
+	&mov	("ecx","ebp");
+	&mov	("esi","edx");
+	&mov	($M,"edi");
+	&data_byte(0xf3,0xa5);		# rep movsl, memcpy
+	&mov	("ecx",$pad/4);
+	&data_byte(0xf3,0xab);		# rep stosl, bzero pad
+	# edi points at the end of padded np copy...
+
+	# let magic happen...
+	&mov	("ecx","ebp");
+	&mov	("esi","esp");
+	&shl	("ecx",5);		# convert word counter to bit counter
+	&align	(4);
+	&data_byte(0xf3,0x0f,0xa6,0xc0);# rep montmul
+
+	&mov	("ecx","ebp");
+	&lea	("esi",&DWP(64,"esp"));		# tp
+	# edi still points at the end of padded np copy...
+	&neg	("ebp");
+	&lea	("ebp",&DWP(-$pad,"edi","ebp",4));	# so just "rewind"
+	&mov	("edi",$rp);			# restore rp
+	&xor	("edx","edx");			# i=0 and clear CF
+
+&set_label("sub",8);
+	&mov	("eax",&DWP(0,"esi","edx",4));
+	&sbb	("eax",&DWP(0,"ebp","edx",4));
+	&mov	(&DWP(0,"edi","edx",4),"eax");	# rp[i]=tp[i]-np[i]
+	&lea	("edx",&DWP(1,"edx"));		# i++
+	&loop	(&label("sub"));		# doesn't affect CF!
+
+	&mov	("eax",&DWP(0,"esi","edx",4));	# upmost overflow bit
+	&sbb	("eax",0);
+
+	&mov	("ecx","edx");			# num
+	&mov	("edx",0);			# i=0
+
+&set_label("copy",8);
+	&mov	("ebx",&DWP(0,"esi","edx",4));
+	&mov	("eax",&DWP(0,"edi","edx",4));
+	&mov	(&DWP(0,"esi","edx",4),"ecx");	# zap tp
+	&cmovc	("eax","ebx");
+	&mov	(&DWP(0,"edi","edx",4),"eax");
+	&lea	("edx",&DWP(1,"edx"));		# i++
+	&loop	(&label("copy"));
+
+	&mov	("ebp",$sp);
+	&xor	("eax","eax");
+
+	&mov	("ecx",64/4);
+	&mov	("edi","esp");		# zap frame including scratch area
+	&data_byte(0xf3,0xab);		# rep stosl, bzero
+
+	# zap copies of ap, bp and np
+	&lea	("edi",&DWP(64+$pad,"esp","edx",4));# pointer to ap
+	&lea	("ecx",&DWP(3*$pad/4,"edx","edx",2));
+	&data_byte(0xf3,0xab);		# rep stosl, bzero
+
+	&mov	("esp","ebp");
+	&inc	("eax");		# signal "done"
+	&popf	();
+&set_label("leave");
+&function_end($func);
+
+&asciz("Padlock Montgomery Multiplication, CRYPTOGAMS by <appro\@openssl.org>");
+
+&asm_finish();
+
+close STDOUT;
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/vis3-mont.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/vis3-mont.pl
@ -0,0 +1,384 @@
+#! /usr/bin/env perl
+# Copyright 2012-2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# October 2012.
+#
+# SPARCv9 VIS3 Montgomery multiplication procedure suitable for T3 and
+# onward. There are three new instructions used here: umulxhi,
+# addxc[cc] and initializing store. On T3 RSA private key operations
+# are 1.54/1.87/2.11/2.26 times faster for 512/1024/2048/4096-bit key
+# lengths. This is without dedicated squaring procedure. On T4
+# corresponding coefficients are 1.47/2.10/2.80/2.90x, which is mostly
+# for reference purposes, because T4 has dedicated Montgomery
+# multiplication and squaring *instructions* that deliver even more.
+
+$output = pop;
+open STDOUT,">$output";
+
+$frame = "STACK_FRAME";
+$bias = "STACK_BIAS";
+
+$code.=<<___;
+#include "sparc_arch.h"
+
+#ifdef	__arch64__
+.register	%g2,#scratch
+.register	%g3,#scratch
+#endif
+
+.section	".text",#alloc,#execinstr
+___
+
+($n0,$m0,$m1,$lo0,$hi0, $lo1,$hi1,$aj,$alo,$nj,$nlo,$tj)=
+	(map("%g$_",(1..5)),map("%o$_",(0..5,7)));
+
+# int bn_mul_mont(
+$rp="%o0";	# BN_ULONG *rp,
+$ap="%o1";	# const BN_ULONG *ap,
+$bp="%o2";	# const BN_ULONG *bp,
+$np="%o3";	# const BN_ULONG *np,
+$n0p="%o4";	# const BN_ULONG *n0,
+$num="%o5";	# int num);	# caller ensures that num is even
+				# and >=6
+$code.=<<___;
+.globl	bn_mul_mont_vis3
+.align	32
+bn_mul_mont_vis3:
+	add	%sp,	$bias,	%g4	! real top of stack
+	sll	$num,	2,	$num	! size in bytes
+	add	$num,	63,	%g5
+	andn	%g5,	63,	%g5	! buffer size rounded up to 64 bytes
+	add	%g5,	%g5,	%g1
+	add	%g5,	%g1,	%g1	! 3*buffer size
+	sub	%g4,	%g1,	%g1
+	andn	%g1,	63,	%g1	! align at 64 byte
+	sub	%g1,	$frame,	%g1	! new top of stack
+	sub	%g1,	%g4,	%g1
+
+	save	%sp,	%g1,	%sp
+___
+
+#	+-------------------------------+<-----	%sp
+#	.				.
+#	+-------------------------------+<-----	aligned at 64 bytes
+#	| __int64 tmp[0]		|
+#	+-------------------------------+
+#	.				.
+#	.				.
+#	+-------------------------------+<----- aligned at 64 bytes
+#	| __int64 ap[1..0]		|	converted ap[]
+#	+-------------------------------+
+#	| __int64 np[1..0]		|	converted np[]
+#	+-------------------------------+
+#	| __int64 ap[3..2]		|
+#	.				.
+#	.				.
+#	+-------------------------------+
+($rp,$ap,$bp,$np,$n0p,$num)=map("%i$_",(0..5));
+($t0,$t1,$t2,$t3,$cnt,$tp,$bufsz,$anp)=map("%l$_",(0..7));
+($ovf,$i)=($t0,$t1);
+$code.=<<___;
+	ld	[$n0p+0],	$t0	! pull n0[0..1] value
+	add	%sp, $bias+$frame, $tp
+	ld	[$n0p+4],	$t1
+	add	$tp,	%g5,	$anp
+	ld	[$bp+0],	$t2	! m0=bp[0]
+	sllx	$t1,	32,	$n0
+	ld	[$bp+4],	$t3
+	or	$t0,	$n0,	$n0
+	add	$bp,	8,	$bp
+
+	ld	[$ap+0],	$t0	! ap[0]
+	sllx	$t3,	32,	$m0
+	ld	[$ap+4],	$t1
+	or	$t2,	$m0,	$m0
+
+	ld	[$ap+8],	$t2	! ap[1]
+	sllx	$t1,	32,	$aj
+	ld	[$ap+12],	$t3
+	or	$t0,	$aj,	$aj
+	add	$ap,	16,	$ap
+	stx	$aj,	[$anp]		! converted ap[0]
+
+	mulx	$aj,	$m0,	$lo0	! ap[0]*bp[0]
+	umulxhi	$aj,	$m0,	$hi0
+
+	ld	[$np+0],	$t0	! np[0]
+	sllx	$t3,	32,	$aj
+	ld	[$np+4],	$t1
+	or	$t2,	$aj,	$aj
+
+	ld	[$np+8],	$t2	! np[1]
+	sllx	$t1,	32,	$nj
+	ld	[$np+12],	$t3
+	or	$t0, $nj,	$nj
+	add	$np,	16,	$np
+	stx	$nj,	[$anp+8]	! converted np[0]
+
+	mulx	$lo0,	$n0,	$m1	! "tp[0]"*n0
+	stx	$aj,	[$anp+16]	! converted ap[1]
+
+	mulx	$aj,	$m0,	$alo	! ap[1]*bp[0]
+	umulxhi	$aj,	$m0,	$aj	! ahi=aj
+
+	mulx	$nj,	$m1,	$lo1	! np[0]*m1
+	umulxhi	$nj,	$m1,	$hi1
+
+	sllx	$t3,	32,	$nj
+	or	$t2,	$nj,	$nj
+	stx	$nj,	[$anp+24]	! converted np[1]
+	add	$anp,	32,	$anp
+
+	addcc	$lo0,	$lo1,	$lo1
+	addxc	%g0,	$hi1,	$hi1
+
+	mulx	$nj,	$m1,	$nlo	! np[1]*m1
+	umulxhi	$nj,	$m1,	$nj	! nhi=nj
+
+	ba	.L1st
+	sub	$num,	24,	$cnt	! cnt=num-3
+
+.align	16
+.L1st:
+	ld	[$ap+0],	$t0	! ap[j]
+	addcc	$alo,	$hi0,	$lo0
+	ld	[$ap+4],	$t1
+	addxc	$aj,	%g0,	$hi0
+
+	sllx	$t1,	32,	$aj
+	add	$ap,	8,	$ap
+	or	$t0,	$aj,	$aj
+	stx	$aj,	[$anp]		! converted ap[j]
+
+	ld	[$np+0],	$t2	! np[j]
+	addcc	$nlo,	$hi1,	$lo1
+	ld	[$np+4],	$t3
+	addxc	$nj,	%g0,	$hi1	! nhi=nj
+
+	sllx	$t3,	32,	$nj
+	add	$np,	8,	$np
+	mulx	$aj,	$m0,	$alo	! ap[j]*bp[0]
+	or	$t2,	$nj,	$nj
+	umulxhi	$aj,	$m0,	$aj	! ahi=aj
+	stx	$nj,	[$anp+8]	! converted np[j]
+	add	$anp,	16,	$anp	! anp++
+
+	mulx	$nj,	$m1,	$nlo	! np[j]*m1
+	addcc	$lo0,	$lo1,	$lo1	! np[j]*m1+ap[j]*bp[0]
+	umulxhi	$nj,	$m1,	$nj	! nhi=nj
+	addxc	%g0,	$hi1,	$hi1
+	stx	$lo1,	[$tp]		! tp[j-1]
+	add	$tp,	8,	$tp	! tp++
+
+	brnz,pt	$cnt,	.L1st
+	sub	$cnt,	8,	$cnt	! j--
+!.L1st
+	addcc	$alo,	$hi0,	$lo0
+	addxc	$aj,	%g0,	$hi0	! ahi=aj
+
+	addcc	$nlo,	$hi1,	$lo1
+	addxc	$nj,	%g0,	$hi1
+	addcc	$lo0,	$lo1,	$lo1	! np[j]*m1+ap[j]*bp[0]
+	addxc	%g0,	$hi1,	$hi1
+	stx	$lo1,	[$tp]		! tp[j-1]
+	add	$tp,	8,	$tp
+
+	addcc	$hi0,	$hi1,	$hi1
+	addxc	%g0,	%g0,	$ovf	! upmost overflow bit
+	stx	$hi1,	[$tp]
+	add	$tp,	8,	$tp
+
+	ba	.Louter
+	sub	$num,	16,	$i	! i=num-2
+
+.align	16
+.Louter:
+	ld	[$bp+0],	$t2	! m0=bp[i]
+	ld	[$bp+4],	$t3
+
+	sub	$anp,	$num,	$anp	! rewind
+	sub	$tp,	$num,	$tp
+	sub	$anp,	$num,	$anp
+
+	add	$bp,	8,	$bp
+	sllx	$t3,	32,	$m0
+	ldx	[$anp+0],	$aj	! ap[0]
+	or	$t2,	$m0,	$m0
+	ldx	[$anp+8],	$nj	! np[0]
+
+	mulx	$aj,	$m0,	$lo0	! ap[0]*bp[i]
+	ldx	[$tp],		$tj	! tp[0]
+	umulxhi	$aj,	$m0,	$hi0
+	ldx	[$anp+16],	$aj	! ap[1]
+	addcc	$lo0,	$tj,	$lo0	! ap[0]*bp[i]+tp[0]
+	mulx	$aj,	$m0,	$alo	! ap[1]*bp[i]
+	addxc	%g0,	$hi0,	$hi0
+	mulx	$lo0,	$n0,	$m1	! tp[0]*n0
+	umulxhi	$aj,	$m0,	$aj	! ahi=aj
+	mulx	$nj,	$m1,	$lo1	! np[0]*m1
+	umulxhi	$nj,	$m1,	$hi1
+	ldx	[$anp+24],	$nj	! np[1]
+	add	$anp,	32,	$anp
+	addcc	$lo1,	$lo0,	$lo1
+	mulx	$nj,	$m1,	$nlo	! np[1]*m1
+	addxc	%g0,	$hi1,	$hi1
+	umulxhi	$nj,	$m1,	$nj	! nhi=nj
+
+	ba	.Linner
+	sub	$num,	24,	$cnt	! cnt=num-3
+.align	16
+.Linner:
+	addcc	$alo,	$hi0,	$lo0
+	ldx	[$tp+8],	$tj	! tp[j]
+	addxc	$aj,	%g0,	$hi0	! ahi=aj
+	ldx	[$anp+0],	$aj	! ap[j]
+	addcc	$nlo,	$hi1,	$lo1
+	mulx	$aj,	$m0,	$alo	! ap[j]*bp[i]
+	addxc	$nj,	%g0,	$hi1	! nhi=nj
+	ldx	[$anp+8],	$nj	! np[j]
+	add	$anp,	16,	$anp
+	umulxhi	$aj,	$m0,	$aj	! ahi=aj
+	addcc	$lo0,	$tj,	$lo0	! ap[j]*bp[i]+tp[j]
+	mulx	$nj,	$m1,	$nlo	! np[j]*m1
+	addxc	%g0,	$hi0,	$hi0
+	umulxhi	$nj,	$m1,	$nj	! nhi=nj
+	addcc	$lo1,	$lo0,	$lo1	! np[j]*m1+ap[j]*bp[i]+tp[j]
+	addxc	%g0,	$hi1,	$hi1
+	stx	$lo1,	[$tp]		! tp[j-1]
+	add	$tp,	8,	$tp
+	brnz,pt	$cnt,	.Linner
+	sub	$cnt,	8,	$cnt
+!.Linner
+	ldx	[$tp+8],	$tj	! tp[j]
+	addcc	$alo,	$hi0,	$lo0
+	addxc	$aj,	%g0,	$hi0	! ahi=aj
+	addcc	$lo0,	$tj,	$lo0	! ap[j]*bp[i]+tp[j]
+	addxc	%g0,	$hi0,	$hi0
+
+	addcc	$nlo,	$hi1,	$lo1
+	addxc	$nj,	%g0,	$hi1	! nhi=nj
+	addcc	$lo1,	$lo0,	$lo1	! np[j]*m1+ap[j]*bp[i]+tp[j]
+	addxc	%g0,	$hi1,	$hi1
+	stx	$lo1,	[$tp]		! tp[j-1]
+
+	subcc	%g0,	$ovf,	%g0	! move upmost overflow to CCR.xcc
+	addxccc	$hi1,	$hi0,	$hi1
+	addxc	%g0,	%g0,	$ovf
+	stx	$hi1,	[$tp+8]
+	add	$tp,	16,	$tp
+
+	brnz,pt	$i,	.Louter
+	sub	$i,	8,	$i
+
+	sub	$anp,	$num,	$anp	! rewind
+	sub	$tp,	$num,	$tp
+	sub	$anp,	$num,	$anp
+	ba	.Lsub
+	subcc	$num,	8,	$cnt	! cnt=num-1 and clear CCR.xcc
+
+.align	16
+.Lsub:
+	ldx	[$tp],		$tj
+	add	$tp,	8,	$tp
+	ldx	[$anp+8],	$nj
+	add	$anp,	16,	$anp
+	subccc	$tj,	$nj,	$t2	! tp[j]-np[j]
+	srlx	$tj,	32,	$tj
+	srlx	$nj,	32,	$nj
+	subccc	$tj,	$nj,	$t3
+	add	$rp,	8,	$rp
+	st	$t2,	[$rp-4]		! reverse order
+	st	$t3,	[$rp-8]
+	brnz,pt	$cnt,	.Lsub
+	sub	$cnt,	8,	$cnt
+
+	sub	$anp,	$num,	$anp	! rewind
+	sub	$tp,	$num,	$tp
+	sub	$anp,	$num,	$anp
+	sub	$rp,	$num,	$rp
+
+	subccc	$ovf,	%g0,	$ovf	! handle upmost overflow bit
+	ba	.Lcopy
+	sub	$num,	8,	$cnt
+
+.align	16
+.Lcopy:					! conditional copy
+	ld	[$tp+0],	$t0
+	ld	[$tp+4],	$t1
+	ld	[$rp+0],	$t2
+	ld	[$rp+4],	$t3
+	stx	%g0,	[$tp]		! zap
+	add	$tp,	8,	$tp
+	stx	%g0,	[$anp]		! zap
+	stx	%g0,	[$anp+8]
+	add	$anp,	16,	$anp
+	movcs	%icc,	$t0,	$t2
+	movcs	%icc,	$t1,	$t3
+	st	$t3,	[$rp+0]		! flip order
+	st	$t2,	[$rp+4]
+	add	$rp,	8,	$rp
+	brnz	$cnt,	.Lcopy
+	sub	$cnt,	8,	$cnt
+
+	mov	1,	%o0
+	ret
+	restore
+.type	bn_mul_mont_vis3, #function
+.size	bn_mul_mont_vis3, .-bn_mul_mont_vis3
+.asciz  "Montgomery Multiplication for SPARCv9 VIS3, CRYPTOGAMS by <appro\@openssl.org>"
+.align	4
+___
+
+# Purpose of these subroutines is to explicitly encode VIS instructions,
+# so that one can compile the module without having to specify VIS
+# extensions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
+# Idea is to reserve for option to produce "universal" binary and let
+# programmer detect if current CPU is VIS capable at run-time.
+sub unvis3 {
+my ($mnemonic,$rs1,$rs2,$rd)=@_;
+my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 );
+my ($ref,$opf);
+my %visopf = (	"addxc"		=> 0x011,
+		"addxccc"	=> 0x013,
+		"umulxhi"	=> 0x016	);
+
+    $ref = "$mnemonic\t$rs1,$rs2,$rd";
+
+    if ($opf=$visopf{$mnemonic}) {
+	foreach ($rs1,$rs2,$rd) {
+	    return $ref if (!/%([goli])([0-9])/);
+	    $_=$bias{$1}+$2;
+	}
+
+	return	sprintf ".word\t0x%08x !%s",
+			0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
+			$ref;
+    } else {
+	return $ref;
+    }
+}
+
+foreach (split("\n",$code)) {
+	s/\`([^\`]*)\`/eval $1/ge;
+
+	s/\b(umulxhi|addxc[c]{0,2})\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/
+		&unvis3($1,$2,$3,$4)
+	 /ge;
+
+	print $_,"\n";
+}
+
+close STDOUT;
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86-gf2m.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86-gf2m.pl
@ -0,0 +1,325 @@
+#! /usr/bin/env perl
+# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# May 2011
+#
+# The module implements bn_GF2m_mul_2x2 polynomial multiplication used
+# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
+# the time being... Except that it has three code paths: pure integer
+# code suitable for any x86 CPU, MMX code suitable for PIII and later
+# and PCLMULQDQ suitable for Westmere and later. Improvement varies
+# from one benchmark and µ-arch to another. Below are interval values
+# for 163- and 571-bit ECDH benchmarks relative to compiler-generated
+# code:
+#
+# PIII		16%-30%
+# P4		12%-12%
+# Opteron	18%-40%
+# Core2		19%-44%
+# Atom		38%-64%
+# Westmere	53%-121%(PCLMULQDQ)/20%-32%(MMX)
+# Sandy Bridge	72%-127%(PCLMULQDQ)/27%-23%(MMX)
+#
+# Note that above improvement coefficients are not coefficients for
+# bn_GF2m_mul_2x2 itself. For example 120% ECDH improvement is result
+# of bn_GF2m_mul_2x2 being >4x faster. As it gets faster, benchmark
+# is more and more dominated by other subroutines, most notably by
+# BN_GF2m_mod[_mul]_arr...
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+push(@INC,"${dir}","${dir}../../perlasm");
+require "x86asm.pl";
+
+$output = pop;
+open STDOUT,">$output";
+
+&asm_init($ARGV[0],$x86only = $ARGV[$#ARGV] eq "386");
+
+$sse2=0;
+for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
+
+&external_label("OPENSSL_ia32cap_P") if ($sse2);
+
+$a="eax";
+$b="ebx";
+($a1,$a2,$a4)=("ecx","edx","ebp");
+
+$R="mm0";
+@T=("mm1","mm2");
+($A,$B,$B30,$B31)=("mm2","mm3","mm4","mm5");
+@i=("esi","edi");
+
+					if (!$x86only) {
+&function_begin_B("_mul_1x1_mmx");
+	&sub	("esp",32+4);
+	 &mov	($a1,$a);
+	 &lea	($a2,&DWP(0,$a,$a));
+	 &and	($a1,0x3fffffff);
+	 &lea	($a4,&DWP(0,$a2,$a2));
+	 &mov	(&DWP(0*4,"esp"),0);
+	 &and	($a2,0x7fffffff);
+	&movd	($A,$a);
+	&movd	($B,$b);
+	 &mov	(&DWP(1*4,"esp"),$a1);	# a1
+	 &xor	($a1,$a2);		# a1^a2
+	&pxor	($B31,$B31);
+	&pxor	($B30,$B30);
+	 &mov	(&DWP(2*4,"esp"),$a2);	# a2
+	 &xor	($a2,$a4);		# a2^a4
+	 &mov	(&DWP(3*4,"esp"),$a1);	# a1^a2
+	&pcmpgtd($B31,$A);		# broadcast 31st bit
+	&paddd	($A,$A);		# $A<<=1
+	 &xor	($a1,$a2);		# a1^a4=a1^a2^a2^a4
+	 &mov	(&DWP(4*4,"esp"),$a4);	# a4
+	 &xor	($a4,$a2);		# a2=a4^a2^a4
+	&pand	($B31,$B);
+	&pcmpgtd($B30,$A);		# broadcast 30th bit
+	 &mov	(&DWP(5*4,"esp"),$a1);	# a1^a4
+	 &xor	($a4,$a1);		# a1^a2^a4
+	&psllq	($B31,31);
+	&pand	($B30,$B);
+	 &mov	(&DWP(6*4,"esp"),$a2);	# a2^a4
+	&mov	(@i[0],0x7);
+	 &mov	(&DWP(7*4,"esp"),$a4);	# a1^a2^a4
+	 &mov	($a4,@i[0]);
+	&and	(@i[0],$b);
+	&shr	($b,3);
+	&mov	(@i[1],$a4);
+	&psllq	($B30,30);
+	&and	(@i[1],$b);
+	&shr	($b,3);
+	&movd	($R,&DWP(0,"esp",@i[0],4));
+	&mov	(@i[0],$a4);
+	&and	(@i[0],$b);
+	&shr	($b,3);
+	for($n=1;$n<9;$n++) {
+		&movd	(@T[1],&DWP(0,"esp",@i[1],4));
+		&mov	(@i[1],$a4);
+		&psllq	(@T[1],3*$n);
+		&and	(@i[1],$b);
+		&shr	($b,3);
+		&pxor	($R,@T[1]);
+
+		push(@i,shift(@i)); push(@T,shift(@T));
+	}
+	&movd	(@T[1],&DWP(0,"esp",@i[1],4));
+	&pxor	($R,$B30);
+	&psllq	(@T[1],3*$n++);
+	&pxor	($R,@T[1]);
+
+	&movd	(@T[0],&DWP(0,"esp",@i[0],4));
+	&pxor	($R,$B31);
+	&psllq	(@T[0],3*$n);
+	&add	("esp",32+4);
+	&pxor	($R,@T[0]);
+	&ret	();
+&function_end_B("_mul_1x1_mmx");
+					}
+
+($lo,$hi)=("eax","edx");
+@T=("ecx","ebp");
+
+&function_begin_B("_mul_1x1_ialu");
+	&sub	("esp",32+4);
+	 &mov	($a1,$a);
+	 &lea	($a2,&DWP(0,$a,$a));
+	 &lea	($a4,&DWP(0,"",$a,4));
+	 &and	($a1,0x3fffffff);
+	&lea	(@i[1],&DWP(0,$lo,$lo));
+	&sar	($lo,31);		# broadcast 31st bit
+	 &mov	(&DWP(0*4,"esp"),0);
+	 &and	($a2,0x7fffffff);
+	 &mov	(&DWP(1*4,"esp"),$a1);	# a1
+	 &xor	($a1,$a2);		# a1^a2
+	 &mov	(&DWP(2*4,"esp"),$a2);	# a2
+	 &xor	($a2,$a4);		# a2^a4
+	 &mov	(&DWP(3*4,"esp"),$a1);	# a1^a2
+	 &xor	($a1,$a2);		# a1^a4=a1^a2^a2^a4
+	 &mov	(&DWP(4*4,"esp"),$a4);	# a4
+	 &xor	($a4,$a2);		# a2=a4^a2^a4
+	 &mov	(&DWP(5*4,"esp"),$a1);	# a1^a4
+	 &xor	($a4,$a1);		# a1^a2^a4
+	&sar	(@i[1],31);		# broadcast 30th bit
+	&and	($lo,$b);
+	 &mov	(&DWP(6*4,"esp"),$a2);	# a2^a4
+	&and	(@i[1],$b);
+	 &mov	(&DWP(7*4,"esp"),$a4);	# a1^a2^a4
+	&mov	($hi,$lo);
+	&shl	($lo,31);
+	&mov	(@T[0],@i[1]);
+	&shr	($hi,1);
+
+	 &mov	(@i[0],0x7);
+	&shl	(@i[1],30);
+	 &and	(@i[0],$b);
+	&shr	(@T[0],2);
+	&xor	($lo,@i[1]);
+
+	&shr	($b,3);
+	&mov	(@i[1],0x7);		# 5-byte instruction!?
+	&and	(@i[1],$b);
+	&shr	($b,3);
+	 &xor	($hi,@T[0]);
+	&xor	($lo,&DWP(0,"esp",@i[0],4));
+	&mov	(@i[0],0x7);
+	&and	(@i[0],$b);
+	&shr	($b,3);
+	for($n=1;$n<9;$n++) {
+		&mov	(@T[1],&DWP(0,"esp",@i[1],4));
+		&mov	(@i[1],0x7);
+		&mov	(@T[0],@T[1]);
+		&shl	(@T[1],3*$n);
+		&and	(@i[1],$b);
+		&shr	(@T[0],32-3*$n);
+		&xor	($lo,@T[1]);
+		&shr	($b,3);
+		&xor	($hi,@T[0]);
+
+		push(@i,shift(@i)); push(@T,shift(@T));
+	}
+	&mov	(@T[1],&DWP(0,"esp",@i[1],4));
+	&mov	(@T[0],@T[1]);
+	&shl	(@T[1],3*$n);
+	&mov	(@i[1],&DWP(0,"esp",@i[0],4));
+	&shr	(@T[0],32-3*$n);	$n++;
+	&mov	(@i[0],@i[1]);
+	&xor	($lo,@T[1]);
+	&shl	(@i[1],3*$n);
+	&xor	($hi,@T[0]);
+	&shr	(@i[0],32-3*$n);
+	&xor	($lo,@i[1]);
+	&xor	($hi,@i[0]);
+
+	&add	("esp",32+4);
+	&ret	();
+&function_end_B("_mul_1x1_ialu");
+
+# void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0);
+&function_begin_B("bn_GF2m_mul_2x2");
+if (!$x86only) {
+	&picmeup("edx","OPENSSL_ia32cap_P");
+	&mov	("eax",&DWP(0,"edx"));
+	&mov	("edx",&DWP(4,"edx"));
+	&test	("eax",1<<23);		# check MMX bit
+	&jz	(&label("ialu"));
+if ($sse2) {
+	&test	("eax",1<<24);		# check FXSR bit
+	&jz	(&label("mmx"));
+	&test	("edx",1<<1);		# check PCLMULQDQ bit
+	&jz	(&label("mmx"));
+
+	&movups		("xmm0",&QWP(8,"esp"));
+	&shufps		("xmm0","xmm0",0b10110001);
+	&pclmulqdq	("xmm0","xmm0",1);
+	&mov		("eax",&DWP(4,"esp"));
+	&movups		(&QWP(0,"eax"),"xmm0");
+	&ret	();
+
+&set_label("mmx",16);
+}
+	&push	("ebp");
+	&push	("ebx");
+	&push	("esi");
+	&push	("edi");
+	&mov	($a,&wparam(1));
+	&mov	($b,&wparam(3));
+	&call	("_mul_1x1_mmx");	# a1·b1
+	&movq	("mm7",$R);
+
+	&mov	($a,&wparam(2));
+	&mov	($b,&wparam(4));
+	&call	("_mul_1x1_mmx");	# a0·b0
+	&movq	("mm6",$R);
+
+	&mov	($a,&wparam(1));
+	&mov	($b,&wparam(3));
+	&xor	($a,&wparam(2));
+	&xor	($b,&wparam(4));
+	&call	("_mul_1x1_mmx");	# (a0+a1)·(b0+b1)
+	&pxor	($R,"mm7");
+	&mov	($a,&wparam(0));
+	&pxor	($R,"mm6");		# (a0+a1)·(b0+b1)-a1·b1-a0·b0
+
+	&movq	($A,$R);
+	&psllq	($R,32);
+	&pop	("edi");
+	&psrlq	($A,32);
+	&pop	("esi");
+	&pxor	($R,"mm6");
+	&pop	("ebx");
+	&pxor	($A,"mm7");
+	&movq	(&QWP(0,$a),$R);
+	&pop	("ebp");
+	&movq	(&QWP(8,$a),$A);
+	&emms	();
+	&ret	();
+&set_label("ialu",16);
+}
+	&push	("ebp");
+	&push	("ebx");
+	&push	("esi");
+	&push	("edi");
+	&stack_push(4+1);
+
+	&mov	($a,&wparam(1));
+	&mov	($b,&wparam(3));
+	&call	("_mul_1x1_ialu");	# a1·b1
+	&mov	(&DWP(8,"esp"),$lo);
+	&mov	(&DWP(12,"esp"),$hi);
+
+	&mov	($a,&wparam(2));
+	&mov	($b,&wparam(4));
+	&call	("_mul_1x1_ialu");	# a0·b0
+	&mov	(&DWP(0,"esp"),$lo);
+	&mov	(&DWP(4,"esp"),$hi);
+
+	&mov	($a,&wparam(1));
+	&mov	($b,&wparam(3));
+	&xor	($a,&wparam(2));
+	&xor	($b,&wparam(4));
+	&call	("_mul_1x1_ialu");	# (a0+a1)·(b0+b1)
+
+	&mov	("ebp",&wparam(0));
+		 @r=("ebx","ecx","edi","esi");
+	&mov	(@r[0],&DWP(0,"esp"));
+	&mov	(@r[1],&DWP(4,"esp"));
+	&mov	(@r[2],&DWP(8,"esp"));
+	&mov	(@r[3],&DWP(12,"esp"));
+
+	&xor	($lo,$hi);
+	&xor	($hi,@r[1]);
+	&xor	($lo,@r[0]);
+	&mov	(&DWP(0,"ebp"),@r[0]);
+	&xor	($hi,@r[2]);
+	&mov	(&DWP(12,"ebp"),@r[3]);
+	&xor	($lo,@r[3]);
+	&stack_pop(4+1);
+	&xor	($hi,@r[3]);
+	&pop	("edi");
+	&xor	($lo,$hi);
+	&pop	("esi");
+	&mov	(&DWP(8,"ebp"),$hi);
+	&pop	("ebx");
+	&mov	(&DWP(4,"ebp"),$lo);
+	&pop	("ebp");
+	&ret	();
+&function_end_B("bn_GF2m_mul_2x2");
+
+&asciz	("GF(2^m) Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
+
+&asm_finish();
+
+close STDOUT;
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86-mont.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86-mont.pl
@ -0,0 +1,631 @@
+#! /usr/bin/env perl
+# Copyright 2005-2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# October 2005
+#
+# This is a "teaser" code, as it can be improved in several ways...
+# First of all non-SSE2 path should be implemented (yes, for now it
+# performs Montgomery multiplication/convolution only on SSE2-capable
+# CPUs such as P4, others fall down to original code). Then inner loop
+# can be unrolled and modulo-scheduled to improve ILP and possibly
+# moved to 128-bit XMM register bank (though it would require input
+# rearrangement and/or increase bus bandwidth utilization). Dedicated
+# squaring procedure should give further performance improvement...
+# Yet, for being draft, the code improves rsa512 *sign* benchmark by
+# 110%(!), rsa1024 one - by 70% and rsa4096 - by 20%:-)
+
+# December 2006
+#
+# Modulo-scheduling SSE2 loops results in further 15-20% improvement.
+# Integer-only code [being equipped with dedicated squaring procedure]
+# gives ~40% on rsa512 sign benchmark...
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+push(@INC,"${dir}","${dir}../../perlasm");
+require "x86asm.pl";
+
+$output = pop;
+open STDOUT,">$output";
+
+&asm_init($ARGV[0]);
+
+$sse2=0;
+for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
+
+&external_label("OPENSSL_ia32cap_P") if ($sse2);
+
+&function_begin("bn_mul_mont");
+
+$i="edx";
+$j="ecx";
+$ap="esi";	$tp="esi";		# overlapping variables!!!
+$rp="edi";	$bp="edi";		# overlapping variables!!!
+$np="ebp";
+$num="ebx";
+
+$_num=&DWP(4*0,"esp");			# stack top layout
+$_rp=&DWP(4*1,"esp");
+$_ap=&DWP(4*2,"esp");
+$_bp=&DWP(4*3,"esp");
+$_np=&DWP(4*4,"esp");
+$_n0=&DWP(4*5,"esp");	$_n0q=&QWP(4*5,"esp");
+$_sp=&DWP(4*6,"esp");
+$_bpend=&DWP(4*7,"esp");
+$frame=32;				# size of above frame rounded up to 16n
+
+	&xor	("eax","eax");
+	&mov	("edi",&wparam(5));	# int num
+	&cmp	("edi",4);
+	&jl	(&label("just_leave"));
+
+	&lea	("esi",&wparam(0));	# put aside pointer to argument block
+	&lea	("edx",&wparam(1));	# load ap
+	&add	("edi",2);		# extra two words on top of tp
+	&neg	("edi");
+	&lea	("ebp",&DWP(-$frame,"esp","edi",4));	# future alloca($frame+4*(num+2))
+	&neg	("edi");
+
+	# minimize cache contention by arranging 2K window between stack
+	# pointer and ap argument [np is also position sensitive vector,
+	# but it's assumed to be near ap, as it's allocated at ~same
+	# time].
+	&mov	("eax","ebp");
+	&sub	("eax","edx");
+	&and	("eax",2047);
+	&sub	("ebp","eax");		# this aligns sp and ap modulo 2048
+
+	&xor	("edx","ebp");
+	&and	("edx",2048);
+	&xor	("edx",2048);
+	&sub	("ebp","edx");		# this splits them apart modulo 4096
+
+	&and	("ebp",-64);		# align to cache line
+
+	# An OS-agnostic version of __chkstk.
+	#
+	# Some OSes (Windows) insist on stack being "wired" to
+	# physical memory in strictly sequential manner, i.e. if stack
+	# allocation spans two pages, then reference to farmost one can
+	# be punishable by SEGV. But page walking can do good even on
+	# other OSes, because it guarantees that villain thread hits
+	# the guard page before it can make damage to innocent one...
+	&mov	("eax","esp");
+	&sub	("eax","ebp");
+	&and	("eax",-4096);
+	&mov	("edx","esp");		# saved stack pointer!
+	&lea	("esp",&DWP(0,"ebp","eax"));
+	&mov	("eax",&DWP(0,"esp"));
+	&cmp	("esp","ebp");
+	&ja	(&label("page_walk"));
+	&jmp	(&label("page_walk_done"));
+
+&set_label("page_walk",16);
+	&lea	("esp",&DWP(-4096,"esp"));
+	&mov	("eax",&DWP(0,"esp"));
+	&cmp	("esp","ebp");
+	&ja	(&label("page_walk"));
+&set_label("page_walk_done");
+
+	################################# load argument block...
+	&mov	("eax",&DWP(0*4,"esi"));# BN_ULONG *rp
+	&mov	("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap
+	&mov	("ecx",&DWP(2*4,"esi"));# const BN_ULONG *bp
+	&mov	("ebp",&DWP(3*4,"esi"));# const BN_ULONG *np
+	&mov	("esi",&DWP(4*4,"esi"));# const BN_ULONG *n0
+	#&mov	("edi",&DWP(5*4,"esi"));# int num
+
+	&mov	("esi",&DWP(0,"esi"));	# pull n0[0]
+	&mov	($_rp,"eax");		# ... save a copy of argument block
+	&mov	($_ap,"ebx");
+	&mov	($_bp,"ecx");
+	&mov	($_np,"ebp");
+	&mov	($_n0,"esi");
+	&lea	($num,&DWP(-3,"edi"));	# num=num-1 to assist modulo-scheduling
+	#&mov	($_num,$num);		# redundant as $num is not reused
+	&mov	($_sp,"edx");		# saved stack pointer!
+
+if($sse2) {
+$acc0="mm0";	# mmx register bank layout
+$acc1="mm1";
+$car0="mm2";
+$car1="mm3";
+$mul0="mm4";
+$mul1="mm5";
+$temp="mm6";
+$mask="mm7";
+
+	&picmeup("eax","OPENSSL_ia32cap_P");
+	&bt	(&DWP(0,"eax"),26);
+	&jnc	(&label("non_sse2"));
+
+	&mov	("eax",-1);
+	&movd	($mask,"eax");		# mask 32 lower bits
+
+	&mov	($ap,$_ap);		# load input pointers
+	&mov	($bp,$_bp);
+	&mov	($np,$_np);
+
+	&xor	($i,$i);		# i=0
+	&xor	($j,$j);		# j=0
+
+	&movd	($mul0,&DWP(0,$bp));		# bp[0]
+	&movd	($mul1,&DWP(0,$ap));		# ap[0]
+	&movd	($car1,&DWP(0,$np));		# np[0]
+
+	&pmuludq($mul1,$mul0);			# ap[0]*bp[0]
+	&movq	($car0,$mul1);
+	&movq	($acc0,$mul1);			# I wish movd worked for
+	&pand	($acc0,$mask);			# inter-register transfers
+
+	&pmuludq($mul1,$_n0q);			# *=n0
+
+	&pmuludq($car1,$mul1);			# "t[0]"*np[0]*n0
+	&paddq	($car1,$acc0);
+
+	&movd	($acc1,&DWP(4,$np));		# np[1]
+	&movd	($acc0,&DWP(4,$ap));		# ap[1]
+
+	&psrlq	($car0,32);
+	&psrlq	($car1,32);
+
+	&inc	($j);				# j++
+&set_label("1st",16);
+	&pmuludq($acc0,$mul0);			# ap[j]*bp[0]
+	&pmuludq($acc1,$mul1);			# np[j]*m1
+	&paddq	($car0,$acc0);			# +=c0
+	&paddq	($car1,$acc1);			# +=c1
+
+	&movq	($acc0,$car0);
+	&pand	($acc0,$mask);
+	&movd	($acc1,&DWP(4,$np,$j,4));	# np[j+1]
+	&paddq	($car1,$acc0);			# +=ap[j]*bp[0];
+	&movd	($acc0,&DWP(4,$ap,$j,4));	# ap[j+1]
+	&psrlq	($car0,32);
+	&movd	(&DWP($frame-4,"esp",$j,4),$car1);	# tp[j-1]=
+	&psrlq	($car1,32);
+
+	&lea	($j,&DWP(1,$j));
+	&cmp	($j,$num);
+	&jl	(&label("1st"));
+
+	&pmuludq($acc0,$mul0);			# ap[num-1]*bp[0]
+	&pmuludq($acc1,$mul1);			# np[num-1]*m1
+	&paddq	($car0,$acc0);			# +=c0
+	&paddq	($car1,$acc1);			# +=c1
+
+	&movq	($acc0,$car0);
+	&pand	($acc0,$mask);
+	&paddq	($car1,$acc0);			# +=ap[num-1]*bp[0];
+	&movd	(&DWP($frame-4,"esp",$j,4),$car1);	# tp[num-2]=
+
+	&psrlq	($car0,32);
+	&psrlq	($car1,32);
+
+	&paddq	($car1,$car0);
+	&movq	(&QWP($frame,"esp",$num,4),$car1);	# tp[num].tp[num-1]
+
+	&inc	($i);				# i++
+&set_label("outer");
+	&xor	($j,$j);			# j=0
+
+	&movd	($mul0,&DWP(0,$bp,$i,4));	# bp[i]
+	&movd	($mul1,&DWP(0,$ap));		# ap[0]
+	&movd	($temp,&DWP($frame,"esp"));	# tp[0]
+	&movd	($car1,&DWP(0,$np));		# np[0]
+	&pmuludq($mul1,$mul0);			# ap[0]*bp[i]
+
+	&paddq	($mul1,$temp);			# +=tp[0]
+	&movq	($acc0,$mul1);
+	&movq	($car0,$mul1);
+	&pand	($acc0,$mask);
+
+	&pmuludq($mul1,$_n0q);			# *=n0
+
+	&pmuludq($car1,$mul1);
+	&paddq	($car1,$acc0);
+
+	&movd	($temp,&DWP($frame+4,"esp"));	# tp[1]
+	&movd	($acc1,&DWP(4,$np));		# np[1]
+	&movd	($acc0,&DWP(4,$ap));		# ap[1]
+
+	&psrlq	($car0,32);
+	&psrlq	($car1,32);
+	&paddq	($car0,$temp);			# +=tp[1]
+
+	&inc	($j);				# j++
+	&dec	($num);
+&set_label("inner");
+	&pmuludq($acc0,$mul0);			# ap[j]*bp[i]
+	&pmuludq($acc1,$mul1);			# np[j]*m1
+	&paddq	($car0,$acc0);			# +=c0
+	&paddq	($car1,$acc1);			# +=c1
+
+	&movq	($acc0,$car0);
+	&movd	($temp,&DWP($frame+4,"esp",$j,4));# tp[j+1]
+	&pand	($acc0,$mask);
+	&movd	($acc1,&DWP(4,$np,$j,4));	# np[j+1]
+	&paddq	($car1,$acc0);			# +=ap[j]*bp[i]+tp[j]
+	&movd	($acc0,&DWP(4,$ap,$j,4));	# ap[j+1]
+	&psrlq	($car0,32);
+	&movd	(&DWP($frame-4,"esp",$j,4),$car1);# tp[j-1]=
+	&psrlq	($car1,32);
+	&paddq	($car0,$temp);			# +=tp[j+1]
+
+	&dec	($num);
+	&lea	($j,&DWP(1,$j));		# j++
+	&jnz	(&label("inner"));
+
+	&mov	($num,$j);
+	&pmuludq($acc0,$mul0);			# ap[num-1]*bp[i]
+	&pmuludq($acc1,$mul1);			# np[num-1]*m1
+	&paddq	($car0,$acc0);			# +=c0
+	&paddq	($car1,$acc1);			# +=c1
+
+	&movq	($acc0,$car0);
+	&pand	($acc0,$mask);
+	&paddq	($car1,$acc0);			# +=ap[num-1]*bp[i]+tp[num-1]
+	&movd	(&DWP($frame-4,"esp",$j,4),$car1);	# tp[num-2]=
+	&psrlq	($car0,32);
+	&psrlq	($car1,32);
+
+	&movd	($temp,&DWP($frame+4,"esp",$num,4));	# += tp[num]
+	&paddq	($car1,$car0);
+	&paddq	($car1,$temp);
+	&movq	(&QWP($frame,"esp",$num,4),$car1);	# tp[num].tp[num-1]
+
+	&lea	($i,&DWP(1,$i));		# i++
+	&cmp	($i,$num);
+	&jle	(&label("outer"));
+
+	&emms	();				# done with mmx bank
+	&jmp	(&label("common_tail"));
+
+&set_label("non_sse2",16);
+}
+
+if (0) {
+	&mov	("esp",$_sp);
+	&xor	("eax","eax");	# signal "not fast enough [yet]"
+	&jmp	(&label("just_leave"));
+	# While the below code provides competitive performance for
+	# all key lengths on modern Intel cores, it's still more
+	# than 10% slower for 4096-bit key elsewhere:-( "Competitive"
+	# means compared to the original integer-only assembler.
+	# 512-bit RSA sign is better by ~40%, but that's about all
+	# one can say about all CPUs...
+} else {
+$inp="esi";	# integer path uses these registers differently
+$word="edi";
+$carry="ebp";
+
+	&mov	($inp,$_ap);
+	&lea	($carry,&DWP(1,$num));
+	&mov	($word,$_bp);
+	&xor	($j,$j);				# j=0
+	&mov	("edx",$inp);
+	&and	($carry,1);				# see if num is even
+	&sub	("edx",$word);				# see if ap==bp
+	&lea	("eax",&DWP(4,$word,$num,4));		# &bp[num]
+	&or	($carry,"edx");
+	&mov	($word,&DWP(0,$word));			# bp[0]
+	&jz	(&label("bn_sqr_mont"));
+	&mov	($_bpend,"eax");
+	&mov	("eax",&DWP(0,$inp));
+	&xor	("edx","edx");
+
+&set_label("mull",16);
+	&mov	($carry,"edx");
+	&mul	($word);				# ap[j]*bp[0]
+	&add	($carry,"eax");
+	&lea	($j,&DWP(1,$j));
+	&adc	("edx",0);
+	&mov	("eax",&DWP(0,$inp,$j,4));		# ap[j+1]
+	&cmp	($j,$num);
+	&mov	(&DWP($frame-4,"esp",$j,4),$carry);	# tp[j]=
+	&jl	(&label("mull"));
+
+	&mov	($carry,"edx");
+	&mul	($word);				# ap[num-1]*bp[0]
+	 &mov	($word,$_n0);
+	&add	("eax",$carry);
+	 &mov	($inp,$_np);
+	&adc	("edx",0);
+	 &imul	($word,&DWP($frame,"esp"));		# n0*tp[0]
+
+	&mov	(&DWP($frame,"esp",$num,4),"eax");	# tp[num-1]=
+	&xor	($j,$j);
+	&mov	(&DWP($frame+4,"esp",$num,4),"edx");	# tp[num]=
+	&mov	(&DWP($frame+8,"esp",$num,4),$j);	# tp[num+1]=
+
+	&mov	("eax",&DWP(0,$inp));			# np[0]
+	&mul	($word);				# np[0]*m
+	&add	("eax",&DWP($frame,"esp"));		# +=tp[0]
+	&mov	("eax",&DWP(4,$inp));			# np[1]
+	&adc	("edx",0);
+	&inc	($j);
+
+	&jmp	(&label("2ndmadd"));
+
+&set_label("1stmadd",16);
+	&mov	($carry,"edx");
+	&mul	($word);				# ap[j]*bp[i]
+	&add	($carry,&DWP($frame,"esp",$j,4));	# +=tp[j]
+	&lea	($j,&DWP(1,$j));
+	&adc	("edx",0);
+	&add	($carry,"eax");
+	&mov	("eax",&DWP(0,$inp,$j,4));		# ap[j+1]
+	&adc	("edx",0);
+	&cmp	($j,$num);
+	&mov	(&DWP($frame-4,"esp",$j,4),$carry);	# tp[j]=
+	&jl	(&label("1stmadd"));
+
+	&mov	($carry,"edx");
+	&mul	($word);				# ap[num-1]*bp[i]
+	&add	("eax",&DWP($frame,"esp",$num,4));	# +=tp[num-1]
+	 &mov	($word,$_n0);
+	&adc	("edx",0);
+	 &mov	($inp,$_np);
+	&add	($carry,"eax");
+	&adc	("edx",0);
+	 &imul	($word,&DWP($frame,"esp"));		# n0*tp[0]
+
+	&xor	($j,$j);
+	&add	("edx",&DWP($frame+4,"esp",$num,4));	# carry+=tp[num]
+	&mov	(&DWP($frame,"esp",$num,4),$carry);	# tp[num-1]=
+	&adc	($j,0);
+	 &mov	("eax",&DWP(0,$inp));			# np[0]
+	&mov	(&DWP($frame+4,"esp",$num,4),"edx");	# tp[num]=
+	&mov	(&DWP($frame+8,"esp",$num,4),$j);	# tp[num+1]=
+
+	&mul	($word);				# np[0]*m
+	&add	("eax",&DWP($frame,"esp"));		# +=tp[0]
+	&mov	("eax",&DWP(4,$inp));			# np[1]
+	&adc	("edx",0);
+	&mov	($j,1);
+
+&set_label("2ndmadd",16);
+	&mov	($carry,"edx");
+	&mul	($word);				# np[j]*m
+	&add	($carry,&DWP($frame,"esp",$j,4));	# +=tp[j]
+	&lea	($j,&DWP(1,$j));
+	&adc	("edx",0);
+	&add	($carry,"eax");
+	&mov	("eax",&DWP(0,$inp,$j,4));		# np[j+1]
+	&adc	("edx",0);
+	&cmp	($j,$num);
+	&mov	(&DWP($frame-8,"esp",$j,4),$carry);	# tp[j-1]=
+	&jl	(&label("2ndmadd"));
+
+	&mov	($carry,"edx");
+	&mul	($word);				# np[j]*m
+	&add	($carry,&DWP($frame,"esp",$num,4));	# +=tp[num-1]
+	&adc	("edx",0);
+	&add	($carry,"eax");
+	&adc	("edx",0);
+	&mov	(&DWP($frame-4,"esp",$num,4),$carry);	# tp[num-2]=
+
+	&xor	("eax","eax");
+	 &mov	($j,$_bp);				# &bp[i]
+	&add	("edx",&DWP($frame+4,"esp",$num,4));	# carry+=tp[num]
+	&adc	("eax",&DWP($frame+8,"esp",$num,4));	# +=tp[num+1]
+	 &lea	($j,&DWP(4,$j));
+	&mov	(&DWP($frame,"esp",$num,4),"edx");	# tp[num-1]=
+	 &cmp	($j,$_bpend);
+	&mov	(&DWP($frame+4,"esp",$num,4),"eax");	# tp[num]=
+	&je	(&label("common_tail"));
+
+	&mov	($word,&DWP(0,$j));			# bp[i+1]
+	&mov	($inp,$_ap);
+	&mov	($_bp,$j);				# &bp[++i]
+	&xor	($j,$j);
+	&xor	("edx","edx");
+	&mov	("eax",&DWP(0,$inp));
+	&jmp	(&label("1stmadd"));
+
+&set_label("bn_sqr_mont",16);
+$sbit=$num;
+	&mov	($_num,$num);
+	&mov	($_bp,$j);				# i=0
+
+	&mov	("eax",$word);				# ap[0]
+	&mul	($word);				# ap[0]*ap[0]
+	&mov	(&DWP($frame,"esp"),"eax");		# tp[0]=
+	&mov	($sbit,"edx");
+	&shr	("edx",1);
+	&and	($sbit,1);
+	&inc	($j);
+&set_label("sqr",16);
+	&mov	("eax",&DWP(0,$inp,$j,4));		# ap[j]
+	&mov	($carry,"edx");
+	&mul	($word);				# ap[j]*ap[0]
+	&add	("eax",$carry);
+	&lea	($j,&DWP(1,$j));
+	&adc	("edx",0);
+	&lea	($carry,&DWP(0,$sbit,"eax",2));
+	&shr	("eax",31);
+	&cmp	($j,$_num);
+	&mov	($sbit,"eax");
+	&mov	(&DWP($frame-4,"esp",$j,4),$carry);	# tp[j]=
+	&jl	(&label("sqr"));
+
+	&mov	("eax",&DWP(0,$inp,$j,4));		# ap[num-1]
+	&mov	($carry,"edx");
+	&mul	($word);				# ap[num-1]*ap[0]
+	&add	("eax",$carry);
+	 &mov	($word,$_n0);
+	&adc	("edx",0);
+	 &mov	($inp,$_np);
+	&lea	($carry,&DWP(0,$sbit,"eax",2));
+	 &imul	($word,&DWP($frame,"esp"));		# n0*tp[0]
+	&shr	("eax",31);
+	&mov	(&DWP($frame,"esp",$j,4),$carry);	# tp[num-1]=
+
+	&lea	($carry,&DWP(0,"eax","edx",2));
+	 &mov	("eax",&DWP(0,$inp));			# np[0]
+	&shr	("edx",31);
+	&mov	(&DWP($frame+4,"esp",$j,4),$carry);	# tp[num]=
+	&mov	(&DWP($frame+8,"esp",$j,4),"edx");	# tp[num+1]=
+
+	&mul	($word);				# np[0]*m
+	&add	("eax",&DWP($frame,"esp"));		# +=tp[0]
+	&mov	($num,$j);
+	&adc	("edx",0);
+	&mov	("eax",&DWP(4,$inp));			# np[1]
+	&mov	($j,1);
+
+&set_label("3rdmadd",16);
+	&mov	($carry,"edx");
+	&mul	($word);				# np[j]*m
+	&add	($carry,&DWP($frame,"esp",$j,4));	# +=tp[j]
+	&adc	("edx",0);
+	&add	($carry,"eax");
+	&mov	("eax",&DWP(4,$inp,$j,4));		# np[j+1]
+	&adc	("edx",0);
+	&mov	(&DWP($frame-4,"esp",$j,4),$carry);	# tp[j-1]=
+
+	&mov	($carry,"edx");
+	&mul	($word);				# np[j+1]*m
+	&add	($carry,&DWP($frame+4,"esp",$j,4));	# +=tp[j+1]
+	&lea	($j,&DWP(2,$j));
+	&adc	("edx",0);
+	&add	($carry,"eax");
+	&mov	("eax",&DWP(0,$inp,$j,4));		# np[j+2]
+	&adc	("edx",0);
+	&cmp	($j,$num);
+	&mov	(&DWP($frame-8,"esp",$j,4),$carry);	# tp[j]=
+	&jl	(&label("3rdmadd"));
+
+	&mov	($carry,"edx");
+	&mul	($word);				# np[j]*m
+	&add	($carry,&DWP($frame,"esp",$num,4));	# +=tp[num-1]
+	&adc	("edx",0);
+	&add	($carry,"eax");
+	&adc	("edx",0);
+	&mov	(&DWP($frame-4,"esp",$num,4),$carry);	# tp[num-2]=
+
+	&mov	($j,$_bp);				# i
+	&xor	("eax","eax");
+	&mov	($inp,$_ap);
+	&add	("edx",&DWP($frame+4,"esp",$num,4));	# carry+=tp[num]
+	&adc	("eax",&DWP($frame+8,"esp",$num,4));	# +=tp[num+1]
+	&mov	(&DWP($frame,"esp",$num,4),"edx");	# tp[num-1]=
+	&cmp	($j,$num);
+	&mov	(&DWP($frame+4,"esp",$num,4),"eax");	# tp[num]=
+	&je	(&label("common_tail"));
+
+	&mov	($word,&DWP(4,$inp,$j,4));		# ap[i]
+	&lea	($j,&DWP(1,$j));
+	&mov	("eax",$word);
+	&mov	($_bp,$j);				# ++i
+	&mul	($word);				# ap[i]*ap[i]
+	&add	("eax",&DWP($frame,"esp",$j,4));	# +=tp[i]
+	&adc	("edx",0);
+	&mov	(&DWP($frame,"esp",$j,4),"eax");	# tp[i]=
+	&xor	($carry,$carry);
+	&cmp	($j,$num);
+	&lea	($j,&DWP(1,$j));
+	&je	(&label("sqrlast"));
+
+	&mov	($sbit,"edx");				# zaps $num
+	&shr	("edx",1);
+	&and	($sbit,1);
+&set_label("sqradd",16);
+	&mov	("eax",&DWP(0,$inp,$j,4));		# ap[j]
+	&mov	($carry,"edx");
+	&mul	($word);				# ap[j]*ap[i]
+	&add	("eax",$carry);
+	&lea	($carry,&DWP(0,"eax","eax"));
+	&adc	("edx",0);
+	&shr	("eax",31);
+	&add	($carry,&DWP($frame,"esp",$j,4));	# +=tp[j]
+	&lea	($j,&DWP(1,$j));
+	&adc	("eax",0);
+	&add	($carry,$sbit);
+	&adc	("eax",0);
+	&cmp	($j,$_num);
+	&mov	(&DWP($frame-4,"esp",$j,4),$carry);	# tp[j]=
+	&mov	($sbit,"eax");
+	&jle	(&label("sqradd"));
+
+	&mov	($carry,"edx");
+	&add	("edx","edx");
+	&shr	($carry,31);
+	&add	("edx",$sbit);
+	&adc	($carry,0);
+&set_label("sqrlast");
+	&mov	($word,$_n0);
+	&mov	($inp,$_np);
+	&imul	($word,&DWP($frame,"esp"));		# n0*tp[0]
+
+	&add	("edx",&DWP($frame,"esp",$j,4));	# +=tp[num]
+	&mov	("eax",&DWP(0,$inp));			# np[0]
+	&adc	($carry,0);
+	&mov	(&DWP($frame,"esp",$j,4),"edx");	# tp[num]=
+	&mov	(&DWP($frame+4,"esp",$j,4),$carry);	# tp[num+1]=
+
+	&mul	($word);				# np[0]*m
+	&add	("eax",&DWP($frame,"esp"));		# +=tp[0]
+	&lea	($num,&DWP(-1,$j));
+	&adc	("edx",0);
+	&mov	($j,1);
+	&mov	("eax",&DWP(4,$inp));			# np[1]
+
+	&jmp	(&label("3rdmadd"));
+}
+
+&set_label("common_tail",16);
+	&mov	($np,$_np);			# load modulus pointer
+	&mov	($rp,$_rp);			# load result pointer
+	&lea	($tp,&DWP($frame,"esp"));	# [$ap and $bp are zapped]
+
+	&mov	("eax",&DWP(0,$tp));		# tp[0]
+	&mov	($j,$num);			# j=num-1
+	&xor	($i,$i);			# i=0 and clear CF!
+
+&set_label("sub",16);
+	&sbb	("eax",&DWP(0,$np,$i,4));
+	&mov	(&DWP(0,$rp,$i,4),"eax");	# rp[i]=tp[i]-np[i]
+	&dec	($j);				# doesn't affect CF!
+	&mov	("eax",&DWP(4,$tp,$i,4));	# tp[i+1]
+	&lea	($i,&DWP(1,$i));		# i++
+	&jge	(&label("sub"));
+
+	&sbb	("eax",0);			# handle upmost overflow bit
+	&mov	("edx",-1);
+	&xor	("edx","eax");
+	&jmp	(&label("copy"));
+
+&set_label("copy",16);				# conditional copy
+	&mov	($tp,&DWP($frame,"esp",$num,4));
+	&mov	($np,&DWP(0,$rp,$num,4));
+	&mov	(&DWP($frame,"esp",$num,4),$j);	# zap temporary vector
+	&and	($tp,"eax");
+	&and	($np,"edx");
+	&or	($np,$tp);
+	&mov	(&DWP(0,$rp,$num,4),$np);
+	&dec	($num);
+	&jge	(&label("copy"));
+
+	&mov	("esp",$_sp);		# pull saved stack pointer
+	&mov	("eax",1);
+&set_label("just_leave");
+&function_end("bn_mul_mont");
+
+&asciz("Montgomery Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
+
+&asm_finish();
+
+close STDOUT;
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86_64-gcc.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86_64-gcc.c
@ -0,0 +1,643 @@
+/*
+ * Copyright 2002-2018 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include "../bn_lcl.h"
+#if !(defined(__GNUC__) && __GNUC__>=2)
+# include "../bn_asm.c"         /* kind of dirty hack for Sun Studio */
+#else
+/*-
+ * x86_64 BIGNUM accelerator version 0.1, December 2002.
+ *
+ * Implemented by Andy Polyakov <appro@openssl.org> for the OpenSSL
+ * project.
+ *
+ * Rights for redistribution and usage in source and binary forms are
+ * granted according to the OpenSSL license. Warranty of any kind is
+ * disclaimed.
+ *
+ * Q. Version 0.1? It doesn't sound like Andy, he used to assign real
+ *    versions, like 1.0...
+ * A. Well, that's because this code is basically a quick-n-dirty
+ *    proof-of-concept hack. As you can see it's implemented with
+ *    inline assembler, which means that you're bound to GCC and that
+ *    there might be enough room for further improvement.
+ *
+ * Q. Why inline assembler?
+ * A. x86_64 features own ABI which I'm not familiar with. This is
+ *    why I decided to let the compiler take care of subroutine
+ *    prologue/epilogue as well as register allocation. For reference.
+ *    Win64 implements different ABI for AMD64, different from Linux.
+ *
+ * Q. How much faster does it get?
+ * A. 'apps/openssl speed rsa dsa' output with no-asm:
+ *
+ *                        sign    verify    sign/s verify/s
+ *      rsa  512 bits   0.0006s   0.0001s   1683.8  18456.2
+ *      rsa 1024 bits   0.0028s   0.0002s    356.0   6407.0
+ *      rsa 2048 bits   0.0172s   0.0005s     58.0   1957.8
+ *      rsa 4096 bits   0.1155s   0.0018s      8.7    555.6
+ *                        sign    verify    sign/s verify/s
+ *      dsa  512 bits   0.0005s   0.0006s   2100.8   1768.3
+ *      dsa 1024 bits   0.0014s   0.0018s    692.3    559.2
+ *      dsa 2048 bits   0.0049s   0.0061s    204.7    165.0
+ *
+ *    'apps/openssl speed rsa dsa' output with this module:
+ *
+ *                        sign    verify    sign/s verify/s
+ *      rsa  512 bits   0.0004s   0.0000s   2767.1  33297.9
+ *      rsa 1024 bits   0.0012s   0.0001s    867.4  14674.7
+ *      rsa 2048 bits   0.0061s   0.0002s    164.0   5270.0
+ *      rsa 4096 bits   0.0384s   0.0006s     26.1   1650.8
+ *                        sign    verify    sign/s verify/s
+ *      dsa  512 bits   0.0002s   0.0003s   4442.2   3786.3
+ *      dsa 1024 bits   0.0005s   0.0007s   1835.1   1497.4
+ *      dsa 2048 bits   0.0016s   0.0020s    620.4    504.6
+ *
+ *    For the reference. IA-32 assembler implementation performs
+ *    very much like 64-bit code compiled with no-asm on the same
+ *    machine.
+ */
+
+# undef mul
+# undef mul_add
+
+/*-
+ * "m"(a), "+m"(r)      is the way to favor DirectPath µ-code;
+ * "g"(0)               let the compiler to decide where does it
+ *                      want to keep the value of zero;
+ */
+# define mul_add(r,a,word,carry) do {   \
+        register BN_ULONG high,low;     \
+        asm ("mulq %3"                  \
+                : "=a"(low),"=d"(high)  \
+                : "a"(word),"m"(a)      \
+                : "cc");                \
+        asm ("addq %2,%0; adcq %3,%1"   \
+                : "+r"(carry),"+d"(high)\
+                : "a"(low),"g"(0)       \
+                : "cc");                \
+        asm ("addq %2,%0; adcq %3,%1"   \
+                : "+m"(r),"+d"(high)    \
+                : "r"(carry),"g"(0)     \
+                : "cc");                \
+        carry=high;                     \
+        } while (0)
+
+# define mul(r,a,word,carry) do {       \
+        register BN_ULONG high,low;     \
+        asm ("mulq %3"                  \
+                : "=a"(low),"=d"(high)  \
+                : "a"(word),"g"(a)      \
+                : "cc");                \
+        asm ("addq %2,%0; adcq %3,%1"   \
+                : "+r"(carry),"+d"(high)\
+                : "a"(low),"g"(0)       \
+                : "cc");                \
+        (r)=carry, carry=high;          \
+        } while (0)
+# undef sqr
+# define sqr(r0,r1,a)                   \
+        asm ("mulq %2"                  \
+                : "=a"(r0),"=d"(r1)     \
+                : "a"(a)                \
+                : "cc");
+
+BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
+                          BN_ULONG w)
+{
+    BN_ULONG c1 = 0;
+
+    if (num <= 0)
+        return c1;
+
+    while (num & ~3) {
+        mul_add(rp[0], ap[0], w, c1);
+        mul_add(rp[1], ap[1], w, c1);
+        mul_add(rp[2], ap[2], w, c1);
+        mul_add(rp[3], ap[3], w, c1);
+        ap += 4;
+        rp += 4;
+        num -= 4;
+    }
+    if (num) {
+        mul_add(rp[0], ap[0], w, c1);
+        if (--num == 0)
+            return c1;
+        mul_add(rp[1], ap[1], w, c1);
+        if (--num == 0)
+            return c1;
+        mul_add(rp[2], ap[2], w, c1);
+        return c1;
+    }
+
+    return c1;
+}
+
+BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
+{
+    BN_ULONG c1 = 0;
+
+    if (num <= 0)
+        return c1;
+
+    while (num & ~3) {
+        mul(rp[0], ap[0], w, c1);
+        mul(rp[1], ap[1], w, c1);
+        mul(rp[2], ap[2], w, c1);
+        mul(rp[3], ap[3], w, c1);
+        ap += 4;
+        rp += 4;
+        num -= 4;
+    }
+    if (num) {
+        mul(rp[0], ap[0], w, c1);
+        if (--num == 0)
+            return c1;
+        mul(rp[1], ap[1], w, c1);
+        if (--num == 0)
+            return c1;
+        mul(rp[2], ap[2], w, c1);
+    }
+    return c1;
+}
+
+void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
+{
+    if (n <= 0)
+        return;
+
+    while (n & ~3) {
+        sqr(r[0], r[1], a[0]);
+        sqr(r[2], r[3], a[1]);
+        sqr(r[4], r[5], a[2]);
+        sqr(r[6], r[7], a[3]);
+        a += 4;
+        r += 8;
+        n -= 4;
+    }
+    if (n) {
+        sqr(r[0], r[1], a[0]);
+        if (--n == 0)
+            return;
+        sqr(r[2], r[3], a[1]);
+        if (--n == 0)
+            return;
+        sqr(r[4], r[5], a[2]);
+    }
+}
+
+BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
+{
+    BN_ULONG ret, waste;
+
+ asm("divq      %4":"=a"(ret), "=d"(waste)
+ :     "a"(l), "d"(h), "r"(d)
+ :     "cc");
+
+    return ret;
+}
+
+BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+                      int n)
+{
+    BN_ULONG ret;
+    size_t i = 0;
+
+    if (n <= 0)
+        return 0;
+
+    asm volatile ("       subq    %0,%0           \n" /* clear carry */
+                  "       jmp     1f              \n"
+                  ".p2align 4                     \n"
+                  "1:     movq    (%4,%2,8),%0    \n"
+                  "       adcq    (%5,%2,8),%0    \n"
+                  "       movq    %0,(%3,%2,8)    \n"
+                  "       lea     1(%2),%2        \n"
+                  "       dec     %1              \n"
+                  "       jnz     1b              \n"
+                  "       sbbq    %0,%0           \n"
+                  :"=&r" (ret), "+c"(n), "+r"(i)
+                  :"r"(rp), "r"(ap), "r"(bp)
+                  :"cc", "memory");
+
+    return ret & 1;
+}
+
+# ifndef SIMICS
+BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+                      int n)
+{
+    BN_ULONG ret;
+    size_t i = 0;
+
+    if (n <= 0)
+        return 0;
+
+    asm volatile ("       subq    %0,%0           \n" /* clear borrow */
+                  "       jmp     1f              \n"
+                  ".p2align 4                     \n"
+                  "1:     movq    (%4,%2,8),%0    \n"
+                  "       sbbq    (%5,%2,8),%0    \n"
+                  "       movq    %0,(%3,%2,8)    \n"
+                  "       lea     1(%2),%2        \n"
+                  "       dec     %1              \n"
+                  "       jnz     1b              \n"
+                  "       sbbq    %0,%0           \n"
+                  :"=&r" (ret), "+c"(n), "+r"(i)
+                  :"r"(rp), "r"(ap), "r"(bp)
+                  :"cc", "memory");
+
+    return ret & 1;
+}
+# else
+/* Simics 1.4<7 has buggy sbbq:-( */
+#  define BN_MASK2 0xffffffffffffffffL
+BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
+{
+    BN_ULONG t1, t2;
+    int c = 0;
+
+    if (n <= 0)
+        return (BN_ULONG)0;
+
+    for (;;) {
+        t1 = a[0];
+        t2 = b[0];
+        r[0] = (t1 - t2 - c) & BN_MASK2;
+        if (t1 != t2)
+            c = (t1 < t2);
+        if (--n <= 0)
+            break;
+
+        t1 = a[1];
+        t2 = b[1];
+        r[1] = (t1 - t2 - c) & BN_MASK2;
+        if (t1 != t2)
+            c = (t1 < t2);
+        if (--n <= 0)
+            break;
+
+        t1 = a[2];
+        t2 = b[2];
+        r[2] = (t1 - t2 - c) & BN_MASK2;
+        if (t1 != t2)
+            c = (t1 < t2);
+        if (--n <= 0)
+            break;
+
+        t1 = a[3];
+        t2 = b[3];
+        r[3] = (t1 - t2 - c) & BN_MASK2;
+        if (t1 != t2)
+            c = (t1 < t2);
+        if (--n <= 0)
+            break;
+
+        a += 4;
+        b += 4;
+        r += 4;
+    }
+    return c;
+}
+# endif
+
+/* mul_add_c(a,b,c0,c1,c2)  -- c+=a*b for three word number c=(c2,c1,c0) */
+/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
+/* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
+/*
+ * sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number
+ * c=(c2,c1,c0)
+ */
+
+/*
+ * Keep in mind that carrying into high part of multiplication result
+ * can not overflow, because it cannot be all-ones.
+ */
+# if 0
+/* original macros are kept for reference purposes */
+#  define mul_add_c(a,b,c0,c1,c2)       do {    \
+        BN_ULONG ta = (a), tb = (b);            \
+        BN_ULONG lo, hi;                        \
+        BN_UMULT_LOHI(lo,hi,ta,tb);             \
+        c0 += lo; hi += (c0<lo)?1:0;            \
+        c1 += hi; c2 += (c1<hi)?1:0;            \
+        } while(0)
+
+#  define mul_add_c2(a,b,c0,c1,c2)      do {    \
+        BN_ULONG ta = (a), tb = (b);            \
+        BN_ULONG lo, hi, tt;                    \
+        BN_UMULT_LOHI(lo,hi,ta,tb);             \
+        c0 += lo; tt = hi+((c0<lo)?1:0);        \
+        c1 += tt; c2 += (c1<tt)?1:0;            \
+        c0 += lo; hi += (c0<lo)?1:0;            \
+        c1 += hi; c2 += (c1<hi)?1:0;            \
+        } while(0)
+
+#  define sqr_add_c(a,i,c0,c1,c2)       do {    \
+        BN_ULONG ta = (a)[i];                   \
+        BN_ULONG lo, hi;                        \
+        BN_UMULT_LOHI(lo,hi,ta,ta);             \
+        c0 += lo; hi += (c0<lo)?1:0;            \
+        c1 += hi; c2 += (c1<hi)?1:0;            \
+        } while(0)
+# else
+#  define mul_add_c(a,b,c0,c1,c2) do {  \
+        BN_ULONG t1,t2;                 \
+        asm ("mulq %3"                  \
+                : "=a"(t1),"=d"(t2)     \
+                : "a"(a),"m"(b)         \
+                : "cc");                \
+        asm ("addq %3,%0; adcq %4,%1; adcq %5,%2"       \
+                : "+r"(c0),"+r"(c1),"+r"(c2)            \
+                : "r"(t1),"r"(t2),"g"(0)                \
+                : "cc");                                \
+        } while (0)
+
+#  define sqr_add_c(a,i,c0,c1,c2) do {  \
+        BN_ULONG t1,t2;                 \
+        asm ("mulq %2"                  \
+                : "=a"(t1),"=d"(t2)     \
+                : "a"(a[i])             \
+                : "cc");                \
+        asm ("addq %3,%0; adcq %4,%1; adcq %5,%2"       \
+                : "+r"(c0),"+r"(c1),"+r"(c2)            \
+                : "r"(t1),"r"(t2),"g"(0)                \
+                : "cc");                                \
+        } while (0)
+
+#  define mul_add_c2(a,b,c0,c1,c2) do { \
+        BN_ULONG t1,t2;                 \
+        asm ("mulq %3"                  \
+                : "=a"(t1),"=d"(t2)     \
+                : "a"(a),"m"(b)         \
+                : "cc");                \
+        asm ("addq %3,%0; adcq %4,%1; adcq %5,%2"       \
+                : "+r"(c0),"+r"(c1),"+r"(c2)            \
+                : "r"(t1),"r"(t2),"g"(0)                \
+                : "cc");                                \
+        asm ("addq %3,%0; adcq %4,%1; adcq %5,%2"       \
+                : "+r"(c0),"+r"(c1),"+r"(c2)            \
+                : "r"(t1),"r"(t2),"g"(0)                \
+                : "cc");                                \
+        } while (0)
+# endif
+
+# define sqr_add_c2(a,i,j,c0,c1,c2)      \
+        mul_add_c2((a)[i],(a)[j],c0,c1,c2)
+
+void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
+{
+    BN_ULONG c1, c2, c3;
+
+    c1 = 0;
+    c2 = 0;
+    c3 = 0;
+    mul_add_c(a[0], b[0], c1, c2, c3);
+    r[0] = c1;
+    c1 = 0;
+    mul_add_c(a[0], b[1], c2, c3, c1);
+    mul_add_c(a[1], b[0], c2, c3, c1);
+    r[1] = c2;
+    c2 = 0;
+    mul_add_c(a[2], b[0], c3, c1, c2);
+    mul_add_c(a[1], b[1], c3, c1, c2);
+    mul_add_c(a[0], b[2], c3, c1, c2);
+    r[2] = c3;
+    c3 = 0;
+    mul_add_c(a[0], b[3], c1, c2, c3);
+    mul_add_c(a[1], b[2], c1, c2, c3);
+    mul_add_c(a[2], b[1], c1, c2, c3);
+    mul_add_c(a[3], b[0], c1, c2, c3);
+    r[3] = c1;
+    c1 = 0;
+    mul_add_c(a[4], b[0], c2, c3, c1);
+    mul_add_c(a[3], b[1], c2, c3, c1);
+    mul_add_c(a[2], b[2], c2, c3, c1);
+    mul_add_c(a[1], b[3], c2, c3, c1);
+    mul_add_c(a[0], b[4], c2, c3, c1);
+    r[4] = c2;
+    c2 = 0;
+    mul_add_c(a[0], b[5], c3, c1, c2);
+    mul_add_c(a[1], b[4], c3, c1, c2);
+    mul_add_c(a[2], b[3], c3, c1, c2);
+    mul_add_c(a[3], b[2], c3, c1, c2);
+    mul_add_c(a[4], b[1], c3, c1, c2);
+    mul_add_c(a[5], b[0], c3, c1, c2);
+    r[5] = c3;
+    c3 = 0;
+    mul_add_c(a[6], b[0], c1, c2, c3);
+    mul_add_c(a[5], b[1], c1, c2, c3);
+    mul_add_c(a[4], b[2], c1, c2, c3);
+    mul_add_c(a[3], b[3], c1, c2, c3);
+    mul_add_c(a[2], b[4], c1, c2, c3);
+    mul_add_c(a[1], b[5], c1, c2, c3);
+    mul_add_c(a[0], b[6], c1, c2, c3);
+    r[6] = c1;
+    c1 = 0;
+    mul_add_c(a[0], b[7], c2, c3, c1);
+    mul_add_c(a[1], b[6], c2, c3, c1);
+    mul_add_c(a[2], b[5], c2, c3, c1);
+    mul_add_c(a[3], b[4], c2, c3, c1);
+    mul_add_c(a[4], b[3], c2, c3, c1);
+    mul_add_c(a[5], b[2], c2, c3, c1);
+    mul_add_c(a[6], b[1], c2, c3, c1);
+    mul_add_c(a[7], b[0], c2, c3, c1);
+    r[7] = c2;
+    c2 = 0;
+    mul_add_c(a[7], b[1], c3, c1, c2);
+    mul_add_c(a[6], b[2], c3, c1, c2);
+    mul_add_c(a[5], b[3], c3, c1, c2);
+    mul_add_c(a[4], b[4], c3, c1, c2);
+    mul_add_c(a[3], b[5], c3, c1, c2);
+    mul_add_c(a[2], b[6], c3, c1, c2);
+    mul_add_c(a[1], b[7], c3, c1, c2);
+    r[8] = c3;
+    c3 = 0;
+    mul_add_c(a[2], b[7], c1, c2, c3);
+    mul_add_c(a[3], b[6], c1, c2, c3);
+    mul_add_c(a[4], b[5], c1, c2, c3);
+    mul_add_c(a[5], b[4], c1, c2, c3);
+    mul_add_c(a[6], b[3], c1, c2, c3);
+    mul_add_c(a[7], b[2], c1, c2, c3);
+    r[9] = c1;
+    c1 = 0;
+    mul_add_c(a[7], b[3], c2, c3, c1);
+    mul_add_c(a[6], b[4], c2, c3, c1);
+    mul_add_c(a[5], b[5], c2, c3, c1);
+    mul_add_c(a[4], b[6], c2, c3, c1);
+    mul_add_c(a[3], b[7], c2, c3, c1);
+    r[10] = c2;
+    c2 = 0;
+    mul_add_c(a[4], b[7], c3, c1, c2);
+    mul_add_c(a[5], b[6], c3, c1, c2);
+    mul_add_c(a[6], b[5], c3, c1, c2);
+    mul_add_c(a[7], b[4], c3, c1, c2);
+    r[11] = c3;
+    c3 = 0;
+    mul_add_c(a[7], b[5], c1, c2, c3);
+    mul_add_c(a[6], b[6], c1, c2, c3);
+    mul_add_c(a[5], b[7], c1, c2, c3);
+    r[12] = c1;
+    c1 = 0;
+    mul_add_c(a[6], b[7], c2, c3, c1);
+    mul_add_c(a[7], b[6], c2, c3, c1);
+    r[13] = c2;
+    c2 = 0;
+    mul_add_c(a[7], b[7], c3, c1, c2);
+    r[14] = c3;
+    r[15] = c1;
+}
+
+void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
+{
+    BN_ULONG c1, c2, c3;
+
+    c1 = 0;
+    c2 = 0;
+    c3 = 0;
+    mul_add_c(a[0], b[0], c1, c2, c3);
+    r[0] = c1;
+    c1 = 0;
+    mul_add_c(a[0], b[1], c2, c3, c1);
+    mul_add_c(a[1], b[0], c2, c3, c1);
+    r[1] = c2;
+    c2 = 0;
+    mul_add_c(a[2], b[0], c3, c1, c2);
+    mul_add_c(a[1], b[1], c3, c1, c2);
+    mul_add_c(a[0], b[2], c3, c1, c2);
+    r[2] = c3;
+    c3 = 0;
+    mul_add_c(a[0], b[3], c1, c2, c3);
+    mul_add_c(a[1], b[2], c1, c2, c3);
+    mul_add_c(a[2], b[1], c1, c2, c3);
+    mul_add_c(a[3], b[0], c1, c2, c3);
+    r[3] = c1;
+    c1 = 0;
+    mul_add_c(a[3], b[1], c2, c3, c1);
+    mul_add_c(a[2], b[2], c2, c3, c1);
+    mul_add_c(a[1], b[3], c2, c3, c1);
+    r[4] = c2;
+    c2 = 0;
+    mul_add_c(a[2], b[3], c3, c1, c2);
+    mul_add_c(a[3], b[2], c3, c1, c2);
+    r[5] = c3;
+    c3 = 0;
+    mul_add_c(a[3], b[3], c1, c2, c3);
+    r[6] = c1;
+    r[7] = c2;
+}
+
+void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
+{
+    BN_ULONG c1, c2, c3;
+
+    c1 = 0;
+    c2 = 0;
+    c3 = 0;
+    sqr_add_c(a, 0, c1, c2, c3);
+    r[0] = c1;
+    c1 = 0;
+    sqr_add_c2(a, 1, 0, c2, c3, c1);
+    r[1] = c2;
+    c2 = 0;
+    sqr_add_c(a, 1, c3, c1, c2);
+    sqr_add_c2(a, 2, 0, c3, c1, c2);
+    r[2] = c3;
+    c3 = 0;
+    sqr_add_c2(a, 3, 0, c1, c2, c3);
+    sqr_add_c2(a, 2, 1, c1, c2, c3);
+    r[3] = c1;
+    c1 = 0;
+    sqr_add_c(a, 2, c2, c3, c1);
+    sqr_add_c2(a, 3, 1, c2, c3, c1);
+    sqr_add_c2(a, 4, 0, c2, c3, c1);
+    r[4] = c2;
+    c2 = 0;
+    sqr_add_c2(a, 5, 0, c3, c1, c2);
+    sqr_add_c2(a, 4, 1, c3, c1, c2);
+    sqr_add_c2(a, 3, 2, c3, c1, c2);
+    r[5] = c3;
+    c3 = 0;
+    sqr_add_c(a, 3, c1, c2, c3);
+    sqr_add_c2(a, 4, 2, c1, c2, c3);
+    sqr_add_c2(a, 5, 1, c1, c2, c3);
+    sqr_add_c2(a, 6, 0, c1, c2, c3);
+    r[6] = c1;
+    c1 = 0;
+    sqr_add_c2(a, 7, 0, c2, c3, c1);
+    sqr_add_c2(a, 6, 1, c2, c3, c1);
+    sqr_add_c2(a, 5, 2, c2, c3, c1);
+    sqr_add_c2(a, 4, 3, c2, c3, c1);
+    r[7] = c2;
+    c2 = 0;
+    sqr_add_c(a, 4, c3, c1, c2);
+    sqr_add_c2(a, 5, 3, c3, c1, c2);
+    sqr_add_c2(a, 6, 2, c3, c1, c2);
+    sqr_add_c2(a, 7, 1, c3, c1, c2);
+    r[8] = c3;
+    c3 = 0;
+    sqr_add_c2(a, 7, 2, c1, c2, c3);
+    sqr_add_c2(a, 6, 3, c1, c2, c3);
+    sqr_add_c2(a, 5, 4, c1, c2, c3);
+    r[9] = c1;
+    c1 = 0;
+    sqr_add_c(a, 5, c2, c3, c1);
+    sqr_add_c2(a, 6, 4, c2, c3, c1);
+    sqr_add_c2(a, 7, 3, c2, c3, c1);
+    r[10] = c2;
+    c2 = 0;
+    sqr_add_c2(a, 7, 4, c3, c1, c2);
+    sqr_add_c2(a, 6, 5, c3, c1, c2);
+    r[11] = c3;
+    c3 = 0;
+    sqr_add_c(a, 6, c1, c2, c3);
+    sqr_add_c2(a, 7, 5, c1, c2, c3);
+    r[12] = c1;
+    c1 = 0;
+    sqr_add_c2(a, 7, 6, c2, c3, c1);
+    r[13] = c2;
+    c2 = 0;
+    sqr_add_c(a, 7, c3, c1, c2);
+    r[14] = c3;
+    r[15] = c1;
+}
+
+void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
+{
+    BN_ULONG c1, c2, c3;
+
+    c1 = 0;
+    c2 = 0;
+    c3 = 0;
+    sqr_add_c(a, 0, c1, c2, c3);
+    r[0] = c1;
+    c1 = 0;
+    sqr_add_c2(a, 1, 0, c2, c3, c1);
+    r[1] = c2;
+    c2 = 0;
+    sqr_add_c(a, 1, c3, c1, c2);
+    sqr_add_c2(a, 2, 0, c3, c1, c2);
+    r[2] = c3;
+    c3 = 0;
+    sqr_add_c2(a, 3, 0, c1, c2, c3);
+    sqr_add_c2(a, 2, 1, c1, c2, c3);
+    r[3] = c1;
+    c1 = 0;
+    sqr_add_c(a, 2, c2, c3, c1);
+    sqr_add_c2(a, 3, 1, c2, c3, c1);
+    r[4] = c2;
+    c2 = 0;
+    sqr_add_c2(a, 3, 2, c3, c1, c2);
+    r[5] = c3;
+    c3 = 0;
+    sqr_add_c(a, 3, c1, c2, c3);
+    r[6] = c1;
+    r[7] = c2;
+}
+#endif
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86_64-gf2m.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86_64-gf2m.pl
@ -0,0 +1,424 @@
+#! /usr/bin/env perl
+# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# May 2011
+#
+# The module implements bn_GF2m_mul_2x2 polynomial multiplication used
+# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
+# the time being... Except that it has two code paths: code suitable
+# for any x86_64 CPU and PCLMULQDQ one suitable for Westmere and
+# later. Improvement varies from one benchmark and µ-arch to another.
+# Vanilla code path is at most 20% faster than compiler-generated code
+# [not very impressive], while PCLMULQDQ - whole 85%-160% better on
+# 163- and 571-bit ECDH benchmarks on Intel CPUs. Keep in mind that
+# these coefficients are not ones for bn_GF2m_mul_2x2 itself, as not
+# all CPU time is burnt in it...
+
+$flavour = shift;
+$output  = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+die "can't locate x86_64-xlate.pl";
+
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
+*STDOUT=*OUT;
+
+($lo,$hi)=("%rax","%rdx");	$a=$lo;
+($i0,$i1)=("%rsi","%rdi");
+($t0,$t1)=("%rbx","%rcx");
+($b,$mask)=("%rbp","%r8");
+($a1,$a2,$a4,$a8,$a12,$a48)=map("%r$_",(9..15));
+($R,$Tx)=("%xmm0","%xmm1");
+
+$code.=<<___;
+.text
+
+.type	_mul_1x1,\@abi-omnipotent
+.align	16
+_mul_1x1:
+.cfi_startproc
+	sub	\$128+8,%rsp
+.cfi_adjust_cfa_offset	128+8
+	mov	\$-1,$a1
+	lea	($a,$a),$i0
+	shr	\$3,$a1
+	lea	(,$a,4),$i1
+	and	$a,$a1			# a1=a&0x1fffffffffffffff
+	lea	(,$a,8),$a8
+	sar	\$63,$a			# broadcast 63rd bit
+	lea	($a1,$a1),$a2
+	sar	\$63,$i0		# broadcast 62nd bit
+	lea	(,$a1,4),$a4
+	and	$b,$a
+	sar	\$63,$i1		# broadcast 61st bit
+	mov	$a,$hi			# $a is $lo
+	shl	\$63,$lo
+	and	$b,$i0
+	shr	\$1,$hi
+	mov	$i0,$t1
+	shl	\$62,$i0
+	and	$b,$i1
+	shr	\$2,$t1
+	xor	$i0,$lo
+	mov	$i1,$t0
+	shl	\$61,$i1
+	xor	$t1,$hi
+	shr	\$3,$t0
+	xor	$i1,$lo
+	xor	$t0,$hi
+
+	mov	$a1,$a12
+	movq	\$0,0(%rsp)		# tab[0]=0
+	xor	$a2,$a12		# a1^a2
+	mov	$a1,8(%rsp)		# tab[1]=a1
+	 mov	$a4,$a48
+	mov	$a2,16(%rsp)		# tab[2]=a2
+	 xor	$a8,$a48		# a4^a8
+	mov	$a12,24(%rsp)		# tab[3]=a1^a2
+
+	xor	$a4,$a1
+	mov	$a4,32(%rsp)		# tab[4]=a4
+	xor	$a4,$a2
+	mov	$a1,40(%rsp)		# tab[5]=a1^a4
+	xor	$a4,$a12
+	mov	$a2,48(%rsp)		# tab[6]=a2^a4
+	 xor	$a48,$a1		# a1^a4^a4^a8=a1^a8
+	mov	$a12,56(%rsp)		# tab[7]=a1^a2^a4
+	 xor	$a48,$a2		# a2^a4^a4^a8=a1^a8
+
+	mov	$a8,64(%rsp)		# tab[8]=a8
+	xor	$a48,$a12		# a1^a2^a4^a4^a8=a1^a2^a8
+	mov	$a1,72(%rsp)		# tab[9]=a1^a8
+	 xor	$a4,$a1			# a1^a8^a4
+	mov	$a2,80(%rsp)		# tab[10]=a2^a8
+	 xor	$a4,$a2			# a2^a8^a4
+	mov	$a12,88(%rsp)		# tab[11]=a1^a2^a8
+
+	xor	$a4,$a12		# a1^a2^a8^a4
+	mov	$a48,96(%rsp)		# tab[12]=a4^a8
+	 mov	$mask,$i0
+	mov	$a1,104(%rsp)		# tab[13]=a1^a4^a8
+	 and	$b,$i0
+	mov	$a2,112(%rsp)		# tab[14]=a2^a4^a8
+	 shr	\$4,$b
+	mov	$a12,120(%rsp)		# tab[15]=a1^a2^a4^a8
+	 mov	$mask,$i1
+	 and	$b,$i1
+	 shr	\$4,$b
+
+	movq	(%rsp,$i0,8),$R		# half of calculations is done in SSE2
+	mov	$mask,$i0
+	and	$b,$i0
+	shr	\$4,$b
+___
+    for ($n=1;$n<8;$n++) {
+	$code.=<<___;
+	mov	(%rsp,$i1,8),$t1
+	mov	$mask,$i1
+	mov	$t1,$t0
+	shl	\$`8*$n-4`,$t1
+	and	$b,$i1
+	 movq	(%rsp,$i0,8),$Tx
+	shr	\$`64-(8*$n-4)`,$t0
+	xor	$t1,$lo
+	 pslldq	\$$n,$Tx
+	 mov	$mask,$i0
+	shr	\$4,$b
+	xor	$t0,$hi
+	 and	$b,$i0
+	 shr	\$4,$b
+	 pxor	$Tx,$R
+___
+    }
+$code.=<<___;
+	mov	(%rsp,$i1,8),$t1
+	mov	$t1,$t0
+	shl	\$`8*$n-4`,$t1
+	movq	$R,$i0
+	shr	\$`64-(8*$n-4)`,$t0
+	xor	$t1,$lo
+	psrldq	\$8,$R
+	xor	$t0,$hi
+	movq	$R,$i1
+	xor	$i0,$lo
+	xor	$i1,$hi
+
+	add	\$128+8,%rsp
+.cfi_adjust_cfa_offset	-128-8
+	ret
+.Lend_mul_1x1:
+.cfi_endproc
+.size	_mul_1x1,.-_mul_1x1
+___
+
+($rp,$a1,$a0,$b1,$b0) = $win64?	("%rcx","%rdx","%r8", "%r9","%r10") :	# Win64 order
+				("%rdi","%rsi","%rdx","%rcx","%r8");	# Unix order
+
+$code.=<<___;
+.extern	OPENSSL_ia32cap_P
+.globl	bn_GF2m_mul_2x2
+.type	bn_GF2m_mul_2x2,\@abi-omnipotent
+.align	16
+bn_GF2m_mul_2x2:
+.cfi_startproc
+	mov	%rsp,%rax
+	mov	OPENSSL_ia32cap_P(%rip),%r10
+	bt	\$33,%r10
+	jnc	.Lvanilla_mul_2x2
+
+	movq		$a1,%xmm0
+	movq		$b1,%xmm1
+	movq		$a0,%xmm2
+___
+$code.=<<___ if ($win64);
+	movq		40(%rsp),%xmm3
+___
+$code.=<<___ if (!$win64);
+	movq		$b0,%xmm3
+___
+$code.=<<___;
+	movdqa		%xmm0,%xmm4
+	movdqa		%xmm1,%xmm5
+	pclmulqdq	\$0,%xmm1,%xmm0	# a1·b1
+	pxor		%xmm2,%xmm4
+	pxor		%xmm3,%xmm5
+	pclmulqdq	\$0,%xmm3,%xmm2	# a0·b0
+	pclmulqdq	\$0,%xmm5,%xmm4	# (a0+a1)·(b0+b1)
+	xorps		%xmm0,%xmm4
+	xorps		%xmm2,%xmm4	# (a0+a1)·(b0+b1)-a0·b0-a1·b1
+	movdqa		%xmm4,%xmm5
+	pslldq		\$8,%xmm4
+	psrldq		\$8,%xmm5
+	pxor		%xmm4,%xmm2
+	pxor		%xmm5,%xmm0
+	movdqu		%xmm2,0($rp)
+	movdqu		%xmm0,16($rp)
+	ret
+
+.align	16
+.Lvanilla_mul_2x2:
+	lea	-8*17(%rsp),%rsp
+.cfi_adjust_cfa_offset	8*17
+___
+$code.=<<___ if ($win64);
+	mov	`8*17+40`(%rsp),$b0
+	mov	%rdi,8*15(%rsp)
+	mov	%rsi,8*16(%rsp)
+___
+$code.=<<___;
+	mov	%r14,8*10(%rsp)
+.cfi_rel_offset	%r14,8*10
+	mov	%r13,8*11(%rsp)
+.cfi_rel_offset	%r13,8*11
+	mov	%r12,8*12(%rsp)
+.cfi_rel_offset	%r12,8*12
+	mov	%rbp,8*13(%rsp)
+.cfi_rel_offset	%rbp,8*13
+	mov	%rbx,8*14(%rsp)
+.cfi_rel_offset	%rbx,8*14
+.Lbody_mul_2x2:
+	mov	$rp,32(%rsp)		# save the arguments
+	mov	$a1,40(%rsp)
+	mov	$a0,48(%rsp)
+	mov	$b1,56(%rsp)
+	mov	$b0,64(%rsp)
+
+	mov	\$0xf,$mask
+	mov	$a1,$a
+	mov	$b1,$b
+	call	_mul_1x1		# a1·b1
+	mov	$lo,16(%rsp)
+	mov	$hi,24(%rsp)
+
+	mov	48(%rsp),$a
+	mov	64(%rsp),$b
+	call	_mul_1x1		# a0·b0
+	mov	$lo,0(%rsp)
+	mov	$hi,8(%rsp)
+
+	mov	40(%rsp),$a
+	mov	56(%rsp),$b
+	xor	48(%rsp),$a
+	xor	64(%rsp),$b
+	call	_mul_1x1		# (a0+a1)·(b0+b1)
+___
+	@r=("%rbx","%rcx","%rdi","%rsi");
+$code.=<<___;
+	mov	0(%rsp),@r[0]
+	mov	8(%rsp),@r[1]
+	mov	16(%rsp),@r[2]
+	mov	24(%rsp),@r[3]
+	mov	32(%rsp),%rbp
+
+	xor	$hi,$lo
+	xor	@r[1],$hi
+	xor	@r[0],$lo
+	mov	@r[0],0(%rbp)
+	xor	@r[2],$hi
+	mov	@r[3],24(%rbp)
+	xor	@r[3],$lo
+	xor	@r[3],$hi
+	xor	$hi,$lo
+	mov	$hi,16(%rbp)
+	mov	$lo,8(%rbp)
+
+	mov	8*10(%rsp),%r14
+.cfi_restore	%r14
+	mov	8*11(%rsp),%r13
+.cfi_restore	%r13
+	mov	8*12(%rsp),%r12
+.cfi_restore	%r12
+	mov	8*13(%rsp),%rbp
+.cfi_restore	%rbp
+	mov	8*14(%rsp),%rbx
+.cfi_restore	%rbx
+___
+$code.=<<___ if ($win64);
+	mov	8*15(%rsp),%rdi
+	mov	8*16(%rsp),%rsi
+___
+$code.=<<___;
+	lea	8*17(%rsp),%rsp
+.cfi_adjust_cfa_offset	-8*17
+.Lepilogue_mul_2x2:
+	ret
+.Lend_mul_2x2:
+.cfi_endproc
+.size	bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
+.asciz	"GF(2^m) Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
+.align	16
+___
+
+# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
+#               CONTEXT *context,DISPATCHER_CONTEXT *disp)
+if ($win64) {
+$rec="%rcx";
+$frame="%rdx";
+$context="%r8";
+$disp="%r9";
+
+$code.=<<___;
+.extern __imp_RtlVirtualUnwind
+
+.type	se_handler,\@abi-omnipotent
+.align	16
+se_handler:
+	push	%rsi
+	push	%rdi
+	push	%rbx
+	push	%rbp
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+	pushfq
+	sub	\$64,%rsp
+
+	mov	120($context),%rax	# pull context->Rax
+	mov	248($context),%rbx	# pull context->Rip
+
+	lea	.Lbody_mul_2x2(%rip),%r10
+	cmp	%r10,%rbx		# context->Rip<"prologue" label
+	jb	.Lin_prologue
+
+	mov	152($context),%rax	# pull context->Rsp
+
+	lea	.Lepilogue_mul_2x2(%rip),%r10
+	cmp	%r10,%rbx		# context->Rip>="epilogue" label
+	jae	.Lin_prologue
+
+	mov	8*10(%rax),%r14		# mimic epilogue
+	mov	8*11(%rax),%r13
+	mov	8*12(%rax),%r12
+	mov	8*13(%rax),%rbp
+	mov	8*14(%rax),%rbx
+	mov	8*15(%rax),%rdi
+	mov	8*16(%rax),%rsi
+
+	mov	%rbx,144($context)	# restore context->Rbx
+	mov	%rbp,160($context)	# restore context->Rbp
+	mov	%rsi,168($context)	# restore context->Rsi
+	mov	%rdi,176($context)	# restore context->Rdi
+	mov	%r12,216($context)	# restore context->R12
+	mov	%r13,224($context)	# restore context->R13
+	mov	%r14,232($context)	# restore context->R14
+
+	lea	8*17(%rax),%rax
+
+.Lin_prologue:
+	mov	%rax,152($context)	# restore context->Rsp
+
+	mov	40($disp),%rdi		# disp->ContextRecord
+	mov	$context,%rsi		# context
+	mov	\$154,%ecx		# sizeof(CONTEXT)
+	.long	0xa548f3fc		# cld; rep movsq
+
+	mov	$disp,%rsi
+	xor	%rcx,%rcx		# arg1, UNW_FLAG_NHANDLER
+	mov	8(%rsi),%rdx		# arg2, disp->ImageBase
+	mov	0(%rsi),%r8		# arg3, disp->ControlPc
+	mov	16(%rsi),%r9		# arg4, disp->FunctionEntry
+	mov	40(%rsi),%r10		# disp->ContextRecord
+	lea	56(%rsi),%r11		# &disp->HandlerData
+	lea	24(%rsi),%r12		# &disp->EstablisherFrame
+	mov	%r10,32(%rsp)		# arg5
+	mov	%r11,40(%rsp)		# arg6
+	mov	%r12,48(%rsp)		# arg7
+	mov	%rcx,56(%rsp)		# arg8, (NULL)
+	call	*__imp_RtlVirtualUnwind(%rip)
+
+	mov	\$1,%eax		# ExceptionContinueSearch
+	add	\$64,%rsp
+	popfq
+	pop	%r15
+	pop	%r14
+	pop	%r13
+	pop	%r12
+	pop	%rbp
+	pop	%rbx
+	pop	%rdi
+	pop	%rsi
+	ret
+.size	se_handler,.-se_handler
+
+.section	.pdata
+.align	4
+	.rva	_mul_1x1
+	.rva	.Lend_mul_1x1
+	.rva	.LSEH_info_1x1
+
+	.rva	.Lvanilla_mul_2x2
+	.rva	.Lend_mul_2x2
+	.rva	.LSEH_info_2x2
+.section	.xdata
+.align	8
+.LSEH_info_1x1:
+	.byte	0x01,0x07,0x02,0x00
+	.byte	0x07,0x01,0x11,0x00	# sub rsp,128+8
+.LSEH_info_2x2:
+	.byte	9,0,0,0
+	.rva	se_handler
+___
+}
+
+$code =~ s/\`([^\`]*)\`/eval($1)/gem;
+print $code;
+close STDOUT;
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86_64-mont.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86_64-mont.pl
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86_64-mont5.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/asm/x86_64-mont5.pl
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_add.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_add.c
@ -0,0 +1,171 @@
+/*
+ * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include "internal/cryptlib.h"
+#include "bn_lcl.h"
+
+/* signed add of b to a. */
+int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
+{
+    int ret, r_neg, cmp_res;
+
+    bn_check_top(a);
+    bn_check_top(b);
+
+    if (a->neg == b->neg) {
+        r_neg = a->neg;
+        ret = BN_uadd(r, a, b);
+    } else {
+        cmp_res = BN_ucmp(a, b);
+        if (cmp_res > 0) {
+            r_neg = a->neg;
+            ret = BN_usub(r, a, b);
+        } else if (cmp_res < 0) {
+            r_neg = b->neg;
+            ret = BN_usub(r, b, a);
+        } else {
+            r_neg = 0;
+            BN_zero(r);
+            ret = 1;
+        }
+    }
+
+    r->neg = r_neg;
+    bn_check_top(r);
+    return ret;
+}
+
+/* signed sub of b from a. */
+int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
+{
+    int ret, r_neg, cmp_res;
+
+    bn_check_top(a);
+    bn_check_top(b);
+
+    if (a->neg != b->neg) {
+        r_neg = a->neg;
+        ret = BN_uadd(r, a, b);
+    } else {
+        cmp_res = BN_ucmp(a, b);
+        if (cmp_res > 0) {
+            r_neg = a->neg;
+            ret = BN_usub(r, a, b);
+        } else if (cmp_res < 0) {
+            r_neg = !b->neg;
+            ret = BN_usub(r, b, a);
+        } else {
+            r_neg = 0;
+            BN_zero(r);
+            ret = 1;
+        }
+    }
+
+    r->neg = r_neg;
+    bn_check_top(r);
+    return ret;
+}
+
+/* unsigned add of b to a, r can be equal to a or b. */
+int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
+{
+    int max, min, dif;
+    const BN_ULONG *ap, *bp;
+    BN_ULONG *rp, carry, t1, t2;
+
+    bn_check_top(a);
+    bn_check_top(b);
+
+    if (a->top < b->top) {
+        const BIGNUM *tmp;
+
+        tmp = a;
+        a = b;
+        b = tmp;
+    }
+    max = a->top;
+    min = b->top;
+    dif = max - min;
+
+    if (bn_wexpand(r, max + 1) == NULL)
+        return 0;
+
+    r->top = max;
+
+    ap = a->d;
+    bp = b->d;
+    rp = r->d;
+
+    carry = bn_add_words(rp, ap, bp, min);
+    rp += min;
+    ap += min;
+
+    while (dif) {
+        dif--;
+        t1 = *(ap++);
+        t2 = (t1 + carry) & BN_MASK2;
+        *(rp++) = t2;
+        carry &= (t2 == 0);
+    }
+    *rp = carry;
+    r->top += carry;
+
+    r->neg = 0;
+    bn_check_top(r);
+    return 1;
+}
+
+/* unsigned subtraction of b from a, a must be larger than b. */
+int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
+{
+    int max, min, dif;
+    BN_ULONG t1, t2, borrow, *rp;
+    const BN_ULONG *ap, *bp;
+
+    bn_check_top(a);
+    bn_check_top(b);
+
+    max = a->top;
+    min = b->top;
+    dif = max - min;
+
+    if (dif < 0) {              /* hmm... should not be happening */
+        BNerr(BN_F_BN_USUB, BN_R_ARG2_LT_ARG3);
+        return 0;
+    }
+
+    if (bn_wexpand(r, max) == NULL)
+        return 0;
+
+    ap = a->d;
+    bp = b->d;
+    rp = r->d;
+
+    borrow = bn_sub_words(rp, ap, bp, min);
+    ap += min;
+    rp += min;
+
+    while (dif) {
+        dif--;
+        t1 = *(ap++);
+        t2 = (t1 - borrow) & BN_MASK2;
+        *(rp++) = t2;
+        borrow &= (t1 == 0);
+    }
+
+    while (max && *--rp == 0)
+        max--;
+
+    r->top = max;
+    r->neg = 0;
+    bn_pollute(r);
+
+    return 1;
+}
+
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_asm.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_asm.c
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_blind.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_blind.c
@ -0,0 +1,312 @@
+/*
+ * Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include <openssl/opensslconf.h>
+#include "internal/cryptlib.h"
+#include "bn_lcl.h"
+
+#define BN_BLINDING_COUNTER     32
+
+struct bn_blinding_st {
+    BIGNUM *A;
+    BIGNUM *Ai;
+    BIGNUM *e;
+    BIGNUM *mod;                /* just a reference */
+    CRYPTO_THREAD_ID tid;
+    int counter;
+    unsigned long flags;
+    BN_MONT_CTX *m_ctx;
+    int (*bn_mod_exp) (BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
+                       const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
+    CRYPTO_RWLOCK *lock;
+};
+
+BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod)
+{
+    BN_BLINDING *ret = NULL;
+
+    bn_check_top(mod);
+
+    if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL) {
+        BNerr(BN_F_BN_BLINDING_NEW, ERR_R_MALLOC_FAILURE);
+        return NULL;
+    }
+
+    ret->lock = CRYPTO_THREAD_lock_new();
+    if (ret->lock == NULL) {
+        BNerr(BN_F_BN_BLINDING_NEW, ERR_R_MALLOC_FAILURE);
+        OPENSSL_free(ret);
+        return NULL;
+    }
+
+    BN_BLINDING_set_current_thread(ret);
+
+    if (A != NULL) {
+        if ((ret->A = BN_dup(A)) == NULL)
+            goto err;
+    }
+
+    if (Ai != NULL) {
+        if ((ret->Ai = BN_dup(Ai)) == NULL)
+            goto err;
+    }
+
+    /* save a copy of mod in the BN_BLINDING structure */
+    if ((ret->mod = BN_dup(mod)) == NULL)
+        goto err;
+
+    if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0)
+        BN_set_flags(ret->mod, BN_FLG_CONSTTIME);
+
+    /*
+     * Set the counter to the special value -1 to indicate that this is
+     * never-used fresh blinding that does not need updating before first
+     * use.
+     */
+    ret->counter = -1;
+
+    return ret;
+
+ err:
+    BN_BLINDING_free(ret);
+    return NULL;
+}
+
+void BN_BLINDING_free(BN_BLINDING *r)
+{
+    if (r == NULL)
+        return;
+    BN_free(r->A);
+    BN_free(r->Ai);
+    BN_free(r->e);
+    BN_free(r->mod);
+    CRYPTO_THREAD_lock_free(r->lock);
+    OPENSSL_free(r);
+}
+
+int BN_BLINDING_update(BN_BLINDING *b, BN_CTX *ctx)
+{
+    int ret = 0;
+
+    if ((b->A == NULL) || (b->Ai == NULL)) {
+        BNerr(BN_F_BN_BLINDING_UPDATE, BN_R_NOT_INITIALIZED);
+        goto err;
+    }
+
+    if (b->counter == -1)
+        b->counter = 0;
+
+    if (++b->counter == BN_BLINDING_COUNTER && b->e != NULL &&
+        !(b->flags & BN_BLINDING_NO_RECREATE)) {
+        /* re-create blinding parameters */
+        if (!BN_BLINDING_create_param(b, NULL, NULL, ctx, NULL, NULL))
+            goto err;
+    } else if (!(b->flags & BN_BLINDING_NO_UPDATE)) {
+        if (b->m_ctx != NULL) {
+            if (!bn_mul_mont_fixed_top(b->Ai, b->Ai, b->Ai, b->m_ctx, ctx)
+                || !bn_mul_mont_fixed_top(b->A, b->A, b->A, b->m_ctx, ctx))
+                goto err;
+        } else {
+            if (!BN_mod_mul(b->Ai, b->Ai, b->Ai, b->mod, ctx)
+                || !BN_mod_mul(b->A, b->A, b->A, b->mod, ctx))
+                goto err;
+        }
+    }
+
+    ret = 1;
+ err:
+    if (b->counter == BN_BLINDING_COUNTER)
+        b->counter = 0;
+    return ret;
+}
+
+int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx)
+{
+    return BN_BLINDING_convert_ex(n, NULL, b, ctx);
+}
+
+int BN_BLINDING_convert_ex(BIGNUM *n, BIGNUM *r, BN_BLINDING *b, BN_CTX *ctx)
+{
+    int ret = 1;
+
+    bn_check_top(n);
+
+    if ((b->A == NULL) || (b->Ai == NULL)) {
+        BNerr(BN_F_BN_BLINDING_CONVERT_EX, BN_R_NOT_INITIALIZED);
+        return 0;
+    }
+
+    if (b->counter == -1)
+        /* Fresh blinding, doesn't need updating. */
+        b->counter = 0;
+    else if (!BN_BLINDING_update(b, ctx))
+        return 0;
+
+    if (r != NULL && (BN_copy(r, b->Ai) == NULL))
+        return 0;
+
+    if (b->m_ctx != NULL)
+        ret = BN_mod_mul_montgomery(n, n, b->A, b->m_ctx, ctx);
+    else
+        ret = BN_mod_mul(n, n, b->A, b->mod, ctx);
+
+    return ret;
+}
+
+int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx)
+{
+    return BN_BLINDING_invert_ex(n, NULL, b, ctx);
+}
+
+int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b,
+                          BN_CTX *ctx)
+{
+    int ret;
+
+    bn_check_top(n);
+
+    if (r == NULL && (r = b->Ai) == NULL) {
+        BNerr(BN_F_BN_BLINDING_INVERT_EX, BN_R_NOT_INITIALIZED);
+        return 0;
+    }
+
+    if (b->m_ctx != NULL) {
+        /* ensure that BN_mod_mul_montgomery takes pre-defined path */
+        if (n->dmax >= r->top) {
+            size_t i, rtop = r->top, ntop = n->top;
+            BN_ULONG mask;
+
+            for (i = 0; i < rtop; i++) {
+                mask = (BN_ULONG)0 - ((i - ntop) >> (8 * sizeof(i) - 1));
+                n->d[i] &= mask;
+            }
+            mask = (BN_ULONG)0 - ((rtop - ntop) >> (8 * sizeof(ntop) - 1));
+            /* always true, if (rtop >= ntop) n->top = r->top; */
+            n->top = (int)(rtop & ~mask) | (ntop & mask);
+            n->flags |= (BN_FLG_FIXED_TOP & ~mask);
+        }
+        ret = BN_mod_mul_montgomery(n, n, r, b->m_ctx, ctx);
+    } else {
+        ret = BN_mod_mul(n, n, r, b->mod, ctx);
+    }
+
+    bn_check_top(n);
+    return ret;
+}
+
+int BN_BLINDING_is_current_thread(BN_BLINDING *b)
+{
+    return CRYPTO_THREAD_compare_id(CRYPTO_THREAD_get_current_id(), b->tid);
+}
+
+void BN_BLINDING_set_current_thread(BN_BLINDING *b)
+{
+    b->tid = CRYPTO_THREAD_get_current_id();
+}
+
+int BN_BLINDING_lock(BN_BLINDING *b)
+{
+    return CRYPTO_THREAD_write_lock(b->lock);
+}
+
+int BN_BLINDING_unlock(BN_BLINDING *b)
+{
+    return CRYPTO_THREAD_unlock(b->lock);
+}
+
+unsigned long BN_BLINDING_get_flags(const BN_BLINDING *b)
+{
+    return b->flags;
+}
+
+void BN_BLINDING_set_flags(BN_BLINDING *b, unsigned long flags)
+{
+    b->flags = flags;
+}
+
+BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b,
+                                      const BIGNUM *e, BIGNUM *m, BN_CTX *ctx,
+                                      int (*bn_mod_exp) (BIGNUM *r,
+                                                         const BIGNUM *a,
+                                                         const BIGNUM *p,
+                                                         const BIGNUM *m,
+                                                         BN_CTX *ctx,
+                                                         BN_MONT_CTX *m_ctx),
+                                      BN_MONT_CTX *m_ctx)
+{
+    int retry_counter = 32;
+    BN_BLINDING *ret = NULL;
+
+    if (b == NULL)
+        ret = BN_BLINDING_new(NULL, NULL, m);
+    else
+        ret = b;
+
+    if (ret == NULL)
+        goto err;
+
+    if (ret->A == NULL && (ret->A = BN_new()) == NULL)
+        goto err;
+    if (ret->Ai == NULL && (ret->Ai = BN_new()) == NULL)
+        goto err;
+
+    if (e != NULL) {
+        BN_free(ret->e);
+        ret->e = BN_dup(e);
+    }
+    if (ret->e == NULL)
+        goto err;
+
+    if (bn_mod_exp != NULL)
+        ret->bn_mod_exp = bn_mod_exp;
+    if (m_ctx != NULL)
+        ret->m_ctx = m_ctx;
+
+    do {
+        int rv;
+        if (!BN_priv_rand_range(ret->A, ret->mod))
+            goto err;
+        if (int_bn_mod_inverse(ret->Ai, ret->A, ret->mod, ctx, &rv))
+            break;
+
+        /*
+         * this should almost never happen for good RSA keys
+         */
+        if (!rv)
+            goto err;
+
+        if (retry_counter-- == 0) {
+            BNerr(BN_F_BN_BLINDING_CREATE_PARAM, BN_R_TOO_MANY_ITERATIONS);
+            goto err;
+        }
+    } while (1);
+
+    if (ret->bn_mod_exp != NULL && ret->m_ctx != NULL) {
+        if (!ret->bn_mod_exp(ret->A, ret->A, ret->e, ret->mod, ctx, ret->m_ctx))
+            goto err;
+    } else {
+        if (!BN_mod_exp(ret->A, ret->A, ret->e, ret->mod, ctx))
+            goto err;
+    }
+
+    if (ret->m_ctx != NULL) {
+        if (!bn_to_mont_fixed_top(ret->Ai, ret->Ai, ret->m_ctx, ctx)
+            || !bn_to_mont_fixed_top(ret->A, ret->A, ret->m_ctx, ctx))
+            goto err;
+    }
+
+    return ret;
+ err:
+    if (b == NULL) {
+        BN_BLINDING_free(ret);
+        ret = NULL;
+    }
+
+    return ret;
+}
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_const.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_const.c
@ -0,0 +1,553 @@
+/*
+ * Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include <openssl/bn.h>
+
+/*-
+ * "First Oakley Default Group" from RFC2409, section 6.1.
+ *
+ * The prime is: 2^768 - 2 ^704 - 1 + 2^64 * { [2^638 pi] + 149686 }
+ *
+ * RFC2409 specifies a generator of 2.
+ * RFC2412 specifies a generator of of 22.
+ */
+
+BIGNUM *BN_get_rfc2409_prime_768(BIGNUM *bn)
+{
+    static const unsigned char RFC2409_PRIME_768[] = {
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+        0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
+        0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
+        0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
+        0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
+        0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
+        0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
+        0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
+        0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
+        0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
+        0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x3A, 0x36, 0x20,
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+    };
+    return BN_bin2bn(RFC2409_PRIME_768, sizeof(RFC2409_PRIME_768), bn);
+}
+
+/*-
+ * "Second Oakley Default Group" from RFC2409, section 6.2.
+ *
+ * The prime is: 2^1024 - 2^960 - 1 + 2^64 * { [2^894 pi] + 129093 }.
+ *
+ * RFC2409 specifies a generator of 2.
+ * RFC2412 specifies a generator of 22.
+ */
+
+BIGNUM *BN_get_rfc2409_prime_1024(BIGNUM *bn)
+{
+    static const unsigned char RFC2409_PRIME_1024[] = {
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+        0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
+        0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
+        0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
+        0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
+        0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
+        0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
+        0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
+        0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
+        0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
+        0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B,
+        0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
+        0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5,
+        0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6,
+        0x49, 0x28, 0x66, 0x51, 0xEC, 0xE6, 0x53, 0x81,
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+    };
+    return BN_bin2bn(RFC2409_PRIME_1024, sizeof(RFC2409_PRIME_1024), bn);
+}
+
+/*-
+ * "1536-bit MODP Group" from RFC3526, Section 2.
+ *
+ * The prime is: 2^1536 - 2^1472 - 1 + 2^64 * { [2^1406 pi] + 741804 }
+ *
+ * RFC3526 specifies a generator of 2.
+ * RFC2312 specifies a generator of 22.
+ */
+
+BIGNUM *BN_get_rfc3526_prime_1536(BIGNUM *bn)
+{
+    static const unsigned char RFC3526_PRIME_1536[] = {
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+        0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
+        0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
+        0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
+        0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
+        0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
+        0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
+        0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
+        0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
+        0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
+        0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B,
+        0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
+        0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5,
+        0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6,
+        0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
+        0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05,
+        0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A,
+        0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
+        0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96,
+        0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB,
+        0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
+        0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04,
+        0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x23, 0x73, 0x27,
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+    };
+    return BN_bin2bn(RFC3526_PRIME_1536, sizeof(RFC3526_PRIME_1536), bn);
+}
+
+/*-
+ * "2048-bit MODP Group" from RFC3526, Section 3.
+ *
+ * The prime is: 2^2048 - 2^1984 - 1 + 2^64 * { [2^1918 pi] + 124476 }
+ *
+ * RFC3526 specifies a generator of 2.
+ */
+
+BIGNUM *BN_get_rfc3526_prime_2048(BIGNUM *bn)
+{
+    static const unsigned char RFC3526_PRIME_2048[] = {
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+        0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
+        0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
+        0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
+        0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
+        0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
+        0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
+        0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
+        0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
+        0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
+        0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B,
+        0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
+        0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5,
+        0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6,
+        0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
+        0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05,
+        0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A,
+        0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
+        0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96,
+        0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB,
+        0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
+        0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04,
+        0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C,
+        0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B,
+        0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03,
+        0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F,
+        0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9,
+        0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18,
+        0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5,
+        0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10,
+        0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAC, 0xAA, 0x68,
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+    };
+    return BN_bin2bn(RFC3526_PRIME_2048, sizeof(RFC3526_PRIME_2048), bn);
+}
+
+/*-
+ * "3072-bit MODP Group" from RFC3526, Section 4.
+ *
+ * The prime is: 2^3072 - 2^3008 - 1 + 2^64 * { [2^2942 pi] + 1690314 }
+ *
+ * RFC3526 specifies a generator of 2.
+ */
+
+BIGNUM *BN_get_rfc3526_prime_3072(BIGNUM *bn)
+{
+    static const unsigned char RFC3526_PRIME_3072[] = {
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+        0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
+        0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
+        0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
+        0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
+        0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
+        0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
+        0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
+        0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
+        0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
+        0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B,
+        0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
+        0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5,
+        0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6,
+        0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
+        0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05,
+        0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A,
+        0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
+        0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96,
+        0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB,
+        0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
+        0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04,
+        0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C,
+        0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B,
+        0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03,
+        0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F,
+        0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9,
+        0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18,
+        0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5,
+        0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10,
+        0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D,
+        0xAD, 0x33, 0x17, 0x0D, 0x04, 0x50, 0x7A, 0x33,
+        0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64,
+        0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A,
+        0x8A, 0xEA, 0x71, 0x57, 0x5D, 0x06, 0x0C, 0x7D,
+        0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7,
+        0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7,
+        0x1E, 0x8C, 0x94, 0xE0, 0x4A, 0x25, 0x61, 0x9D,
+        0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B,
+        0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64,
+        0xD8, 0x76, 0x02, 0x73, 0x3E, 0xC8, 0x6A, 0x64,
+        0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C,
+        0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C,
+        0x77, 0x09, 0x88, 0xC0, 0xBA, 0xD9, 0x46, 0xE2,
+        0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31,
+        0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E,
+        0x4B, 0x82, 0xD1, 0x20, 0xA9, 0x3A, 0xD2, 0xCA,
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+    };
+    return BN_bin2bn(RFC3526_PRIME_3072, sizeof(RFC3526_PRIME_3072), bn);
+}
+
+/*-
+ * "4096-bit MODP Group" from RFC3526, Section 5.
+ *
+ * The prime is: 2^4096 - 2^4032 - 1 + 2^64 * { [2^3966 pi] + 240904 }
+ *
+ * RFC3526 specifies a generator of 2.
+ */
+
+BIGNUM *BN_get_rfc3526_prime_4096(BIGNUM *bn)
+{
+    static const unsigned char RFC3526_PRIME_4096[] = {
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+        0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
+        0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
+        0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
+        0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
+        0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
+        0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
+        0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
+        0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
+        0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
+        0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B,
+        0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
+        0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5,
+        0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6,
+        0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
+        0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05,
+        0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A,
+        0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
+        0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96,
+        0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB,
+        0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
+        0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04,
+        0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C,
+        0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B,
+        0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03,
+        0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F,
+        0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9,
+        0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18,
+        0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5,
+        0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10,
+        0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D,
+        0xAD, 0x33, 0x17, 0x0D, 0x04, 0x50, 0x7A, 0x33,
+        0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64,
+        0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A,
+        0x8A, 0xEA, 0x71, 0x57, 0x5D, 0x06, 0x0C, 0x7D,
+        0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7,
+        0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7,
+        0x1E, 0x8C, 0x94, 0xE0, 0x4A, 0x25, 0x61, 0x9D,
+        0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B,
+        0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64,
+        0xD8, 0x76, 0x02, 0x73, 0x3E, 0xC8, 0x6A, 0x64,
+        0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C,
+        0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C,
+        0x77, 0x09, 0x88, 0xC0, 0xBA, 0xD9, 0x46, 0xE2,
+        0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31,
+        0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E,
+        0x4B, 0x82, 0xD1, 0x20, 0xA9, 0x21, 0x08, 0x01,
+        0x1A, 0x72, 0x3C, 0x12, 0xA7, 0x87, 0xE6, 0xD7,
+        0x88, 0x71, 0x9A, 0x10, 0xBD, 0xBA, 0x5B, 0x26,
+        0x99, 0xC3, 0x27, 0x18, 0x6A, 0xF4, 0xE2, 0x3C,
+        0x1A, 0x94, 0x68, 0x34, 0xB6, 0x15, 0x0B, 0xDA,
+        0x25, 0x83, 0xE9, 0xCA, 0x2A, 0xD4, 0x4C, 0xE8,
+        0xDB, 0xBB, 0xC2, 0xDB, 0x04, 0xDE, 0x8E, 0xF9,
+        0x2E, 0x8E, 0xFC, 0x14, 0x1F, 0xBE, 0xCA, 0xA6,
+        0x28, 0x7C, 0x59, 0x47, 0x4E, 0x6B, 0xC0, 0x5D,
+        0x99, 0xB2, 0x96, 0x4F, 0xA0, 0x90, 0xC3, 0xA2,
+        0x23, 0x3B, 0xA1, 0x86, 0x51, 0x5B, 0xE7, 0xED,
+        0x1F, 0x61, 0x29, 0x70, 0xCE, 0xE2, 0xD7, 0xAF,
+        0xB8, 0x1B, 0xDD, 0x76, 0x21, 0x70, 0x48, 0x1C,
+        0xD0, 0x06, 0x91, 0x27, 0xD5, 0xB0, 0x5A, 0xA9,
+        0x93, 0xB4, 0xEA, 0x98, 0x8D, 0x8F, 0xDD, 0xC1,
+        0x86, 0xFF, 0xB7, 0xDC, 0x90, 0xA6, 0xC0, 0x8F,
+        0x4D, 0xF4, 0x35, 0xC9, 0x34, 0x06, 0x31, 0x99,
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+    };
+    return BN_bin2bn(RFC3526_PRIME_4096, sizeof(RFC3526_PRIME_4096), bn);
+}
+
+/*-
+ * "6144-bit MODP Group" from RFC3526, Section 6.
+ *
+ * The prime is: 2^6144 - 2^6080 - 1 + 2^64 * { [2^6014 pi] + 929484 }
+ *
+ * RFC3526 specifies a generator of 2.
+ */
+
+BIGNUM *BN_get_rfc3526_prime_6144(BIGNUM *bn)
+{
+    static const unsigned char RFC3526_PRIME_6144[] = {
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+        0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
+        0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
+        0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
+        0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
+        0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
+        0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
+        0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
+        0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
+        0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
+        0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B,
+        0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
+        0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5,
+        0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6,
+        0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
+        0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05,
+        0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A,
+        0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
+        0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96,
+        0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB,
+        0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
+        0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04,
+        0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C,
+        0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B,
+        0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03,
+        0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F,
+        0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9,
+        0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18,
+        0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5,
+        0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10,
+        0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D,
+        0xAD, 0x33, 0x17, 0x0D, 0x04, 0x50, 0x7A, 0x33,
+        0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64,
+        0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A,
+        0x8A, 0xEA, 0x71, 0x57, 0x5D, 0x06, 0x0C, 0x7D,
+        0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7,
+        0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7,
+        0x1E, 0x8C, 0x94, 0xE0, 0x4A, 0x25, 0x61, 0x9D,
+        0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B,
+        0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64,
+        0xD8, 0x76, 0x02, 0x73, 0x3E, 0xC8, 0x6A, 0x64,
+        0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C,
+        0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C,
+        0x77, 0x09, 0x88, 0xC0, 0xBA, 0xD9, 0x46, 0xE2,
+        0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31,
+        0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E,
+        0x4B, 0x82, 0xD1, 0x20, 0xA9, 0x21, 0x08, 0x01,
+        0x1A, 0x72, 0x3C, 0x12, 0xA7, 0x87, 0xE6, 0xD7,
+        0x88, 0x71, 0x9A, 0x10, 0xBD, 0xBA, 0x5B, 0x26,
+        0x99, 0xC3, 0x27, 0x18, 0x6A, 0xF4, 0xE2, 0x3C,
+        0x1A, 0x94, 0x68, 0x34, 0xB6, 0x15, 0x0B, 0xDA,
+        0x25, 0x83, 0xE9, 0xCA, 0x2A, 0xD4, 0x4C, 0xE8,
+        0xDB, 0xBB, 0xC2, 0xDB, 0x04, 0xDE, 0x8E, 0xF9,
+        0x2E, 0x8E, 0xFC, 0x14, 0x1F, 0xBE, 0xCA, 0xA6,
+        0x28, 0x7C, 0x59, 0x47, 0x4E, 0x6B, 0xC0, 0x5D,
+        0x99, 0xB2, 0x96, 0x4F, 0xA0, 0x90, 0xC3, 0xA2,
+        0x23, 0x3B, 0xA1, 0x86, 0x51, 0x5B, 0xE7, 0xED,
+        0x1F, 0x61, 0x29, 0x70, 0xCE, 0xE2, 0xD7, 0xAF,
+        0xB8, 0x1B, 0xDD, 0x76, 0x21, 0x70, 0x48, 0x1C,
+        0xD0, 0x06, 0x91, 0x27, 0xD5, 0xB0, 0x5A, 0xA9,
+        0x93, 0xB4, 0xEA, 0x98, 0x8D, 0x8F, 0xDD, 0xC1,
+        0x86, 0xFF, 0xB7, 0xDC, 0x90, 0xA6, 0xC0, 0x8F,
+        0x4D, 0xF4, 0x35, 0xC9, 0x34, 0x02, 0x84, 0x92,
+        0x36, 0xC3, 0xFA, 0xB4, 0xD2, 0x7C, 0x70, 0x26,
+        0xC1, 0xD4, 0xDC, 0xB2, 0x60, 0x26, 0x46, 0xDE,
+        0xC9, 0x75, 0x1E, 0x76, 0x3D, 0xBA, 0x37, 0xBD,
+        0xF8, 0xFF, 0x94, 0x06, 0xAD, 0x9E, 0x53, 0x0E,
+        0xE5, 0xDB, 0x38, 0x2F, 0x41, 0x30, 0x01, 0xAE,
+        0xB0, 0x6A, 0x53, 0xED, 0x90, 0x27, 0xD8, 0x31,
+        0x17, 0x97, 0x27, 0xB0, 0x86, 0x5A, 0x89, 0x18,
+        0xDA, 0x3E, 0xDB, 0xEB, 0xCF, 0x9B, 0x14, 0xED,
+        0x44, 0xCE, 0x6C, 0xBA, 0xCE, 0xD4, 0xBB, 0x1B,
+        0xDB, 0x7F, 0x14, 0x47, 0xE6, 0xCC, 0x25, 0x4B,
+        0x33, 0x20, 0x51, 0x51, 0x2B, 0xD7, 0xAF, 0x42,
+        0x6F, 0xB8, 0xF4, 0x01, 0x37, 0x8C, 0xD2, 0xBF,
+        0x59, 0x83, 0xCA, 0x01, 0xC6, 0x4B, 0x92, 0xEC,
+        0xF0, 0x32, 0xEA, 0x15, 0xD1, 0x72, 0x1D, 0x03,
+        0xF4, 0x82, 0xD7, 0xCE, 0x6E, 0x74, 0xFE, 0xF6,
+        0xD5, 0x5E, 0x70, 0x2F, 0x46, 0x98, 0x0C, 0x82,
+        0xB5, 0xA8, 0x40, 0x31, 0x90, 0x0B, 0x1C, 0x9E,
+        0x59, 0xE7, 0xC9, 0x7F, 0xBE, 0xC7, 0xE8, 0xF3,
+        0x23, 0xA9, 0x7A, 0x7E, 0x36, 0xCC, 0x88, 0xBE,
+        0x0F, 0x1D, 0x45, 0xB7, 0xFF, 0x58, 0x5A, 0xC5,
+        0x4B, 0xD4, 0x07, 0xB2, 0x2B, 0x41, 0x54, 0xAA,
+        0xCC, 0x8F, 0x6D, 0x7E, 0xBF, 0x48, 0xE1, 0xD8,
+        0x14, 0xCC, 0x5E, 0xD2, 0x0F, 0x80, 0x37, 0xE0,
+        0xA7, 0x97, 0x15, 0xEE, 0xF2, 0x9B, 0xE3, 0x28,
+        0x06, 0xA1, 0xD5, 0x8B, 0xB7, 0xC5, 0xDA, 0x76,
+        0xF5, 0x50, 0xAA, 0x3D, 0x8A, 0x1F, 0xBF, 0xF0,
+        0xEB, 0x19, 0xCC, 0xB1, 0xA3, 0x13, 0xD5, 0x5C,
+        0xDA, 0x56, 0xC9, 0xEC, 0x2E, 0xF2, 0x96, 0x32,
+        0x38, 0x7F, 0xE8, 0xD7, 0x6E, 0x3C, 0x04, 0x68,
+        0x04, 0x3E, 0x8F, 0x66, 0x3F, 0x48, 0x60, 0xEE,
+        0x12, 0xBF, 0x2D, 0x5B, 0x0B, 0x74, 0x74, 0xD6,
+        0xE6, 0x94, 0xF9, 0x1E, 0x6D, 0xCC, 0x40, 0x24,
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+    };
+    return BN_bin2bn(RFC3526_PRIME_6144, sizeof(RFC3526_PRIME_6144), bn);
+}
+
+/*-
+ * "8192-bit MODP Group" from RFC3526, Section 7.
+ *
+ * The prime is: 2^8192 - 2^8128 - 1 + 2^64 * { [2^8062 pi] + 4743158 }
+ *
+ * RFC3526 specifies a generator of 2.
+ */
+
+BIGNUM *BN_get_rfc3526_prime_8192(BIGNUM *bn)
+{
+    static const unsigned char RFC3526_PRIME_8192[] = {
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+        0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
+        0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
+        0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
+        0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
+        0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
+        0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
+        0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
+        0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
+        0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
+        0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B,
+        0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
+        0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5,
+        0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6,
+        0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
+        0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05,
+        0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A,
+        0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
+        0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96,
+        0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB,
+        0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
+        0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04,
+        0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C,
+        0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B,
+        0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03,
+        0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F,
+        0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9,
+        0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18,
+        0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5,
+        0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10,
+        0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D,
+        0xAD, 0x33, 0x17, 0x0D, 0x04, 0x50, 0x7A, 0x33,
+        0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64,
+        0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A,
+        0x8A, 0xEA, 0x71, 0x57, 0x5D, 0x06, 0x0C, 0x7D,
+        0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7,
+        0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7,
+        0x1E, 0x8C, 0x94, 0xE0, 0x4A, 0x25, 0x61, 0x9D,
+        0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B,
+        0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64,
+        0xD8, 0x76, 0x02, 0x73, 0x3E, 0xC8, 0x6A, 0x64,
+        0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C,
+        0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C,
+        0x77, 0x09, 0x88, 0xC0, 0xBA, 0xD9, 0x46, 0xE2,
+        0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31,
+        0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E,
+        0x4B, 0x82, 0xD1, 0x20, 0xA9, 0x21, 0x08, 0x01,
+        0x1A, 0x72, 0x3C, 0x12, 0xA7, 0x87, 0xE6, 0xD7,
+        0x88, 0x71, 0x9A, 0x10, 0xBD, 0xBA, 0x5B, 0x26,
+        0x99, 0xC3, 0x27, 0x18, 0x6A, 0xF4, 0xE2, 0x3C,
+        0x1A, 0x94, 0x68, 0x34, 0xB6, 0x15, 0x0B, 0xDA,
+        0x25, 0x83, 0xE9, 0xCA, 0x2A, 0xD4, 0x4C, 0xE8,
+        0xDB, 0xBB, 0xC2, 0xDB, 0x04, 0xDE, 0x8E, 0xF9,
+        0x2E, 0x8E, 0xFC, 0x14, 0x1F, 0xBE, 0xCA, 0xA6,
+        0x28, 0x7C, 0x59, 0x47, 0x4E, 0x6B, 0xC0, 0x5D,
+        0x99, 0xB2, 0x96, 0x4F, 0xA0, 0x90, 0xC3, 0xA2,
+        0x23, 0x3B, 0xA1, 0x86, 0x51, 0x5B, 0xE7, 0xED,
+        0x1F, 0x61, 0x29, 0x70, 0xCE, 0xE2, 0xD7, 0xAF,
+        0xB8, 0x1B, 0xDD, 0x76, 0x21, 0x70, 0x48, 0x1C,
+        0xD0, 0x06, 0x91, 0x27, 0xD5, 0xB0, 0x5A, 0xA9,
+        0x93, 0xB4, 0xEA, 0x98, 0x8D, 0x8F, 0xDD, 0xC1,
+        0x86, 0xFF, 0xB7, 0xDC, 0x90, 0xA6, 0xC0, 0x8F,
+        0x4D, 0xF4, 0x35, 0xC9, 0x34, 0x02, 0x84, 0x92,
+        0x36, 0xC3, 0xFA, 0xB4, 0xD2, 0x7C, 0x70, 0x26,
+        0xC1, 0xD4, 0xDC, 0xB2, 0x60, 0x26, 0x46, 0xDE,
+        0xC9, 0x75, 0x1E, 0x76, 0x3D, 0xBA, 0x37, 0xBD,
+        0xF8, 0xFF, 0x94, 0x06, 0xAD, 0x9E, 0x53, 0x0E,
+        0xE5, 0xDB, 0x38, 0x2F, 0x41, 0x30, 0x01, 0xAE,
+        0xB0, 0x6A, 0x53, 0xED, 0x90, 0x27, 0xD8, 0x31,
+        0x17, 0x97, 0x27, 0xB0, 0x86, 0x5A, 0x89, 0x18,
+        0xDA, 0x3E, 0xDB, 0xEB, 0xCF, 0x9B, 0x14, 0xED,
+        0x44, 0xCE, 0x6C, 0xBA, 0xCE, 0xD4, 0xBB, 0x1B,
+        0xDB, 0x7F, 0x14, 0x47, 0xE6, 0xCC, 0x25, 0x4B,
+        0x33, 0x20, 0x51, 0x51, 0x2B, 0xD7, 0xAF, 0x42,
+        0x6F, 0xB8, 0xF4, 0x01, 0x37, 0x8C, 0xD2, 0xBF,
+        0x59, 0x83, 0xCA, 0x01, 0xC6, 0x4B, 0x92, 0xEC,
+        0xF0, 0x32, 0xEA, 0x15, 0xD1, 0x72, 0x1D, 0x03,
+        0xF4, 0x82, 0xD7, 0xCE, 0x6E, 0x74, 0xFE, 0xF6,
+        0xD5, 0x5E, 0x70, 0x2F, 0x46, 0x98, 0x0C, 0x82,
+        0xB5, 0xA8, 0x40, 0x31, 0x90, 0x0B, 0x1C, 0x9E,
+        0x59, 0xE7, 0xC9, 0x7F, 0xBE, 0xC7, 0xE8, 0xF3,
+        0x23, 0xA9, 0x7A, 0x7E, 0x36, 0xCC, 0x88, 0xBE,
+        0x0F, 0x1D, 0x45, 0xB7, 0xFF, 0x58, 0x5A, 0xC5,
+        0x4B, 0xD4, 0x07, 0xB2, 0x2B, 0x41, 0x54, 0xAA,
+        0xCC, 0x8F, 0x6D, 0x7E, 0xBF, 0x48, 0xE1, 0xD8,
+        0x14, 0xCC, 0x5E, 0xD2, 0x0F, 0x80, 0x37, 0xE0,
+        0xA7, 0x97, 0x15, 0xEE, 0xF2, 0x9B, 0xE3, 0x28,
+        0x06, 0xA1, 0xD5, 0x8B, 0xB7, 0xC5, 0xDA, 0x76,
+        0xF5, 0x50, 0xAA, 0x3D, 0x8A, 0x1F, 0xBF, 0xF0,
+        0xEB, 0x19, 0xCC, 0xB1, 0xA3, 0x13, 0xD5, 0x5C,
+        0xDA, 0x56, 0xC9, 0xEC, 0x2E, 0xF2, 0x96, 0x32,
+        0x38, 0x7F, 0xE8, 0xD7, 0x6E, 0x3C, 0x04, 0x68,
+        0x04, 0x3E, 0x8F, 0x66, 0x3F, 0x48, 0x60, 0xEE,
+        0x12, 0xBF, 0x2D, 0x5B, 0x0B, 0x74, 0x74, 0xD6,
+        0xE6, 0x94, 0xF9, 0x1E, 0x6D, 0xBE, 0x11, 0x59,
+        0x74, 0xA3, 0x92, 0x6F, 0x12, 0xFE, 0xE5, 0xE4,
+        0x38, 0x77, 0x7C, 0xB6, 0xA9, 0x32, 0xDF, 0x8C,
+        0xD8, 0xBE, 0xC4, 0xD0, 0x73, 0xB9, 0x31, 0xBA,
+        0x3B, 0xC8, 0x32, 0xB6, 0x8D, 0x9D, 0xD3, 0x00,
+        0x74, 0x1F, 0xA7, 0xBF, 0x8A, 0xFC, 0x47, 0xED,
+        0x25, 0x76, 0xF6, 0x93, 0x6B, 0xA4, 0x24, 0x66,
+        0x3A, 0xAB, 0x63, 0x9C, 0x5A, 0xE4, 0xF5, 0x68,
+        0x34, 0x23, 0xB4, 0x74, 0x2B, 0xF1, 0xC9, 0x78,
+        0x23, 0x8F, 0x16, 0xCB, 0xE3, 0x9D, 0x65, 0x2D,
+        0xE3, 0xFD, 0xB8, 0xBE, 0xFC, 0x84, 0x8A, 0xD9,
+        0x22, 0x22, 0x2E, 0x04, 0xA4, 0x03, 0x7C, 0x07,
+        0x13, 0xEB, 0x57, 0xA8, 0x1A, 0x23, 0xF0, 0xC7,
+        0x34, 0x73, 0xFC, 0x64, 0x6C, 0xEA, 0x30, 0x6B,
+        0x4B, 0xCB, 0xC8, 0x86, 0x2F, 0x83, 0x85, 0xDD,
+        0xFA, 0x9D, 0x4B, 0x7F, 0xA2, 0xC0, 0x87, 0xE8,
+        0x79, 0x68, 0x33, 0x03, 0xED, 0x5B, 0xDD, 0x3A,
+        0x06, 0x2B, 0x3C, 0xF5, 0xB3, 0xA2, 0x78, 0xA6,
+        0x6D, 0x2A, 0x13, 0xF8, 0x3F, 0x44, 0xF8, 0x2D,
+        0xDF, 0x31, 0x0E, 0xE0, 0x74, 0xAB, 0x6A, 0x36,
+        0x45, 0x97, 0xE8, 0x99, 0xA0, 0x25, 0x5D, 0xC1,
+        0x64, 0xF3, 0x1C, 0xC5, 0x08, 0x46, 0x85, 0x1D,
+        0xF9, 0xAB, 0x48, 0x19, 0x5D, 0xED, 0x7E, 0xA1,
+        0xB1, 0xD5, 0x10, 0xBD, 0x7E, 0xE7, 0x4D, 0x73,
+        0xFA, 0xF3, 0x6B, 0xC3, 0x1E, 0xCF, 0xA2, 0x68,
+        0x35, 0x90, 0x46, 0xF4, 0xEB, 0x87, 0x9F, 0x92,
+        0x40, 0x09, 0x43, 0x8B, 0x48, 0x1C, 0x6C, 0xD7,
+        0x88, 0x9A, 0x00, 0x2E, 0xD5, 0xEE, 0x38, 0x2B,
+        0xC9, 0x19, 0x0D, 0xA6, 0xFC, 0x02, 0x6E, 0x47,
+        0x95, 0x58, 0xE4, 0x47, 0x56, 0x77, 0xE9, 0xAA,
+        0x9E, 0x30, 0x50, 0xE2, 0x76, 0x56, 0x94, 0xDF,
+        0xC8, 0x1F, 0x56, 0xE8, 0x80, 0xB9, 0x6E, 0x71,
+        0x60, 0xC9, 0x80, 0xDD, 0x98, 0xED, 0xD3, 0xDF,
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+    };
+    return BN_bin2bn(RFC3526_PRIME_8192, sizeof(RFC3526_PRIME_8192), bn);
+}
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_ctx.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_ctx.c
@ -0,0 +1,361 @@
+/*
+ * Copyright 2000-2019 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include "internal/cryptlib.h"
+#include "bn_lcl.h"
+
+/*-
+ * TODO list
+ *
+ * 1. Check a bunch of "(words+1)" type hacks in various bignum functions and
+ * check they can be safely removed.
+ *  - Check +1 and other ugliness in BN_from_montgomery()
+ *
+ * 2. Consider allowing a BN_new_ex() that, at least, lets you specify an
+ * appropriate 'block' size that will be honoured by bn_expand_internal() to
+ * prevent piddly little reallocations. OTOH, profiling bignum expansions in
+ * BN_CTX doesn't show this to be a big issue.
+ */
+
+/* How many bignums are in each "pool item"; */
+#define BN_CTX_POOL_SIZE        16
+/* The stack frame info is resizing, set a first-time expansion size; */
+#define BN_CTX_START_FRAMES     32
+
+/***********/
+/* BN_POOL */
+/***********/
+
+/* A bundle of bignums that can be linked with other bundles */
+typedef struct bignum_pool_item {
+    /* The bignum values */
+    BIGNUM vals[BN_CTX_POOL_SIZE];
+    /* Linked-list admin */
+    struct bignum_pool_item *prev, *next;
+} BN_POOL_ITEM;
+/* A linked-list of bignums grouped in bundles */
+typedef struct bignum_pool {
+    /* Linked-list admin */
+    BN_POOL_ITEM *head, *current, *tail;
+    /* Stack depth and allocation size */
+    unsigned used, size;
+} BN_POOL;
+static void BN_POOL_init(BN_POOL *);
+static void BN_POOL_finish(BN_POOL *);
+static BIGNUM *BN_POOL_get(BN_POOL *, int);
+static void BN_POOL_release(BN_POOL *, unsigned int);
+
+/************/
+/* BN_STACK */
+/************/
+
+/* A wrapper to manage the "stack frames" */
+typedef struct bignum_ctx_stack {
+    /* Array of indexes into the bignum stack */
+    unsigned int *indexes;
+    /* Number of stack frames, and the size of the allocated array */
+    unsigned int depth, size;
+} BN_STACK;
+static void BN_STACK_init(BN_STACK *);
+static void BN_STACK_finish(BN_STACK *);
+static int BN_STACK_push(BN_STACK *, unsigned int);
+static unsigned int BN_STACK_pop(BN_STACK *);
+
+/**********/
+/* BN_CTX */
+/**********/
+
+/* The opaque BN_CTX type */
+struct bignum_ctx {
+    /* The bignum bundles */
+    BN_POOL pool;
+    /* The "stack frames", if you will */
+    BN_STACK stack;
+    /* The number of bignums currently assigned */
+    unsigned int used;
+    /* Depth of stack overflow */
+    int err_stack;
+    /* Block "gets" until an "end" (compatibility behaviour) */
+    int too_many;
+    /* Flags. */
+    int flags;
+};
+
+/* Enable this to find BN_CTX bugs */
+#ifdef BN_CTX_DEBUG
+static const char *ctxdbg_cur = NULL;
+static void ctxdbg(BN_CTX *ctx)
+{
+    unsigned int bnidx = 0, fpidx = 0;
+    BN_POOL_ITEM *item = ctx->pool.head;
+    BN_STACK *stack = &ctx->stack;
+    fprintf(stderr, "(%16p): ", ctx);
+    while (bnidx < ctx->used) {
+        fprintf(stderr, "%03x ", item->vals[bnidx++ % BN_CTX_POOL_SIZE].dmax);
+        if (!(bnidx % BN_CTX_POOL_SIZE))
+            item = item->next;
+    }
+    fprintf(stderr, "\n");
+    bnidx = 0;
+    fprintf(stderr, "          : ");
+    while (fpidx < stack->depth) {
+        while (bnidx++ < stack->indexes[fpidx])
+            fprintf(stderr, "    ");
+        fprintf(stderr, "^^^ ");
+        bnidx++;
+        fpidx++;
+    }
+    fprintf(stderr, "\n");
+}
+
+# define CTXDBG_ENTRY(str, ctx)  do { \
+                                ctxdbg_cur = (str); \
+                                fprintf(stderr,"Starting %s\n", ctxdbg_cur); \
+                                ctxdbg(ctx); \
+                                } while(0)
+# define CTXDBG_EXIT(ctx)        do { \
+                                fprintf(stderr,"Ending %s\n", ctxdbg_cur); \
+                                ctxdbg(ctx); \
+                                } while(0)
+# define CTXDBG_RET(ctx,ret)
+#else
+# define CTXDBG_ENTRY(str, ctx)
+# define CTXDBG_EXIT(ctx)
+# define CTXDBG_RET(ctx,ret)
+#endif
+
+
+BN_CTX *BN_CTX_new(void)
+{
+    BN_CTX *ret;
+
+    if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL) {
+        BNerr(BN_F_BN_CTX_NEW, ERR_R_MALLOC_FAILURE);
+        return NULL;
+    }
+    /* Initialise the structure */
+    BN_POOL_init(&ret->pool);
+    BN_STACK_init(&ret->stack);
+    return ret;
+}
+
+BN_CTX *BN_CTX_secure_new(void)
+{
+    BN_CTX *ret = BN_CTX_new();
+
+    if (ret != NULL)
+        ret->flags = BN_FLG_SECURE;
+    return ret;
+}
+
+void BN_CTX_free(BN_CTX *ctx)
+{
+    if (ctx == NULL)
+        return;
+#ifdef BN_CTX_DEBUG
+    {
+        BN_POOL_ITEM *pool = ctx->pool.head;
+        fprintf(stderr, "BN_CTX_free, stack-size=%d, pool-bignums=%d\n",
+                ctx->stack.size, ctx->pool.size);
+        fprintf(stderr, "dmaxs: ");
+        while (pool) {
+            unsigned loop = 0;
+            while (loop < BN_CTX_POOL_SIZE)
+                fprintf(stderr, "%02x ", pool->vals[loop++].dmax);
+            pool = pool->next;
+        }
+        fprintf(stderr, "\n");
+    }
+#endif
+    BN_STACK_finish(&ctx->stack);
+    BN_POOL_finish(&ctx->pool);
+    OPENSSL_free(ctx);
+}
+
+void BN_CTX_start(BN_CTX *ctx)
+{
+    CTXDBG_ENTRY("BN_CTX_start", ctx);
+    /* If we're already overflowing ... */
+    if (ctx->err_stack || ctx->too_many)
+        ctx->err_stack++;
+    /* (Try to) get a new frame pointer */
+    else if (!BN_STACK_push(&ctx->stack, ctx->used)) {
+        BNerr(BN_F_BN_CTX_START, BN_R_TOO_MANY_TEMPORARY_VARIABLES);
+        ctx->err_stack++;
+    }
+    CTXDBG_EXIT(ctx);
+}
+
+void BN_CTX_end(BN_CTX *ctx)
+{
+    CTXDBG_ENTRY("BN_CTX_end", ctx);
+    if (ctx->err_stack)
+        ctx->err_stack--;
+    else {
+        unsigned int fp = BN_STACK_pop(&ctx->stack);
+        /* Does this stack frame have anything to release? */
+        if (fp < ctx->used)
+            BN_POOL_release(&ctx->pool, ctx->used - fp);
+        ctx->used = fp;
+        /* Unjam "too_many" in case "get" had failed */
+        ctx->too_many = 0;
+    }
+    CTXDBG_EXIT(ctx);
+}
+
+BIGNUM *BN_CTX_get(BN_CTX *ctx)
+{
+    BIGNUM *ret;
+
+    CTXDBG_ENTRY("BN_CTX_get", ctx);
+    if (ctx->err_stack || ctx->too_many)
+        return NULL;
+    if ((ret = BN_POOL_get(&ctx->pool, ctx->flags)) == NULL) {
+        /*
+         * Setting too_many prevents repeated "get" attempts from cluttering
+         * the error stack.
+         */
+        ctx->too_many = 1;
+        BNerr(BN_F_BN_CTX_GET, BN_R_TOO_MANY_TEMPORARY_VARIABLES);
+        return NULL;
+    }
+    /* OK, make sure the returned bignum is "zero" */
+    BN_zero(ret);
+    /* clear BN_FLG_CONSTTIME if leaked from previous frames */
+    ret->flags &= (~BN_FLG_CONSTTIME);
+    ctx->used++;
+    CTXDBG_RET(ctx, ret);
+    return ret;
+}
+
+/************/
+/* BN_STACK */
+/************/
+
+static void BN_STACK_init(BN_STACK *st)
+{
+    st->indexes = NULL;
+    st->depth = st->size = 0;
+}
+
+static void BN_STACK_finish(BN_STACK *st)
+{
+    OPENSSL_free(st->indexes);
+    st->indexes = NULL;
+}
+
+
+static int BN_STACK_push(BN_STACK *st, unsigned int idx)
+{
+    if (st->depth == st->size) {
+        /* Need to expand */
+        unsigned int newsize =
+            st->size ? (st->size * 3 / 2) : BN_CTX_START_FRAMES;
+        unsigned int *newitems;
+
+        if ((newitems = OPENSSL_malloc(sizeof(*newitems) * newsize)) == NULL) {
+            BNerr(BN_F_BN_STACK_PUSH, ERR_R_MALLOC_FAILURE);
+            return 0;
+        }
+        if (st->depth)
+            memcpy(newitems, st->indexes, sizeof(*newitems) * st->depth);
+        OPENSSL_free(st->indexes);
+        st->indexes = newitems;
+        st->size = newsize;
+    }
+    st->indexes[(st->depth)++] = idx;
+    return 1;
+}
+
+static unsigned int BN_STACK_pop(BN_STACK *st)
+{
+    return st->indexes[--(st->depth)];
+}
+
+/***********/
+/* BN_POOL */
+/***********/
+
+static void BN_POOL_init(BN_POOL *p)
+{
+    p->head = p->current = p->tail = NULL;
+    p->used = p->size = 0;
+}
+
+static void BN_POOL_finish(BN_POOL *p)
+{
+    unsigned int loop;
+    BIGNUM *bn;
+
+    while (p->head) {
+        for (loop = 0, bn = p->head->vals; loop++ < BN_CTX_POOL_SIZE; bn++)
+            if (bn->d)
+                BN_clear_free(bn);
+        p->current = p->head->next;
+        OPENSSL_free(p->head);
+        p->head = p->current;
+    }
+}
+
+
+static BIGNUM *BN_POOL_get(BN_POOL *p, int flag)
+{
+    BIGNUM *bn;
+    unsigned int loop;
+
+    /* Full; allocate a new pool item and link it in. */
+    if (p->used == p->size) {
+        BN_POOL_ITEM *item;
+
+        if ((item = OPENSSL_malloc(sizeof(*item))) == NULL) {
+            BNerr(BN_F_BN_POOL_GET, ERR_R_MALLOC_FAILURE);
+            return NULL;
+        }
+        for (loop = 0, bn = item->vals; loop++ < BN_CTX_POOL_SIZE; bn++) {
+            bn_init(bn);
+            if ((flag & BN_FLG_SECURE) != 0)
+                BN_set_flags(bn, BN_FLG_SECURE);
+        }
+        item->prev = p->tail;
+        item->next = NULL;
+
+        if (p->head == NULL)
+            p->head = p->current = p->tail = item;
+        else {
+            p->tail->next = item;
+            p->tail = item;
+            p->current = item;
+        }
+        p->size += BN_CTX_POOL_SIZE;
+        p->used++;
+        /* Return the first bignum from the new pool */
+        return item->vals;
+    }
+
+    if (!p->used)
+        p->current = p->head;
+    else if ((p->used % BN_CTX_POOL_SIZE) == 0)
+        p->current = p->current->next;
+    return p->current->vals + ((p->used++) % BN_CTX_POOL_SIZE);
+}
+
+static void BN_POOL_release(BN_POOL *p, unsigned int num)
+{
+    unsigned int offset = (p->used - 1) % BN_CTX_POOL_SIZE;
+
+    p->used -= num;
+    while (num--) {
+        bn_check_top(p->current->vals + offset);
+        if (offset == 0) {
+            offset = BN_CTX_POOL_SIZE - 1;
+            p->current = p->current->prev;
+        } else
+            offset--;
+    }
+}
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_depr.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_depr.c
@ -0,0 +1,68 @@
+/*
+ * Copyright 2002-2019 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+/*
+ * Support for deprecated functions goes here - static linkage will only
+ * slurp this code if applications are using them directly.
+ */
+
+#include <openssl/opensslconf.h>
+#if OPENSSL_API_COMPAT >= 0x00908000L
+NON_EMPTY_TRANSLATION_UNIT
+#else
+
+# include <stdio.h>
+# include <time.h>
+# include "internal/cryptlib.h"
+# include "bn_lcl.h"
+
+BIGNUM *BN_generate_prime(BIGNUM *ret, int bits, int safe,
+                          const BIGNUM *add, const BIGNUM *rem,
+                          void (*callback) (int, int, void *), void *cb_arg)
+{
+    BN_GENCB cb;
+    BIGNUM *rnd = NULL;
+
+    BN_GENCB_set_old(&cb, callback, cb_arg);
+
+    if (ret == NULL) {
+        if ((rnd = BN_new()) == NULL)
+            goto err;
+    } else
+        rnd = ret;
+    if (!BN_generate_prime_ex(rnd, bits, safe, add, rem, &cb))
+        goto err;
+
+    /* we have a prime :-) */
+    return rnd;
+ err:
+    BN_free(rnd);
+    return NULL;
+}
+
+int BN_is_prime(const BIGNUM *a, int checks,
+                void (*callback) (int, int, void *), BN_CTX *ctx_passed,
+                void *cb_arg)
+{
+    BN_GENCB cb;
+    BN_GENCB_set_old(&cb, callback, cb_arg);
+    return BN_is_prime_ex(a, checks, ctx_passed, &cb);
+}
+
+int BN_is_prime_fasttest(const BIGNUM *a, int checks,
+                         void (*callback) (int, int, void *),
+                         BN_CTX *ctx_passed, void *cb_arg,
+                         int do_trial_division)
+{
+    BN_GENCB cb;
+    BN_GENCB_set_old(&cb, callback, cb_arg);
+    return BN_is_prime_fasttest_ex(a, checks, ctx_passed,
+                                   do_trial_division, &cb);
+}
+#endif
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_dh.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_dh.c
@ -0,0 +1,512 @@
+/*
+ * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include "bn_lcl.h"
+#include "internal/nelem.h"
+
+#ifndef OPENSSL_NO_DH
+#include <openssl/dh.h>
+#include "internal/bn_dh.h"
+/* DH parameters from RFC5114 */
+
+# if BN_BITS2 == 64
+static const BN_ULONG dh1024_160_p[] = {
+    0xDF1FB2BC2E4A4371ULL, 0xE68CFDA76D4DA708ULL, 0x45BF37DF365C1A65ULL,
+    0xA151AF5F0DC8B4BDULL, 0xFAA31A4FF55BCCC0ULL, 0x4EFFD6FAE5644738ULL,
+    0x98488E9C219A7372ULL, 0xACCBDD7D90C4BD70ULL, 0x24975C3CD49B83BFULL,
+    0x13ECB4AEA9061123ULL, 0x9838EF1E2EE652C0ULL, 0x6073E28675A23D18ULL,
+    0x9A6A9DCA52D23B61ULL, 0x52C99FBCFB06A3C6ULL, 0xDE92DE5EAE5D54ECULL,
+    0xB10B8F96A080E01DULL
+};
+
+static const BN_ULONG dh1024_160_g[] = {
+    0x855E6EEB22B3B2E5ULL, 0x858F4DCEF97C2A24ULL, 0x2D779D5918D08BC8ULL,
+    0xD662A4D18E73AFA3ULL, 0x1DBF0A0169B6A28AULL, 0xA6A24C087A091F53ULL,
+    0x909D0D2263F80A76ULL, 0xD7FBD7D3B9A92EE1ULL, 0x5E91547F9E2749F4ULL,
+    0x160217B4B01B886AULL, 0x777E690F5504F213ULL, 0x266FEA1E5C41564BULL,
+    0xD6406CFF14266D31ULL, 0xF8104DD258AC507FULL, 0x6765A442EFB99905ULL,
+    0xA4D1CBD5C3FD3412ULL
+};
+
+static const BN_ULONG dh1024_160_q[] = {
+    0x64B7CB9D49462353ULL, 0x81A8DF278ABA4E7DULL, 0x00000000F518AA87ULL
+};
+
+static const BN_ULONG dh2048_224_p[] = {
+    0x0AC4DFFE0C10E64FULL, 0xCF9DE5384E71B81CULL, 0x7EF363E2FFA31F71ULL,
+    0xE3FB73C16B8E75B9ULL, 0xC9B53DCF4BA80A29ULL, 0x23F10B0E16E79763ULL,
+    0xC52172E413042E9BULL, 0xBE60E69CC928B2B9ULL, 0x80CD86A1B9E587E8ULL,
+    0x315D75E198C641A4ULL, 0xCDF93ACC44328387ULL, 0x15987D9ADC0A486DULL,
+    0x7310F7121FD5A074ULL, 0x278273C7DE31EFDCULL, 0x1602E714415D9330ULL,
+    0x81286130BC8985DBULL, 0xB3BF8A3170918836ULL, 0x6A00E0A0B9C49708ULL,
+    0xC6BA0B2C8BBC27BEULL, 0xC9F98D11ED34DBF6ULL, 0x7AD5B7D0B6C12207ULL,
+    0xD91E8FEF55B7394BULL, 0x9037C9EDEFDA4DF8ULL, 0x6D3F8152AD6AC212ULL,
+    0x1DE6B85A1274A0A6ULL, 0xEB3D688A309C180EULL, 0xAF9A3C407BA1DF15ULL,
+    0xE6FA141DF95A56DBULL, 0xB54B1597B61D0A75ULL, 0xA20D64E5683B9FD1ULL,
+    0xD660FAA79559C51FULL, 0xAD107E1E9123A9D0ULL
+};
+
+static const BN_ULONG dh2048_224_g[] = {
+    0x84B890D3191F2BFAULL, 0x81BC087F2A7065B3ULL, 0x19C418E1F6EC0179ULL,
+    0x7B5A0F1C71CFFF4CULL, 0xEDFE72FE9B6AA4BDULL, 0x81E1BCFE94B30269ULL,
+    0x566AFBB48D6C0191ULL, 0xB539CCE3409D13CDULL, 0x6AA21E7F5F2FF381ULL,
+    0xD9E263E4770589EFULL, 0x10E183EDD19963DDULL, 0xB70A8137150B8EEBULL,
+    0x051AE3D428C8F8ACULL, 0xBB77A86F0C1AB15BULL, 0x6E3025E316A330EFULL,
+    0x19529A45D6F83456ULL, 0xF180EB34118E98D1ULL, 0xB5F6C6B250717CBEULL,
+    0x09939D54DA7460CDULL, 0xE247150422EA1ED4ULL, 0xB8A762D0521BC98AULL,
+    0xF4D027275AC1348BULL, 0xC17669101999024AULL, 0xBE5E9001A8D66AD7ULL,
+    0xC57DB17C620A8652ULL, 0xAB739D7700C29F52ULL, 0xDD921F01A70C4AFAULL,
+    0xA6824A4E10B9A6F0ULL, 0x74866A08CFE4FFE3ULL, 0x6CDEBE7B89998CAFULL,
+    0x9DF30B5C8FFDAC50ULL, 0xAC4032EF4F2D9AE3ULL
+};
+
+static const BN_ULONG dh2048_224_q[] = {
+    0xBF389A99B36371EBULL, 0x1F80535A4738CEBCULL, 0xC58D93FE99717710ULL,
+    0x00000000801C0D34ULL
+};
+
+static const BN_ULONG dh2048_256_p[] = {
+    0xDB094AE91E1A1597ULL, 0x693877FAD7EF09CAULL, 0x6116D2276E11715FULL,
+    0xA4B54330C198AF12ULL, 0x75F26375D7014103ULL, 0xC3A3960A54E710C3ULL,
+    0xDED4010ABD0BE621ULL, 0xC0B857F689962856ULL, 0xB3CA3F7971506026ULL,
+    0x1CCACB83E6B486F6ULL, 0x67E144E514056425ULL, 0xF6A167B5A41825D9ULL,
+    0x3AD8347796524D8EULL, 0xF13C6D9A51BFA4ABULL, 0x2D52526735488A0EULL,
+    0xB63ACAE1CAA6B790ULL, 0x4FDB70C581B23F76ULL, 0xBC39A0BF12307F5CULL,
+    0xB941F54EB1E59BB8ULL, 0x6C5BFC11D45F9088ULL, 0x22E0B1EF4275BF7BULL,
+    0x91F9E6725B4758C0ULL, 0x5A8A9D306BCF67EDULL, 0x209E0C6497517ABDULL,
+    0x3BF4296D830E9A7CULL, 0x16C3D91134096FAAULL, 0xFAF7DF4561B2AA30ULL,
+    0xE00DF8F1D61957D4ULL, 0x5D2CEED4435E3B00ULL, 0x8CEEF608660DD0F2ULL,
+    0xFFBBD19C65195999ULL, 0x87A8E61DB4B6663CULL
+};
+
+static const BN_ULONG dh2048_256_g[] = {
+    0x664B4C0F6CC41659ULL, 0x5E2327CFEF98C582ULL, 0xD647D148D4795451ULL,
+    0x2F63078490F00EF8ULL, 0x184B523D1DB246C3ULL, 0xC7891428CDC67EB6ULL,
+    0x7FD028370DF92B52ULL, 0xB3353BBB64E0EC37ULL, 0xECD06E1557CD0915ULL,
+    0xB7D2BBD2DF016199ULL, 0xC8484B1E052588B9ULL, 0xDB2A3B7313D3FE14ULL,
+    0xD052B985D182EA0AULL, 0xA4BD1BFFE83B9C80ULL, 0xDFC967C1FB3F2E55ULL,
+    0xB5045AF2767164E1ULL, 0x1D14348F6F2F9193ULL, 0x64E67982428EBC83ULL,
+    0x8AC376D282D6ED38ULL, 0x777DE62AAAB8A862ULL, 0xDDF463E5E9EC144BULL,
+    0x0196F931C77A57F2ULL, 0xA55AE31341000A65ULL, 0x901228F8C28CBB18ULL,
+    0xBC3773BF7E8C6F62ULL, 0xBE3A6C1B0C6B47B1ULL, 0xFF4FED4AAC0BB555ULL,
+    0x10DBC15077BE463FULL, 0x07F4793A1A0BA125ULL, 0x4CA7B18F21EF2054ULL,
+    0x2E77506660EDBD48ULL, 0x3FB32C9B73134D0BULL
+};
+
+static const BN_ULONG dh2048_256_q[] = {
+    0xA308B0FE64F5FBD3ULL, 0x99B1A47D1EB3750BULL, 0xB447997640129DA2ULL,
+    0x8CF83642A709A097ULL
+};
+
+/* Primes from RFC 7919 */
+static const BN_ULONG ffdhe2048_p[] = {
+    0xFFFFFFFFFFFFFFFFULL, 0x886B423861285C97ULL, 0xC6F34A26C1B2EFFAULL,
+    0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL, 0xC3FE3B1B4C6FAD73ULL,
+    0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL, 0xC03404CD28342F61ULL,
+    0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL, 0xAE56EDE76372BB19ULL,
+    0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL, 0xD108A94BB2C8E3FBULL,
+    0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL, 0x1DF158A136ADE735ULL,
+    0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL, 0xB557135E7F57C935ULL,
+    0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL, 0xD3DF1ED5D5FD6561ULL,
+    0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL, 0xCC939DCE249B3EF9ULL,
+    0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL, 0xAFDC5620273D3CF1ULL,
+    0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL
+};
+
+static const BN_ULONG ffdhe3072_p[] = {
+    0xFFFFFFFFFFFFFFFFULL, 0x25E41D2B66C62E37ULL, 0x3C1B20EE3FD59D7CULL,
+    0x0ABCD06BFA53DDEFULL, 0x1DBF9A42D5C4484EULL, 0xABC521979B0DEADAULL,
+    0xE86D2BC522363A0DULL, 0x5CAE82AB9C9DF69EULL, 0x64F2E21E71F54BFFULL,
+    0xF4FD4452E2D74DD3ULL, 0xB4130C93BC437944ULL, 0xAEFE130985139270ULL,
+    0x598CB0FAC186D91CULL, 0x7AD91D2691F7F7EEULL, 0x61B46FC9D6E6C907ULL,
+    0xBC34F4DEF99C0238ULL, 0xDE355B3B6519035BULL, 0x886B4238611FCFDCULL,
+    0xC6F34A26C1B2EFFAULL, 0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL,
+    0xC3FE3B1B4C6FAD73ULL, 0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL,
+    0xC03404CD28342F61ULL, 0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL,
+    0xAE56EDE76372BB19ULL, 0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL,
+    0xD108A94BB2C8E3FBULL, 0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL,
+    0x1DF158A136ADE735ULL, 0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL,
+    0xB557135E7F57C935ULL, 0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL,
+    0xD3DF1ED5D5FD6561ULL, 0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL,
+    0xCC939DCE249B3EF9ULL, 0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL,
+    0xAFDC5620273D3CF1ULL, 0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL
+};
+
+static const BN_ULONG ffdhe4096_p[] = {
+    0xFFFFFFFFFFFFFFFFULL, 0xC68A007E5E655F6AULL, 0x4DB5A851F44182E1ULL,
+    0x8EC9B55A7F88A46BULL, 0x0A8291CDCEC97DCFULL, 0x2A4ECEA9F98D0ACCULL,
+    0x1A1DB93D7140003CULL, 0x092999A333CB8B7AULL, 0x6DC778F971AD0038ULL,
+    0xA907600A918130C4ULL, 0xED6A1E012D9E6832ULL, 0x7135C886EFB4318AULL,
+    0x87F55BA57E31CC7AULL, 0x7763CF1D55034004ULL, 0xAC7D5F42D69F6D18ULL,
+    0x7930E9E4E58857B6ULL, 0x6E6F52C3164DF4FBULL, 0x25E41D2B669E1EF1ULL,
+    0x3C1B20EE3FD59D7CULL, 0x0ABCD06BFA53DDEFULL, 0x1DBF9A42D5C4484EULL,
+    0xABC521979B0DEADAULL, 0xE86D2BC522363A0DULL, 0x5CAE82AB9C9DF69EULL,
+    0x64F2E21E71F54BFFULL, 0xF4FD4452E2D74DD3ULL, 0xB4130C93BC437944ULL,
+    0xAEFE130985139270ULL, 0x598CB0FAC186D91CULL, 0x7AD91D2691F7F7EEULL,
+    0x61B46FC9D6E6C907ULL, 0xBC34F4DEF99C0238ULL, 0xDE355B3B6519035BULL,
+    0x886B4238611FCFDCULL, 0xC6F34A26C1B2EFFAULL, 0xC58EF1837D1683B2ULL,
+    0x3BB5FCBC2EC22005ULL, 0xC3FE3B1B4C6FAD73ULL, 0x8E4F1232EEF28183ULL,
+    0x9172FE9CE98583FFULL, 0xC03404CD28342F61ULL, 0x9E02FCE1CDF7E2ECULL,
+    0x0B07A7C8EE0A6D70ULL, 0xAE56EDE76372BB19ULL, 0x1D4F42A3DE394DF4ULL,
+    0xB96ADAB760D7F468ULL, 0xD108A94BB2C8E3FBULL, 0xBC0AB182B324FB61ULL,
+    0x30ACCA4F483A797AULL, 0x1DF158A136ADE735ULL, 0xE2A689DAF3EFE872ULL,
+    0x984F0C70E0E68B77ULL, 0xB557135E7F57C935ULL, 0x856365553DED1AF3ULL,
+    0x2433F51F5F066ED0ULL, 0xD3DF1ED5D5FD6561ULL, 0xF681B202AEC4617AULL,
+    0x7D2FE363630C75D8ULL, 0xCC939DCE249B3EF9ULL, 0xA9E13641146433FBULL,
+    0xD8B9C583CE2D3695ULL, 0xAFDC5620273D3CF1ULL, 0xADF85458A2BB4A9AULL,
+    0xFFFFFFFFFFFFFFFFULL
+};
+
+static const BN_ULONG ffdhe6144_p[] = {
+    0xFFFFFFFFFFFFFFFFULL, 0xA40E329CD0E40E65ULL, 0xA41D570D7938DAD4ULL,
+    0x62A69526D43161C1ULL, 0x3FDD4A8E9ADB1E69ULL, 0x5B3B71F9DC6B80D6ULL,
+    0xEC9D1810C6272B04ULL, 0x8CCF2DD5CACEF403ULL, 0xE49F5235C95B9117ULL,
+    0x505DC82DB854338AULL, 0x62292C311562A846ULL, 0xD72B03746AE77F5EULL,
+    0xF9C9091B462D538CULL, 0x0AE8DB5847A67CBEULL, 0xB3A739C122611682ULL,
+    0xEEAAC0232A281BF6ULL, 0x94C6651E77CAF992ULL, 0x763E4E4B94B2BBC1ULL,
+    0x587E38DA0077D9B4ULL, 0x7FB29F8C183023C3ULL, 0x0ABEC1FFF9E3A26EULL,
+    0xA00EF092350511E3ULL, 0xB855322EDB6340D8ULL, 0xA52471F7A9A96910ULL,
+    0x388147FB4CFDB477ULL, 0x9B1F5C3E4E46041FULL, 0xCDAD0657FCCFEC71ULL,
+    0xB38E8C334C701C3AULL, 0x917BDD64B1C0FD4CULL, 0x3BB454329B7624C8ULL,
+    0x23BA4442CAF53EA6ULL, 0x4E677D2C38532A3AULL, 0x0BFD64B645036C7AULL,
+    0xC68A007E5E0DD902ULL, 0x4DB5A851F44182E1ULL, 0x8EC9B55A7F88A46BULL,
+    0x0A8291CDCEC97DCFULL, 0x2A4ECEA9F98D0ACCULL, 0x1A1DB93D7140003CULL,
+    0x092999A333CB8B7AULL, 0x6DC778F971AD0038ULL, 0xA907600A918130C4ULL,
+    0xED6A1E012D9E6832ULL, 0x7135C886EFB4318AULL, 0x87F55BA57E31CC7AULL,
+    0x7763CF1D55034004ULL, 0xAC7D5F42D69F6D18ULL, 0x7930E9E4E58857B6ULL,
+    0x6E6F52C3164DF4FBULL, 0x25E41D2B669E1EF1ULL, 0x3C1B20EE3FD59D7CULL,
+    0x0ABCD06BFA53DDEFULL, 0x1DBF9A42D5C4484EULL, 0xABC521979B0DEADAULL,
+    0xE86D2BC522363A0DULL, 0x5CAE82AB9C9DF69EULL, 0x64F2E21E71F54BFFULL,
+    0xF4FD4452E2D74DD3ULL, 0xB4130C93BC437944ULL, 0xAEFE130985139270ULL,
+    0x598CB0FAC186D91CULL, 0x7AD91D2691F7F7EEULL, 0x61B46FC9D6E6C907ULL,
+    0xBC34F4DEF99C0238ULL, 0xDE355B3B6519035BULL, 0x886B4238611FCFDCULL,
+    0xC6F34A26C1B2EFFAULL, 0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL,
+    0xC3FE3B1B4C6FAD73ULL, 0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL,
+    0xC03404CD28342F61ULL, 0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL,
+    0xAE56EDE76372BB19ULL, 0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL,
+    0xD108A94BB2C8E3FBULL, 0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL,
+    0x1DF158A136ADE735ULL, 0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL,
+    0xB557135E7F57C935ULL, 0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL,
+    0xD3DF1ED5D5FD6561ULL, 0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL,
+    0xCC939DCE249B3EF9ULL, 0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL,
+    0xAFDC5620273D3CF1ULL, 0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL
+};
+
+static const BN_ULONG ffdhe8192_p[] = {
+    0xFFFFFFFFFFFFFFFFULL, 0xD68C8BB7C5C6424CULL, 0x011E2A94838FF88CULL,
+    0x0822E506A9F4614EULL, 0x97D11D49F7A8443DULL, 0xA6BBFDE530677F0DULL,
+    0x2F741EF8C1FE86FEULL, 0xFAFABE1C5D71A87EULL, 0xDED2FBABFBE58A30ULL,
+    0xB6855DFE72B0A66EULL, 0x1EFC8CE0BA8A4FE8ULL, 0x83F81D4A3F2FA457ULL,
+    0xA1FE3075A577E231ULL, 0xD5B8019488D9C0A0ULL, 0x624816CDAD9A95F9ULL,
+    0x99E9E31650C1217BULL, 0x51AA691E0E423CFCULL, 0x1C217E6C3826E52CULL,
+    0x51A8A93109703FEEULL, 0xBB7099876A460E74ULL, 0x541FC68C9C86B022ULL,
+    0x59160CC046FD8251ULL, 0x2846C0BA35C35F5CULL, 0x54504AC78B758282ULL,
+    0x29388839D2AF05E4ULL, 0xCB2C0F1CC01BD702ULL, 0x555B2F747C932665ULL,
+    0x86B63142A3AB8829ULL, 0x0B8CC3BDF64B10EFULL, 0x687FEB69EDD1CC5EULL,
+    0xFDB23FCEC9509D43ULL, 0x1E425A31D951AE64ULL, 0x36AD004CF600C838ULL,
+    0xA40E329CCFF46AAAULL, 0xA41D570D7938DAD4ULL, 0x62A69526D43161C1ULL,
+    0x3FDD4A8E9ADB1E69ULL, 0x5B3B71F9DC6B80D6ULL, 0xEC9D1810C6272B04ULL,
+    0x8CCF2DD5CACEF403ULL, 0xE49F5235C95B9117ULL, 0x505DC82DB854338AULL,
+    0x62292C311562A846ULL, 0xD72B03746AE77F5EULL, 0xF9C9091B462D538CULL,
+    0x0AE8DB5847A67CBEULL, 0xB3A739C122611682ULL, 0xEEAAC0232A281BF6ULL,
+    0x94C6651E77CAF992ULL, 0x763E4E4B94B2BBC1ULL, 0x587E38DA0077D9B4ULL,
+    0x7FB29F8C183023C3ULL, 0x0ABEC1FFF9E3A26EULL, 0xA00EF092350511E3ULL,
+    0xB855322EDB6340D8ULL, 0xA52471F7A9A96910ULL, 0x388147FB4CFDB477ULL,
+    0x9B1F5C3E4E46041FULL, 0xCDAD0657FCCFEC71ULL, 0xB38E8C334C701C3AULL,
+    0x917BDD64B1C0FD4CULL, 0x3BB454329B7624C8ULL, 0x23BA4442CAF53EA6ULL,
+    0x4E677D2C38532A3AULL, 0x0BFD64B645036C7AULL, 0xC68A007E5E0DD902ULL,
+    0x4DB5A851F44182E1ULL, 0x8EC9B55A7F88A46BULL, 0x0A8291CDCEC97DCFULL,
+    0x2A4ECEA9F98D0ACCULL, 0x1A1DB93D7140003CULL, 0x092999A333CB8B7AULL,
+    0x6DC778F971AD0038ULL, 0xA907600A918130C4ULL, 0xED6A1E012D9E6832ULL,
+    0x7135C886EFB4318AULL, 0x87F55BA57E31CC7AULL, 0x7763CF1D55034004ULL,
+    0xAC7D5F42D69F6D18ULL, 0x7930E9E4E58857B6ULL, 0x6E6F52C3164DF4FBULL,
+    0x25E41D2B669E1EF1ULL, 0x3C1B20EE3FD59D7CULL, 0x0ABCD06BFA53DDEFULL,
+    0x1DBF9A42D5C4484EULL, 0xABC521979B0DEADAULL, 0xE86D2BC522363A0DULL,
+    0x5CAE82AB9C9DF69EULL, 0x64F2E21E71F54BFFULL, 0xF4FD4452E2D74DD3ULL,
+    0xB4130C93BC437944ULL, 0xAEFE130985139270ULL, 0x598CB0FAC186D91CULL,
+    0x7AD91D2691F7F7EEULL, 0x61B46FC9D6E6C907ULL, 0xBC34F4DEF99C0238ULL,
+    0xDE355B3B6519035BULL, 0x886B4238611FCFDCULL, 0xC6F34A26C1B2EFFAULL,
+    0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL, 0xC3FE3B1B4C6FAD73ULL,
+    0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL, 0xC03404CD28342F61ULL,
+    0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL, 0xAE56EDE76372BB19ULL,
+    0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL, 0xD108A94BB2C8E3FBULL,
+    0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL, 0x1DF158A136ADE735ULL,
+    0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL, 0xB557135E7F57C935ULL,
+    0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL, 0xD3DF1ED5D5FD6561ULL,
+    0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL, 0xCC939DCE249B3EF9ULL,
+    0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL, 0xAFDC5620273D3CF1ULL,
+    0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL
+};
+
+# elif BN_BITS2 == 32
+
+static const BN_ULONG dh1024_160_p[] = {
+    0x2E4A4371, 0xDF1FB2BC, 0x6D4DA708, 0xE68CFDA7, 0x365C1A65, 0x45BF37DF,
+    0x0DC8B4BD, 0xA151AF5F, 0xF55BCCC0, 0xFAA31A4F, 0xE5644738, 0x4EFFD6FA,
+    0x219A7372, 0x98488E9C, 0x90C4BD70, 0xACCBDD7D, 0xD49B83BF, 0x24975C3C,
+    0xA9061123, 0x13ECB4AE, 0x2EE652C0, 0x9838EF1E, 0x75A23D18, 0x6073E286,
+    0x52D23B61, 0x9A6A9DCA, 0xFB06A3C6, 0x52C99FBC, 0xAE5D54EC, 0xDE92DE5E,
+    0xA080E01D, 0xB10B8F96
+};
+
+static const BN_ULONG dh1024_160_g[] = {
+    0x22B3B2E5, 0x855E6EEB, 0xF97C2A24, 0x858F4DCE, 0x18D08BC8, 0x2D779D59,
+    0x8E73AFA3, 0xD662A4D1, 0x69B6A28A, 0x1DBF0A01, 0x7A091F53, 0xA6A24C08,
+    0x63F80A76, 0x909D0D22, 0xB9A92EE1, 0xD7FBD7D3, 0x9E2749F4, 0x5E91547F,
+    0xB01B886A, 0x160217B4, 0x5504F213, 0x777E690F, 0x5C41564B, 0x266FEA1E,
+    0x14266D31, 0xD6406CFF, 0x58AC507F, 0xF8104DD2, 0xEFB99905, 0x6765A442,
+    0xC3FD3412, 0xA4D1CBD5
+};
+
+static const BN_ULONG dh1024_160_q[] = {
+    0x49462353, 0x64B7CB9D, 0x8ABA4E7D, 0x81A8DF27, 0xF518AA87
+};
+
+static const BN_ULONG dh2048_224_p[] = {
+    0x0C10E64F, 0x0AC4DFFE, 0x4E71B81C, 0xCF9DE538, 0xFFA31F71, 0x7EF363E2,
+    0x6B8E75B9, 0xE3FB73C1, 0x4BA80A29, 0xC9B53DCF, 0x16E79763, 0x23F10B0E,
+    0x13042E9B, 0xC52172E4, 0xC928B2B9, 0xBE60E69C, 0xB9E587E8, 0x80CD86A1,
+    0x98C641A4, 0x315D75E1, 0x44328387, 0xCDF93ACC, 0xDC0A486D, 0x15987D9A,
+    0x1FD5A074, 0x7310F712, 0xDE31EFDC, 0x278273C7, 0x415D9330, 0x1602E714,
+    0xBC8985DB, 0x81286130, 0x70918836, 0xB3BF8A31, 0xB9C49708, 0x6A00E0A0,
+    0x8BBC27BE, 0xC6BA0B2C, 0xED34DBF6, 0xC9F98D11, 0xB6C12207, 0x7AD5B7D0,
+    0x55B7394B, 0xD91E8FEF, 0xEFDA4DF8, 0x9037C9ED, 0xAD6AC212, 0x6D3F8152,
+    0x1274A0A6, 0x1DE6B85A, 0x309C180E, 0xEB3D688A, 0x7BA1DF15, 0xAF9A3C40,
+    0xF95A56DB, 0xE6FA141D, 0xB61D0A75, 0xB54B1597, 0x683B9FD1, 0xA20D64E5,
+    0x9559C51F, 0xD660FAA7, 0x9123A9D0, 0xAD107E1E
+};
+
+static const BN_ULONG dh2048_224_g[] = {
+    0x191F2BFA, 0x84B890D3, 0x2A7065B3, 0x81BC087F, 0xF6EC0179, 0x19C418E1,
+    0x71CFFF4C, 0x7B5A0F1C, 0x9B6AA4BD, 0xEDFE72FE, 0x94B30269, 0x81E1BCFE,
+    0x8D6C0191, 0x566AFBB4, 0x409D13CD, 0xB539CCE3, 0x5F2FF381, 0x6AA21E7F,
+    0x770589EF, 0xD9E263E4, 0xD19963DD, 0x10E183ED, 0x150B8EEB, 0xB70A8137,
+    0x28C8F8AC, 0x051AE3D4, 0x0C1AB15B, 0xBB77A86F, 0x16A330EF, 0x6E3025E3,
+    0xD6F83456, 0x19529A45, 0x118E98D1, 0xF180EB34, 0x50717CBE, 0xB5F6C6B2,
+    0xDA7460CD, 0x09939D54, 0x22EA1ED4, 0xE2471504, 0x521BC98A, 0xB8A762D0,
+    0x5AC1348B, 0xF4D02727, 0x1999024A, 0xC1766910, 0xA8D66AD7, 0xBE5E9001,
+    0x620A8652, 0xC57DB17C, 0x00C29F52, 0xAB739D77, 0xA70C4AFA, 0xDD921F01,
+    0x10B9A6F0, 0xA6824A4E, 0xCFE4FFE3, 0x74866A08, 0x89998CAF, 0x6CDEBE7B,
+    0x8FFDAC50, 0x9DF30B5C, 0x4F2D9AE3, 0xAC4032EF
+};
+
+static const BN_ULONG dh2048_224_q[] = {
+    0xB36371EB, 0xBF389A99, 0x4738CEBC, 0x1F80535A, 0x99717710, 0xC58D93FE,
+    0x801C0D34
+};
+
+static const BN_ULONG dh2048_256_p[] = {
+    0x1E1A1597, 0xDB094AE9, 0xD7EF09CA, 0x693877FA, 0x6E11715F, 0x6116D227,
+    0xC198AF12, 0xA4B54330, 0xD7014103, 0x75F26375, 0x54E710C3, 0xC3A3960A,
+    0xBD0BE621, 0xDED4010A, 0x89962856, 0xC0B857F6, 0x71506026, 0xB3CA3F79,
+    0xE6B486F6, 0x1CCACB83, 0x14056425, 0x67E144E5, 0xA41825D9, 0xF6A167B5,
+    0x96524D8E, 0x3AD83477, 0x51BFA4AB, 0xF13C6D9A, 0x35488A0E, 0x2D525267,
+    0xCAA6B790, 0xB63ACAE1, 0x81B23F76, 0x4FDB70C5, 0x12307F5C, 0xBC39A0BF,
+    0xB1E59BB8, 0xB941F54E, 0xD45F9088, 0x6C5BFC11, 0x4275BF7B, 0x22E0B1EF,
+    0x5B4758C0, 0x91F9E672, 0x6BCF67ED, 0x5A8A9D30, 0x97517ABD, 0x209E0C64,
+    0x830E9A7C, 0x3BF4296D, 0x34096FAA, 0x16C3D911, 0x61B2AA30, 0xFAF7DF45,
+    0xD61957D4, 0xE00DF8F1, 0x435E3B00, 0x5D2CEED4, 0x660DD0F2, 0x8CEEF608,
+    0x65195999, 0xFFBBD19C, 0xB4B6663C, 0x87A8E61D
+};
+
+static const BN_ULONG dh2048_256_g[] = {
+    0x6CC41659, 0x664B4C0F, 0xEF98C582, 0x5E2327CF, 0xD4795451, 0xD647D148,
+    0x90F00EF8, 0x2F630784, 0x1DB246C3, 0x184B523D, 0xCDC67EB6, 0xC7891428,
+    0x0DF92B52, 0x7FD02837, 0x64E0EC37, 0xB3353BBB, 0x57CD0915, 0xECD06E15,
+    0xDF016199, 0xB7D2BBD2, 0x052588B9, 0xC8484B1E, 0x13D3FE14, 0xDB2A3B73,
+    0xD182EA0A, 0xD052B985, 0xE83B9C80, 0xA4BD1BFF, 0xFB3F2E55, 0xDFC967C1,
+    0x767164E1, 0xB5045AF2, 0x6F2F9193, 0x1D14348F, 0x428EBC83, 0x64E67982,
+    0x82D6ED38, 0x8AC376D2, 0xAAB8A862, 0x777DE62A, 0xE9EC144B, 0xDDF463E5,
+    0xC77A57F2, 0x0196F931, 0x41000A65, 0xA55AE313, 0xC28CBB18, 0x901228F8,
+    0x7E8C6F62, 0xBC3773BF, 0x0C6B47B1, 0xBE3A6C1B, 0xAC0BB555, 0xFF4FED4A,
+    0x77BE463F, 0x10DBC150, 0x1A0BA125, 0x07F4793A, 0x21EF2054, 0x4CA7B18F,
+    0x60EDBD48, 0x2E775066, 0x73134D0B, 0x3FB32C9B
+};
+
+static const BN_ULONG dh2048_256_q[] = {
+    0x64F5FBD3, 0xA308B0FE, 0x1EB3750B, 0x99B1A47D, 0x40129DA2, 0xB4479976,
+    0xA709A097, 0x8CF83642
+};
+
+/* Primes from RFC 7919 */
+
+static const BN_ULONG ffdhe2048_p[] = {
+    0xFFFFFFFF, 0xFFFFFFFF, 0x61285C97, 0x886B4238, 0xC1B2EFFA, 0xC6F34A26,
+    0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC, 0x4C6FAD73, 0xC3FE3B1B,
+    0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C, 0x28342F61, 0xC03404CD,
+    0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8, 0x6372BB19, 0xAE56EDE7,
+    0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7, 0xB2C8E3FB, 0xD108A94B,
+    0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F, 0x36ADE735, 0x1DF158A1,
+    0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70, 0x7F57C935, 0xB557135E,
+    0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F, 0xD5FD6561, 0xD3DF1ED5,
+    0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363, 0x249B3EF9, 0xCC939DCE,
+    0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583, 0x273D3CF1, 0xAFDC5620,
+    0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF
+};
+
+static const BN_ULONG ffdhe3072_p[] = {
+    0xFFFFFFFF, 0xFFFFFFFF, 0x66C62E37, 0x25E41D2B, 0x3FD59D7C, 0x3C1B20EE,
+    0xFA53DDEF, 0x0ABCD06B, 0xD5C4484E, 0x1DBF9A42, 0x9B0DEADA, 0xABC52197,
+    0x22363A0D, 0xE86D2BC5, 0x9C9DF69E, 0x5CAE82AB, 0x71F54BFF, 0x64F2E21E,
+    0xE2D74DD3, 0xF4FD4452, 0xBC437944, 0xB4130C93, 0x85139270, 0xAEFE1309,
+    0xC186D91C, 0x598CB0FA, 0x91F7F7EE, 0x7AD91D26, 0xD6E6C907, 0x61B46FC9,
+    0xF99C0238, 0xBC34F4DE, 0x6519035B, 0xDE355B3B, 0x611FCFDC, 0x886B4238,
+    0xC1B2EFFA, 0xC6F34A26, 0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC,
+    0x4C6FAD73, 0xC3FE3B1B, 0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C,
+    0x28342F61, 0xC03404CD, 0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8,
+    0x6372BB19, 0xAE56EDE7, 0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7,
+    0xB2C8E3FB, 0xD108A94B, 0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F,
+    0x36ADE735, 0x1DF158A1, 0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70,
+    0x7F57C935, 0xB557135E, 0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F,
+    0xD5FD6561, 0xD3DF1ED5, 0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363,
+    0x249B3EF9, 0xCC939DCE, 0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583,
+    0x273D3CF1, 0xAFDC5620, 0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF
+};
+
+static const BN_ULONG ffdhe4096_p[] = {
+    0xFFFFFFFF, 0xFFFFFFFF, 0x5E655F6A, 0xC68A007E, 0xF44182E1, 0x4DB5A851,
+    0x7F88A46B, 0x8EC9B55A, 0xCEC97DCF, 0x0A8291CD, 0xF98D0ACC, 0x2A4ECEA9,
+    0x7140003C, 0x1A1DB93D, 0x33CB8B7A, 0x092999A3, 0x71AD0038, 0x6DC778F9,
+    0x918130C4, 0xA907600A, 0x2D9E6832, 0xED6A1E01, 0xEFB4318A, 0x7135C886,
+    0x7E31CC7A, 0x87F55BA5, 0x55034004, 0x7763CF1D, 0xD69F6D18, 0xAC7D5F42,
+    0xE58857B6, 0x7930E9E4, 0x164DF4FB, 0x6E6F52C3, 0x669E1EF1, 0x25E41D2B,
+    0x3FD59D7C, 0x3C1B20EE, 0xFA53DDEF, 0x0ABCD06B, 0xD5C4484E, 0x1DBF9A42,
+    0x9B0DEADA, 0xABC52197, 0x22363A0D, 0xE86D2BC5, 0x9C9DF69E, 0x5CAE82AB,
+    0x71F54BFF, 0x64F2E21E, 0xE2D74DD3, 0xF4FD4452, 0xBC437944, 0xB4130C93,
+    0x85139270, 0xAEFE1309, 0xC186D91C, 0x598CB0FA, 0x91F7F7EE, 0x7AD91D26,
+    0xD6E6C907, 0x61B46FC9, 0xF99C0238, 0xBC34F4DE, 0x6519035B, 0xDE355B3B,
+    0x611FCFDC, 0x886B4238, 0xC1B2EFFA, 0xC6F34A26, 0x7D1683B2, 0xC58EF183,
+    0x2EC22005, 0x3BB5FCBC, 0x4C6FAD73, 0xC3FE3B1B, 0xEEF28183, 0x8E4F1232,
+    0xE98583FF, 0x9172FE9C, 0x28342F61, 0xC03404CD, 0xCDF7E2EC, 0x9E02FCE1,
+    0xEE0A6D70, 0x0B07A7C8, 0x6372BB19, 0xAE56EDE7, 0xDE394DF4, 0x1D4F42A3,
+    0x60D7F468, 0xB96ADAB7, 0xB2C8E3FB, 0xD108A94B, 0xB324FB61, 0xBC0AB182,
+    0x483A797A, 0x30ACCA4F, 0x36ADE735, 0x1DF158A1, 0xF3EFE872, 0xE2A689DA,
+    0xE0E68B77, 0x984F0C70, 0x7F57C935, 0xB557135E, 0x3DED1AF3, 0x85636555,
+    0x5F066ED0, 0x2433F51F, 0xD5FD6561, 0xD3DF1ED5, 0xAEC4617A, 0xF681B202,
+    0x630C75D8, 0x7D2FE363, 0x249B3EF9, 0xCC939DCE, 0x146433FB, 0xA9E13641,
+    0xCE2D3695, 0xD8B9C583, 0x273D3CF1, 0xAFDC5620, 0xA2BB4A9A, 0xADF85458,
+    0xFFFFFFFF, 0xFFFFFFFF
+};
+
+static const BN_ULONG ffdhe6144_p[] = {
+    0xFFFFFFFF, 0xFFFFFFFF, 0xD0E40E65, 0xA40E329C, 0x7938DAD4, 0xA41D570D,
+    0xD43161C1, 0x62A69526, 0x9ADB1E69, 0x3FDD4A8E, 0xDC6B80D6, 0x5B3B71F9,
+    0xC6272B04, 0xEC9D1810, 0xCACEF403, 0x8CCF2DD5, 0xC95B9117, 0xE49F5235,
+    0xB854338A, 0x505DC82D, 0x1562A846, 0x62292C31, 0x6AE77F5E, 0xD72B0374,
+    0x462D538C, 0xF9C9091B, 0x47A67CBE, 0x0AE8DB58, 0x22611682, 0xB3A739C1,
+    0x2A281BF6, 0xEEAAC023, 0x77CAF992, 0x94C6651E, 0x94B2BBC1, 0x763E4E4B,
+    0x0077D9B4, 0x587E38DA, 0x183023C3, 0x7FB29F8C, 0xF9E3A26E, 0x0ABEC1FF,
+    0x350511E3, 0xA00EF092, 0xDB6340D8, 0xB855322E, 0xA9A96910, 0xA52471F7,
+    0x4CFDB477, 0x388147FB, 0x4E46041F, 0x9B1F5C3E, 0xFCCFEC71, 0xCDAD0657,
+    0x4C701C3A, 0xB38E8C33, 0xB1C0FD4C, 0x917BDD64, 0x9B7624C8, 0x3BB45432,
+    0xCAF53EA6, 0x23BA4442, 0x38532A3A, 0x4E677D2C, 0x45036C7A, 0x0BFD64B6,
+    0x5E0DD902, 0xC68A007E, 0xF44182E1, 0x4DB5A851, 0x7F88A46B, 0x8EC9B55A,
+    0xCEC97DCF, 0x0A8291CD, 0xF98D0ACC, 0x2A4ECEA9, 0x7140003C, 0x1A1DB93D,
+    0x33CB8B7A, 0x092999A3, 0x71AD0038, 0x6DC778F9, 0x918130C4, 0xA907600A,
+    0x2D9E6832, 0xED6A1E01, 0xEFB4318A, 0x7135C886, 0x7E31CC7A, 0x87F55BA5,
+    0x55034004, 0x7763CF1D, 0xD69F6D18, 0xAC7D5F42, 0xE58857B6, 0x7930E9E4,
+    0x164DF4FB, 0x6E6F52C3, 0x669E1EF1, 0x25E41D2B, 0x3FD59D7C, 0x3C1B20EE,
+    0xFA53DDEF, 0x0ABCD06B, 0xD5C4484E, 0x1DBF9A42, 0x9B0DEADA, 0xABC52197,
+    0x22363A0D, 0xE86D2BC5, 0x9C9DF69E, 0x5CAE82AB, 0x71F54BFF, 0x64F2E21E,
+    0xE2D74DD3, 0xF4FD4452, 0xBC437944, 0xB4130C93, 0x85139270, 0xAEFE1309,
+    0xC186D91C, 0x598CB0FA, 0x91F7F7EE, 0x7AD91D26, 0xD6E6C907, 0x61B46FC9,
+    0xF99C0238, 0xBC34F4DE, 0x6519035B, 0xDE355B3B, 0x611FCFDC, 0x886B4238,
+    0xC1B2EFFA, 0xC6F34A26, 0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC,
+    0x4C6FAD73, 0xC3FE3B1B, 0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C,
+    0x28342F61, 0xC03404CD, 0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8,
+    0x6372BB19, 0xAE56EDE7, 0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7,
+    0xB2C8E3FB, 0xD108A94B, 0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F,
+    0x36ADE735, 0x1DF158A1, 0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70,
+    0x7F57C935, 0xB557135E, 0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F,
+    0xD5FD6561, 0xD3DF1ED5, 0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363,
+    0x249B3EF9, 0xCC939DCE, 0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583,
+    0x273D3CF1, 0xAFDC5620, 0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF
+};
+
+static const BN_ULONG ffdhe8192_p[] = {
+    0xFFFFFFFF, 0xFFFFFFFF, 0xC5C6424C, 0xD68C8BB7, 0x838FF88C, 0x011E2A94,
+    0xA9F4614E, 0x0822E506, 0xF7A8443D, 0x97D11D49, 0x30677F0D, 0xA6BBFDE5,
+    0xC1FE86FE, 0x2F741EF8, 0x5D71A87E, 0xFAFABE1C, 0xFBE58A30, 0xDED2FBAB,
+    0x72B0A66E, 0xB6855DFE, 0xBA8A4FE8, 0x1EFC8CE0, 0x3F2FA457, 0x83F81D4A,
+    0xA577E231, 0xA1FE3075, 0x88D9C0A0, 0xD5B80194, 0xAD9A95F9, 0x624816CD,
+    0x50C1217B, 0x99E9E316, 0x0E423CFC, 0x51AA691E, 0x3826E52C, 0x1C217E6C,
+    0x09703FEE, 0x51A8A931, 0x6A460E74, 0xBB709987, 0x9C86B022, 0x541FC68C,
+    0x46FD8251, 0x59160CC0, 0x35C35F5C, 0x2846C0BA, 0x8B758282, 0x54504AC7,
+    0xD2AF05E4, 0x29388839, 0xC01BD702, 0xCB2C0F1C, 0x7C932665, 0x555B2F74,
+    0xA3AB8829, 0x86B63142, 0xF64B10EF, 0x0B8CC3BD, 0xEDD1CC5E, 0x687FEB69,
+    0xC9509D43, 0xFDB23FCE, 0xD951AE64, 0x1E425A31, 0xF600C838, 0x36AD004C,
+    0xCFF46AAA, 0xA40E329C, 0x7938DAD4, 0xA41D570D, 0xD43161C1, 0x62A69526,
+    0x9ADB1E69, 0x3FDD4A8E, 0xDC6B80D6, 0x5B3B71F9, 0xC6272B04, 0xEC9D1810,
+    0xCACEF403, 0x8CCF2DD5, 0xC95B9117, 0xE49F5235, 0xB854338A, 0x505DC82D,
+    0x1562A846, 0x62292C31, 0x6AE77F5E, 0xD72B0374, 0x462D538C, 0xF9C9091B,
+    0x47A67CBE, 0x0AE8DB58, 0x22611682, 0xB3A739C1, 0x2A281BF6, 0xEEAAC023,
+    0x77CAF992, 0x94C6651E, 0x94B2BBC1, 0x763E4E4B, 0x0077D9B4, 0x587E38DA,
+    0x183023C3, 0x7FB29F8C, 0xF9E3A26E, 0x0ABEC1FF, 0x350511E3, 0xA00EF092,
+    0xDB6340D8, 0xB855322E, 0xA9A96910, 0xA52471F7, 0x4CFDB477, 0x388147FB,
+    0x4E46041F, 0x9B1F5C3E, 0xFCCFEC71, 0xCDAD0657, 0x4C701C3A, 0xB38E8C33,
+    0xB1C0FD4C, 0x917BDD64, 0x9B7624C8, 0x3BB45432, 0xCAF53EA6, 0x23BA4442,
+    0x38532A3A, 0x4E677D2C, 0x45036C7A, 0x0BFD64B6, 0x5E0DD902, 0xC68A007E,
+    0xF44182E1, 0x4DB5A851, 0x7F88A46B, 0x8EC9B55A, 0xCEC97DCF, 0x0A8291CD,
+    0xF98D0ACC, 0x2A4ECEA9, 0x7140003C, 0x1A1DB93D, 0x33CB8B7A, 0x092999A3,
+    0x71AD0038, 0x6DC778F9, 0x918130C4, 0xA907600A, 0x2D9E6832, 0xED6A1E01,
+    0xEFB4318A, 0x7135C886, 0x7E31CC7A, 0x87F55BA5, 0x55034004, 0x7763CF1D,
+    0xD69F6D18, 0xAC7D5F42, 0xE58857B6, 0x7930E9E4, 0x164DF4FB, 0x6E6F52C3,
+    0x669E1EF1, 0x25E41D2B, 0x3FD59D7C, 0x3C1B20EE, 0xFA53DDEF, 0x0ABCD06B,
+    0xD5C4484E, 0x1DBF9A42, 0x9B0DEADA, 0xABC52197, 0x22363A0D, 0xE86D2BC5,
+    0x9C9DF69E, 0x5CAE82AB, 0x71F54BFF, 0x64F2E21E, 0xE2D74DD3, 0xF4FD4452,
+    0xBC437944, 0xB4130C93, 0x85139270, 0xAEFE1309, 0xC186D91C, 0x598CB0FA,
+    0x91F7F7EE, 0x7AD91D26, 0xD6E6C907, 0x61B46FC9, 0xF99C0238, 0xBC34F4DE,
+    0x6519035B, 0xDE355B3B, 0x611FCFDC, 0x886B4238, 0xC1B2EFFA, 0xC6F34A26,
+    0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC, 0x4C6FAD73, 0xC3FE3B1B,
+    0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C, 0x28342F61, 0xC03404CD,
+    0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8, 0x6372BB19, 0xAE56EDE7,
+    0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7, 0xB2C8E3FB, 0xD108A94B,
+    0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F, 0x36ADE735, 0x1DF158A1,
+    0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70, 0x7F57C935, 0xB557135E,
+    0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F, 0xD5FD6561, 0xD3DF1ED5,
+    0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363, 0x249B3EF9, 0xCC939DCE,
+    0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583, 0x273D3CF1, 0xAFDC5620,
+    0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF
+};
+
+# else
+#  error "unsupported BN_BITS2"
+# endif
+
+/* Macro to make a BIGNUM from static data */
+
+# define make_dh_bn(x) extern const BIGNUM _bignum_##x; \
+                       const BIGNUM _bignum_##x = { (BN_ULONG *) x, \
+                        OSSL_NELEM(x),\
+                        OSSL_NELEM(x),\
+                        0, BN_FLG_STATIC_DATA };
+
+static const BN_ULONG value_2 = 2;
+
+const BIGNUM _bignum_const_2 =
+    { (BN_ULONG *)&value_2, 1, 1, 0, BN_FLG_STATIC_DATA };
+
+make_dh_bn(dh1024_160_p)
+make_dh_bn(dh1024_160_g)
+make_dh_bn(dh1024_160_q)
+make_dh_bn(dh2048_224_p)
+make_dh_bn(dh2048_224_g)
+make_dh_bn(dh2048_224_q)
+make_dh_bn(dh2048_256_p)
+make_dh_bn(dh2048_256_g)
+make_dh_bn(dh2048_256_q)
+
+make_dh_bn(ffdhe2048_p)
+make_dh_bn(ffdhe3072_p)
+make_dh_bn(ffdhe4096_p)
+make_dh_bn(ffdhe6144_p)
+make_dh_bn(ffdhe8192_p)
+
+
+#endif
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_div.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_div.c
@ -0,0 +1,457 @@
+/*
+ * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include <assert.h>
+#include <openssl/bn.h>
+#include "internal/cryptlib.h"
+#include "bn_lcl.h"
+
+/* The old slow way */
+#if 0
+int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
+           BN_CTX *ctx)
+{
+    int i, nm, nd;
+    int ret = 0;
+    BIGNUM *D;
+
+    bn_check_top(m);
+    bn_check_top(d);
+    if (BN_is_zero(d)) {
+        BNerr(BN_F_BN_DIV, BN_R_DIV_BY_ZERO);
+        return 0;
+    }
+
+    if (BN_ucmp(m, d) < 0) {
+        if (rem != NULL) {
+            if (BN_copy(rem, m) == NULL)
+                return 0;
+        }
+        if (dv != NULL)
+            BN_zero(dv);
+        return 1;
+    }
+
+    BN_CTX_start(ctx);
+    D = BN_CTX_get(ctx);
+    if (dv == NULL)
+        dv = BN_CTX_get(ctx);
+    if (rem == NULL)
+        rem = BN_CTX_get(ctx);
+    if (D == NULL || dv == NULL || rem == NULL)
+        goto end;
+
+    nd = BN_num_bits(d);
+    nm = BN_num_bits(m);
+    if (BN_copy(D, d) == NULL)
+        goto end;
+    if (BN_copy(rem, m) == NULL)
+        goto end;
+
+    /*
+     * The next 2 are needed so we can do a dv->d[0]|=1 later since
+     * BN_lshift1 will only work once there is a value :-)
+     */
+    BN_zero(dv);
+    if (bn_wexpand(dv, 1) == NULL)
+        goto end;
+    dv->top = 1;
+
+    if (!BN_lshift(D, D, nm - nd))
+        goto end;
+    for (i = nm - nd; i >= 0; i--) {
+        if (!BN_lshift1(dv, dv))
+            goto end;
+        if (BN_ucmp(rem, D) >= 0) {
+            dv->d[0] |= 1;
+            if (!BN_usub(rem, rem, D))
+                goto end;
+        }
+/* CAN IMPROVE (and have now :=) */
+        if (!BN_rshift1(D, D))
+            goto end;
+    }
+    rem->neg = BN_is_zero(rem) ? 0 : m->neg;
+    dv->neg = m->neg ^ d->neg;
+    ret = 1;
+ end:
+    BN_CTX_end(ctx);
+    return ret;
+}
+
+#else
+
+# if defined(BN_DIV3W)
+BN_ULONG bn_div_3_words(const BN_ULONG *m, BN_ULONG d1, BN_ULONG d0);
+# elif 0
+/*
+ * This is #if-ed away, because it's a reference for assembly implementations,
+ * where it can and should be made constant-time. But if you want to test it,
+ * just replace 0 with 1.
+ */
+#  if BN_BITS2 == 64 && defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
+#   undef BN_ULLONG
+#   define BN_ULLONG __uint128_t
+#   define BN_LLONG
+#  endif
+
+#  ifdef BN_LLONG
+#   define BN_DIV3W
+/*
+ * Interface is somewhat quirky, |m| is pointer to most significant limb,
+ * and less significant limb is referred at |m[-1]|. This means that caller
+ * is responsible for ensuring that |m[-1]| is valid. Second condition that
+ * has to be met is that |d0|'s most significant bit has to be set. Or in
+ * other words divisor has to be "bit-aligned to the left." bn_div_fixed_top
+ * does all this. The subroutine considers four limbs, two of which are
+ * "overlapping," hence the name...
+ */
+static BN_ULONG bn_div_3_words(const BN_ULONG *m, BN_ULONG d1, BN_ULONG d0)
+{
+    BN_ULLONG R = ((BN_ULLONG)m[0] << BN_BITS2) | m[-1];
+    BN_ULLONG D = ((BN_ULLONG)d0 << BN_BITS2) | d1;
+    BN_ULONG Q = 0, mask;
+    int i;
+
+    for (i = 0; i < BN_BITS2; i++) {
+        Q <<= 1;
+        if (R >= D) {
+            Q |= 1;
+            R -= D;
+        }
+        D >>= 1;
+    }
+
+    mask = 0 - (Q >> (BN_BITS2 - 1));   /* does it overflow? */
+
+    Q <<= 1;
+    Q |= (R >= D);
+
+    return (Q | mask) & BN_MASK2;
+}
+#  endif
+# endif
+
+static int bn_left_align(BIGNUM *num)
+{
+    BN_ULONG *d = num->d, n, m, rmask;
+    int top = num->top;
+    int rshift = BN_num_bits_word(d[top - 1]), lshift, i;
+
+    lshift = BN_BITS2 - rshift;
+    rshift %= BN_BITS2;            /* say no to undefined behaviour */
+    rmask = (BN_ULONG)0 - rshift;  /* rmask = 0 - (rshift != 0) */
+    rmask |= rmask >> 8;
+
+    for (i = 0, m = 0; i < top; i++) {
+        n = d[i];
+        d[i] = ((n << lshift) | m) & BN_MASK2;
+        m = (n >> rshift) & rmask;
+    }
+
+    return lshift;
+}
+
+# if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) \
+    && !defined(PEDANTIC) && !defined(BN_DIV3W)
+#  if defined(__GNUC__) && __GNUC__>=2
+#   if defined(__i386) || defined (__i386__)
+   /*-
+    * There were two reasons for implementing this template:
+    * - GNU C generates a call to a function (__udivdi3 to be exact)
+    *   in reply to ((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0 (I fail to
+    *   understand why...);
+    * - divl doesn't only calculate quotient, but also leaves
+    *   remainder in %edx which we can definitely use here:-)
+    */
+#    undef bn_div_words
+#    define bn_div_words(n0,n1,d0)                \
+        ({  asm volatile (                      \
+                "divl   %4"                     \
+                : "=a"(q), "=d"(rem)            \
+                : "a"(n1), "d"(n0), "r"(d0)     \
+                : "cc");                        \
+            q;                                  \
+        })
+#    define REMAINDER_IS_ALREADY_CALCULATED
+#   elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
+   /*
+    * Same story here, but it's 128-bit by 64-bit division. Wow!
+    */
+#    undef bn_div_words
+#    define bn_div_words(n0,n1,d0)                \
+        ({  asm volatile (                      \
+                "divq   %4"                     \
+                : "=a"(q), "=d"(rem)            \
+                : "a"(n1), "d"(n0), "r"(d0)     \
+                : "cc");                        \
+            q;                                  \
+        })
+#    define REMAINDER_IS_ALREADY_CALCULATED
+#   endif                       /* __<cpu> */
+#  endif                        /* __GNUC__ */
+# endif                         /* OPENSSL_NO_ASM */
+
+/*-
+ * BN_div computes  dv := num / divisor, rounding towards
+ * zero, and sets up rm  such that  dv*divisor + rm = num  holds.
+ * Thus:
+ *     dv->neg == num->neg ^ divisor->neg  (unless the result is zero)
+ *     rm->neg == num->neg                 (unless the remainder is zero)
+ * If 'dv' or 'rm' is NULL, the respective value is not returned.
+ */
+int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
+           BN_CTX *ctx)
+{
+    int ret;
+
+    if (BN_is_zero(divisor)) {
+        BNerr(BN_F_BN_DIV, BN_R_DIV_BY_ZERO);
+        return 0;
+    }
+
+    /*
+     * Invalid zero-padding would have particularly bad consequences so don't
+     * just rely on bn_check_top() here (bn_check_top() works only for
+     * BN_DEBUG builds)
+     */
+    if (divisor->d[divisor->top - 1] == 0) {
+        BNerr(BN_F_BN_DIV, BN_R_NOT_INITIALIZED);
+        return 0;
+    }
+
+    ret = bn_div_fixed_top(dv, rm, num, divisor, ctx);
+
+    if (ret) {
+        if (dv != NULL)
+            bn_correct_top(dv);
+        if (rm != NULL)
+            bn_correct_top(rm);
+    }
+
+    return ret;
+}
+
+/*
+ * It's argued that *length* of *significant* part of divisor is public.
+ * Even if it's private modulus that is. Again, *length* is assumed
+ * public, but not *value*. Former is likely to be pre-defined by
+ * algorithm with bit granularity, though below subroutine is invariant
+ * of limb length. Thanks to this assumption we can require that |divisor|
+ * may not be zero-padded, yet claim this subroutine "constant-time"(*).
+ * This is because zero-padded dividend, |num|, is tolerated, so that
+ * caller can pass dividend of public length(*), but with smaller amount
+ * of significant limbs. This naturally means that quotient, |dv|, would
+ * contain correspongly less significant limbs as well, and will be zero-
+ * padded accordingly. Returned remainder, |rm|, will have same bit length
+ * as divisor, also zero-padded if needed. These actually leave sign bits
+ * in ambiguous state. In sense that we try to avoid negative zeros, while
+ * zero-padded zeros would retain sign.
+ *
+ * (*) "Constant-time-ness" has two pre-conditions:
+ *
+ *     - availability of constant-time bn_div_3_words;
+ *     - dividend is at least as "wide" as divisor, limb-wise, zero-padded
+ *       if so requied, which shouldn't be a privacy problem, because
+ *       divisor's length is considered public;
+ */
+int bn_div_fixed_top(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num,
+                     const BIGNUM *divisor, BN_CTX *ctx)
+{
+    int norm_shift, i, j, loop;
+    BIGNUM *tmp, *snum, *sdiv, *res;
+    BN_ULONG *resp, *wnum, *wnumtop;
+    BN_ULONG d0, d1;
+    int num_n, div_n;
+
+    assert(divisor->top > 0 && divisor->d[divisor->top - 1] != 0);
+
+    bn_check_top(num);
+    bn_check_top(divisor);
+    bn_check_top(dv);
+    bn_check_top(rm);
+
+    BN_CTX_start(ctx);
+    res = (dv == NULL) ? BN_CTX_get(ctx) : dv;
+    tmp = BN_CTX_get(ctx);
+    snum = BN_CTX_get(ctx);
+    sdiv = BN_CTX_get(ctx);
+    if (sdiv == NULL)
+        goto err;
+
+    /* First we normalise the numbers */
+    if (!BN_copy(sdiv, divisor))
+        goto err;
+    norm_shift = bn_left_align(sdiv);
+    sdiv->neg = 0;
+    /*
+     * Note that bn_lshift_fixed_top's output is always one limb longer
+     * than input, even when norm_shift is zero. This means that amount of
+     * inner loop iterations is invariant of dividend value, and that one
+     * doesn't need to compare dividend and divisor if they were originally
+     * of the same bit length.
+     */
+    if (!(bn_lshift_fixed_top(snum, num, norm_shift)))
+        goto err;
+
+    div_n = sdiv->top;
+    num_n = snum->top;
+
+    if (num_n <= div_n) {
+        /* caller didn't pad dividend -> no constant-time guarantee... */
+        if (bn_wexpand(snum, div_n + 1) == NULL)
+            goto err;
+        memset(&(snum->d[num_n]), 0, (div_n - num_n + 1) * sizeof(BN_ULONG));
+        snum->top = num_n = div_n + 1;
+    }
+
+    loop = num_n - div_n;
+    /*
+     * Lets setup a 'window' into snum This is the part that corresponds to
+     * the current 'area' being divided
+     */
+    wnum = &(snum->d[loop]);
+    wnumtop = &(snum->d[num_n - 1]);
+
+    /* Get the top 2 words of sdiv */
+    d0 = sdiv->d[div_n - 1];
+    d1 = (div_n == 1) ? 0 : sdiv->d[div_n - 2];
+
+    /* Setup quotient */
+    if (!bn_wexpand(res, loop))
+        goto err;
+    res->neg = (num->neg ^ divisor->neg);
+    res->top = loop;
+    res->flags |= BN_FLG_FIXED_TOP;
+    resp = &(res->d[loop]);
+
+    /* space for temp */
+    if (!bn_wexpand(tmp, (div_n + 1)))
+        goto err;
+
+    for (i = 0; i < loop; i++, wnumtop--) {
+        BN_ULONG q, l0;
+        /*
+         * the first part of the loop uses the top two words of snum and sdiv
+         * to calculate a BN_ULONG q such that | wnum - sdiv * q | < sdiv
+         */
+# if defined(BN_DIV3W)
+        q = bn_div_3_words(wnumtop, d1, d0);
+# else
+        BN_ULONG n0, n1, rem = 0;
+
+        n0 = wnumtop[0];
+        n1 = wnumtop[-1];
+        if (n0 == d0)
+            q = BN_MASK2;
+        else {                  /* n0 < d0 */
+            BN_ULONG n2 = (wnumtop == wnum) ? 0 : wnumtop[-2];
+#  ifdef BN_LLONG
+            BN_ULLONG t2;
+
+#   if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words)
+            q = (BN_ULONG)(((((BN_ULLONG) n0) << BN_BITS2) | n1) / d0);
+#   else
+            q = bn_div_words(n0, n1, d0);
+#   endif
+
+#   ifndef REMAINDER_IS_ALREADY_CALCULATED
+            /*
+             * rem doesn't have to be BN_ULLONG. The least we
+             * know it's less that d0, isn't it?
+             */
+            rem = (n1 - q * d0) & BN_MASK2;
+#   endif
+            t2 = (BN_ULLONG) d1 *q;
+
+            for (;;) {
+                if (t2 <= ((((BN_ULLONG) rem) << BN_BITS2) | n2))
+                    break;
+                q--;
+                rem += d0;
+                if (rem < d0)
+                    break;      /* don't let rem overflow */
+                t2 -= d1;
+            }
+#  else                         /* !BN_LLONG */
+            BN_ULONG t2l, t2h;
+
+            q = bn_div_words(n0, n1, d0);
+#   ifndef REMAINDER_IS_ALREADY_CALCULATED
+            rem = (n1 - q * d0) & BN_MASK2;
+#   endif
+
+#   if defined(BN_UMULT_LOHI)
+            BN_UMULT_LOHI(t2l, t2h, d1, q);
+#   elif defined(BN_UMULT_HIGH)
+            t2l = d1 * q;
+            t2h = BN_UMULT_HIGH(d1, q);
+#   else
+            {
+                BN_ULONG ql, qh;
+                t2l = LBITS(d1);
+                t2h = HBITS(d1);
+                ql = LBITS(q);
+                qh = HBITS(q);
+                mul64(t2l, t2h, ql, qh); /* t2=(BN_ULLONG)d1*q; */
+            }
+#   endif
+
+            for (;;) {
+                if ((t2h < rem) || ((t2h == rem) && (t2l <= n2)))
+                    break;
+                q--;
+                rem += d0;
+                if (rem < d0)
+                    break;      /* don't let rem overflow */
+                if (t2l < d1)
+                    t2h--;
+                t2l -= d1;
+            }
+#  endif                        /* !BN_LLONG */
+        }
+# endif                         /* !BN_DIV3W */
+
+        l0 = bn_mul_words(tmp->d, sdiv->d, div_n, q);
+        tmp->d[div_n] = l0;
+        wnum--;
+        /*
+         * ignore top values of the bignums just sub the two BN_ULONG arrays
+         * with bn_sub_words
+         */
+        l0 = bn_sub_words(wnum, wnum, tmp->d, div_n + 1);
+        q -= l0;
+        /*
+         * Note: As we have considered only the leading two BN_ULONGs in
+         * the calculation of q, sdiv * q might be greater than wnum (but
+         * then (q-1) * sdiv is less or equal than wnum)
+         */
+        for (l0 = 0 - l0, j = 0; j < div_n; j++)
+            tmp->d[j] = sdiv->d[j] & l0;
+        l0 = bn_add_words(wnum, wnum, tmp->d, div_n);
+        (*wnumtop) += l0;
+        assert((*wnumtop) == 0);
+
+        /* store part of the result */
+        *--resp = q;
+    }
+    /* snum holds remainder, it's as wide as divisor */
+    snum->neg = num->neg;
+    snum->top = div_n;
+    snum->flags |= BN_FLG_FIXED_TOP;
+    if (rm != NULL)
+        bn_rshift_fixed_top(rm, snum, norm_shift);
+    BN_CTX_end(ctx);
+    return 1;
+ err:
+    bn_check_top(rm);
+    BN_CTX_end(ctx);
+    return 0;
+}
+#endif
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_err.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_err.c
@ -0,0 +1,118 @@
+/*
+ * Generated by util/mkerr.pl DO NOT EDIT
+ * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include <openssl/err.h>
+#include <openssl/bnerr.h>
+
+#ifndef OPENSSL_NO_ERR
+
+static const ERR_STRING_DATA BN_str_functs[] = {
+    {ERR_PACK(ERR_LIB_BN, BN_F_BNRAND, 0), "bnrand"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BNRAND_RANGE, 0), "bnrand_range"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_CONVERT_EX, 0),
+     "BN_BLINDING_convert_ex"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_CREATE_PARAM, 0),
+     "BN_BLINDING_create_param"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_INVERT_EX, 0),
+     "BN_BLINDING_invert_ex"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_NEW, 0), "BN_BLINDING_new"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_UPDATE, 0), "BN_BLINDING_update"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_BN2DEC, 0), "BN_bn2dec"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_BN2HEX, 0), "BN_bn2hex"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_COMPUTE_WNAF, 0), "bn_compute_wNAF"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_CTX_GET, 0), "BN_CTX_get"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_CTX_NEW, 0), "BN_CTX_new"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_CTX_START, 0), "BN_CTX_start"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_DIV, 0), "BN_div"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_DIV_RECP, 0), "BN_div_recp"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_EXP, 0), "BN_exp"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_EXPAND_INTERNAL, 0), "bn_expand_internal"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_GENCB_NEW, 0), "BN_GENCB_new"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_GENERATE_DSA_NONCE, 0),
+     "BN_generate_dsa_nonce"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_GENERATE_PRIME_EX, 0),
+     "BN_generate_prime_ex"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD, 0), "BN_GF2m_mod"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_EXP, 0), "BN_GF2m_mod_exp"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_MUL, 0), "BN_GF2m_mod_mul"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SOLVE_QUAD, 0),
+     "BN_GF2m_mod_solve_quad"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR, 0),
+     "BN_GF2m_mod_solve_quad_arr"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SQR, 0), "BN_GF2m_mod_sqr"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SQRT, 0), "BN_GF2m_mod_sqrt"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_LSHIFT, 0), "BN_lshift"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP2_MONT, 0), "BN_mod_exp2_mont"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_MONT, 0), "BN_mod_exp_mont"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_MONT_CONSTTIME, 0),
+     "BN_mod_exp_mont_consttime"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_MONT_WORD, 0),
+     "BN_mod_exp_mont_word"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_RECP, 0), "BN_mod_exp_recp"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_SIMPLE, 0), "BN_mod_exp_simple"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_INVERSE, 0), "BN_mod_inverse"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_INVERSE_NO_BRANCH, 0),
+     "BN_mod_inverse_no_branch"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_LSHIFT_QUICK, 0), "BN_mod_lshift_quick"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_SQRT, 0), "BN_mod_sqrt"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MONT_CTX_NEW, 0), "BN_MONT_CTX_new"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MPI2BN, 0), "BN_mpi2bn"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_NEW, 0), "BN_new"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_POOL_GET, 0), "BN_POOL_get"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_RAND, 0), "BN_rand"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_RAND_RANGE, 0), "BN_rand_range"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_RECP_CTX_NEW, 0), "BN_RECP_CTX_new"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_RSHIFT, 0), "BN_rshift"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_SET_WORDS, 0), "bn_set_words"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_STACK_PUSH, 0), "BN_STACK_push"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_USUB, 0), "BN_usub"},
+    {0, NULL}
+};
+
+static const ERR_STRING_DATA BN_str_reasons[] = {
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_ARG2_LT_ARG3), "arg2 lt arg3"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_BAD_RECIPROCAL), "bad reciprocal"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_BIGNUM_TOO_LONG), "bignum too long"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_BITS_TOO_SMALL), "bits too small"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_CALLED_WITH_EVEN_MODULUS),
+    "called with even modulus"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_DIV_BY_ZERO), "div by zero"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_ENCODING_ERROR), "encoding error"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_EXPAND_ON_STATIC_BIGNUM_DATA),
+    "expand on static bignum data"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_INPUT_NOT_REDUCED), "input not reduced"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_INVALID_LENGTH), "invalid length"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_INVALID_RANGE), "invalid range"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_INVALID_SHIFT), "invalid shift"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_NOT_A_SQUARE), "not a square"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_NOT_INITIALIZED), "not initialized"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_NO_INVERSE), "no inverse"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_NO_SOLUTION), "no solution"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_PRIVATE_KEY_TOO_LARGE),
+    "private key too large"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_P_IS_NOT_PRIME), "p is not prime"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_TOO_MANY_ITERATIONS), "too many iterations"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_TOO_MANY_TEMPORARY_VARIABLES),
+    "too many temporary variables"},
+    {0, NULL}
+};
+
+#endif
+
+int ERR_load_BN_strings(void)
+{
+#ifndef OPENSSL_NO_ERR
+    if (ERR_func_error_string(BN_str_functs[0].error) == NULL) {
+        ERR_load_strings_const(BN_str_functs);
+        ERR_load_strings_const(BN_str_reasons);
+    }
+#endif
+    return 1;
+}
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_exp.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_exp.c
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_exp2.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_exp2.c
@ -0,0 +1,201 @@
+/*
+ * Copyright 1995-2017 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include <stdio.h>
+#include "internal/cryptlib.h"
+#include "bn_lcl.h"
+
+#define TABLE_SIZE      32
+
+int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
+                     const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m,
+                     BN_CTX *ctx, BN_MONT_CTX *in_mont)
+{
+    int i, j, bits, b, bits1, bits2, ret =
+        0, wpos1, wpos2, window1, window2, wvalue1, wvalue2;
+    int r_is_one = 1;
+    BIGNUM *d, *r;
+    const BIGNUM *a_mod_m;
+    /* Tables of variables obtained from 'ctx' */
+    BIGNUM *val1[TABLE_SIZE], *val2[TABLE_SIZE];
+    BN_MONT_CTX *mont = NULL;
+
+    bn_check_top(a1);
+    bn_check_top(p1);
+    bn_check_top(a2);
+    bn_check_top(p2);
+    bn_check_top(m);
+
+    if (!(m->d[0] & 1)) {
+        BNerr(BN_F_BN_MOD_EXP2_MONT, BN_R_CALLED_WITH_EVEN_MODULUS);
+        return 0;
+    }
+    bits1 = BN_num_bits(p1);
+    bits2 = BN_num_bits(p2);
+    if ((bits1 == 0) && (bits2 == 0)) {
+        ret = BN_one(rr);
+        return ret;
+    }
+
+    bits = (bits1 > bits2) ? bits1 : bits2;
+
+    BN_CTX_start(ctx);
+    d = BN_CTX_get(ctx);
+    r = BN_CTX_get(ctx);
+    val1[0] = BN_CTX_get(ctx);
+    val2[0] = BN_CTX_get(ctx);
+    if (val2[0] == NULL)
+        goto err;
+
+    if (in_mont != NULL)
+        mont = in_mont;
+    else {
+        if ((mont = BN_MONT_CTX_new()) == NULL)
+            goto err;
+        if (!BN_MONT_CTX_set(mont, m, ctx))
+            goto err;
+    }
+
+    window1 = BN_window_bits_for_exponent_size(bits1);
+    window2 = BN_window_bits_for_exponent_size(bits2);
+
+    /*
+     * Build table for a1:   val1[i] := a1^(2*i + 1) mod m  for i = 0 .. 2^(window1-1)
+     */
+    if (a1->neg || BN_ucmp(a1, m) >= 0) {
+        if (!BN_mod(val1[0], a1, m, ctx))
+            goto err;
+        a_mod_m = val1[0];
+    } else
+        a_mod_m = a1;
+    if (BN_is_zero(a_mod_m)) {
+        BN_zero(rr);
+        ret = 1;
+        goto err;
+    }
+
+    if (!BN_to_montgomery(val1[0], a_mod_m, mont, ctx))
+        goto err;
+    if (window1 > 1) {
+        if (!BN_mod_mul_montgomery(d, val1[0], val1[0], mont, ctx))
+            goto err;
+
+        j = 1 << (window1 - 1);
+        for (i = 1; i < j; i++) {
+            if (((val1[i] = BN_CTX_get(ctx)) == NULL) ||
+                !BN_mod_mul_montgomery(val1[i], val1[i - 1], d, mont, ctx))
+                goto err;
+        }
+    }
+
+    /*
+     * Build table for a2:   val2[i] := a2^(2*i + 1) mod m  for i = 0 .. 2^(window2-1)
+     */
+    if (a2->neg || BN_ucmp(a2, m) >= 0) {
+        if (!BN_mod(val2[0], a2, m, ctx))
+            goto err;
+        a_mod_m = val2[0];
+    } else
+        a_mod_m = a2;
+    if (BN_is_zero(a_mod_m)) {
+        BN_zero(rr);
+        ret = 1;
+        goto err;
+    }
+    if (!BN_to_montgomery(val2[0], a_mod_m, mont, ctx))
+        goto err;
+    if (window2 > 1) {
+        if (!BN_mod_mul_montgomery(d, val2[0], val2[0], mont, ctx))
+            goto err;
+
+        j = 1 << (window2 - 1);
+        for (i = 1; i < j; i++) {
+            if (((val2[i] = BN_CTX_get(ctx)) == NULL) ||
+                !BN_mod_mul_montgomery(val2[i], val2[i - 1], d, mont, ctx))
+                goto err;
+        }
+    }
+
+    /* Now compute the power product, using independent windows. */
+    r_is_one = 1;
+    wvalue1 = 0;                /* The 'value' of the first window */
+    wvalue2 = 0;                /* The 'value' of the second window */
+    wpos1 = 0;                  /* If wvalue1 > 0, the bottom bit of the
+                                 * first window */
+    wpos2 = 0;                  /* If wvalue2 > 0, the bottom bit of the
+                                 * second window */
+
+    if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
+        goto err;
+    for (b = bits - 1; b >= 0; b--) {
+        if (!r_is_one) {
+            if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
+                goto err;
+        }
+
+        if (!wvalue1)
+            if (BN_is_bit_set(p1, b)) {
+                /*
+                 * consider bits b-window1+1 .. b for this window
+                 */
+                i = b - window1 + 1;
+                while (!BN_is_bit_set(p1, i)) /* works for i<0 */
+                    i++;
+                wpos1 = i;
+                wvalue1 = 1;
+                for (i = b - 1; i >= wpos1; i--) {
+                    wvalue1 <<= 1;
+                    if (BN_is_bit_set(p1, i))
+                        wvalue1++;
+                }
+            }
+
+        if (!wvalue2)
+            if (BN_is_bit_set(p2, b)) {
+                /*
+                 * consider bits b-window2+1 .. b for this window
+                 */
+                i = b - window2 + 1;
+                while (!BN_is_bit_set(p2, i))
+                    i++;
+                wpos2 = i;
+                wvalue2 = 1;
+                for (i = b - 1; i >= wpos2; i--) {
+                    wvalue2 <<= 1;
+                    if (BN_is_bit_set(p2, i))
+                        wvalue2++;
+                }
+            }
+
+        if (wvalue1 && b == wpos1) {
+            /* wvalue1 is odd and < 2^window1 */
+            if (!BN_mod_mul_montgomery(r, r, val1[wvalue1 >> 1], mont, ctx))
+                goto err;
+            wvalue1 = 0;
+            r_is_one = 0;
+        }
+
+        if (wvalue2 && b == wpos2) {
+            /* wvalue2 is odd and < 2^window2 */
+            if (!BN_mod_mul_montgomery(r, r, val2[wvalue2 >> 1], mont, ctx))
+                goto err;
+            wvalue2 = 0;
+            r_is_one = 0;
+        }
+    }
+    if (!BN_from_montgomery(rr, r, mont, ctx))
+        goto err;
+    ret = 1;
+ err:
+    if (in_mont == NULL)
+        BN_MONT_CTX_free(mont);
+    BN_CTX_end(ctx);
+    bn_check_top(rr);
+    return ret;
+}
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_gcd.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_gcd.c
@ -0,0 +1,623 @@
+/*
+ * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include "internal/cryptlib.h"
+#include "bn_lcl.h"
+
+static BIGNUM *euclid(BIGNUM *a, BIGNUM *b);
+
+int BN_gcd(BIGNUM *r, const BIGNUM *in_a, const BIGNUM *in_b, BN_CTX *ctx)
+{
+    BIGNUM *a, *b, *t;
+    int ret = 0;
+
+    bn_check_top(in_a);
+    bn_check_top(in_b);
+
+    BN_CTX_start(ctx);
+    a = BN_CTX_get(ctx);
+    b = BN_CTX_get(ctx);
+    if (b == NULL)
+        goto err;
+
+    if (BN_copy(a, in_a) == NULL)
+        goto err;
+    if (BN_copy(b, in_b) == NULL)
+        goto err;
+    a->neg = 0;
+    b->neg = 0;
+
+    if (BN_cmp(a, b) < 0) {
+        t = a;
+        a = b;
+        b = t;
+    }
+    t = euclid(a, b);
+    if (t == NULL)
+        goto err;
+
+    if (BN_copy(r, t) == NULL)
+        goto err;
+    ret = 1;
+ err:
+    BN_CTX_end(ctx);
+    bn_check_top(r);
+    return ret;
+}
+
+static BIGNUM *euclid(BIGNUM *a, BIGNUM *b)
+{
+    BIGNUM *t;
+    int shifts = 0;
+
+    bn_check_top(a);
+    bn_check_top(b);
+
+    /* 0 <= b <= a */
+    while (!BN_is_zero(b)) {
+        /* 0 < b <= a */
+
+        if (BN_is_odd(a)) {
+            if (BN_is_odd(b)) {
+                if (!BN_sub(a, a, b))
+                    goto err;
+                if (!BN_rshift1(a, a))
+                    goto err;
+                if (BN_cmp(a, b) < 0) {
+                    t = a;
+                    a = b;
+                    b = t;
+                }
+            } else {            /* a odd - b even */
+
+                if (!BN_rshift1(b, b))
+                    goto err;
+                if (BN_cmp(a, b) < 0) {
+                    t = a;
+                    a = b;
+                    b = t;
+                }
+            }
+        } else {                /* a is even */
+
+            if (BN_is_odd(b)) {
+                if (!BN_rshift1(a, a))
+                    goto err;
+                if (BN_cmp(a, b) < 0) {
+                    t = a;
+                    a = b;
+                    b = t;
+                }
+            } else {            /* a even - b even */
+
+                if (!BN_rshift1(a, a))
+                    goto err;
+                if (!BN_rshift1(b, b))
+                    goto err;
+                shifts++;
+            }
+        }
+        /* 0 <= b <= a */
+    }
+
+    if (shifts) {
+        if (!BN_lshift(a, a, shifts))
+            goto err;
+    }
+    bn_check_top(a);
+    return a;
+ err:
+    return NULL;
+}
+
+/* solves ax == 1 (mod n) */
+static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in,
+                                        const BIGNUM *a, const BIGNUM *n,
+                                        BN_CTX *ctx);
+
+BIGNUM *BN_mod_inverse(BIGNUM *in,
+                       const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
+{
+    BIGNUM *rv;
+    int noinv;
+    rv = int_bn_mod_inverse(in, a, n, ctx, &noinv);
+    if (noinv)
+        BNerr(BN_F_BN_MOD_INVERSE, BN_R_NO_INVERSE);
+    return rv;
+}
+
+BIGNUM *int_bn_mod_inverse(BIGNUM *in,
+                           const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx,
+                           int *pnoinv)
+{
+    BIGNUM *A, *B, *X, *Y, *M, *D, *T, *R = NULL;
+    BIGNUM *ret = NULL;
+    int sign;
+
+    /* This is invalid input so we don't worry about constant time here */
+    if (BN_abs_is_word(n, 1) || BN_is_zero(n)) {
+        if (pnoinv != NULL)
+            *pnoinv = 1;
+        return NULL;
+    }
+
+    if (pnoinv != NULL)
+        *pnoinv = 0;
+
+    if ((BN_get_flags(a, BN_FLG_CONSTTIME) != 0)
+        || (BN_get_flags(n, BN_FLG_CONSTTIME) != 0)) {
+        return BN_mod_inverse_no_branch(in, a, n, ctx);
+    }
+
+    bn_check_top(a);
+    bn_check_top(n);
+
+    BN_CTX_start(ctx);
+    A = BN_CTX_get(ctx);
+    B = BN_CTX_get(ctx);
+    X = BN_CTX_get(ctx);
+    D = BN_CTX_get(ctx);
+    M = BN_CTX_get(ctx);
+    Y = BN_CTX_get(ctx);
+    T = BN_CTX_get(ctx);
+    if (T == NULL)
+        goto err;
+
+    if (in == NULL)
+        R = BN_new();
+    else
+        R = in;
+    if (R == NULL)
+        goto err;
+
+    BN_one(X);
+    BN_zero(Y);
+    if (BN_copy(B, a) == NULL)
+        goto err;
+    if (BN_copy(A, n) == NULL)
+        goto err;
+    A->neg = 0;
+    if (B->neg || (BN_ucmp(B, A) >= 0)) {
+        if (!BN_nnmod(B, B, A, ctx))
+            goto err;
+    }
+    sign = -1;
+    /*-
+     * From  B = a mod |n|,  A = |n|  it follows that
+     *
+     *      0 <= B < A,
+     *     -sign*X*a  ==  B   (mod |n|),
+     *      sign*Y*a  ==  A   (mod |n|).
+     */
+
+    if (BN_is_odd(n) && (BN_num_bits(n) <= 2048)) {
+        /*
+         * Binary inversion algorithm; requires odd modulus. This is faster
+         * than the general algorithm if the modulus is sufficiently small
+         * (about 400 .. 500 bits on 32-bit systems, but much more on 64-bit
+         * systems)
+         */
+        int shift;
+
+        while (!BN_is_zero(B)) {
+            /*-
+             *      0 < B < |n|,
+             *      0 < A <= |n|,
+             * (1) -sign*X*a  ==  B   (mod |n|),
+             * (2)  sign*Y*a  ==  A   (mod |n|)
+             */
+
+            /*
+             * Now divide B by the maximum possible power of two in the
+             * integers, and divide X by the same value mod |n|. When we're
+             * done, (1) still holds.
+             */
+            shift = 0;
+            while (!BN_is_bit_set(B, shift)) { /* note that 0 < B */
+                shift++;
+
+                if (BN_is_odd(X)) {
+                    if (!BN_uadd(X, X, n))
+                        goto err;
+                }
+                /*
+                 * now X is even, so we can easily divide it by two
+                 */
+                if (!BN_rshift1(X, X))
+                    goto err;
+            }
+            if (shift > 0) {
+                if (!BN_rshift(B, B, shift))
+                    goto err;
+            }
+
+            /*
+             * Same for A and Y.  Afterwards, (2) still holds.
+             */
+            shift = 0;
+            while (!BN_is_bit_set(A, shift)) { /* note that 0 < A */
+                shift++;
+
+                if (BN_is_odd(Y)) {
+                    if (!BN_uadd(Y, Y, n))
+                        goto err;
+                }
+                /* now Y is even */
+                if (!BN_rshift1(Y, Y))
+                    goto err;
+            }
+            if (shift > 0) {
+                if (!BN_rshift(A, A, shift))
+                    goto err;
+            }
+
+            /*-
+             * We still have (1) and (2).
+             * Both  A  and  B  are odd.
+             * The following computations ensure that
+             *
+             *     0 <= B < |n|,
+             *      0 < A < |n|,
+             * (1) -sign*X*a  ==  B   (mod |n|),
+             * (2)  sign*Y*a  ==  A   (mod |n|),
+             *
+             * and that either  A  or  B  is even in the next iteration.
+             */
+            if (BN_ucmp(B, A) >= 0) {
+                /* -sign*(X + Y)*a == B - A  (mod |n|) */
+                if (!BN_uadd(X, X, Y))
+                    goto err;
+                /*
+                 * NB: we could use BN_mod_add_quick(X, X, Y, n), but that
+                 * actually makes the algorithm slower
+                 */
+                if (!BN_usub(B, B, A))
+                    goto err;
+            } else {
+                /*  sign*(X + Y)*a == A - B  (mod |n|) */
+                if (!BN_uadd(Y, Y, X))
+                    goto err;
+                /*
+                 * as above, BN_mod_add_quick(Y, Y, X, n) would slow things down
+                 */
+                if (!BN_usub(A, A, B))
+                    goto err;
+            }
+        }
+    } else {
+        /* general inversion algorithm */
+
+        while (!BN_is_zero(B)) {
+            BIGNUM *tmp;
+
+            /*-
+             *      0 < B < A,
+             * (*) -sign*X*a  ==  B   (mod |n|),
+             *      sign*Y*a  ==  A   (mod |n|)
+             */
+
+            /* (D, M) := (A/B, A%B) ... */
+            if (BN_num_bits(A) == BN_num_bits(B)) {
+                if (!BN_one(D))
+                    goto err;
+                if (!BN_sub(M, A, B))
+                    goto err;
+            } else if (BN_num_bits(A) == BN_num_bits(B) + 1) {
+                /* A/B is 1, 2, or 3 */
+                if (!BN_lshift1(T, B))
+                    goto err;
+                if (BN_ucmp(A, T) < 0) {
+                    /* A < 2*B, so D=1 */
+                    if (!BN_one(D))
+                        goto err;
+                    if (!BN_sub(M, A, B))
+                        goto err;
+                } else {
+                    /* A >= 2*B, so D=2 or D=3 */
+                    if (!BN_sub(M, A, T))
+                        goto err;
+                    if (!BN_add(D, T, B))
+                        goto err; /* use D (:= 3*B) as temp */
+                    if (BN_ucmp(A, D) < 0) {
+                        /* A < 3*B, so D=2 */
+                        if (!BN_set_word(D, 2))
+                            goto err;
+                        /*
+                         * M (= A - 2*B) already has the correct value
+                         */
+                    } else {
+                        /* only D=3 remains */
+                        if (!BN_set_word(D, 3))
+                            goto err;
+                        /*
+                         * currently M = A - 2*B, but we need M = A - 3*B
+                         */
+                        if (!BN_sub(M, M, B))
+                            goto err;
+                    }
+                }
+            } else {
+                if (!BN_div(D, M, A, B, ctx))
+                    goto err;
+            }
+
+            /*-
+             * Now
+             *      A = D*B + M;
+             * thus we have
+             * (**)  sign*Y*a  ==  D*B + M   (mod |n|).
+             */
+
+            tmp = A;    /* keep the BIGNUM object, the value does not matter */
+
+            /* (A, B) := (B, A mod B) ... */
+            A = B;
+            B = M;
+            /* ... so we have  0 <= B < A  again */
+
+            /*-
+             * Since the former  M  is now  B  and the former  B  is now  A,
+             * (**) translates into
+             *       sign*Y*a  ==  D*A + B    (mod |n|),
+             * i.e.
+             *       sign*Y*a - D*A  ==  B    (mod |n|).
+             * Similarly, (*) translates into
+             *      -sign*X*a  ==  A          (mod |n|).
+             *
+             * Thus,
+             *   sign*Y*a + D*sign*X*a  ==  B  (mod |n|),
+             * i.e.
+             *        sign*(Y + D*X)*a  ==  B  (mod |n|).
+             *
+             * So if we set  (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at
+             *      -sign*X*a  ==  B   (mod |n|),
+             *       sign*Y*a  ==  A   (mod |n|).
+             * Note that  X  and  Y  stay non-negative all the time.
+             */
+
+            /*
+             * most of the time D is very small, so we can optimize tmp := D*X+Y
+             */
+            if (BN_is_one(D)) {
+                if (!BN_add(tmp, X, Y))
+                    goto err;
+            } else {
+                if (BN_is_word(D, 2)) {
+                    if (!BN_lshift1(tmp, X))
+                        goto err;
+                } else if (BN_is_word(D, 4)) {
+                    if (!BN_lshift(tmp, X, 2))
+                        goto err;
+                } else if (D->top == 1) {
+                    if (!BN_copy(tmp, X))
+                        goto err;
+                    if (!BN_mul_word(tmp, D->d[0]))
+                        goto err;
+                } else {
+                    if (!BN_mul(tmp, D, X, ctx))
+                        goto err;
+                }
+                if (!BN_add(tmp, tmp, Y))
+                    goto err;
+            }
+
+            M = Y;      /* keep the BIGNUM object, the value does not matter */
+            Y = X;
+            X = tmp;
+            sign = -sign;
+        }
+    }
+
+    /*-
+     * The while loop (Euclid's algorithm) ends when
+     *      A == gcd(a,n);
+     * we have
+     *       sign*Y*a  ==  A  (mod |n|),
+     * where  Y  is non-negative.
+     */
+
+    if (sign < 0) {
+        if (!BN_sub(Y, n, Y))
+            goto err;
+    }
+    /* Now  Y*a  ==  A  (mod |n|).  */
+
+    if (BN_is_one(A)) {
+        /* Y*a == 1  (mod |n|) */
+        if (!Y->neg && BN_ucmp(Y, n) < 0) {
+            if (!BN_copy(R, Y))
+                goto err;
+        } else {
+            if (!BN_nnmod(R, Y, n, ctx))
+                goto err;
+        }
+    } else {
+        if (pnoinv)
+            *pnoinv = 1;
+        goto err;
+    }
+    ret = R;
+ err:
+    if ((ret == NULL) && (in == NULL))
+        BN_free(R);
+    BN_CTX_end(ctx);
+    bn_check_top(ret);
+    return ret;
+}
+
+/*
+ * BN_mod_inverse_no_branch is a special version of BN_mod_inverse. It does
+ * not contain branches that may leak sensitive information.
+ */
+static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in,
+                                        const BIGNUM *a, const BIGNUM *n,
+                                        BN_CTX *ctx)
+{
+    BIGNUM *A, *B, *X, *Y, *M, *D, *T, *R = NULL;
+    BIGNUM *ret = NULL;
+    int sign;
+
+    bn_check_top(a);
+    bn_check_top(n);
+
+    BN_CTX_start(ctx);
+    A = BN_CTX_get(ctx);
+    B = BN_CTX_get(ctx);
+    X = BN_CTX_get(ctx);
+    D = BN_CTX_get(ctx);
+    M = BN_CTX_get(ctx);
+    Y = BN_CTX_get(ctx);
+    T = BN_CTX_get(ctx);
+    if (T == NULL)
+        goto err;
+
+    if (in == NULL)
+        R = BN_new();
+    else
+        R = in;
+    if (R == NULL)
+        goto err;
+
+    BN_one(X);
+    BN_zero(Y);
+    if (BN_copy(B, a) == NULL)
+        goto err;
+    if (BN_copy(A, n) == NULL)
+        goto err;
+    A->neg = 0;
+
+    if (B->neg || (BN_ucmp(B, A) >= 0)) {
+        /*
+         * Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked,
+         * BN_div_no_branch will be called eventually.
+         */
+         {
+            BIGNUM local_B;
+            bn_init(&local_B);
+            BN_with_flags(&local_B, B, BN_FLG_CONSTTIME);
+            if (!BN_nnmod(B, &local_B, A, ctx))
+                goto err;
+            /* Ensure local_B goes out of scope before any further use of B */
+        }
+    }
+    sign = -1;
+    /*-
+     * From  B = a mod |n|,  A = |n|  it follows that
+     *
+     *      0 <= B < A,
+     *     -sign*X*a  ==  B   (mod |n|),
+     *      sign*Y*a  ==  A   (mod |n|).
+     */
+
+    while (!BN_is_zero(B)) {
+        BIGNUM *tmp;
+
+        /*-
+         *      0 < B < A,
+         * (*) -sign*X*a  ==  B   (mod |n|),
+         *      sign*Y*a  ==  A   (mod |n|)
+         */
+
+        /*
+         * Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked,
+         * BN_div_no_branch will be called eventually.
+         */
+        {
+            BIGNUM local_A;
+            bn_init(&local_A);
+            BN_with_flags(&local_A, A, BN_FLG_CONSTTIME);
+
+            /* (D, M) := (A/B, A%B) ... */
+            if (!BN_div(D, M, &local_A, B, ctx))
+                goto err;
+            /* Ensure local_A goes out of scope before any further use of A */
+        }
+
+        /*-
+         * Now
+         *      A = D*B + M;
+         * thus we have
+         * (**)  sign*Y*a  ==  D*B + M   (mod |n|).
+         */
+
+        tmp = A;                /* keep the BIGNUM object, the value does not
+                                 * matter */
+
+        /* (A, B) := (B, A mod B) ... */
+        A = B;
+        B = M;
+        /* ... so we have  0 <= B < A  again */
+
+        /*-
+         * Since the former  M  is now  B  and the former  B  is now  A,
+         * (**) translates into
+         *       sign*Y*a  ==  D*A + B    (mod |n|),
+         * i.e.
+         *       sign*Y*a - D*A  ==  B    (mod |n|).
+         * Similarly, (*) translates into
+         *      -sign*X*a  ==  A          (mod |n|).
+         *
+         * Thus,
+         *   sign*Y*a + D*sign*X*a  ==  B  (mod |n|),
+         * i.e.
+         *        sign*(Y + D*X)*a  ==  B  (mod |n|).
+         *
+         * So if we set  (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at
+         *      -sign*X*a  ==  B   (mod |n|),
+         *       sign*Y*a  ==  A   (mod |n|).
+         * Note that  X  and  Y  stay non-negative all the time.
+         */
+
+        if (!BN_mul(tmp, D, X, ctx))
+            goto err;
+        if (!BN_add(tmp, tmp, Y))
+            goto err;
+
+        M = Y;                  /* keep the BIGNUM object, the value does not
+                                 * matter */
+        Y = X;
+        X = tmp;
+        sign = -sign;
+    }
+
+    /*-
+     * The while loop (Euclid's algorithm) ends when
+     *      A == gcd(a,n);
+     * we have
+     *       sign*Y*a  ==  A  (mod |n|),
+     * where  Y  is non-negative.
+     */
+
+    if (sign < 0) {
+        if (!BN_sub(Y, n, Y))
+            goto err;
+    }
+    /* Now  Y*a  ==  A  (mod |n|).  */
+
+    if (BN_is_one(A)) {
+        /* Y*a == 1  (mod |n|) */
+        if (!Y->neg && BN_ucmp(Y, n) < 0) {
+            if (!BN_copy(R, Y))
+                goto err;
+        } else {
+            if (!BN_nnmod(R, Y, n, ctx))
+                goto err;
+        }
+    } else {
+        BNerr(BN_F_BN_MOD_INVERSE_NO_BRANCH, BN_R_NO_INVERSE);
+        goto err;
+    }
+    ret = R;
+ err:
+    if ((ret == NULL) && (in == NULL))
+        BN_free(R);
+    BN_CTX_end(ctx);
+    bn_check_top(ret);
+    return ret;
+}
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_gf2m.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_gf2m.c
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_intern.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_intern.c
@ -0,0 +1,199 @@
+/*
+ * Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include "internal/cryptlib.h"
+#include "bn_lcl.h"
+
+/*
+ * Determine the modified width-(w+1) Non-Adjacent Form (wNAF) of 'scalar'.
+ * This is an array  r[]  of values that are either zero or odd with an
+ * absolute value less than  2^w  satisfying
+ *     scalar = \sum_j r[j]*2^j
+ * where at most one of any  w+1  consecutive digits is non-zero
+ * with the exception that the most significant digit may be only
+ * w-1 zeros away from that next non-zero digit.
+ */
+signed char *bn_compute_wNAF(const BIGNUM *scalar, int w, size_t *ret_len)
+{
+    int window_val;
+    signed char *r = NULL;
+    int sign = 1;
+    int bit, next_bit, mask;
+    size_t len = 0, j;
+
+    if (BN_is_zero(scalar)) {
+        r = OPENSSL_malloc(1);
+        if (r == NULL) {
+            BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_MALLOC_FAILURE);
+            goto err;
+        }
+        r[0] = 0;
+        *ret_len = 1;
+        return r;
+    }
+
+    if (w <= 0 || w > 7) {      /* 'signed char' can represent integers with
+                                 * absolute values less than 2^7 */
+        BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR);
+        goto err;
+    }
+    bit = 1 << w;               /* at most 128 */
+    next_bit = bit << 1;        /* at most 256 */
+    mask = next_bit - 1;        /* at most 255 */
+
+    if (BN_is_negative(scalar)) {
+        sign = -1;
+    }
+
+    if (scalar->d == NULL || scalar->top == 0) {
+        BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR);
+        goto err;
+    }
+
+    len = BN_num_bits(scalar);
+    r = OPENSSL_malloc(len + 1); /*
+                                  * Modified wNAF may be one digit longer than binary representation
+                                  * (*ret_len will be set to the actual length, i.e. at most
+                                  * BN_num_bits(scalar) + 1)
+                                  */
+    if (r == NULL) {
+        BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_MALLOC_FAILURE);
+        goto err;
+    }
+    window_val = scalar->d[0] & mask;
+    j = 0;
+    while ((window_val != 0) || (j + w + 1 < len)) { /* if j+w+1 >= len,
+                                                      * window_val will not
+                                                      * increase */
+        int digit = 0;
+
+        /* 0 <= window_val <= 2^(w+1) */
+
+        if (window_val & 1) {
+            /* 0 < window_val < 2^(w+1) */
+
+            if (window_val & bit) {
+                digit = window_val - next_bit; /* -2^w < digit < 0 */
+
+#if 1                           /* modified wNAF */
+                if (j + w + 1 >= len) {
+                    /*
+                     * Special case for generating modified wNAFs:
+                     * no new bits will be added into window_val,
+                     * so using a positive digit here will decrease
+                     * the total length of the representation
+                     */
+
+                    digit = window_val & (mask >> 1); /* 0 < digit < 2^w */
+                }
+#endif
+            } else {
+                digit = window_val; /* 0 < digit < 2^w */
+            }
+
+            if (digit <= -bit || digit >= bit || !(digit & 1)) {
+                BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR);
+                goto err;
+            }
+
+            window_val -= digit;
+
+            /*
+             * now window_val is 0 or 2^(w+1) in standard wNAF generation;
+             * for modified window NAFs, it may also be 2^w
+             */
+            if (window_val != 0 && window_val != next_bit
+                && window_val != bit) {
+                BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR);
+                goto err;
+            }
+        }
+
+        r[j++] = sign * digit;
+
+        window_val >>= 1;
+        window_val += bit * BN_is_bit_set(scalar, j + w);
+
+        if (window_val > next_bit) {
+            BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR);
+            goto err;
+        }
+    }
+
+    if (j > len + 1) {
+        BNerr(BN_F_BN_COMPUTE_WNAF, ERR_R_INTERNAL_ERROR);
+        goto err;
+    }
+    *ret_len = j;
+    return r;
+
+ err:
+    OPENSSL_free(r);
+    return NULL;
+}
+
+int bn_get_top(const BIGNUM *a)
+{
+    return a->top;
+}
+
+int bn_get_dmax(const BIGNUM *a)
+{
+    return a->dmax;
+}
+
+void bn_set_all_zero(BIGNUM *a)
+{
+    int i;
+
+    for (i = a->top; i < a->dmax; i++)
+        a->d[i] = 0;
+}
+
+int bn_copy_words(BN_ULONG *out, const BIGNUM *in, int size)
+{
+    if (in->top > size)
+        return 0;
+
+    memset(out, 0, sizeof(*out) * size);
+    if (in->d != NULL)
+        memcpy(out, in->d, sizeof(*out) * in->top);
+    return 1;
+}
+
+BN_ULONG *bn_get_words(const BIGNUM *a)
+{
+    return a->d;
+}
+
+void bn_set_static_words(BIGNUM *a, const BN_ULONG *words, int size)
+{
+    /*
+     * |const| qualifier omission is compensated by BN_FLG_STATIC_DATA
+     * flag, which effectively means "read-only data".
+     */
+    a->d = (BN_ULONG *)words;
+    a->dmax = a->top = size;
+    a->neg = 0;
+    a->flags |= BN_FLG_STATIC_DATA;
+    bn_correct_top(a);
+}
+
+int bn_set_words(BIGNUM *a, const BN_ULONG *words, int num_words)
+{
+    if (bn_wexpand(a, num_words) == NULL) {
+        BNerr(BN_F_BN_SET_WORDS, ERR_R_MALLOC_FAILURE);
+        return 0;
+    }
+
+    memcpy(a->d, words, sizeof(BN_ULONG) * num_words);
+    a->top = num_words;
+    bn_correct_top(a);
+    return 1;
+}
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_kron.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_kron.c
@ -0,0 +1,140 @@
+/*
+ * Copyright 2000-2016 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include "internal/cryptlib.h"
+#include "bn_lcl.h"
+
+/* least significant word */
+#define BN_lsw(n) (((n)->top == 0) ? (BN_ULONG) 0 : (n)->d[0])
+
+/* Returns -2 for errors because both -1 and 0 are valid results. */
+int BN_kronecker(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
+{
+    int i;
+    int ret = -2;               /* avoid 'uninitialized' warning */
+    int err = 0;
+    BIGNUM *A, *B, *tmp;
+    /*-
+     * In 'tab', only odd-indexed entries are relevant:
+     * For any odd BIGNUM n,
+     *     tab[BN_lsw(n) & 7]
+     * is $(-1)^{(n^2-1)/8}$ (using TeX notation).
+     * Note that the sign of n does not matter.
+     */
+    static const int tab[8] = { 0, 1, 0, -1, 0, -1, 0, 1 };
+
+    bn_check_top(a);
+    bn_check_top(b);
+
+    BN_CTX_start(ctx);
+    A = BN_CTX_get(ctx);
+    B = BN_CTX_get(ctx);
+    if (B == NULL)
+        goto end;
+
+    err = !BN_copy(A, a);
+    if (err)
+        goto end;
+    err = !BN_copy(B, b);
+    if (err)
+        goto end;
+
+    /*
+     * Kronecker symbol, implemented according to Henri Cohen,
+     * "A Course in Computational Algebraic Number Theory"
+     * (algorithm 1.4.10).
+     */
+
+    /* Cohen's step 1: */
+
+    if (BN_is_zero(B)) {
+        ret = BN_abs_is_word(A, 1);
+        goto end;
+    }
+
+    /* Cohen's step 2: */
+
+    if (!BN_is_odd(A) && !BN_is_odd(B)) {
+        ret = 0;
+        goto end;
+    }
+
+    /* now  B  is non-zero */
+    i = 0;
+    while (!BN_is_bit_set(B, i))
+        i++;
+    err = !BN_rshift(B, B, i);
+    if (err)
+        goto end;
+    if (i & 1) {
+        /* i is odd */
+        /* (thus  B  was even, thus  A  must be odd!)  */
+
+        /* set 'ret' to $(-1)^{(A^2-1)/8}$ */
+        ret = tab[BN_lsw(A) & 7];
+    } else {
+        /* i is even */
+        ret = 1;
+    }
+
+    if (B->neg) {
+        B->neg = 0;
+        if (A->neg)
+            ret = -ret;
+    }
+
+    /*
+     * now B is positive and odd, so what remains to be done is to compute
+     * the Jacobi symbol (A/B) and multiply it by 'ret'
+     */
+
+    while (1) {
+        /* Cohen's step 3: */
+
+        /*  B  is positive and odd */
+
+        if (BN_is_zero(A)) {
+            ret = BN_is_one(B) ? ret : 0;
+            goto end;
+        }
+
+        /* now  A  is non-zero */
+        i = 0;
+        while (!BN_is_bit_set(A, i))
+            i++;
+        err = !BN_rshift(A, A, i);
+        if (err)
+            goto end;
+        if (i & 1) {
+            /* i is odd */
+            /* multiply 'ret' by  $(-1)^{(B^2-1)/8}$ */
+            ret = ret * tab[BN_lsw(B) & 7];
+        }
+
+        /* Cohen's step 4: */
+        /* multiply 'ret' by  $(-1)^{(A-1)(B-1)/4}$ */
+        if ((A->neg ? ~BN_lsw(A) : BN_lsw(A)) & BN_lsw(B) & 2)
+            ret = -ret;
+
+        /* (A, B) := (B mod |A|, |A|) */
+        err = !BN_nnmod(B, B, A, ctx);
+        if (err)
+            goto end;
+        tmp = A;
+        A = B;
+        B = tmp;
+        tmp->neg = 0;
+    }
+ end:
+    BN_CTX_end(ctx);
+    if (err)
+        return -2;
+    else
+        return ret;
+}
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_lcl.h
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_lcl.h
@ -0,0 +1,671 @@
+/*
+ * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#ifndef HEADER_BN_LCL_H
+# define HEADER_BN_LCL_H
+
+/*
+ * The EDK2 build doesn't use bn_conf.h; it sets THIRTY_TWO_BIT or
+ * SIXTY_FOUR_BIT in its own environment since it doesn't re-run our
+ * Configure script and needs to support both 32-bit and 64-bit.
+ */
+# include <openssl/opensslconf.h>
+
+# if !defined(OPENSSL_SYS_UEFI)
+#  include "internal/bn_conf.h"
+# endif
+
+# include "internal/bn_int.h"
+
+/*
+ * These preprocessor symbols control various aspects of the bignum headers
+ * and library code. They're not defined by any "normal" configuration, as
+ * they are intended for development and testing purposes. NB: defining all
+ * three can be useful for debugging application code as well as openssl
+ * itself. BN_DEBUG - turn on various debugging alterations to the bignum
+ * code BN_DEBUG_RAND - uses random poisoning of unused words to trip up
+ * mismanagement of bignum internals. You must also define BN_DEBUG.
+ */
+/* #define BN_DEBUG */
+/* #define BN_DEBUG_RAND */
+
+# ifndef OPENSSL_SMALL_FOOTPRINT
+#  define BN_MUL_COMBA
+#  define BN_SQR_COMBA
+#  define BN_RECURSION
+# endif
+
+/*
+ * This next option uses the C libraries (2 word)/(1 word) function. If it is
+ * not defined, I use my C version (which is slower). The reason for this
+ * flag is that when the particular C compiler library routine is used, and
+ * the library is linked with a different compiler, the library is missing.
+ * This mostly happens when the library is built with gcc and then linked
+ * using normal cc.  This would be a common occurrence because gcc normally
+ * produces code that is 2 times faster than system compilers for the big
+ * number stuff. For machines with only one compiler (or shared libraries),
+ * this should be on.  Again this in only really a problem on machines using
+ * "long long's", are 32bit, and are not using my assembler code.
+ */
+# if defined(OPENSSL_SYS_MSDOS) || defined(OPENSSL_SYS_WINDOWS) || \
+    defined(OPENSSL_SYS_WIN32) || defined(linux)
+#  define BN_DIV2W
+# endif
+
+/*
+ * 64-bit processor with LP64 ABI
+ */
+# ifdef SIXTY_FOUR_BIT_LONG
+#  define BN_ULLONG       unsigned long long
+#  define BN_BITS4        32
+#  define BN_MASK2        (0xffffffffffffffffL)
+#  define BN_MASK2l       (0xffffffffL)
+#  define BN_MASK2h       (0xffffffff00000000L)
+#  define BN_MASK2h1      (0xffffffff80000000L)
+#  define BN_DEC_CONV     (10000000000000000000UL)
+#  define BN_DEC_NUM      19
+#  define BN_DEC_FMT1     "%lu"
+#  define BN_DEC_FMT2     "%019lu"
+# endif
+
+/*
+ * 64-bit processor other than LP64 ABI
+ */
+# ifdef SIXTY_FOUR_BIT
+#  undef BN_LLONG
+#  undef BN_ULLONG
+#  define BN_BITS4        32
+#  define BN_MASK2        (0xffffffffffffffffLL)
+#  define BN_MASK2l       (0xffffffffL)
+#  define BN_MASK2h       (0xffffffff00000000LL)
+#  define BN_MASK2h1      (0xffffffff80000000LL)
+#  define BN_DEC_CONV     (10000000000000000000ULL)
+#  define BN_DEC_NUM      19
+#  define BN_DEC_FMT1     "%llu"
+#  define BN_DEC_FMT2     "%019llu"
+# endif
+
+# ifdef THIRTY_TWO_BIT
+#  ifdef BN_LLONG
+#   if defined(_WIN32) && !defined(__GNUC__)
+#    define BN_ULLONG     unsigned __int64
+#   else
+#    define BN_ULLONG     unsigned long long
+#   endif
+#  endif
+#  define BN_BITS4        16
+#  define BN_MASK2        (0xffffffffL)
+#  define BN_MASK2l       (0xffff)
+#  define BN_MASK2h1      (0xffff8000L)
+#  define BN_MASK2h       (0xffff0000L)
+#  define BN_DEC_CONV     (1000000000L)
+#  define BN_DEC_NUM      9
+#  define BN_DEC_FMT1     "%u"
+#  define BN_DEC_FMT2     "%09u"
+# endif
+
+
+/*-
+ * Bignum consistency macros
+ * There is one "API" macro, bn_fix_top(), for stripping leading zeroes from
+ * bignum data after direct manipulations on the data. There is also an
+ * "internal" macro, bn_check_top(), for verifying that there are no leading
+ * zeroes. Unfortunately, some auditing is required due to the fact that
+ * bn_fix_top() has become an overabused duct-tape because bignum data is
+ * occasionally passed around in an inconsistent state. So the following
+ * changes have been made to sort this out;
+ * - bn_fix_top()s implementation has been moved to bn_correct_top()
+ * - if BN_DEBUG isn't defined, bn_fix_top() maps to bn_correct_top(), and
+ *   bn_check_top() is as before.
+ * - if BN_DEBUG *is* defined;
+ *   - bn_check_top() tries to pollute unused words even if the bignum 'top' is
+ *     consistent. (ed: only if BN_DEBUG_RAND is defined)
+ *   - bn_fix_top() maps to bn_check_top() rather than "fixing" anything.
+ * The idea is to have debug builds flag up inconsistent bignums when they
+ * occur. If that occurs in a bn_fix_top(), we examine the code in question; if
+ * the use of bn_fix_top() was appropriate (ie. it follows directly after code
+ * that manipulates the bignum) it is converted to bn_correct_top(), and if it
+ * was not appropriate, we convert it permanently to bn_check_top() and track
+ * down the cause of the bug. Eventually, no internal code should be using the
+ * bn_fix_top() macro. External applications and libraries should try this with
+ * their own code too, both in terms of building against the openssl headers
+ * with BN_DEBUG defined *and* linking with a version of OpenSSL built with it
+ * defined. This not only improves external code, it provides more test
+ * coverage for openssl's own code.
+ */
+
+# ifdef BN_DEBUG
+/*
+ * The new BN_FLG_FIXED_TOP flag marks vectors that were not treated with
+ * bn_correct_top, in other words such vectors are permitted to have zeros
+ * in most significant limbs. Such vectors are used internally to achieve
+ * execution time invariance for critical operations with private keys.
+ * It's BN_DEBUG-only flag, because user application is not supposed to
+ * observe it anyway. Moreover, optimizing compiler would actually remove
+ * all operations manipulating the bit in question in non-BN_DEBUG build.
+ */
+#  define BN_FLG_FIXED_TOP 0x10000
+#  ifdef BN_DEBUG_RAND
+#   define bn_pollute(a) \
+        do { \
+            const BIGNUM *_bnum1 = (a); \
+            if (_bnum1->top < _bnum1->dmax) { \
+                unsigned char _tmp_char; \
+                /* We cast away const without the compiler knowing, any \
+                 * *genuinely* constant variables that aren't mutable \
+                 * wouldn't be constructed with top!=dmax. */ \
+                BN_ULONG *_not_const; \
+                memcpy(&_not_const, &_bnum1->d, sizeof(_not_const)); \
+                RAND_bytes(&_tmp_char, 1); /* Debug only - safe to ignore error return */\
+                memset(_not_const + _bnum1->top, _tmp_char, \
+                       sizeof(*_not_const) * (_bnum1->dmax - _bnum1->top)); \
+            } \
+        } while(0)
+#  else
+#   define bn_pollute(a)
+#  endif
+#  define bn_check_top(a) \
+        do { \
+                const BIGNUM *_bnum2 = (a); \
+                if (_bnum2 != NULL) { \
+                        int _top = _bnum2->top; \
+                        (void)ossl_assert((_top == 0 && !_bnum2->neg) || \
+                                  (_top && ((_bnum2->flags & BN_FLG_FIXED_TOP) \
+                                            || _bnum2->d[_top - 1] != 0))); \
+                        bn_pollute(_bnum2); \
+                } \
+        } while(0)
+
+#  define bn_fix_top(a)           bn_check_top(a)
+
+#  define bn_check_size(bn, bits) bn_wcheck_size(bn, ((bits+BN_BITS2-1))/BN_BITS2)
+#  define bn_wcheck_size(bn, words) \
+        do { \
+                const BIGNUM *_bnum2 = (bn); \
+                assert((words) <= (_bnum2)->dmax && \
+                       (words) >= (_bnum2)->top); \
+                /* avoid unused variable warning with NDEBUG */ \
+                (void)(_bnum2); \
+        } while(0)
+
+# else                          /* !BN_DEBUG */
+
+#  define BN_FLG_FIXED_TOP 0
+#  define bn_pollute(a)
+#  define bn_check_top(a)
+#  define bn_fix_top(a)           bn_correct_top(a)
+#  define bn_check_size(bn, bits)
+#  define bn_wcheck_size(bn, words)
+
+# endif
+
+BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
+                          BN_ULONG w);
+BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
+void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num);
+BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d);
+BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+                      int num);
+BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+                      int num);
+
+struct bignum_st {
+    BN_ULONG *d;                /* Pointer to an array of 'BN_BITS2' bit
+                                 * chunks. */
+    int top;                    /* Index of last used d +1. */
+    /* The next are internal book keeping for bn_expand. */
+    int dmax;                   /* Size of the d array. */
+    int neg;                    /* one if the number is negative */
+    int flags;
+};
+
+/* Used for montgomery multiplication */
+struct bn_mont_ctx_st {
+    int ri;                     /* number of bits in R */
+    BIGNUM RR;                  /* used to convert to montgomery form,
+                                   possibly zero-padded */
+    BIGNUM N;                   /* The modulus */
+    BIGNUM Ni;                  /* R*(1/R mod N) - N*Ni = 1 (Ni is only
+                                 * stored for bignum algorithm) */
+    BN_ULONG n0[2];             /* least significant word(s) of Ni; (type
+                                 * changed with 0.9.9, was "BN_ULONG n0;"
+                                 * before) */
+    int flags;
+};
+
+/*
+ * Used for reciprocal division/mod functions It cannot be shared between
+ * threads
+ */
+struct bn_recp_ctx_st {
+    BIGNUM N;                   /* the divisor */
+    BIGNUM Nr;                  /* the reciprocal */
+    int num_bits;
+    int shift;
+    int flags;
+};
+
+/* Used for slow "generation" functions. */
+struct bn_gencb_st {
+    unsigned int ver;           /* To handle binary (in)compatibility */
+    void *arg;                  /* callback-specific data */
+    union {
+        /* if (ver==1) - handles old style callbacks */
+        void (*cb_1) (int, int, void *);
+        /* if (ver==2) - new callback style */
+        int (*cb_2) (int, int, BN_GENCB *);
+    } cb;
+};
+
+/*-
+ * BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions
+ *
+ *
+ * For window size 'w' (w >= 2) and a random 'b' bits exponent,
+ * the number of multiplications is a constant plus on average
+ *
+ *    2^(w-1) + (b-w)/(w+1);
+ *
+ * here  2^(w-1)  is for precomputing the table (we actually need
+ * entries only for windows that have the lowest bit set), and
+ * (b-w)/(w+1)  is an approximation for the expected number of
+ * w-bit windows, not counting the first one.
+ *
+ * Thus we should use
+ *
+ *    w >= 6  if        b > 671
+ *     w = 5  if  671 > b > 239
+ *     w = 4  if  239 > b >  79
+ *     w = 3  if   79 > b >  23
+ *    w <= 2  if   23 > b
+ *
+ * (with draws in between).  Very small exponents are often selected
+ * with low Hamming weight, so we use  w = 1  for b <= 23.
+ */
+# define BN_window_bits_for_exponent_size(b) \
+                ((b) > 671 ? 6 : \
+                 (b) > 239 ? 5 : \
+                 (b) >  79 ? 4 : \
+                 (b) >  23 ? 3 : 1)
+
+/*
+ * BN_mod_exp_mont_conttime is based on the assumption that the L1 data cache
+ * line width of the target processor is at least the following value.
+ */
+# define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH      ( 64 )
+# define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK       (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
+
+/*
+ * Window sizes optimized for fixed window size modular exponentiation
+ * algorithm (BN_mod_exp_mont_consttime). To achieve the security goals of
+ * BN_mode_exp_mont_consttime, the maximum size of the window must not exceed
+ * log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH). Window size thresholds are
+ * defined for cache line sizes of 32 and 64, cache line sizes where
+ * log_2(32)=5 and log_2(64)=6 respectively. A window size of 7 should only be
+ * used on processors that have a 128 byte or greater cache line size.
+ */
+# if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
+
+#  define BN_window_bits_for_ctime_exponent_size(b) \
+                ((b) > 937 ? 6 : \
+                 (b) > 306 ? 5 : \
+                 (b) >  89 ? 4 : \
+                 (b) >  22 ? 3 : 1)
+#  define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE    (6)
+
+# elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32
+
+#  define BN_window_bits_for_ctime_exponent_size(b) \
+                ((b) > 306 ? 5 : \
+                 (b) >  89 ? 4 : \
+                 (b) >  22 ? 3 : 1)
+#  define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE    (5)
+
+# endif
+
+/* Pentium pro 16,16,16,32,64 */
+/* Alpha       16,16,16,16.64 */
+# define BN_MULL_SIZE_NORMAL                     (16)/* 32 */
+# define BN_MUL_RECURSIVE_SIZE_NORMAL            (16)/* 32 less than */
+# define BN_SQR_RECURSIVE_SIZE_NORMAL            (16)/* 32 */
+# define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL        (32)/* 32 */
+# define BN_MONT_CTX_SET_SIZE_WORD               (64)/* 32 */
+
+/*
+ * 2011-02-22 SMS. In various places, a size_t variable or a type cast to
+ * size_t was used to perform integer-only operations on pointers.  This
+ * failed on VMS with 64-bit pointers (CC /POINTER_SIZE = 64) because size_t
+ * is still only 32 bits.  What's needed in these cases is an integer type
+ * with the same size as a pointer, which size_t is not certain to be. The
+ * only fix here is VMS-specific.
+ */
+# if defined(OPENSSL_SYS_VMS)
+#  if __INITIAL_POINTER_SIZE == 64
+#   define PTR_SIZE_INT long long
+#  else                         /* __INITIAL_POINTER_SIZE == 64 */
+#   define PTR_SIZE_INT int
+#  endif                        /* __INITIAL_POINTER_SIZE == 64 [else] */
+# elif !defined(PTR_SIZE_INT)   /* defined(OPENSSL_SYS_VMS) */
+#  define PTR_SIZE_INT size_t
+# endif                         /* defined(OPENSSL_SYS_VMS) [else] */
+
+# if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC)
+/*
+ * BN_UMULT_HIGH section.
+ * If the compiler doesn't support 2*N integer type, then you have to
+ * replace every N*N multiplication with 4 (N/2)*(N/2) accompanied by some
+ * shifts and additions which unavoidably results in severe performance
+ * penalties. Of course provided that the hardware is capable of producing
+ * 2*N result... That's when you normally start considering assembler
+ * implementation. However! It should be pointed out that some CPUs (e.g.,
+ * PowerPC, Alpha, and IA-64) provide *separate* instruction calculating
+ * the upper half of the product placing the result into a general
+ * purpose register. Now *if* the compiler supports inline assembler,
+ * then it's not impossible to implement the "bignum" routines (and have
+ * the compiler optimize 'em) exhibiting "native" performance in C. That's
+ * what BN_UMULT_HIGH macro is about:-) Note that more recent compilers do
+ * support 2*64 integer type, which is also used here.
+ */
+#  if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16 && \
+      (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG))
+#   define BN_UMULT_HIGH(a,b)          (((__uint128_t)(a)*(b))>>64)
+#   define BN_UMULT_LOHI(low,high,a,b) ({       \
+        __uint128_t ret=(__uint128_t)(a)*(b);   \
+        (high)=ret>>64; (low)=ret;      })
+#  elif defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
+#   if defined(__DECC)
+#    include <c_asm.h>
+#    define BN_UMULT_HIGH(a,b)   (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b))
+#   elif defined(__GNUC__) && __GNUC__>=2
+#    define BN_UMULT_HIGH(a,b)   ({     \
+        register BN_ULONG ret;          \
+        asm ("umulh     %1,%2,%0"       \
+             : "=r"(ret)                \
+             : "r"(a), "r"(b));         \
+        ret;                      })
+#   endif                       /* compiler */
+#  elif defined(_ARCH_PPC64) && defined(SIXTY_FOUR_BIT_LONG)
+#   if defined(__GNUC__) && __GNUC__>=2
+#    define BN_UMULT_HIGH(a,b)   ({     \
+        register BN_ULONG ret;          \
+        asm ("mulhdu    %0,%1,%2"       \
+             : "=r"(ret)                \
+             : "r"(a), "r"(b));         \
+        ret;                      })
+#   endif                       /* compiler */
+#  elif (defined(__x86_64) || defined(__x86_64__)) && \
+       (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
+#   if defined(__GNUC__) && __GNUC__>=2
+#    define BN_UMULT_HIGH(a,b)   ({     \
+        register BN_ULONG ret,discard;  \
+        asm ("mulq      %3"             \
+             : "=a"(discard),"=d"(ret)  \
+             : "a"(a), "g"(b)           \
+             : "cc");                   \
+        ret;                      })
+#    define BN_UMULT_LOHI(low,high,a,b) \
+        asm ("mulq      %3"             \
+                : "=a"(low),"=d"(high)  \
+                : "a"(a),"g"(b)         \
+                : "cc");
+#   endif
+#  elif (defined(_M_AMD64) || defined(_M_X64)) && defined(SIXTY_FOUR_BIT)
+#   if defined(_MSC_VER) && _MSC_VER>=1400
+unsigned __int64 __umulh(unsigned __int64 a, unsigned __int64 b);
+unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b,
+                          unsigned __int64 *h);
+#    pragma intrinsic(__umulh,_umul128)
+#    define BN_UMULT_HIGH(a,b)           __umulh((a),(b))
+#    define BN_UMULT_LOHI(low,high,a,b)  ((low)=_umul128((a),(b),&(high)))
+#   endif
+#  elif defined(__mips) && (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG))
+#   if defined(__GNUC__) && __GNUC__>=2
+#    define BN_UMULT_HIGH(a,b) ({       \
+        register BN_ULONG ret;          \
+        asm ("dmultu    %1,%2"          \
+             : "=h"(ret)                \
+             : "r"(a), "r"(b) : "l");   \
+        ret;                    })
+#    define BN_UMULT_LOHI(low,high,a,b) \
+        asm ("dmultu    %2,%3"          \
+             : "=l"(low),"=h"(high)     \
+             : "r"(a), "r"(b));
+#   endif
+#  elif defined(__aarch64__) && defined(SIXTY_FOUR_BIT_LONG)
+#   if defined(__GNUC__) && __GNUC__>=2
+#    define BN_UMULT_HIGH(a,b)   ({     \
+        register BN_ULONG ret;          \
+        asm ("umulh     %0,%1,%2"       \
+             : "=r"(ret)                \
+             : "r"(a), "r"(b));         \
+        ret;                      })
+#   endif
+#  endif                        /* cpu */
+# endif                         /* OPENSSL_NO_ASM */
+
+# ifdef BN_DEBUG_RAND
+#  define bn_clear_top2max(a) \
+        { \
+        int      ind = (a)->dmax - (a)->top; \
+        BN_ULONG *ftl = &(a)->d[(a)->top-1]; \
+        for (; ind != 0; ind--) \
+                *(++ftl) = 0x0; \
+        }
+# else
+#  define bn_clear_top2max(a)
+# endif
+
+# ifdef BN_LLONG
+/*******************************************************************
+ * Using the long long type, has to be twice as wide as BN_ULONG...
+ */
+#  define Lw(t)    (((BN_ULONG)(t))&BN_MASK2)
+#  define Hw(t)    (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
+
+#  define mul_add(r,a,w,c) { \
+        BN_ULLONG t; \
+        t=(BN_ULLONG)w * (a) + (r) + (c); \
+        (r)= Lw(t); \
+        (c)= Hw(t); \
+        }
+
+#  define mul(r,a,w,c) { \
+        BN_ULLONG t; \
+        t=(BN_ULLONG)w * (a) + (c); \
+        (r)= Lw(t); \
+        (c)= Hw(t); \
+        }
+
+#  define sqr(r0,r1,a) { \
+        BN_ULLONG t; \
+        t=(BN_ULLONG)(a)*(a); \
+        (r0)=Lw(t); \
+        (r1)=Hw(t); \
+        }
+
+# elif defined(BN_UMULT_LOHI)
+#  define mul_add(r,a,w,c) {              \
+        BN_ULONG high,low,ret,tmp=(a);  \
+        ret =  (r);                     \
+        BN_UMULT_LOHI(low,high,w,tmp);  \
+        ret += (c);                     \
+        (c) =  (ret<(c))?1:0;           \
+        (c) += high;                    \
+        ret += low;                     \
+        (c) += (ret<low)?1:0;           \
+        (r) =  ret;                     \
+        }
+
+#  define mul(r,a,w,c)    {               \
+        BN_ULONG high,low,ret,ta=(a);   \
+        BN_UMULT_LOHI(low,high,w,ta);   \
+        ret =  low + (c);               \
+        (c) =  high;                    \
+        (c) += (ret<low)?1:0;           \
+        (r) =  ret;                     \
+        }
+
+#  define sqr(r0,r1,a)    {               \
+        BN_ULONG tmp=(a);               \
+        BN_UMULT_LOHI(r0,r1,tmp,tmp);   \
+        }
+
+# elif defined(BN_UMULT_HIGH)
+#  define mul_add(r,a,w,c) {              \
+        BN_ULONG high,low,ret,tmp=(a);  \
+        ret =  (r);                     \
+        high=  BN_UMULT_HIGH(w,tmp);    \
+        ret += (c);                     \
+        low =  (w) * tmp;               \
+        (c) =  (ret<(c))?1:0;           \
+        (c) += high;                    \
+        ret += low;                     \
+        (c) += (ret<low)?1:0;           \
+        (r) =  ret;                     \
+        }
+
+#  define mul(r,a,w,c)    {               \
+        BN_ULONG high,low,ret,ta=(a);   \
+        low =  (w) * ta;                \
+        high=  BN_UMULT_HIGH(w,ta);     \
+        ret =  low + (c);               \
+        (c) =  high;                    \
+        (c) += (ret<low)?1:0;           \
+        (r) =  ret;                     \
+        }
+
+#  define sqr(r0,r1,a)    {               \
+        BN_ULONG tmp=(a);               \
+        (r0) = tmp * tmp;               \
+        (r1) = BN_UMULT_HIGH(tmp,tmp);  \
+        }
+
+# else
+/*************************************************************
+ * No long long type
+ */
+
+#  define LBITS(a)        ((a)&BN_MASK2l)
+#  define HBITS(a)        (((a)>>BN_BITS4)&BN_MASK2l)
+#  define L2HBITS(a)      (((a)<<BN_BITS4)&BN_MASK2)
+
+#  define LLBITS(a)       ((a)&BN_MASKl)
+#  define LHBITS(a)       (((a)>>BN_BITS2)&BN_MASKl)
+#  define LL2HBITS(a)     ((BN_ULLONG)((a)&BN_MASKl)<<BN_BITS2)
+
+#  define mul64(l,h,bl,bh) \
+        { \
+        BN_ULONG m,m1,lt,ht; \
+ \
+        lt=l; \
+        ht=h; \
+        m =(bh)*(lt); \
+        lt=(bl)*(lt); \
+        m1=(bl)*(ht); \
+        ht =(bh)*(ht); \
+        m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS((BN_ULONG)1); \
+        ht+=HBITS(m); \
+        m1=L2HBITS(m); \
+        lt=(lt+m1)&BN_MASK2; if (lt < m1) ht++; \
+        (l)=lt; \
+        (h)=ht; \
+        }
+
+#  define sqr64(lo,ho,in) \
+        { \
+        BN_ULONG l,h,m; \
+ \
+        h=(in); \
+        l=LBITS(h); \
+        h=HBITS(h); \
+        m =(l)*(h); \
+        l*=l; \
+        h*=h; \
+        h+=(m&BN_MASK2h1)>>(BN_BITS4-1); \
+        m =(m&BN_MASK2l)<<(BN_BITS4+1); \
+        l=(l+m)&BN_MASK2; if (l < m) h++; \
+        (lo)=l; \
+        (ho)=h; \
+        }
+
+#  define mul_add(r,a,bl,bh,c) { \
+        BN_ULONG l,h; \
+ \
+        h= (a); \
+        l=LBITS(h); \
+        h=HBITS(h); \
+        mul64(l,h,(bl),(bh)); \
+ \
+        /* non-multiply part */ \
+        l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
+        (c)=(r); \
+        l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
+        (c)=h&BN_MASK2; \
+        (r)=l; \
+        }
+
+#  define mul(r,a,bl,bh,c) { \
+        BN_ULONG l,h; \
+ \
+        h= (a); \
+        l=LBITS(h); \
+        h=HBITS(h); \
+        mul64(l,h,(bl),(bh)); \
+ \
+        /* non-multiply part */ \
+        l+=(c); if ((l&BN_MASK2) < (c)) h++; \
+        (c)=h&BN_MASK2; \
+        (r)=l&BN_MASK2; \
+        }
+# endif                         /* !BN_LLONG */
+
+void BN_RECP_CTX_init(BN_RECP_CTX *recp);
+void BN_MONT_CTX_init(BN_MONT_CTX *ctx);
+
+void bn_init(BIGNUM *a);
+void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb);
+void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
+void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
+void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp);
+void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a);
+void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a);
+int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n);
+int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b, int cl, int dl);
+void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
+                      int dna, int dnb, BN_ULONG *t);
+void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b,
+                           int n, int tna, int tnb, BN_ULONG *t);
+void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t);
+void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n);
+void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
+                          BN_ULONG *t);
+BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
+                           int cl, int dl);
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+                const BN_ULONG *np, const BN_ULONG *n0, int num);
+
+BIGNUM *int_bn_mod_inverse(BIGNUM *in,
+                           const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx,
+                           int *noinv);
+
+int bn_probable_prime_dh(BIGNUM *rnd, int bits,
+                         const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx);
+
+static ossl_inline BIGNUM *bn_expand(BIGNUM *a, int bits)
+{
+    if (bits > (INT_MAX - BN_BITS2 + 1))
+        return NULL;
+
+    if (((bits+BN_BITS2-1)/BN_BITS2) <= (a)->dmax)
+        return a;
+
+    return bn_expand2((a),(bits+BN_BITS2-1)/BN_BITS2);
+}
+
+#endif
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_lib.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_lib.c
@ -0,0 +1,964 @@
+/*
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include <assert.h>
+#include <limits.h>
+#include "internal/cryptlib.h"
+#include "bn_lcl.h"
+#include <openssl/opensslconf.h>
+#include "internal/constant_time_locl.h"
+
+/* This stuff appears to be completely unused, so is deprecated */
+#if OPENSSL_API_COMPAT < 0x00908000L
+/*-
+ * For a 32 bit machine
+ * 2 -   4 ==  128
+ * 3 -   8 ==  256
+ * 4 -  16 ==  512
+ * 5 -  32 == 1024
+ * 6 -  64 == 2048
+ * 7 - 128 == 4096
+ * 8 - 256 == 8192
+ */
+static int bn_limit_bits = 0;
+static int bn_limit_num = 8;    /* (1<<bn_limit_bits) */
+static int bn_limit_bits_low = 0;
+static int bn_limit_num_low = 8; /* (1<<bn_limit_bits_low) */
+static int bn_limit_bits_high = 0;
+static int bn_limit_num_high = 8; /* (1<<bn_limit_bits_high) */
+static int bn_limit_bits_mont = 0;
+static int bn_limit_num_mont = 8; /* (1<<bn_limit_bits_mont) */
+
+void BN_set_params(int mult, int high, int low, int mont)
+{
+    if (mult >= 0) {
+        if (mult > (int)(sizeof(int) * 8) - 1)
+            mult = sizeof(int) * 8 - 1;
+        bn_limit_bits = mult;
+        bn_limit_num = 1 << mult;
+    }
+    if (high >= 0) {
+        if (high > (int)(sizeof(int) * 8) - 1)
+            high = sizeof(int) * 8 - 1;
+        bn_limit_bits_high = high;
+        bn_limit_num_high = 1 << high;
+    }
+    if (low >= 0) {
+        if (low > (int)(sizeof(int) * 8) - 1)
+            low = sizeof(int) * 8 - 1;
+        bn_limit_bits_low = low;
+        bn_limit_num_low = 1 << low;
+    }
+    if (mont >= 0) {
+        if (mont > (int)(sizeof(int) * 8) - 1)
+            mont = sizeof(int) * 8 - 1;
+        bn_limit_bits_mont = mont;
+        bn_limit_num_mont = 1 << mont;
+    }
+}
+
+int BN_get_params(int which)
+{
+    if (which == 0)
+        return bn_limit_bits;
+    else if (which == 1)
+        return bn_limit_bits_high;
+    else if (which == 2)
+        return bn_limit_bits_low;
+    else if (which == 3)
+        return bn_limit_bits_mont;
+    else
+        return 0;
+}
+#endif
+
+const BIGNUM *BN_value_one(void)
+{
+    static const BN_ULONG data_one = 1L;
+    static const BIGNUM const_one =
+        { (BN_ULONG *)&data_one, 1, 1, 0, BN_FLG_STATIC_DATA };
+
+    return &const_one;
+}
+
+int BN_num_bits_word(BN_ULONG l)
+{
+    BN_ULONG x, mask;
+    int bits = (l != 0);
+
+#if BN_BITS2 > 32
+    x = l >> 32;
+    mask = (0 - x) & BN_MASK2;
+    mask = (0 - (mask >> (BN_BITS2 - 1)));
+    bits += 32 & mask;
+    l ^= (x ^ l) & mask;
+#endif
+
+    x = l >> 16;
+    mask = (0 - x) & BN_MASK2;
+    mask = (0 - (mask >> (BN_BITS2 - 1)));
+    bits += 16 & mask;
+    l ^= (x ^ l) & mask;
+
+    x = l >> 8;
+    mask = (0 - x) & BN_MASK2;
+    mask = (0 - (mask >> (BN_BITS2 - 1)));
+    bits += 8 & mask;
+    l ^= (x ^ l) & mask;
+
+    x = l >> 4;
+    mask = (0 - x) & BN_MASK2;
+    mask = (0 - (mask >> (BN_BITS2 - 1)));
+    bits += 4 & mask;
+    l ^= (x ^ l) & mask;
+
+    x = l >> 2;
+    mask = (0 - x) & BN_MASK2;
+    mask = (0 - (mask >> (BN_BITS2 - 1)));
+    bits += 2 & mask;
+    l ^= (x ^ l) & mask;
+
+    x = l >> 1;
+    mask = (0 - x) & BN_MASK2;
+    mask = (0 - (mask >> (BN_BITS2 - 1)));
+    bits += 1 & mask;
+
+    return bits;
+}
+
+int BN_num_bits(const BIGNUM *a)
+{
+    int i = a->top - 1;
+    bn_check_top(a);
+
+    if (BN_is_zero(a))
+        return 0;
+    return ((i * BN_BITS2) + BN_num_bits_word(a->d[i]));
+}
+
+static void bn_free_d(BIGNUM *a)
+{
+    if (BN_get_flags(a, BN_FLG_SECURE))
+        OPENSSL_secure_free(a->d);
+    else
+        OPENSSL_free(a->d);
+}
+
+
+void BN_clear_free(BIGNUM *a)
+{
+    if (a == NULL)
+        return;
+    if (a->d != NULL && !BN_get_flags(a, BN_FLG_STATIC_DATA)) {
+        OPENSSL_cleanse(a->d, a->dmax * sizeof(a->d[0]));
+        bn_free_d(a);
+    }
+    if (BN_get_flags(a, BN_FLG_MALLOCED)) {
+        OPENSSL_cleanse(a, sizeof(*a));
+        OPENSSL_free(a);
+    }
+}
+
+void BN_free(BIGNUM *a)
+{
+    if (a == NULL)
+        return;
+    if (!BN_get_flags(a, BN_FLG_STATIC_DATA))
+        bn_free_d(a);
+    if (a->flags & BN_FLG_MALLOCED)
+        OPENSSL_free(a);
+}
+
+void bn_init(BIGNUM *a)
+{
+    static BIGNUM nilbn;
+
+    *a = nilbn;
+    bn_check_top(a);
+}
+
+BIGNUM *BN_new(void)
+{
+    BIGNUM *ret;
+
+    if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL) {
+        BNerr(BN_F_BN_NEW, ERR_R_MALLOC_FAILURE);
+        return NULL;
+    }
+    ret->flags = BN_FLG_MALLOCED;
+    bn_check_top(ret);
+    return ret;
+}
+
+ BIGNUM *BN_secure_new(void)
+ {
+     BIGNUM *ret = BN_new();
+     if (ret != NULL)
+         ret->flags |= BN_FLG_SECURE;
+     return ret;
+ }
+
+/* This is used by bn_expand2() */
+/* The caller MUST check that words > b->dmax before calling this */
+static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words)
+{
+    BN_ULONG *a = NULL;
+
+    if (words > (INT_MAX / (4 * BN_BITS2))) {
+        BNerr(BN_F_BN_EXPAND_INTERNAL, BN_R_BIGNUM_TOO_LONG);
+        return NULL;
+    }
+    if (BN_get_flags(b, BN_FLG_STATIC_DATA)) {
+        BNerr(BN_F_BN_EXPAND_INTERNAL, BN_R_EXPAND_ON_STATIC_BIGNUM_DATA);
+        return NULL;
+    }
+    if (BN_get_flags(b, BN_FLG_SECURE))
+        a = OPENSSL_secure_zalloc(words * sizeof(*a));
+    else
+        a = OPENSSL_zalloc(words * sizeof(*a));
+    if (a == NULL) {
+        BNerr(BN_F_BN_EXPAND_INTERNAL, ERR_R_MALLOC_FAILURE);
+        return NULL;
+    }
+
+    assert(b->top <= words);
+    if (b->top > 0)
+        memcpy(a, b->d, sizeof(*a) * b->top);
+
+    return a;
+}
+
+/*
+ * This is an internal function that should not be used in applications. It
+ * ensures that 'b' has enough room for a 'words' word number and initialises
+ * any unused part of b->d with leading zeros. It is mostly used by the
+ * various BIGNUM routines. If there is an error, NULL is returned. If not,
+ * 'b' is returned.
+ */
+
+BIGNUM *bn_expand2(BIGNUM *b, int words)
+{
+    if (words > b->dmax) {
+        BN_ULONG *a = bn_expand_internal(b, words);
+        if (!a)
+            return NULL;
+        if (b->d) {
+            OPENSSL_cleanse(b->d, b->dmax * sizeof(b->d[0]));
+            bn_free_d(b);
+        }
+        b->d = a;
+        b->dmax = words;
+    }
+
+    return b;
+}
+
+BIGNUM *BN_dup(const BIGNUM *a)
+{
+    BIGNUM *t;
+
+    if (a == NULL)
+        return NULL;
+    bn_check_top(a);
+
+    t = BN_get_flags(a, BN_FLG_SECURE) ? BN_secure_new() : BN_new();
+    if (t == NULL)
+        return NULL;
+    if (!BN_copy(t, a)) {
+        BN_free(t);
+        return NULL;
+    }
+    bn_check_top(t);
+    return t;
+}
+
+BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b)
+{
+    bn_check_top(b);
+
+    if (a == b)
+        return a;
+    if (bn_wexpand(a, b->top) == NULL)
+        return NULL;
+
+    if (b->top > 0)
+        memcpy(a->d, b->d, sizeof(b->d[0]) * b->top);
+
+    a->neg = b->neg;
+    a->top = b->top;
+    a->flags |= b->flags & BN_FLG_FIXED_TOP;
+    bn_check_top(a);
+    return a;
+}
+
+#define FLAGS_DATA(flags) ((flags) & (BN_FLG_STATIC_DATA \
+                                    | BN_FLG_CONSTTIME   \
+                                    | BN_FLG_SECURE      \
+                                    | BN_FLG_FIXED_TOP))
+#define FLAGS_STRUCT(flags) ((flags) & (BN_FLG_MALLOCED))
+
+void BN_swap(BIGNUM *a, BIGNUM *b)
+{
+    int flags_old_a, flags_old_b;
+    BN_ULONG *tmp_d;
+    int tmp_top, tmp_dmax, tmp_neg;
+
+    bn_check_top(a);
+    bn_check_top(b);
+
+    flags_old_a = a->flags;
+    flags_old_b = b->flags;
+
+    tmp_d = a->d;
+    tmp_top = a->top;
+    tmp_dmax = a->dmax;
+    tmp_neg = a->neg;
+
+    a->d = b->d;
+    a->top = b->top;
+    a->dmax = b->dmax;
+    a->neg = b->neg;
+
+    b->d = tmp_d;
+    b->top = tmp_top;
+    b->dmax = tmp_dmax;
+    b->neg = tmp_neg;
+
+    a->flags = FLAGS_STRUCT(flags_old_a) | FLAGS_DATA(flags_old_b);
+    b->flags = FLAGS_STRUCT(flags_old_b) | FLAGS_DATA(flags_old_a);
+    bn_check_top(a);
+    bn_check_top(b);
+}
+
+void BN_clear(BIGNUM *a)
+{
+    bn_check_top(a);
+    if (a->d != NULL)
+        OPENSSL_cleanse(a->d, sizeof(*a->d) * a->dmax);
+    a->neg = 0;
+    a->top = 0;
+    a->flags &= ~BN_FLG_FIXED_TOP;
+}
+
+BN_ULONG BN_get_word(const BIGNUM *a)
+{
+    if (a->top > 1)
+        return BN_MASK2;
+    else if (a->top == 1)
+        return a->d[0];
+    /* a->top == 0 */
+    return 0;
+}
+
+int BN_set_word(BIGNUM *a, BN_ULONG w)
+{
+    bn_check_top(a);
+    if (bn_expand(a, (int)sizeof(BN_ULONG) * 8) == NULL)
+        return 0;
+    a->neg = 0;
+    a->d[0] = w;
+    a->top = (w ? 1 : 0);
+    a->flags &= ~BN_FLG_FIXED_TOP;
+    bn_check_top(a);
+    return 1;
+}
+
+BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
+{
+    unsigned int i, m;
+    unsigned int n;
+    BN_ULONG l;
+    BIGNUM *bn = NULL;
+
+    if (ret == NULL)
+        ret = bn = BN_new();
+    if (ret == NULL)
+        return NULL;
+    bn_check_top(ret);
+    /* Skip leading zero's. */
+    for ( ; len > 0 && *s == 0; s++, len--)
+        continue;
+    n = len;
+    if (n == 0) {
+        ret->top = 0;
+        return ret;
+    }
+    i = ((n - 1) / BN_BYTES) + 1;
+    m = ((n - 1) % (BN_BYTES));
+    if (bn_wexpand(ret, (int)i) == NULL) {
+        BN_free(bn);
+        return NULL;
+    }
+    ret->top = i;
+    ret->neg = 0;
+    l = 0;
+    while (n--) {
+        l = (l << 8L) | *(s++);
+        if (m-- == 0) {
+            ret->d[--i] = l;
+            l = 0;
+            m = BN_BYTES - 1;
+        }
+    }
+    /*
+     * need to call this due to clear byte at top if avoiding having the top
+     * bit set (-ve number)
+     */
+    bn_correct_top(ret);
+    return ret;
+}
+
+/* ignore negative */
+static int bn2binpad(const BIGNUM *a, unsigned char *to, int tolen)
+{
+    int n;
+    size_t i, lasti, j, atop, mask;
+    BN_ULONG l;
+
+    /*
+     * In case |a| is fixed-top, BN_num_bytes can return bogus length,
+     * but it's assumed that fixed-top inputs ought to be "nominated"
+     * even for padded output, so it works out...
+     */
+    n = BN_num_bytes(a);
+    if (tolen == -1) {
+        tolen = n;
+    } else if (tolen < n) {     /* uncommon/unlike case */
+        BIGNUM temp = *a;
+
+        bn_correct_top(&temp);
+        n = BN_num_bytes(&temp);
+        if (tolen < n)
+            return -1;
+    }
+
+    /* Swipe through whole available data and don't give away padded zero. */
+    atop = a->dmax * BN_BYTES;
+    if (atop == 0) {
+        OPENSSL_cleanse(to, tolen);
+        return tolen;
+    }
+
+    lasti = atop - 1;
+    atop = a->top * BN_BYTES;
+    for (i = 0, j = 0, to += tolen; j < (size_t)tolen; j++) {
+        l = a->d[i / BN_BYTES];
+        mask = 0 - ((j - atop) >> (8 * sizeof(i) - 1));
+        *--to = (unsigned char)(l >> (8 * (i % BN_BYTES)) & mask);
+        i += (i - lasti) >> (8 * sizeof(i) - 1); /* stay on last limb */
+    }
+
+    return tolen;
+}
+
+int BN_bn2binpad(const BIGNUM *a, unsigned char *to, int tolen)
+{
+    if (tolen < 0)
+        return -1;
+    return bn2binpad(a, to, tolen);
+}
+
+int BN_bn2bin(const BIGNUM *a, unsigned char *to)
+{
+    return bn2binpad(a, to, -1);
+}
+
+BIGNUM *BN_lebin2bn(const unsigned char *s, int len, BIGNUM *ret)
+{
+    unsigned int i, m;
+    unsigned int n;
+    BN_ULONG l;
+    BIGNUM *bn = NULL;
+
+    if (ret == NULL)
+        ret = bn = BN_new();
+    if (ret == NULL)
+        return NULL;
+    bn_check_top(ret);
+    s += len;
+    /* Skip trailing zeroes. */
+    for ( ; len > 0 && s[-1] == 0; s--, len--)
+        continue;
+    n = len;
+    if (n == 0) {
+        ret->top = 0;
+        return ret;
+    }
+    i = ((n - 1) / BN_BYTES) + 1;
+    m = ((n - 1) % (BN_BYTES));
+    if (bn_wexpand(ret, (int)i) == NULL) {
+        BN_free(bn);
+        return NULL;
+    }
+    ret->top = i;
+    ret->neg = 0;
+    l = 0;
+    while (n--) {
+        s--;
+        l = (l << 8L) | *s;
+        if (m-- == 0) {
+            ret->d[--i] = l;
+            l = 0;
+            m = BN_BYTES - 1;
+        }
+    }
+    /*
+     * need to call this due to clear byte at top if avoiding having the top
+     * bit set (-ve number)
+     */
+    bn_correct_top(ret);
+    return ret;
+}
+
+int BN_bn2lebinpad(const BIGNUM *a, unsigned char *to, int tolen)
+{
+    int i;
+    BN_ULONG l;
+    bn_check_top(a);
+    i = BN_num_bytes(a);
+    if (tolen < i)
+        return -1;
+    /* Add trailing zeroes if necessary */
+    if (tolen > i)
+        memset(to + i, 0, tolen - i);
+    to += i;
+    while (i--) {
+        l = a->d[i / BN_BYTES];
+        to--;
+        *to = (unsigned char)(l >> (8 * (i % BN_BYTES))) & 0xff;
+    }
+    return tolen;
+}
+
+int BN_ucmp(const BIGNUM *a, const BIGNUM *b)
+{
+    int i;
+    BN_ULONG t1, t2, *ap, *bp;
+
+    bn_check_top(a);
+    bn_check_top(b);
+
+    i = a->top - b->top;
+    if (i != 0)
+        return i;
+    ap = a->d;
+    bp = b->d;
+    for (i = a->top - 1; i >= 0; i--) {
+        t1 = ap[i];
+        t2 = bp[i];
+        if (t1 != t2)
+            return ((t1 > t2) ? 1 : -1);
+    }
+    return 0;
+}
+
+int BN_cmp(const BIGNUM *a, const BIGNUM *b)
+{
+    int i;
+    int gt, lt;
+    BN_ULONG t1, t2;
+
+    if ((a == NULL) || (b == NULL)) {
+        if (a != NULL)
+            return -1;
+        else if (b != NULL)
+            return 1;
+        else
+            return 0;
+    }
+
+    bn_check_top(a);
+    bn_check_top(b);
+
+    if (a->neg != b->neg) {
+        if (a->neg)
+            return -1;
+        else
+            return 1;
+    }
+    if (a->neg == 0) {
+        gt = 1;
+        lt = -1;
+    } else {
+        gt = -1;
+        lt = 1;
+    }
+
+    if (a->top > b->top)
+        return gt;
+    if (a->top < b->top)
+        return lt;
+    for (i = a->top - 1; i >= 0; i--) {
+        t1 = a->d[i];
+        t2 = b->d[i];
+        if (t1 > t2)
+            return gt;
+        if (t1 < t2)
+            return lt;
+    }
+    return 0;
+}
+
+int BN_set_bit(BIGNUM *a, int n)
+{
+    int i, j, k;
+
+    if (n < 0)
+        return 0;
+
+    i = n / BN_BITS2;
+    j = n % BN_BITS2;
+    if (a->top <= i) {
+        if (bn_wexpand(a, i + 1) == NULL)
+            return 0;
+        for (k = a->top; k < i + 1; k++)
+            a->d[k] = 0;
+        a->top = i + 1;
+        a->flags &= ~BN_FLG_FIXED_TOP;
+    }
+
+    a->d[i] |= (((BN_ULONG)1) << j);
+    bn_check_top(a);
+    return 1;
+}
+
+int BN_clear_bit(BIGNUM *a, int n)
+{
+    int i, j;
+
+    bn_check_top(a);
+    if (n < 0)
+        return 0;
+
+    i = n / BN_BITS2;
+    j = n % BN_BITS2;
+    if (a->top <= i)
+        return 0;
+
+    a->d[i] &= (~(((BN_ULONG)1) << j));
+    bn_correct_top(a);
+    return 1;
+}
+
+int BN_is_bit_set(const BIGNUM *a, int n)
+{
+    int i, j;
+
+    bn_check_top(a);
+    if (n < 0)
+        return 0;
+    i = n / BN_BITS2;
+    j = n % BN_BITS2;
+    if (a->top <= i)
+        return 0;
+    return (int)(((a->d[i]) >> j) & ((BN_ULONG)1));
+}
+
+int BN_mask_bits(BIGNUM *a, int n)
+{
+    int b, w;
+
+    bn_check_top(a);
+    if (n < 0)
+        return 0;
+
+    w = n / BN_BITS2;
+    b = n % BN_BITS2;
+    if (w >= a->top)
+        return 0;
+    if (b == 0)
+        a->top = w;
+    else {
+        a->top = w + 1;
+        a->d[w] &= ~(BN_MASK2 << b);
+    }
+    bn_correct_top(a);
+    return 1;
+}
+
+void BN_set_negative(BIGNUM *a, int b)
+{
+    if (b && !BN_is_zero(a))
+        a->neg = 1;
+    else
+        a->neg = 0;
+}
+
+int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n)
+{
+    int i;
+    BN_ULONG aa, bb;
+
+    if (n == 0)
+        return 0;
+
+    aa = a[n - 1];
+    bb = b[n - 1];
+    if (aa != bb)
+        return ((aa > bb) ? 1 : -1);
+    for (i = n - 2; i >= 0; i--) {
+        aa = a[i];
+        bb = b[i];
+        if (aa != bb)
+            return ((aa > bb) ? 1 : -1);
+    }
+    return 0;
+}
+
+/*
+ * Here follows a specialised variants of bn_cmp_words().  It has the
+ * capability of performing the operation on arrays of different sizes. The
+ * sizes of those arrays is expressed through cl, which is the common length
+ * ( basically, min(len(a),len(b)) ), and dl, which is the delta between the
+ * two lengths, calculated as len(a)-len(b). All lengths are the number of
+ * BN_ULONGs...
+ */
+
+int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b, int cl, int dl)
+{
+    int n, i;
+    n = cl - 1;
+
+    if (dl < 0) {
+        for (i = dl; i < 0; i++) {
+            if (b[n - i] != 0)
+                return -1;      /* a < b */
+        }
+    }
+    if (dl > 0) {
+        for (i = dl; i > 0; i--) {
+            if (a[n + i] != 0)
+                return 1;       /* a > b */
+        }
+    }
+    return bn_cmp_words(a, b, cl);
+}
+
+/*-
+ * Constant-time conditional swap of a and b.
+ * a and b are swapped if condition is not 0.
+ * nwords is the number of words to swap.
+ * Assumes that at least nwords are allocated in both a and b.
+ * Assumes that no more than nwords are used by either a or b.
+ */
+void BN_consttime_swap(BN_ULONG condition, BIGNUM *a, BIGNUM *b, int nwords)
+{
+    BN_ULONG t;
+    int i;
+
+    if (a == b)
+        return;
+
+    bn_wcheck_size(a, nwords);
+    bn_wcheck_size(b, nwords);
+
+    condition = ((~condition & ((condition - 1))) >> (BN_BITS2 - 1)) - 1;
+
+    t = (a->top ^ b->top) & condition;
+    a->top ^= t;
+    b->top ^= t;
+
+    t = (a->neg ^ b->neg) & condition;
+    a->neg ^= t;
+    b->neg ^= t;
+
+    /*-
+     * BN_FLG_STATIC_DATA: indicates that data may not be written to. Intention
+     * is actually to treat it as it's read-only data, and some (if not most)
+     * of it does reside in read-only segment. In other words observation of
+     * BN_FLG_STATIC_DATA in BN_consttime_swap should be treated as fatal
+     * condition. It would either cause SEGV or effectively cause data
+     * corruption.
+     *
+     * BN_FLG_MALLOCED: refers to BN structure itself, and hence must be
+     * preserved.
+     *
+     * BN_FLG_SECURE: must be preserved, because it determines how x->d was
+     * allocated and hence how to free it.
+     *
+     * BN_FLG_CONSTTIME: sufficient to mask and swap
+     *
+     * BN_FLG_FIXED_TOP: indicates that we haven't called bn_correct_top() on
+     * the data, so the d array may be padded with additional 0 values (i.e.
+     * top could be greater than the minimal value that it could be). We should
+     * be swapping it
+     */
+
+#define BN_CONSTTIME_SWAP_FLAGS (BN_FLG_CONSTTIME | BN_FLG_FIXED_TOP)
+
+    t = ((a->flags ^ b->flags) & BN_CONSTTIME_SWAP_FLAGS) & condition;
+    a->flags ^= t;
+    b->flags ^= t;
+
+    /* conditionally swap the data */
+    for (i = 0; i < nwords; i++) {
+        t = (a->d[i] ^ b->d[i]) & condition;
+        a->d[i] ^= t;
+        b->d[i] ^= t;
+    }
+}
+
+#undef BN_CONSTTIME_SWAP_FLAGS
+
+/* Bits of security, see SP800-57 */
+
+int BN_security_bits(int L, int N)
+{
+    int secbits, bits;
+    if (L >= 15360)
+        secbits = 256;
+    else if (L >= 7680)
+        secbits = 192;
+    else if (L >= 3072)
+        secbits = 128;
+    else if (L >= 2048)
+        secbits = 112;
+    else if (L >= 1024)
+        secbits = 80;
+    else
+        return 0;
+    if (N == -1)
+        return secbits;
+    bits = N / 2;
+    if (bits < 80)
+        return 0;
+    return bits >= secbits ? secbits : bits;
+}
+
+void BN_zero_ex(BIGNUM *a)
+{
+    a->neg = 0;
+    a->top = 0;
+    a->flags &= ~BN_FLG_FIXED_TOP;
+}
+
+int BN_abs_is_word(const BIGNUM *a, const BN_ULONG w)
+{
+    return ((a->top == 1) && (a->d[0] == w)) || ((w == 0) && (a->top == 0));
+}
+
+int BN_is_zero(const BIGNUM *a)
+{
+    return a->top == 0;
+}
+
+int BN_is_one(const BIGNUM *a)
+{
+    return BN_abs_is_word(a, 1) && !a->neg;
+}
+
+int BN_is_word(const BIGNUM *a, const BN_ULONG w)
+{
+    return BN_abs_is_word(a, w) && (!w || !a->neg);
+}
+
+int BN_is_odd(const BIGNUM *a)
+{
+    return (a->top > 0) && (a->d[0] & 1);
+}
+
+int BN_is_negative(const BIGNUM *a)
+{
+    return (a->neg != 0);
+}
+
+int BN_to_montgomery(BIGNUM *r, const BIGNUM *a, BN_MONT_CTX *mont,
+                     BN_CTX *ctx)
+{
+    return BN_mod_mul_montgomery(r, a, &(mont->RR), mont, ctx);
+}
+
+void BN_with_flags(BIGNUM *dest, const BIGNUM *b, int flags)
+{
+    dest->d = b->d;
+    dest->top = b->top;
+    dest->dmax = b->dmax;
+    dest->neg = b->neg;
+    dest->flags = ((dest->flags & BN_FLG_MALLOCED)
+                   | (b->flags & ~BN_FLG_MALLOCED)
+                   | BN_FLG_STATIC_DATA | flags);
+}
+
+BN_GENCB *BN_GENCB_new(void)
+{
+    BN_GENCB *ret;
+
+    if ((ret = OPENSSL_malloc(sizeof(*ret))) == NULL) {
+        BNerr(BN_F_BN_GENCB_NEW, ERR_R_MALLOC_FAILURE);
+        return NULL;
+    }
+
+    return ret;
+}
+
+void BN_GENCB_free(BN_GENCB *cb)
+{
+    if (cb == NULL)
+        return;
+    OPENSSL_free(cb);
+}
+
+void BN_set_flags(BIGNUM *b, int n)
+{
+    b->flags |= n;
+}
+
+int BN_get_flags(const BIGNUM *b, int n)
+{
+    return b->flags & n;
+}
+
+/* Populate a BN_GENCB structure with an "old"-style callback */
+void BN_GENCB_set_old(BN_GENCB *gencb, void (*callback) (int, int, void *),
+                      void *cb_arg)
+{
+    BN_GENCB *tmp_gencb = gencb;
+    tmp_gencb->ver = 1;
+    tmp_gencb->arg = cb_arg;
+    tmp_gencb->cb.cb_1 = callback;
+}
+
+/* Populate a BN_GENCB structure with a "new"-style callback */
+void BN_GENCB_set(BN_GENCB *gencb, int (*callback) (int, int, BN_GENCB *),
+                  void *cb_arg)
+{
+    BN_GENCB *tmp_gencb = gencb;
+    tmp_gencb->ver = 2;
+    tmp_gencb->arg = cb_arg;
+    tmp_gencb->cb.cb_2 = callback;
+}
+
+void *BN_GENCB_get_arg(BN_GENCB *cb)
+{
+    return cb->arg;
+}
+
+BIGNUM *bn_wexpand(BIGNUM *a, int words)
+{
+    return (words <= a->dmax) ? a : bn_expand2(a, words);
+}
+
+void bn_correct_top(BIGNUM *a)
+{
+    BN_ULONG *ftl;
+    int tmp_top = a->top;
+
+    if (tmp_top > 0) {
+        for (ftl = &(a->d[tmp_top]); tmp_top > 0; tmp_top--) {
+            ftl--;
+            if (*ftl != 0)
+                break;
+        }
+        a->top = tmp_top;
+    }
+    if (a->top == 0)
+        a->neg = 0;
+    a->flags &= ~BN_FLG_FIXED_TOP;
+    bn_pollute(a);
+}
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_mod.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_mod.c
@ -0,0 +1,321 @@
+/*
+ * Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include "internal/cryptlib.h"
+#include "bn_lcl.h"
+
+int BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx)
+{
+    /*
+     * like BN_mod, but returns non-negative remainder (i.e., 0 <= r < |d|
+     * always holds)
+     */
+
+    if (!(BN_mod(r, m, d, ctx)))
+        return 0;
+    if (!r->neg)
+        return 1;
+    /* now   -|d| < r < 0,  so we have to set  r := r + |d| */
+    return (d->neg ? BN_sub : BN_add) (r, r, d);
+}
+
+int BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
+               BN_CTX *ctx)
+{
+    if (!BN_add(r, a, b))
+        return 0;
+    return BN_nnmod(r, r, m, ctx);
+}
+
+/*
+ * BN_mod_add variant that may be used if both a and b are non-negative and
+ * less than m. The original algorithm was
+ *
+ *    if (!BN_uadd(r, a, b))
+ *       return 0;
+ *    if (BN_ucmp(r, m) >= 0)
+ *       return BN_usub(r, r, m);
+ *
+ * which is replaced with addition, subtracting modulus, and conditional
+ * move depending on whether or not subtraction borrowed.
+ */
+int bn_mod_add_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
+                         const BIGNUM *m)
+{
+    size_t i, ai, bi, mtop = m->top;
+    BN_ULONG storage[1024 / BN_BITS2];
+    BN_ULONG carry, temp, mask, *rp, *tp = storage;
+    const BN_ULONG *ap, *bp;
+
+    if (bn_wexpand(r, mtop) == NULL)
+        return 0;
+
+    if (mtop > sizeof(storage) / sizeof(storage[0])
+        && (tp = OPENSSL_malloc(mtop * sizeof(BN_ULONG))) == NULL)
+        return 0;
+
+    ap = a->d != NULL ? a->d : tp;
+    bp = b->d != NULL ? b->d : tp;
+
+    for (i = 0, ai = 0, bi = 0, carry = 0; i < mtop;) {
+        mask = (BN_ULONG)0 - ((i - a->top) >> (8 * sizeof(i) - 1));
+        temp = ((ap[ai] & mask) + carry) & BN_MASK2;
+        carry = (temp < carry);
+
+        mask = (BN_ULONG)0 - ((i - b->top) >> (8 * sizeof(i) - 1));
+        tp[i] = ((bp[bi] & mask) + temp) & BN_MASK2;
+        carry += (tp[i] < temp);
+
+        i++;
+        ai += (i - a->dmax) >> (8 * sizeof(i) - 1);
+        bi += (i - b->dmax) >> (8 * sizeof(i) - 1);
+    }
+    rp = r->d;
+    carry -= bn_sub_words(rp, tp, m->d, mtop);
+    for (i = 0; i < mtop; i++) {
+        rp[i] = (carry & tp[i]) | (~carry & rp[i]);
+        ((volatile BN_ULONG *)tp)[i] = 0;
+    }
+    r->top = mtop;
+    r->flags |= BN_FLG_FIXED_TOP;
+    r->neg = 0;
+
+    if (tp != storage)
+        OPENSSL_free(tp);
+
+    return 1;
+}
+
+int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
+                     const BIGNUM *m)
+{
+    int ret = bn_mod_add_fixed_top(r, a, b, m);
+
+    if (ret)
+        bn_correct_top(r);
+
+    return ret;
+}
+
+int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
+               BN_CTX *ctx)
+{
+    if (!BN_sub(r, a, b))
+        return 0;
+    return BN_nnmod(r, r, m, ctx);
+}
+
+/*
+ * BN_mod_sub variant that may be used if both a and b are non-negative,
+ * a is less than m, while b is of same bit width as m. It's implemented
+ * as subtraction followed by two conditional additions.
+ *
+ * 0 <= a < m
+ * 0 <= b < 2^w < 2*m
+ *
+ * after subtraction
+ *
+ * -2*m < r = a - b < m
+ *
+ * Thus it takes up to two conditional additions to make |r| positive.
+ */
+int bn_mod_sub_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
+                         const BIGNUM *m)
+{
+    size_t i, ai, bi, mtop = m->top;
+    BN_ULONG borrow, carry, ta, tb, mask, *rp;
+    const BN_ULONG *ap, *bp;
+
+    if (bn_wexpand(r, mtop) == NULL)
+        return 0;
+
+    rp = r->d;
+    ap = a->d != NULL ? a->d : rp;
+    bp = b->d != NULL ? b->d : rp;
+
+    for (i = 0, ai = 0, bi = 0, borrow = 0; i < mtop;) {
+        mask = (BN_ULONG)0 - ((i - a->top) >> (8 * sizeof(i) - 1));
+        ta = ap[ai] & mask;
+
+        mask = (BN_ULONG)0 - ((i - b->top) >> (8 * sizeof(i) - 1));
+        tb = bp[bi] & mask;
+        rp[i] = ta - tb - borrow;
+        if (ta != tb)
+            borrow = (ta < tb);
+
+        i++;
+        ai += (i - a->dmax) >> (8 * sizeof(i) - 1);
+        bi += (i - b->dmax) >> (8 * sizeof(i) - 1);
+    }
+    ap = m->d;
+    for (i = 0, mask = 0 - borrow, carry = 0; i < mtop; i++) {
+        ta = ((ap[i] & mask) + carry) & BN_MASK2;
+        carry = (ta < carry);
+        rp[i] = (rp[i] + ta) & BN_MASK2;
+        carry += (rp[i] < ta);
+    }
+    borrow -= carry;
+    for (i = 0, mask = 0 - borrow, carry = 0; i < mtop; i++) {
+        ta = ((ap[i] & mask) + carry) & BN_MASK2;
+        carry = (ta < carry);
+        rp[i] = (rp[i] + ta) & BN_MASK2;
+        carry += (rp[i] < ta);
+    }
+
+    r->top = mtop;
+    r->flags |= BN_FLG_FIXED_TOP;
+    r->neg = 0;
+
+    return 1;
+}
+
+/*
+ * BN_mod_sub variant that may be used if both a and b are non-negative and
+ * less than m
+ */
+int BN_mod_sub_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
+                     const BIGNUM *m)
+{
+    if (!BN_sub(r, a, b))
+        return 0;
+    if (r->neg)
+        return BN_add(r, r, m);
+    return 1;
+}
+
+/* slow but works */
+int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
+               BN_CTX *ctx)
+{
+    BIGNUM *t;
+    int ret = 0;
+
+    bn_check_top(a);
+    bn_check_top(b);
+    bn_check_top(m);
+
+    BN_CTX_start(ctx);
+    if ((t = BN_CTX_get(ctx)) == NULL)
+        goto err;
+    if (a == b) {
+        if (!BN_sqr(t, a, ctx))
+            goto err;
+    } else {
+        if (!BN_mul(t, a, b, ctx))
+            goto err;
+    }
+    if (!BN_nnmod(r, t, m, ctx))
+        goto err;
+    bn_check_top(r);
+    ret = 1;
+ err:
+    BN_CTX_end(ctx);
+    return ret;
+}
+
+int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
+{
+    if (!BN_sqr(r, a, ctx))
+        return 0;
+    /* r->neg == 0,  thus we don't need BN_nnmod */
+    return BN_mod(r, r, m, ctx);
+}
+
+int BN_mod_lshift1(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
+{
+    if (!BN_lshift1(r, a))
+        return 0;
+    bn_check_top(r);
+    return BN_nnmod(r, r, m, ctx);
+}
+
+/*
+ * BN_mod_lshift1 variant that may be used if a is non-negative and less than
+ * m
+ */
+int BN_mod_lshift1_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *m)
+{
+    if (!BN_lshift1(r, a))
+        return 0;
+    bn_check_top(r);
+    if (BN_cmp(r, m) >= 0)
+        return BN_sub(r, r, m);
+    return 1;
+}
+
+int BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m,
+                  BN_CTX *ctx)
+{
+    BIGNUM *abs_m = NULL;
+    int ret;
+
+    if (!BN_nnmod(r, a, m, ctx))
+        return 0;
+
+    if (m->neg) {
+        abs_m = BN_dup(m);
+        if (abs_m == NULL)
+            return 0;
+        abs_m->neg = 0;
+    }
+
+    ret = BN_mod_lshift_quick(r, r, n, (abs_m ? abs_m : m));
+    bn_check_top(r);
+
+    BN_free(abs_m);
+    return ret;
+}
+
+/*
+ * BN_mod_lshift variant that may be used if a is non-negative and less than
+ * m
+ */
+int BN_mod_lshift_quick(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m)
+{
+    if (r != a) {
+        if (BN_copy(r, a) == NULL)
+            return 0;
+    }
+
+    while (n > 0) {
+        int max_shift;
+
+        /* 0 < r < m */
+        max_shift = BN_num_bits(m) - BN_num_bits(r);
+        /* max_shift >= 0 */
+
+        if (max_shift < 0) {
+            BNerr(BN_F_BN_MOD_LSHIFT_QUICK, BN_R_INPUT_NOT_REDUCED);
+            return 0;
+        }
+
+        if (max_shift > n)
+            max_shift = n;
+
+        if (max_shift) {
+            if (!BN_lshift(r, r, max_shift))
+                return 0;
+            n -= max_shift;
+        } else {
+            if (!BN_lshift1(r, r))
+                return 0;
+            --n;
+        }
+
+        /* BN_num_bits(r) <= BN_num_bits(m) */
+
+        if (BN_cmp(r, m) >= 0) {
+            if (!BN_sub(r, r, m))
+                return 0;
+        }
+    }
+    bn_check_top(r);
+
+    return 1;
+}
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_mont.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_mont.c
@ -0,0 +1,464 @@
+/*
+ * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+/*
+ * Details about Montgomery multiplication algorithms can be found at
+ * http://security.ece.orst.edu/publications.html, e.g.
+ * http://security.ece.orst.edu/koc/papers/j37acmon.pdf and
+ * sections 3.8 and 4.2 in http://security.ece.orst.edu/koc/papers/r01rsasw.pdf
+ */
+
+#include "internal/cryptlib.h"
+#include "bn_lcl.h"
+
+#define MONT_WORD               /* use the faster word-based algorithm */
+
+#ifdef MONT_WORD
+static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont);
+#endif
+
+int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
+                          BN_MONT_CTX *mont, BN_CTX *ctx)
+{
+    int ret = bn_mul_mont_fixed_top(r, a, b, mont, ctx);
+
+    bn_correct_top(r);
+    bn_check_top(r);
+
+    return ret;
+}
+
+int bn_mul_mont_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
+                          BN_MONT_CTX *mont, BN_CTX *ctx)
+{
+    BIGNUM *tmp;
+    int ret = 0;
+    int num = mont->N.top;
+
+#if defined(OPENSSL_BN_ASM_MONT) && defined(MONT_WORD)
+    if (num > 1 && a->top == num && b->top == num) {
+        if (bn_wexpand(r, num) == NULL)
+            return 0;
+        if (bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) {
+            r->neg = a->neg ^ b->neg;
+            r->top = num;
+            r->flags |= BN_FLG_FIXED_TOP;
+            return 1;
+        }
+    }
+#endif
+
+    if ((a->top + b->top) > 2 * num)
+        return 0;
+
+    BN_CTX_start(ctx);
+    tmp = BN_CTX_get(ctx);
+    if (tmp == NULL)
+        goto err;
+
+    bn_check_top(tmp);
+    if (a == b) {
+        if (!bn_sqr_fixed_top(tmp, a, ctx))
+            goto err;
+    } else {
+        if (!bn_mul_fixed_top(tmp, a, b, ctx))
+            goto err;
+    }
+    /* reduce from aRR to aR */
+#ifdef MONT_WORD
+    if (!bn_from_montgomery_word(r, tmp, mont))
+        goto err;
+#else
+    if (!BN_from_montgomery(r, tmp, mont, ctx))
+        goto err;
+#endif
+    ret = 1;
+ err:
+    BN_CTX_end(ctx);
+    return ret;
+}
+
+#ifdef MONT_WORD
+static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
+{
+    BIGNUM *n;
+    BN_ULONG *ap, *np, *rp, n0, v, carry;
+    int nl, max, i;
+    unsigned int rtop;
+
+    n = &(mont->N);
+    nl = n->top;
+    if (nl == 0) {
+        ret->top = 0;
+        return 1;
+    }
+
+    max = (2 * nl);             /* carry is stored separately */
+    if (bn_wexpand(r, max) == NULL)
+        return 0;
+
+    r->neg ^= n->neg;
+    np = n->d;
+    rp = r->d;
+
+    /* clear the top words of T */
+    for (rtop = r->top, i = 0; i < max; i++) {
+        v = (BN_ULONG)0 - ((i - rtop) >> (8 * sizeof(rtop) - 1));
+        rp[i] &= v;
+    }
+
+    r->top = max;
+    r->flags |= BN_FLG_FIXED_TOP;
+    n0 = mont->n0[0];
+
+    /*
+     * Add multiples of |n| to |r| until R = 2^(nl * BN_BITS2) divides it. On
+     * input, we had |r| < |n| * R, so now |r| < 2 * |n| * R. Note that |r|
+     * includes |carry| which is stored separately.
+     */
+    for (carry = 0, i = 0; i < nl; i++, rp++) {
+        v = bn_mul_add_words(rp, np, nl, (rp[0] * n0) & BN_MASK2);
+        v = (v + carry + rp[nl]) & BN_MASK2;
+        carry |= (v != rp[nl]);
+        carry &= (v <= rp[nl]);
+        rp[nl] = v;
+    }
+
+    if (bn_wexpand(ret, nl) == NULL)
+        return 0;
+    ret->top = nl;
+    ret->flags |= BN_FLG_FIXED_TOP;
+    ret->neg = r->neg;
+
+    rp = ret->d;
+
+    /*
+     * Shift |nl| words to divide by R. We have |ap| < 2 * |n|. Note that |ap|
+     * includes |carry| which is stored separately.
+     */
+    ap = &(r->d[nl]);
+
+    carry -= bn_sub_words(rp, ap, np, nl);
+    /*
+     * |carry| is -1 if |ap| - |np| underflowed or zero if it did not. Note
+     * |carry| cannot be 1. That would imply the subtraction did not fit in
+     * |nl| words, and we know at most one subtraction is needed.
+     */
+    for (i = 0; i < nl; i++) {
+        rp[i] = (carry & ap[i]) | (~carry & rp[i]);
+        ap[i] = 0;
+    }
+
+    return 1;
+}
+#endif                          /* MONT_WORD */
+
+int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
+                       BN_CTX *ctx)
+{
+    int retn;
+
+    retn = bn_from_mont_fixed_top(ret, a, mont, ctx);
+    bn_correct_top(ret);
+    bn_check_top(ret);
+
+    return retn;
+}
+
+int bn_from_mont_fixed_top(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
+                           BN_CTX *ctx)
+{
+    int retn = 0;
+#ifdef MONT_WORD
+    BIGNUM *t;
+
+    BN_CTX_start(ctx);
+    if ((t = BN_CTX_get(ctx)) && BN_copy(t, a)) {
+        retn = bn_from_montgomery_word(ret, t, mont);
+    }
+    BN_CTX_end(ctx);
+#else                           /* !MONT_WORD */
+    BIGNUM *t1, *t2;
+
+    BN_CTX_start(ctx);
+    t1 = BN_CTX_get(ctx);
+    t2 = BN_CTX_get(ctx);
+    if (t2 == NULL)
+        goto err;
+
+    if (!BN_copy(t1, a))
+        goto err;
+    BN_mask_bits(t1, mont->ri);
+
+    if (!BN_mul(t2, t1, &mont->Ni, ctx))
+        goto err;
+    BN_mask_bits(t2, mont->ri);
+
+    if (!BN_mul(t1, t2, &mont->N, ctx))
+        goto err;
+    if (!BN_add(t2, a, t1))
+        goto err;
+    if (!BN_rshift(ret, t2, mont->ri))
+        goto err;
+
+    if (BN_ucmp(ret, &(mont->N)) >= 0) {
+        if (!BN_usub(ret, ret, &(mont->N)))
+            goto err;
+    }
+    retn = 1;
+    bn_check_top(ret);
+ err:
+    BN_CTX_end(ctx);
+#endif                          /* MONT_WORD */
+    return retn;
+}
+
+int bn_to_mont_fixed_top(BIGNUM *r, const BIGNUM *a, BN_MONT_CTX *mont,
+                         BN_CTX *ctx)
+{
+    return bn_mul_mont_fixed_top(r, a, &(mont->RR), mont, ctx);
+}
+
+BN_MONT_CTX *BN_MONT_CTX_new(void)
+{
+    BN_MONT_CTX *ret;
+
+    if ((ret = OPENSSL_malloc(sizeof(*ret))) == NULL) {
+        BNerr(BN_F_BN_MONT_CTX_NEW, ERR_R_MALLOC_FAILURE);
+        return NULL;
+    }
+
+    BN_MONT_CTX_init(ret);
+    ret->flags = BN_FLG_MALLOCED;
+    return ret;
+}
+
+void BN_MONT_CTX_init(BN_MONT_CTX *ctx)
+{
+    ctx->ri = 0;
+    bn_init(&ctx->RR);
+    bn_init(&ctx->N);
+    bn_init(&ctx->Ni);
+    ctx->n0[0] = ctx->n0[1] = 0;
+    ctx->flags = 0;
+}
+
+void BN_MONT_CTX_free(BN_MONT_CTX *mont)
+{
+    if (mont == NULL)
+        return;
+    BN_clear_free(&mont->RR);
+    BN_clear_free(&mont->N);
+    BN_clear_free(&mont->Ni);
+    if (mont->flags & BN_FLG_MALLOCED)
+        OPENSSL_free(mont);
+}
+
+int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
+{
+    int i, ret = 0;
+    BIGNUM *Ri, *R;
+
+    if (BN_is_zero(mod))
+        return 0;
+
+    BN_CTX_start(ctx);
+    if ((Ri = BN_CTX_get(ctx)) == NULL)
+        goto err;
+    R = &(mont->RR);            /* grab RR as a temp */
+    if (!BN_copy(&(mont->N), mod))
+        goto err;               /* Set N */
+    if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0)
+        BN_set_flags(&(mont->N), BN_FLG_CONSTTIME);
+    mont->N.neg = 0;
+
+#ifdef MONT_WORD
+    {
+        BIGNUM tmod;
+        BN_ULONG buf[2];
+
+        bn_init(&tmod);
+        tmod.d = buf;
+        tmod.dmax = 2;
+        tmod.neg = 0;
+
+        if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0)
+            BN_set_flags(&tmod, BN_FLG_CONSTTIME);
+
+        mont->ri = (BN_num_bits(mod) + (BN_BITS2 - 1)) / BN_BITS2 * BN_BITS2;
+
+# if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)
+        /*
+         * Only certain BN_BITS2<=32 platforms actually make use of n0[1],
+         * and we could use the #else case (with a shorter R value) for the
+         * others.  However, currently only the assembler files do know which
+         * is which.
+         */
+
+        BN_zero(R);
+        if (!(BN_set_bit(R, 2 * BN_BITS2)))
+            goto err;
+
+        tmod.top = 0;
+        if ((buf[0] = mod->d[0]))
+            tmod.top = 1;
+        if ((buf[1] = mod->top > 1 ? mod->d[1] : 0))
+            tmod.top = 2;
+
+        if (BN_is_one(&tmod))
+            BN_zero(Ri);
+        else if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL)
+            goto err;
+        if (!BN_lshift(Ri, Ri, 2 * BN_BITS2))
+            goto err;           /* R*Ri */
+        if (!BN_is_zero(Ri)) {
+            if (!BN_sub_word(Ri, 1))
+                goto err;
+        } else {                /* if N mod word size == 1 */
+
+            if (bn_expand(Ri, (int)sizeof(BN_ULONG) * 2) == NULL)
+                goto err;
+            /* Ri-- (mod double word size) */
+            Ri->neg = 0;
+            Ri->d[0] = BN_MASK2;
+            Ri->d[1] = BN_MASK2;
+            Ri->top = 2;
+        }
+        if (!BN_div(Ri, NULL, Ri, &tmod, ctx))
+            goto err;
+        /*
+         * Ni = (R*Ri-1)/N, keep only couple of least significant words:
+         */
+        mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
+        mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0;
+# else
+        BN_zero(R);
+        if (!(BN_set_bit(R, BN_BITS2)))
+            goto err;           /* R */
+
+        buf[0] = mod->d[0];     /* tmod = N mod word size */
+        buf[1] = 0;
+        tmod.top = buf[0] != 0 ? 1 : 0;
+        /* Ri = R^-1 mod N */
+        if (BN_is_one(&tmod))
+            BN_zero(Ri);
+        else if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL)
+            goto err;
+        if (!BN_lshift(Ri, Ri, BN_BITS2))
+            goto err;           /* R*Ri */
+        if (!BN_is_zero(Ri)) {
+            if (!BN_sub_word(Ri, 1))
+                goto err;
+        } else {                /* if N mod word size == 1 */
+
+            if (!BN_set_word(Ri, BN_MASK2))
+                goto err;       /* Ri-- (mod word size) */
+        }
+        if (!BN_div(Ri, NULL, Ri, &tmod, ctx))
+            goto err;
+        /*
+         * Ni = (R*Ri-1)/N, keep only least significant word:
+         */
+        mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
+        mont->n0[1] = 0;
+# endif
+    }
+#else                           /* !MONT_WORD */
+    {                           /* bignum version */
+        mont->ri = BN_num_bits(&mont->N);
+        BN_zero(R);
+        if (!BN_set_bit(R, mont->ri))
+            goto err;           /* R = 2^ri */
+        /* Ri = R^-1 mod N */
+        if ((BN_mod_inverse(Ri, R, &mont->N, ctx)) == NULL)
+            goto err;
+        if (!BN_lshift(Ri, Ri, mont->ri))
+            goto err;           /* R*Ri */
+        if (!BN_sub_word(Ri, 1))
+            goto err;
+        /*
+         * Ni = (R*Ri-1) / N
+         */
+        if (!BN_div(&(mont->Ni), NULL, Ri, &mont->N, ctx))
+            goto err;
+    }
+#endif
+
+    /* setup RR for conversions */
+    BN_zero(&(mont->RR));
+    if (!BN_set_bit(&(mont->RR), mont->ri * 2))
+        goto err;
+    if (!BN_mod(&(mont->RR), &(mont->RR), &(mont->N), ctx))
+        goto err;
+
+    for (i = mont->RR.top, ret = mont->N.top; i < ret; i++)
+        mont->RR.d[i] = 0;
+    mont->RR.top = ret;
+    mont->RR.flags |= BN_FLG_FIXED_TOP;
+
+    ret = 1;
+ err:
+    BN_CTX_end(ctx);
+    return ret;
+}
+
+BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)
+{
+    if (to == from)
+        return to;
+
+    if (!BN_copy(&(to->RR), &(from->RR)))
+        return NULL;
+    if (!BN_copy(&(to->N), &(from->N)))
+        return NULL;
+    if (!BN_copy(&(to->Ni), &(from->Ni)))
+        return NULL;
+    to->ri = from->ri;
+    to->n0[0] = from->n0[0];
+    to->n0[1] = from->n0[1];
+    return to;
+}
+
+BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, CRYPTO_RWLOCK *lock,
+                                    const BIGNUM *mod, BN_CTX *ctx)
+{
+    BN_MONT_CTX *ret;
+
+    CRYPTO_THREAD_read_lock(lock);
+    ret = *pmont;
+    CRYPTO_THREAD_unlock(lock);
+    if (ret)
+        return ret;
+
+    /*
+     * We don't want to serialise globally while doing our lazy-init math in
+     * BN_MONT_CTX_set. That punishes threads that are doing independent
+     * things. Instead, punish the case where more than one thread tries to
+     * lazy-init the same 'pmont', by having each do the lazy-init math work
+     * independently and only use the one from the thread that wins the race
+     * (the losers throw away the work they've done).
+     */
+    ret = BN_MONT_CTX_new();
+    if (ret == NULL)
+        return NULL;
+    if (!BN_MONT_CTX_set(ret, mod, ctx)) {
+        BN_MONT_CTX_free(ret);
+        return NULL;
+    }
+
+    /* The locked compare-and-set, after the local work is done. */
+    CRYPTO_THREAD_write_lock(lock);
+    if (*pmont) {
+        BN_MONT_CTX_free(ret);
+        ret = *pmont;
+    } else
+        *pmont = ret;
+    CRYPTO_THREAD_unlock(lock);
+    return ret;
+}
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_mpi.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_mpi.c
@ -0,0 +1,86 @@
+/*
+ * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include <stdio.h>
+#include "internal/cryptlib.h"
+#include "bn_lcl.h"
+
+int BN_bn2mpi(const BIGNUM *a, unsigned char *d)
+{
+    int bits;
+    int num = 0;
+    int ext = 0;
+    long l;
+
+    bits = BN_num_bits(a);
+    num = (bits + 7) / 8;
+    if (bits > 0) {
+        ext = ((bits & 0x07) == 0);
+    }
+    if (d == NULL)
+        return (num + 4 + ext);
+
+    l = num + ext;
+    d[0] = (unsigned char)(l >> 24) & 0xff;
+    d[1] = (unsigned char)(l >> 16) & 0xff;
+    d[2] = (unsigned char)(l >> 8) & 0xff;
+    d[3] = (unsigned char)(l) & 0xff;
+    if (ext)
+        d[4] = 0;
+    num = BN_bn2bin(a, &(d[4 + ext]));
+    if (a->neg)
+        d[4] |= 0x80;
+    return (num + 4 + ext);
+}
+
+BIGNUM *BN_mpi2bn(const unsigned char *d, int n, BIGNUM *ain)
+{
+    long len;
+    int neg = 0;
+    BIGNUM *a = NULL;
+
+    if (n < 4) {
+        BNerr(BN_F_BN_MPI2BN, BN_R_INVALID_LENGTH);
+        return NULL;
+    }
+    len = ((long)d[0] << 24) | ((long)d[1] << 16) | ((int)d[2] << 8) | (int)
+        d[3];
+    if ((len + 4) != n) {
+        BNerr(BN_F_BN_MPI2BN, BN_R_ENCODING_ERROR);
+        return NULL;
+    }
+
+    if (ain == NULL)
+        a = BN_new();
+    else
+        a = ain;
+
+    if (a == NULL)
+        return NULL;
+
+    if (len == 0) {
+        a->neg = 0;
+        a->top = 0;
+        return a;
+    }
+    d += 4;
+    if ((*d) & 0x80)
+        neg = 1;
+    if (BN_bin2bn(d, (int)len, a) == NULL) {
+        if (ain == NULL)
+            BN_free(a);
+        return NULL;
+    }
+    a->neg = neg;
+    if (neg) {
+        BN_clear_bit(a, BN_num_bits(a) - 1);
+    }
+    bn_check_top(a);
+    return a;
+}
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_mul.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_mul.c
@ -0,0 +1,684 @@
+/*
+ * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include <assert.h>
+#include "internal/cryptlib.h"
+#include "bn_lcl.h"
+
+#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
+/*
+ * Here follows specialised variants of bn_add_words() and bn_sub_words().
+ * They have the property performing operations on arrays of different sizes.
+ * The sizes of those arrays is expressed through cl, which is the common
+ * length ( basically, min(len(a),len(b)) ), and dl, which is the delta
+ * between the two lengths, calculated as len(a)-len(b). All lengths are the
+ * number of BN_ULONGs...  For the operations that require a result array as
+ * parameter, it must have the length cl+abs(dl). These functions should
+ * probably end up in bn_asm.c as soon as there are assembler counterparts
+ * for the systems that use assembler files.
+ */
+
+BN_ULONG bn_sub_part_words(BN_ULONG *r,
+                           const BN_ULONG *a, const BN_ULONG *b,
+                           int cl, int dl)
+{
+    BN_ULONG c, t;
+
+    assert(cl >= 0);
+    c = bn_sub_words(r, a, b, cl);
+
+    if (dl == 0)
+        return c;
+
+    r += cl;
+    a += cl;
+    b += cl;
+
+    if (dl < 0) {
+        for (;;) {
+            t = b[0];
+            r[0] = (0 - t - c) & BN_MASK2;
+            if (t != 0)
+                c = 1;
+            if (++dl >= 0)
+                break;
+
+            t = b[1];
+            r[1] = (0 - t - c) & BN_MASK2;
+            if (t != 0)
+                c = 1;
+            if (++dl >= 0)
+                break;
+
+            t = b[2];
+            r[2] = (0 - t - c) & BN_MASK2;
+            if (t != 0)
+                c = 1;
+            if (++dl >= 0)
+                break;
+
+            t = b[3];
+            r[3] = (0 - t - c) & BN_MASK2;
+            if (t != 0)
+                c = 1;
+            if (++dl >= 0)
+                break;
+
+            b += 4;
+            r += 4;
+        }
+    } else {
+        int save_dl = dl;
+        while (c) {
+            t = a[0];
+            r[0] = (t - c) & BN_MASK2;
+            if (t != 0)
+                c = 0;
+            if (--dl <= 0)
+                break;
+
+            t = a[1];
+            r[1] = (t - c) & BN_MASK2;
+            if (t != 0)
+                c = 0;
+            if (--dl <= 0)
+                break;
+
+            t = a[2];
+            r[2] = (t - c) & BN_MASK2;
+            if (t != 0)
+                c = 0;
+            if (--dl <= 0)
+                break;
+
+            t = a[3];
+            r[3] = (t - c) & BN_MASK2;
+            if (t != 0)
+                c = 0;
+            if (--dl <= 0)
+                break;
+
+            save_dl = dl;
+            a += 4;
+            r += 4;
+        }
+        if (dl > 0) {
+            if (save_dl > dl) {
+                switch (save_dl - dl) {
+                case 1:
+                    r[1] = a[1];
+                    if (--dl <= 0)
+                        break;
+                    /* fall thru */
+                case 2:
+                    r[2] = a[2];
+                    if (--dl <= 0)
+                        break;
+                    /* fall thru */
+                case 3:
+                    r[3] = a[3];
+                    if (--dl <= 0)
+                        break;
+                }
+                a += 4;
+                r += 4;
+            }
+        }
+        if (dl > 0) {
+            for (;;) {
+                r[0] = a[0];
+                if (--dl <= 0)
+                    break;
+                r[1] = a[1];
+                if (--dl <= 0)
+                    break;
+                r[2] = a[2];
+                if (--dl <= 0)
+                    break;
+                r[3] = a[3];
+                if (--dl <= 0)
+                    break;
+
+                a += 4;
+                r += 4;
+            }
+        }
+    }
+    return c;
+}
+#endif
+
+#ifdef BN_RECURSION
+/*
+ * Karatsuba recursive multiplication algorithm (cf. Knuth, The Art of
+ * Computer Programming, Vol. 2)
+ */
+
+/*-
+ * r is 2*n2 words in size,
+ * a and b are both n2 words in size.
+ * n2 must be a power of 2.
+ * We multiply and return the result.
+ * t must be 2*n2 words in size
+ * We calculate
+ * a[0]*b[0]
+ * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
+ * a[1]*b[1]
+ */
+/* dnX may not be positive, but n2/2+dnX has to be */
+void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
+                      int dna, int dnb, BN_ULONG *t)
+{
+    int n = n2 / 2, c1, c2;
+    int tna = n + dna, tnb = n + dnb;
+    unsigned int neg, zero;
+    BN_ULONG ln, lo, *p;
+
+# ifdef BN_MUL_COMBA
+#  if 0
+    if (n2 == 4) {
+        bn_mul_comba4(r, a, b);
+        return;
+    }
+#  endif
+    /*
+     * Only call bn_mul_comba 8 if n2 == 8 and the two arrays are complete
+     * [steve]
+     */
+    if (n2 == 8 && dna == 0 && dnb == 0) {
+        bn_mul_comba8(r, a, b);
+        return;
+    }
+# endif                         /* BN_MUL_COMBA */
+    /* Else do normal multiply */
+    if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) {
+        bn_mul_normal(r, a, n2 + dna, b, n2 + dnb);
+        if ((dna + dnb) < 0)
+            memset(&r[2 * n2 + dna + dnb], 0,
+                   sizeof(BN_ULONG) * -(dna + dnb));
+        return;
+    }
+    /* r=(a[0]-a[1])*(b[1]-b[0]) */
+    c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
+    c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
+    zero = neg = 0;
+    switch (c1 * 3 + c2) {
+    case -4:
+        bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
+        bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
+        break;
+    case -3:
+        zero = 1;
+        break;
+    case -2:
+        bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
+        bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
+        neg = 1;
+        break;
+    case -1:
+    case 0:
+    case 1:
+        zero = 1;
+        break;
+    case 2:
+        bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
+        bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
+        neg = 1;
+        break;
+    case 3:
+        zero = 1;
+        break;
+    case 4:
+        bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
+        bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
+        break;
+    }
+
+# ifdef BN_MUL_COMBA
+    if (n == 4 && dna == 0 && dnb == 0) { /* XXX: bn_mul_comba4 could take
+                                           * extra args to do this well */
+        if (!zero)
+            bn_mul_comba4(&(t[n2]), t, &(t[n]));
+        else
+            memset(&t[n2], 0, sizeof(*t) * 8);
+
+        bn_mul_comba4(r, a, b);
+        bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n]));
+    } else if (n == 8 && dna == 0 && dnb == 0) { /* XXX: bn_mul_comba8 could
+                                                  * take extra args to do
+                                                  * this well */
+        if (!zero)
+            bn_mul_comba8(&(t[n2]), t, &(t[n]));
+        else
+            memset(&t[n2], 0, sizeof(*t) * 16);
+
+        bn_mul_comba8(r, a, b);
+        bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n]));
+    } else
+# endif                         /* BN_MUL_COMBA */
+    {
+        p = &(t[n2 * 2]);
+        if (!zero)
+            bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
+        else
+            memset(&t[n2], 0, sizeof(*t) * n2);
+        bn_mul_recursive(r, a, b, n, 0, 0, p);
+        bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p);
+    }
+
+    /*-
+     * t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
+     * r[10] holds (a[0]*b[0])
+     * r[32] holds (b[1]*b[1])
+     */
+
+    c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
+
+    if (neg) {                  /* if t[32] is negative */
+        c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
+    } else {
+        /* Might have a carry */
+        c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
+    }
+
+    /*-
+     * t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
+     * r[10] holds (a[0]*b[0])
+     * r[32] holds (b[1]*b[1])
+     * c1 holds the carry bits
+     */
+    c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
+    if (c1) {
+        p = &(r[n + n2]);
+        lo = *p;
+        ln = (lo + c1) & BN_MASK2;
+        *p = ln;
+
+        /*
+         * The overflow will stop before we over write words we should not
+         * overwrite
+         */
+        if (ln < (BN_ULONG)c1) {
+            do {
+                p++;
+                lo = *p;
+                ln = (lo + 1) & BN_MASK2;
+                *p = ln;
+            } while (ln == 0);
+        }
+    }
+}
+
+/*
+ * n+tn is the word length t needs to be n*4 is size, as does r
+ */
+/* tnX may not be negative but less than n */
+void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
+                           int tna, int tnb, BN_ULONG *t)
+{
+    int i, j, n2 = n * 2;
+    int c1, c2, neg;
+    BN_ULONG ln, lo, *p;
+
+    if (n < 8) {
+        bn_mul_normal(r, a, n + tna, b, n + tnb);
+        return;
+    }
+
+    /* r=(a[0]-a[1])*(b[1]-b[0]) */
+    c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
+    c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
+    neg = 0;
+    switch (c1 * 3 + c2) {
+    case -4:
+        bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
+        bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
+        break;
+    case -3:
+    case -2:
+        bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
+        bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
+        neg = 1;
+        break;
+    case -1:
+    case 0:
+    case 1:
+    case 2:
+        bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
+        bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
+        neg = 1;
+        break;
+    case 3:
+    case 4:
+        bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
+        bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
+        break;
+    }
+    /*
+     * The zero case isn't yet implemented here. The speedup would probably
+     * be negligible.
+     */
+# if 0
+    if (n == 4) {
+        bn_mul_comba4(&(t[n2]), t, &(t[n]));
+        bn_mul_comba4(r, a, b);
+        bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn);
+        memset(&r[n2 + tn * 2], 0, sizeof(*r) * (n2 - tn * 2));
+    } else
+# endif
+    if (n == 8) {
+        bn_mul_comba8(&(t[n2]), t, &(t[n]));
+        bn_mul_comba8(r, a, b);
+        bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
+        memset(&r[n2 + tna + tnb], 0, sizeof(*r) * (n2 - tna - tnb));
+    } else {
+        p = &(t[n2 * 2]);
+        bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
+        bn_mul_recursive(r, a, b, n, 0, 0, p);
+        i = n / 2;
+        /*
+         * If there is only a bottom half to the number, just do it
+         */
+        if (tna > tnb)
+            j = tna - i;
+        else
+            j = tnb - i;
+        if (j == 0) {
+            bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]),
+                             i, tna - i, tnb - i, p);
+            memset(&r[n2 + i * 2], 0, sizeof(*r) * (n2 - i * 2));
+        } else if (j > 0) {     /* eg, n == 16, i == 8 and tn == 11 */
+            bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]),
+                                  i, tna - i, tnb - i, p);
+            memset(&(r[n2 + tna + tnb]), 0,
+                   sizeof(BN_ULONG) * (n2 - tna - tnb));
+        } else {                /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
+
+            memset(&r[n2], 0, sizeof(*r) * n2);
+            if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL
+                && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) {
+                bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
+            } else {
+                for (;;) {
+                    i /= 2;
+                    /*
+                     * these simplified conditions work exclusively because
+                     * difference between tna and tnb is 1 or 0
+                     */
+                    if (i < tna || i < tnb) {
+                        bn_mul_part_recursive(&(r[n2]),
+                                              &(a[n]), &(b[n]),
+                                              i, tna - i, tnb - i, p);
+                        break;
+                    } else if (i == tna || i == tnb) {
+                        bn_mul_recursive(&(r[n2]),
+                                         &(a[n]), &(b[n]),
+                                         i, tna - i, tnb - i, p);
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    /*-
+     * t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
+     * r[10] holds (a[0]*b[0])
+     * r[32] holds (b[1]*b[1])
+     */
+
+    c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
+
+    if (neg) {                  /* if t[32] is negative */
+        c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
+    } else {
+        /* Might have a carry */
+        c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
+    }
+
+    /*-
+     * t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
+     * r[10] holds (a[0]*b[0])
+     * r[32] holds (b[1]*b[1])
+     * c1 holds the carry bits
+     */
+    c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
+    if (c1) {
+        p = &(r[n + n2]);
+        lo = *p;
+        ln = (lo + c1) & BN_MASK2;
+        *p = ln;
+
+        /*
+         * The overflow will stop before we over write words we should not
+         * overwrite
+         */
+        if (ln < (BN_ULONG)c1) {
+            do {
+                p++;
+                lo = *p;
+                ln = (lo + 1) & BN_MASK2;
+                *p = ln;
+            } while (ln == 0);
+        }
+    }
+}
+
+/*-
+ * a and b must be the same size, which is n2.
+ * r needs to be n2 words and t needs to be n2*2
+ */
+void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
+                          BN_ULONG *t)
+{
+    int n = n2 / 2;
+
+    bn_mul_recursive(r, a, b, n, 0, 0, &(t[0]));
+    if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) {
+        bn_mul_low_recursive(&(t[0]), &(a[0]), &(b[n]), n, &(t[n2]));
+        bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
+        bn_mul_low_recursive(&(t[0]), &(a[n]), &(b[0]), n, &(t[n2]));
+        bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
+    } else {
+        bn_mul_low_normal(&(t[0]), &(a[0]), &(b[n]), n);
+        bn_mul_low_normal(&(t[n]), &(a[n]), &(b[0]), n);
+        bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
+        bn_add_words(&(r[n]), &(r[n]), &(t[n]), n);
+    }
+}
+#endif                          /* BN_RECURSION */
+
+int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
+{
+    int ret = bn_mul_fixed_top(r, a, b, ctx);
+
+    bn_correct_top(r);
+    bn_check_top(r);
+
+    return ret;
+}
+
+int bn_mul_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
+{
+    int ret = 0;
+    int top, al, bl;
+    BIGNUM *rr;
+#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
+    int i;
+#endif
+#ifdef BN_RECURSION
+    BIGNUM *t = NULL;
+    int j = 0, k;
+#endif
+
+    bn_check_top(a);
+    bn_check_top(b);
+    bn_check_top(r);
+
+    al = a->top;
+    bl = b->top;
+
+    if ((al == 0) || (bl == 0)) {
+        BN_zero(r);
+        return 1;
+    }
+    top = al + bl;
+
+    BN_CTX_start(ctx);
+    if ((r == a) || (r == b)) {
+        if ((rr = BN_CTX_get(ctx)) == NULL)
+            goto err;
+    } else
+        rr = r;
+
+#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
+    i = al - bl;
+#endif
+#ifdef BN_MUL_COMBA
+    if (i == 0) {
+# if 0
+        if (al == 4) {
+            if (bn_wexpand(rr, 8) == NULL)
+                goto err;
+            rr->top = 8;
+            bn_mul_comba4(rr->d, a->d, b->d);
+            goto end;
+        }
+# endif
+        if (al == 8) {
+            if (bn_wexpand(rr, 16) == NULL)
+                goto err;
+            rr->top = 16;
+            bn_mul_comba8(rr->d, a->d, b->d);
+            goto end;
+        }
+    }
+#endif                          /* BN_MUL_COMBA */
+#ifdef BN_RECURSION
+    if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) {
+        if (i >= -1 && i <= 1) {
+            /*
+             * Find out the power of two lower or equal to the longest of the
+             * two numbers
+             */
+            if (i >= 0) {
+                j = BN_num_bits_word((BN_ULONG)al);
+            }
+            if (i == -1) {
+                j = BN_num_bits_word((BN_ULONG)bl);
+            }
+            j = 1 << (j - 1);
+            assert(j <= al || j <= bl);
+            k = j + j;
+            t = BN_CTX_get(ctx);
+            if (t == NULL)
+                goto err;
+            if (al > j || bl > j) {
+                if (bn_wexpand(t, k * 4) == NULL)
+                    goto err;
+                if (bn_wexpand(rr, k * 4) == NULL)
+                    goto err;
+                bn_mul_part_recursive(rr->d, a->d, b->d,
+                                      j, al - j, bl - j, t->d);
+            } else {            /* al <= j || bl <= j */
+
+                if (bn_wexpand(t, k * 2) == NULL)
+                    goto err;
+                if (bn_wexpand(rr, k * 2) == NULL)
+                    goto err;
+                bn_mul_recursive(rr->d, a->d, b->d, j, al - j, bl - j, t->d);
+            }
+            rr->top = top;
+            goto end;
+        }
+    }
+#endif                          /* BN_RECURSION */
+    if (bn_wexpand(rr, top) == NULL)
+        goto err;
+    rr->top = top;
+    bn_mul_normal(rr->d, a->d, al, b->d, bl);
+
+#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
+ end:
+#endif
+    rr->neg = a->neg ^ b->neg;
+    rr->flags |= BN_FLG_FIXED_TOP;
+    if (r != rr && BN_copy(r, rr) == NULL)
+        goto err;
+
+    ret = 1;
+ err:
+    bn_check_top(r);
+    BN_CTX_end(ctx);
+    return ret;
+}
+
+void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
+{
+    BN_ULONG *rr;
+
+    if (na < nb) {
+        int itmp;
+        BN_ULONG *ltmp;
+
+        itmp = na;
+        na = nb;
+        nb = itmp;
+        ltmp = a;
+        a = b;
+        b = ltmp;
+
+    }
+    rr = &(r[na]);
+    if (nb <= 0) {
+        (void)bn_mul_words(r, a, na, 0);
+        return;
+    } else
+        rr[0] = bn_mul_words(r, a, na, b[0]);
+
+    for (;;) {
+        if (--nb <= 0)
+            return;
+        rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
+        if (--nb <= 0)
+            return;
+        rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
+        if (--nb <= 0)
+            return;
+        rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
+        if (--nb <= 0)
+            return;
+        rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
+        rr += 4;
+        r += 4;
+        b += 4;
+    }
+}
+
+void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
+{
+    bn_mul_words(r, a, n, b[0]);
+
+    for (;;) {
+        if (--n <= 0)
+            return;
+        bn_mul_add_words(&(r[1]), a, n, b[1]);
+        if (--n <= 0)
+            return;
+        bn_mul_add_words(&(r[2]), a, n, b[2]);
+        if (--n <= 0)
+            return;
+        bn_mul_add_words(&(r[3]), a, n, b[3]);
+        if (--n <= 0)
+            return;
+        bn_mul_add_words(&(r[4]), a, n, b[4]);
+        r += 4;
+        b += 4;
+    }
+}
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_nist.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_nist.c
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_prime.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_prime.c
@ -0,0 +1,469 @@
+/*
+ * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include <stdio.h>
+#include <time.h>
+#include "internal/cryptlib.h"
+#include "bn_lcl.h"
+
+/*
+ * The quick sieve algorithm approach to weeding out primes is Philip
+ * Zimmermann's, as implemented in PGP.  I have had a read of his comments
+ * and implemented my own version.
+ */
+#include "bn_prime.h"
+
+static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1,
+                   const BIGNUM *a1_odd, int k, BN_CTX *ctx,
+                   BN_MONT_CTX *mont);
+static int probable_prime(BIGNUM *rnd, int bits, prime_t *mods);
+static int probable_prime_dh_safe(BIGNUM *rnd, int bits,
+                                  const BIGNUM *add, const BIGNUM *rem,
+                                  BN_CTX *ctx);
+
+int BN_GENCB_call(BN_GENCB *cb, int a, int b)
+{
+    /* No callback means continue */
+    if (!cb)
+        return 1;
+    switch (cb->ver) {
+    case 1:
+        /* Deprecated-style callbacks */
+        if (!cb->cb.cb_1)
+            return 1;
+        cb->cb.cb_1(a, b, cb->arg);
+        return 1;
+    case 2:
+        /* New-style callbacks */
+        return cb->cb.cb_2(a, b, cb);
+    default:
+        break;
+    }
+    /* Unrecognised callback type */
+    return 0;
+}
+
+int BN_generate_prime_ex(BIGNUM *ret, int bits, int safe,
+                         const BIGNUM *add, const BIGNUM *rem, BN_GENCB *cb)
+{
+    BIGNUM *t;
+    int found = 0;
+    int i, j, c1 = 0;
+    BN_CTX *ctx = NULL;
+    prime_t *mods = NULL;
+    int checks = BN_prime_checks_for_size(bits);
+
+    if (bits < 2) {
+        /* There are no prime numbers this small. */
+        BNerr(BN_F_BN_GENERATE_PRIME_EX, BN_R_BITS_TOO_SMALL);
+        return 0;
+    } else if (bits == 2 && safe) {
+        /* The smallest safe prime (7) is three bits. */
+        BNerr(BN_F_BN_GENERATE_PRIME_EX, BN_R_BITS_TOO_SMALL);
+        return 0;
+    }
+
+    mods = OPENSSL_zalloc(sizeof(*mods) * NUMPRIMES);
+    if (mods == NULL)
+        goto err;
+
+    ctx = BN_CTX_new();
+    if (ctx == NULL)
+        goto err;
+    BN_CTX_start(ctx);
+    t = BN_CTX_get(ctx);
+    if (t == NULL)
+        goto err;
+ loop:
+    /* make a random number and set the top and bottom bits */
+    if (add == NULL) {
+        if (!probable_prime(ret, bits, mods))
+            goto err;
+    } else {
+        if (safe) {
+            if (!probable_prime_dh_safe(ret, bits, add, rem, ctx))
+                goto err;
+        } else {
+            if (!bn_probable_prime_dh(ret, bits, add, rem, ctx))
+                goto err;
+        }
+    }
+
+    if (!BN_GENCB_call(cb, 0, c1++))
+        /* aborted */
+        goto err;
+
+    if (!safe) {
+        i = BN_is_prime_fasttest_ex(ret, checks, ctx, 0, cb);
+        if (i == -1)
+            goto err;
+        if (i == 0)
+            goto loop;
+    } else {
+        /*
+         * for "safe prime" generation, check that (p-1)/2 is prime. Since a
+         * prime is odd, We just need to divide by 2
+         */
+        if (!BN_rshift1(t, ret))
+            goto err;
+
+        for (i = 0; i < checks; i++) {
+            j = BN_is_prime_fasttest_ex(ret, 1, ctx, 0, cb);
+            if (j == -1)
+                goto err;
+            if (j == 0)
+                goto loop;
+
+            j = BN_is_prime_fasttest_ex(t, 1, ctx, 0, cb);
+            if (j == -1)
+                goto err;
+            if (j == 0)
+                goto loop;
+
+            if (!BN_GENCB_call(cb, 2, c1 - 1))
+                goto err;
+            /* We have a safe prime test pass */
+        }
+    }
+    /* we have a prime :-) */
+    found = 1;
+ err:
+    OPENSSL_free(mods);
+    if (ctx != NULL)
+        BN_CTX_end(ctx);
+    BN_CTX_free(ctx);
+    bn_check_top(ret);
+    return found;
+}
+
+int BN_is_prime_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed,
+                   BN_GENCB *cb)
+{
+    return BN_is_prime_fasttest_ex(a, checks, ctx_passed, 0, cb);
+}
+
+int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed,
+                            int do_trial_division, BN_GENCB *cb)
+{
+    int i, j, ret = -1;
+    int k;
+    BN_CTX *ctx = NULL;
+    BIGNUM *A1, *A1_odd, *A3, *check; /* taken from ctx */
+    BN_MONT_CTX *mont = NULL;
+
+    /* Take care of the really small primes 2 & 3 */
+    if (BN_is_word(a, 2) || BN_is_word(a, 3))
+        return 1;
+
+    /* Check odd and bigger than 1 */
+    if (!BN_is_odd(a) || BN_cmp(a, BN_value_one()) <= 0)
+        return 0;
+
+    if (checks == BN_prime_checks)
+        checks = BN_prime_checks_for_size(BN_num_bits(a));
+
+    /* first look for small factors */
+    if (do_trial_division) {
+        for (i = 1; i < NUMPRIMES; i++) {
+            BN_ULONG mod = BN_mod_word(a, primes[i]);
+            if (mod == (BN_ULONG)-1)
+                goto err;
+            if (mod == 0)
+                return BN_is_word(a, primes[i]);
+        }
+        if (!BN_GENCB_call(cb, 1, -1))
+            goto err;
+    }
+
+    if (ctx_passed != NULL)
+        ctx = ctx_passed;
+    else if ((ctx = BN_CTX_new()) == NULL)
+        goto err;
+    BN_CTX_start(ctx);
+
+    A1 = BN_CTX_get(ctx);
+    A3 = BN_CTX_get(ctx);
+    A1_odd = BN_CTX_get(ctx);
+    check = BN_CTX_get(ctx);
+    if (check == NULL)
+        goto err;
+
+    /* compute A1 := a - 1 */
+    if (!BN_copy(A1, a) || !BN_sub_word(A1, 1))
+        goto err;
+    /* compute A3 := a - 3 */
+    if (!BN_copy(A3, a) || !BN_sub_word(A3, 3))
+        goto err;
+
+    /* write  A1  as  A1_odd * 2^k */
+    k = 1;
+    while (!BN_is_bit_set(A1, k))
+        k++;
+    if (!BN_rshift(A1_odd, A1, k))
+        goto err;
+
+    /* Montgomery setup for computations mod a */
+    mont = BN_MONT_CTX_new();
+    if (mont == NULL)
+        goto err;
+    if (!BN_MONT_CTX_set(mont, a, ctx))
+        goto err;
+
+    for (i = 0; i < checks; i++) {
+        /* 1 < check < a-1 */
+        if (!BN_priv_rand_range(check, A3) || !BN_add_word(check, 2))
+            goto err;
+
+        j = witness(check, a, A1, A1_odd, k, ctx, mont);
+        if (j == -1)
+            goto err;
+        if (j) {
+            ret = 0;
+            goto err;
+        }
+        if (!BN_GENCB_call(cb, 1, i))
+            goto err;
+    }
+    ret = 1;
+ err:
+    if (ctx != NULL) {
+        BN_CTX_end(ctx);
+        if (ctx_passed == NULL)
+            BN_CTX_free(ctx);
+    }
+    BN_MONT_CTX_free(mont);
+
+    return ret;
+}
+
+static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1,
+                   const BIGNUM *a1_odd, int k, BN_CTX *ctx,
+                   BN_MONT_CTX *mont)
+{
+    if (!BN_mod_exp_mont(w, w, a1_odd, a, ctx, mont)) /* w := w^a1_odd mod a */
+        return -1;
+    if (BN_is_one(w))
+        return 0;               /* probably prime */
+    if (BN_cmp(w, a1) == 0)
+        return 0;               /* w == -1 (mod a), 'a' is probably prime */
+    while (--k) {
+        if (!BN_mod_mul(w, w, w, a, ctx)) /* w := w^2 mod a */
+            return -1;
+        if (BN_is_one(w))
+            return 1;           /* 'a' is composite, otherwise a previous 'w'
+                                 * would have been == -1 (mod 'a') */
+        if (BN_cmp(w, a1) == 0)
+            return 0;           /* w == -1 (mod a), 'a' is probably prime */
+    }
+    /*
+     * If we get here, 'w' is the (a-1)/2-th power of the original 'w', and
+     * it is neither -1 nor +1 -- so 'a' cannot be prime
+     */
+    bn_check_top(w);
+    return 1;
+}
+
+static int probable_prime(BIGNUM *rnd, int bits, prime_t *mods)
+{
+    int i;
+    BN_ULONG delta;
+    BN_ULONG maxdelta = BN_MASK2 - primes[NUMPRIMES - 1];
+    char is_single_word = bits <= BN_BITS2;
+
+ again:
+    /* TODO: Not all primes are private */
+    if (!BN_priv_rand(rnd, bits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ODD))
+        return 0;
+    /* we now have a random number 'rnd' to test. */
+    for (i = 1; i < NUMPRIMES; i++) {
+        BN_ULONG mod = BN_mod_word(rnd, (BN_ULONG)primes[i]);
+        if (mod == (BN_ULONG)-1)
+            return 0;
+        mods[i] = (prime_t) mod;
+    }
+    /*
+     * If bits is so small that it fits into a single word then we
+     * additionally don't want to exceed that many bits.
+     */
+    if (is_single_word) {
+        BN_ULONG size_limit;
+
+        if (bits == BN_BITS2) {
+            /*
+             * Shifting by this much has undefined behaviour so we do it a
+             * different way
+             */
+            size_limit = ~((BN_ULONG)0) - BN_get_word(rnd);
+        } else {
+            size_limit = (((BN_ULONG)1) << bits) - BN_get_word(rnd) - 1;
+        }
+        if (size_limit < maxdelta)
+            maxdelta = size_limit;
+    }
+    delta = 0;
+ loop:
+    if (is_single_word) {
+        BN_ULONG rnd_word = BN_get_word(rnd);
+
+        /*-
+         * In the case that the candidate prime is a single word then
+         * we check that:
+         *   1) It's greater than primes[i] because we shouldn't reject
+         *      3 as being a prime number because it's a multiple of
+         *      three.
+         *   2) That it's not a multiple of a known prime. We don't
+         *      check that rnd-1 is also coprime to all the known
+         *      primes because there aren't many small primes where
+         *      that's true.
+         */
+        for (i = 1; i < NUMPRIMES && primes[i] < rnd_word; i++) {
+            if ((mods[i] + delta) % primes[i] == 0) {
+                delta += 2;
+                if (delta > maxdelta)
+                    goto again;
+                goto loop;
+            }
+        }
+    } else {
+        for (i = 1; i < NUMPRIMES; i++) {
+            /*
+             * check that rnd is not a prime and also that gcd(rnd-1,primes)
+             * == 1 (except for 2)
+             */
+            if (((mods[i] + delta) % primes[i]) <= 1) {
+                delta += 2;
+                if (delta > maxdelta)
+                    goto again;
+                goto loop;
+            }
+        }
+    }
+    if (!BN_add_word(rnd, delta))
+        return 0;
+    if (BN_num_bits(rnd) != bits)
+        goto again;
+    bn_check_top(rnd);
+    return 1;
+}
+
+int bn_probable_prime_dh(BIGNUM *rnd, int bits,
+                         const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx)
+{
+    int i, ret = 0;
+    BIGNUM *t1;
+
+    BN_CTX_start(ctx);
+    if ((t1 = BN_CTX_get(ctx)) == NULL)
+        goto err;
+
+    if (!BN_rand(rnd, bits, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ODD))
+        goto err;
+
+    /* we need ((rnd-rem) % add) == 0 */
+
+    if (!BN_mod(t1, rnd, add, ctx))
+        goto err;
+    if (!BN_sub(rnd, rnd, t1))
+        goto err;
+    if (rem == NULL) {
+        if (!BN_add_word(rnd, 1))
+            goto err;
+    } else {
+        if (!BN_add(rnd, rnd, rem))
+            goto err;
+    }
+
+    /* we now have a random number 'rand' to test. */
+
+ loop:
+    for (i = 1; i < NUMPRIMES; i++) {
+        /* check that rnd is a prime */
+        BN_ULONG mod = BN_mod_word(rnd, (BN_ULONG)primes[i]);
+        if (mod == (BN_ULONG)-1)
+            goto err;
+        if (mod <= 1) {
+            if (!BN_add(rnd, rnd, add))
+                goto err;
+            goto loop;
+        }
+    }
+    ret = 1;
+
+ err:
+    BN_CTX_end(ctx);
+    bn_check_top(rnd);
+    return ret;
+}
+
+static int probable_prime_dh_safe(BIGNUM *p, int bits, const BIGNUM *padd,
+                                  const BIGNUM *rem, BN_CTX *ctx)
+{
+    int i, ret = 0;
+    BIGNUM *t1, *qadd, *q;
+
+    bits--;
+    BN_CTX_start(ctx);
+    t1 = BN_CTX_get(ctx);
+    q = BN_CTX_get(ctx);
+    qadd = BN_CTX_get(ctx);
+    if (qadd == NULL)
+        goto err;
+
+    if (!BN_rshift1(qadd, padd))
+        goto err;
+
+    if (!BN_rand(q, bits, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ODD))
+        goto err;
+
+    /* we need ((rnd-rem) % add) == 0 */
+    if (!BN_mod(t1, q, qadd, ctx))
+        goto err;
+    if (!BN_sub(q, q, t1))
+        goto err;
+    if (rem == NULL) {
+        if (!BN_add_word(q, 1))
+            goto err;
+    } else {
+        if (!BN_rshift1(t1, rem))
+            goto err;
+        if (!BN_add(q, q, t1))
+            goto err;
+    }
+
+    /* we now have a random number 'rand' to test. */
+    if (!BN_lshift1(p, q))
+        goto err;
+    if (!BN_add_word(p, 1))
+        goto err;
+
+ loop:
+    for (i = 1; i < NUMPRIMES; i++) {
+        /* check that p and q are prime */
+        /*
+         * check that for p and q gcd(p-1,primes) == 1 (except for 2)
+         */
+        BN_ULONG pmod = BN_mod_word(p, (BN_ULONG)primes[i]);
+        BN_ULONG qmod = BN_mod_word(q, (BN_ULONG)primes[i]);
+        if (pmod == (BN_ULONG)-1 || qmod == (BN_ULONG)-1)
+            goto err;
+        if (pmod == 0 || qmod == 0) {
+            if (!BN_add(p, p, padd))
+                goto err;
+            if (!BN_add(q, q, qadd))
+                goto err;
+            goto loop;
+        }
+    }
+    ret = 1;
+
+ err:
+    BN_CTX_end(ctx);
+    bn_check_top(p);
+    return ret;
+}
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_prime.h
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_prime.h
@ -0,0 +1,273 @@
+/*
+ * WARNING: do not edit!
+ * Generated by crypto/bn/bn_prime.pl
+ *
+ * Copyright 1998-2019 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+typedef unsigned short prime_t;
+# define NUMPRIMES 2048
+
+static const prime_t primes[2048] = {
+        2,     3,     5,     7,    11,    13,    17,    19,
+       23,    29,    31,    37,    41,    43,    47,    53,
+       59,    61,    67,    71,    73,    79,    83,    89,
+       97,   101,   103,   107,   109,   113,   127,   131,
+      137,   139,   149,   151,   157,   163,   167,   173,
+      179,   181,   191,   193,   197,   199,   211,   223,
+      227,   229,   233,   239,   241,   251,   257,   263,
+      269,   271,   277,   281,   283,   293,   307,   311,
+      313,   317,   331,   337,   347,   349,   353,   359,
+      367,   373,   379,   383,   389,   397,   401,   409,
+      419,   421,   431,   433,   439,   443,   449,   457,
+      461,   463,   467,   479,   487,   491,   499,   503,
+      509,   521,   523,   541,   547,   557,   563,   569,
+      571,   577,   587,   593,   599,   601,   607,   613,
+      617,   619,   631,   641,   643,   647,   653,   659,
+      661,   673,   677,   683,   691,   701,   709,   719,
+      727,   733,   739,   743,   751,   757,   761,   769,
+      773,   787,   797,   809,   811,   821,   823,   827,
+      829,   839,   853,   857,   859,   863,   877,   881,
+      883,   887,   907,   911,   919,   929,   937,   941,
+      947,   953,   967,   971,   977,   983,   991,   997,
+     1009,  1013,  1019,  1021,  1031,  1033,  1039,  1049,
+     1051,  1061,  1063,  1069,  1087,  1091,  1093,  1097,
+     1103,  1109,  1117,  1123,  1129,  1151,  1153,  1163,
+     1171,  1181,  1187,  1193,  1201,  1213,  1217,  1223,
+     1229,  1231,  1237,  1249,  1259,  1277,  1279,  1283,
+     1289,  1291,  1297,  1301,  1303,  1307,  1319,  1321,
+     1327,  1361,  1367,  1373,  1381,  1399,  1409,  1423,
+     1427,  1429,  1433,  1439,  1447,  1451,  1453,  1459,
+     1471,  1481,  1483,  1487,  1489,  1493,  1499,  1511,
+     1523,  1531,  1543,  1549,  1553,  1559,  1567,  1571,
+     1579,  1583,  1597,  1601,  1607,  1609,  1613,  1619,
+     1621,  1627,  1637,  1657,  1663,  1667,  1669,  1693,
+     1697,  1699,  1709,  1721,  1723,  1733,  1741,  1747,
+     1753,  1759,  1777,  1783,  1787,  1789,  1801,  1811,
+     1823,  1831,  1847,  1861,  1867,  1871,  1873,  1877,
+     1879,  1889,  1901,  1907,  1913,  1931,  1933,  1949,
+     1951,  1973,  1979,  1987,  1993,  1997,  1999,  2003,
+     2011,  2017,  2027,  2029,  2039,  2053,  2063,  2069,
+     2081,  2083,  2087,  2089,  2099,  2111,  2113,  2129,
+     2131,  2137,  2141,  2143,  2153,  2161,  2179,  2203,
+     2207,  2213,  2221,  2237,  2239,  2243,  2251,  2267,
+     2269,  2273,  2281,  2287,  2293,  2297,  2309,  2311,
+     2333,  2339,  2341,  2347,  2351,  2357,  2371,  2377,
+     2381,  2383,  2389,  2393,  2399,  2411,  2417,  2423,
+     2437,  2441,  2447,  2459,  2467,  2473,  2477,  2503,
+     2521,  2531,  2539,  2543,  2549,  2551,  2557,  2579,
+     2591,  2593,  2609,  2617,  2621,  2633,  2647,  2657,
+     2659,  2663,  2671,  2677,  2683,  2687,  2689,  2693,
+     2699,  2707,  2711,  2713,  2719,  2729,  2731,  2741,
+     2749,  2753,  2767,  2777,  2789,  2791,  2797,  2801,
+     2803,  2819,  2833,  2837,  2843,  2851,  2857,  2861,
+     2879,  2887,  2897,  2903,  2909,  2917,  2927,  2939,
+     2953,  2957,  2963,  2969,  2971,  2999,  3001,  3011,
+     3019,  3023,  3037,  3041,  3049,  3061,  3067,  3079,
+     3083,  3089,  3109,  3119,  3121,  3137,  3163,  3167,
+     3169,  3181,  3187,  3191,  3203,  3209,  3217,  3221,
+     3229,  3251,  3253,  3257,  3259,  3271,  3299,  3301,
+     3307,  3313,  3319,  3323,  3329,  3331,  3343,  3347,
+     3359,  3361,  3371,  3373,  3389,  3391,  3407,  3413,
+     3433,  3449,  3457,  3461,  3463,  3467,  3469,  3491,
+     3499,  3511,  3517,  3527,  3529,  3533,  3539,  3541,
+     3547,  3557,  3559,  3571,  3581,  3583,  3593,  3607,
+     3613,  3617,  3623,  3631,  3637,  3643,  3659,  3671,
+     3673,  3677,  3691,  3697,  3701,  3709,  3719,  3727,
+     3733,  3739,  3761,  3767,  3769,  3779,  3793,  3797,
+     3803,  3821,  3823,  3833,  3847,  3851,  3853,  3863,
+     3877,  3881,  3889,  3907,  3911,  3917,  3919,  3923,
+     3929,  3931,  3943,  3947,  3967,  3989,  4001,  4003,
+     4007,  4013,  4019,  4021,  4027,  4049,  4051,  4057,
+     4073,  4079,  4091,  4093,  4099,  4111,  4127,  4129,
+     4133,  4139,  4153,  4157,  4159,  4177,  4201,  4211,
+     4217,  4219,  4229,  4231,  4241,  4243,  4253,  4259,
+     4261,  4271,  4273,  4283,  4289,  4297,  4327,  4337,
+     4339,  4349,  4357,  4363,  4373,  4391,  4397,  4409,
+     4421,  4423,  4441,  4447,  4451,  4457,  4463,  4481,
+     4483,  4493,  4507,  4513,  4517,  4519,  4523,  4547,
+     4549,  4561,  4567,  4583,  4591,  4597,  4603,  4621,
+     4637,  4639,  4643,  4649,  4651,  4657,  4663,  4673,
+     4679,  4691,  4703,  4721,  4723,  4729,  4733,  4751,
+     4759,  4783,  4787,  4789,  4793,  4799,  4801,  4813,
+     4817,  4831,  4861,  4871,  4877,  4889,  4903,  4909,
+     4919,  4931,  4933,  4937,  4943,  4951,  4957,  4967,
+     4969,  4973,  4987,  4993,  4999,  5003,  5009,  5011,
+     5021,  5023,  5039,  5051,  5059,  5077,  5081,  5087,
+     5099,  5101,  5107,  5113,  5119,  5147,  5153,  5167,
+     5171,  5179,  5189,  5197,  5209,  5227,  5231,  5233,
+     5237,  5261,  5273,  5279,  5281,  5297,  5303,  5309,
+     5323,  5333,  5347,  5351,  5381,  5387,  5393,  5399,
+     5407,  5413,  5417,  5419,  5431,  5437,  5441,  5443,
+     5449,  5471,  5477,  5479,  5483,  5501,  5503,  5507,
+     5519,  5521,  5527,  5531,  5557,  5563,  5569,  5573,
+     5581,  5591,  5623,  5639,  5641,  5647,  5651,  5653,
+     5657,  5659,  5669,  5683,  5689,  5693,  5701,  5711,
+     5717,  5737,  5741,  5743,  5749,  5779,  5783,  5791,
+     5801,  5807,  5813,  5821,  5827,  5839,  5843,  5849,
+     5851,  5857,  5861,  5867,  5869,  5879,  5881,  5897,
+     5903,  5923,  5927,  5939,  5953,  5981,  5987,  6007,
+     6011,  6029,  6037,  6043,  6047,  6053,  6067,  6073,
+     6079,  6089,  6091,  6101,  6113,  6121,  6131,  6133,
+     6143,  6151,  6163,  6173,  6197,  6199,  6203,  6211,
+     6217,  6221,  6229,  6247,  6257,  6263,  6269,  6271,
+     6277,  6287,  6299,  6301,  6311,  6317,  6323,  6329,
+     6337,  6343,  6353,  6359,  6361,  6367,  6373,  6379,
+     6389,  6397,  6421,  6427,  6449,  6451,  6469,  6473,
+     6481,  6491,  6521,  6529,  6547,  6551,  6553,  6563,
+     6569,  6571,  6577,  6581,  6599,  6607,  6619,  6637,
+     6653,  6659,  6661,  6673,  6679,  6689,  6691,  6701,
+     6703,  6709,  6719,  6733,  6737,  6761,  6763,  6779,
+     6781,  6791,  6793,  6803,  6823,  6827,  6829,  6833,
+     6841,  6857,  6863,  6869,  6871,  6883,  6899,  6907,
+     6911,  6917,  6947,  6949,  6959,  6961,  6967,  6971,
+     6977,  6983,  6991,  6997,  7001,  7013,  7019,  7027,
+     7039,  7043,  7057,  7069,  7079,  7103,  7109,  7121,
+     7127,  7129,  7151,  7159,  7177,  7187,  7193,  7207,
+     7211,  7213,  7219,  7229,  7237,  7243,  7247,  7253,
+     7283,  7297,  7307,  7309,  7321,  7331,  7333,  7349,
+     7351,  7369,  7393,  7411,  7417,  7433,  7451,  7457,
+     7459,  7477,  7481,  7487,  7489,  7499,  7507,  7517,
+     7523,  7529,  7537,  7541,  7547,  7549,  7559,  7561,
+     7573,  7577,  7583,  7589,  7591,  7603,  7607,  7621,
+     7639,  7643,  7649,  7669,  7673,  7681,  7687,  7691,
+     7699,  7703,  7717,  7723,  7727,  7741,  7753,  7757,
+     7759,  7789,  7793,  7817,  7823,  7829,  7841,  7853,
+     7867,  7873,  7877,  7879,  7883,  7901,  7907,  7919,
+     7927,  7933,  7937,  7949,  7951,  7963,  7993,  8009,
+     8011,  8017,  8039,  8053,  8059,  8069,  8081,  8087,
+     8089,  8093,  8101,  8111,  8117,  8123,  8147,  8161,
+     8167,  8171,  8179,  8191,  8209,  8219,  8221,  8231,
+     8233,  8237,  8243,  8263,  8269,  8273,  8287,  8291,
+     8293,  8297,  8311,  8317,  8329,  8353,  8363,  8369,
+     8377,  8387,  8389,  8419,  8423,  8429,  8431,  8443,
+     8447,  8461,  8467,  8501,  8513,  8521,  8527,  8537,
+     8539,  8543,  8563,  8573,  8581,  8597,  8599,  8609,
+     8623,  8627,  8629,  8641,  8647,  8663,  8669,  8677,
+     8681,  8689,  8693,  8699,  8707,  8713,  8719,  8731,
+     8737,  8741,  8747,  8753,  8761,  8779,  8783,  8803,
+     8807,  8819,  8821,  8831,  8837,  8839,  8849,  8861,
+     8863,  8867,  8887,  8893,  8923,  8929,  8933,  8941,
+     8951,  8963,  8969,  8971,  8999,  9001,  9007,  9011,
+     9013,  9029,  9041,  9043,  9049,  9059,  9067,  9091,
+     9103,  9109,  9127,  9133,  9137,  9151,  9157,  9161,
+     9173,  9181,  9187,  9199,  9203,  9209,  9221,  9227,
+     9239,  9241,  9257,  9277,  9281,  9283,  9293,  9311,
+     9319,  9323,  9337,  9341,  9343,  9349,  9371,  9377,
+     9391,  9397,  9403,  9413,  9419,  9421,  9431,  9433,
+     9437,  9439,  9461,  9463,  9467,  9473,  9479,  9491,
+     9497,  9511,  9521,  9533,  9539,  9547,  9551,  9587,
+     9601,  9613,  9619,  9623,  9629,  9631,  9643,  9649,
+     9661,  9677,  9679,  9689,  9697,  9719,  9721,  9733,
+     9739,  9743,  9749,  9767,  9769,  9781,  9787,  9791,
+     9803,  9811,  9817,  9829,  9833,  9839,  9851,  9857,
+     9859,  9871,  9883,  9887,  9901,  9907,  9923,  9929,
+     9931,  9941,  9949,  9967,  9973, 10007, 10009, 10037,
+    10039, 10061, 10067, 10069, 10079, 10091, 10093, 10099,
+    10103, 10111, 10133, 10139, 10141, 10151, 10159, 10163,
+    10169, 10177, 10181, 10193, 10211, 10223, 10243, 10247,
+    10253, 10259, 10267, 10271, 10273, 10289, 10301, 10303,
+    10313, 10321, 10331, 10333, 10337, 10343, 10357, 10369,
+    10391, 10399, 10427, 10429, 10433, 10453, 10457, 10459,
+    10463, 10477, 10487, 10499, 10501, 10513, 10529, 10531,
+    10559, 10567, 10589, 10597, 10601, 10607, 10613, 10627,
+    10631, 10639, 10651, 10657, 10663, 10667, 10687, 10691,
+    10709, 10711, 10723, 10729, 10733, 10739, 10753, 10771,
+    10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859,
+    10861, 10867, 10883, 10889, 10891, 10903, 10909, 10937,
+    10939, 10949, 10957, 10973, 10979, 10987, 10993, 11003,
+    11027, 11047, 11057, 11059, 11069, 11071, 11083, 11087,
+    11093, 11113, 11117, 11119, 11131, 11149, 11159, 11161,
+    11171, 11173, 11177, 11197, 11213, 11239, 11243, 11251,
+    11257, 11261, 11273, 11279, 11287, 11299, 11311, 11317,
+    11321, 11329, 11351, 11353, 11369, 11383, 11393, 11399,
+    11411, 11423, 11437, 11443, 11447, 11467, 11471, 11483,
+    11489, 11491, 11497, 11503, 11519, 11527, 11549, 11551,
+    11579, 11587, 11593, 11597, 11617, 11621, 11633, 11657,
+    11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731,
+    11743, 11777, 11779, 11783, 11789, 11801, 11807, 11813,
+    11821, 11827, 11831, 11833, 11839, 11863, 11867, 11887,
+    11897, 11903, 11909, 11923, 11927, 11933, 11939, 11941,
+    11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011,
+    12037, 12041, 12043, 12049, 12071, 12073, 12097, 12101,
+    12107, 12109, 12113, 12119, 12143, 12149, 12157, 12161,
+    12163, 12197, 12203, 12211, 12227, 12239, 12241, 12251,
+    12253, 12263, 12269, 12277, 12281, 12289, 12301, 12323,
+    12329, 12343, 12347, 12373, 12377, 12379, 12391, 12401,
+    12409, 12413, 12421, 12433, 12437, 12451, 12457, 12473,
+    12479, 12487, 12491, 12497, 12503, 12511, 12517, 12527,
+    12539, 12541, 12547, 12553, 12569, 12577, 12583, 12589,
+    12601, 12611, 12613, 12619, 12637, 12641, 12647, 12653,
+    12659, 12671, 12689, 12697, 12703, 12713, 12721, 12739,
+    12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821,
+    12823, 12829, 12841, 12853, 12889, 12893, 12899, 12907,
+    12911, 12917, 12919, 12923, 12941, 12953, 12959, 12967,
+    12973, 12979, 12983, 13001, 13003, 13007, 13009, 13033,
+    13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109,
+    13121, 13127, 13147, 13151, 13159, 13163, 13171, 13177,
+    13183, 13187, 13217, 13219, 13229, 13241, 13249, 13259,
+    13267, 13291, 13297, 13309, 13313, 13327, 13331, 13337,
+    13339, 13367, 13381, 13397, 13399, 13411, 13417, 13421,
+    13441, 13451, 13457, 13463, 13469, 13477, 13487, 13499,
+    13513, 13523, 13537, 13553, 13567, 13577, 13591, 13597,
+    13613, 13619, 13627, 13633, 13649, 13669, 13679, 13681,
+    13687, 13691, 13693, 13697, 13709, 13711, 13721, 13723,
+    13729, 13751, 13757, 13759, 13763, 13781, 13789, 13799,
+    13807, 13829, 13831, 13841, 13859, 13873, 13877, 13879,
+    13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933,
+    13963, 13967, 13997, 13999, 14009, 14011, 14029, 14033,
+    14051, 14057, 14071, 14081, 14083, 14087, 14107, 14143,
+    14149, 14153, 14159, 14173, 14177, 14197, 14207, 14221,
+    14243, 14249, 14251, 14281, 14293, 14303, 14321, 14323,
+    14327, 14341, 14347, 14369, 14387, 14389, 14401, 14407,
+    14411, 14419, 14423, 14431, 14437, 14447, 14449, 14461,
+    14479, 14489, 14503, 14519, 14533, 14537, 14543, 14549,
+    14551, 14557, 14561, 14563, 14591, 14593, 14621, 14627,
+    14629, 14633, 14639, 14653, 14657, 14669, 14683, 14699,
+    14713, 14717, 14723, 14731, 14737, 14741, 14747, 14753,
+    14759, 14767, 14771, 14779, 14783, 14797, 14813, 14821,
+    14827, 14831, 14843, 14851, 14867, 14869, 14879, 14887,
+    14891, 14897, 14923, 14929, 14939, 14947, 14951, 14957,
+    14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073,
+    15077, 15083, 15091, 15101, 15107, 15121, 15131, 15137,
+    15139, 15149, 15161, 15173, 15187, 15193, 15199, 15217,
+    15227, 15233, 15241, 15259, 15263, 15269, 15271, 15277,
+    15287, 15289, 15299, 15307, 15313, 15319, 15329, 15331,
+    15349, 15359, 15361, 15373, 15377, 15383, 15391, 15401,
+    15413, 15427, 15439, 15443, 15451, 15461, 15467, 15473,
+    15493, 15497, 15511, 15527, 15541, 15551, 15559, 15569,
+    15581, 15583, 15601, 15607, 15619, 15629, 15641, 15643,
+    15647, 15649, 15661, 15667, 15671, 15679, 15683, 15727,
+    15731, 15733, 15737, 15739, 15749, 15761, 15767, 15773,
+    15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859,
+    15877, 15881, 15887, 15889, 15901, 15907, 15913, 15919,
+    15923, 15937, 15959, 15971, 15973, 15991, 16001, 16007,
+    16033, 16057, 16061, 16063, 16067, 16069, 16073, 16087,
+    16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183,
+    16187, 16189, 16193, 16217, 16223, 16229, 16231, 16249,
+    16253, 16267, 16273, 16301, 16319, 16333, 16339, 16349,
+    16361, 16363, 16369, 16381, 16411, 16417, 16421, 16427,
+    16433, 16447, 16451, 16453, 16477, 16481, 16487, 16493,
+    16519, 16529, 16547, 16553, 16561, 16567, 16573, 16603,
+    16607, 16619, 16631, 16633, 16649, 16651, 16657, 16661,
+    16673, 16691, 16693, 16699, 16703, 16729, 16741, 16747,
+    16759, 16763, 16787, 16811, 16823, 16829, 16831, 16843,
+    16871, 16879, 16883, 16889, 16901, 16903, 16921, 16927,
+    16931, 16937, 16943, 16963, 16979, 16981, 16987, 16993,
+    17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053,
+    17077, 17093, 17099, 17107, 17117, 17123, 17137, 17159,
+    17167, 17183, 17189, 17191, 17203, 17207, 17209, 17231,
+    17239, 17257, 17291, 17293, 17299, 17317, 17321, 17327,
+    17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389,
+    17393, 17401, 17417, 17419, 17431, 17443, 17449, 17467,
+    17471, 17477, 17483, 17489, 17491, 17497, 17509, 17519,
+    17539, 17551, 17569, 17573, 17579, 17581, 17597, 17599,
+    17609, 17623, 17627, 17657, 17659, 17669, 17681, 17683,
+    17707, 17713, 17729, 17737, 17747, 17749, 17761, 17783,
+    17789, 17791, 17807, 17827, 17837, 17839, 17851, 17863,
+};
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_prime.pl
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_prime.pl
@ -0,0 +1,48 @@
+#! /usr/bin/env perl
+# Copyright 1998-2019 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+# Output year depends on the year of the script.
+my $YEAR = [localtime([stat($0)]->[9])]->[5] + 1900;
+print <<"EOF";
+/*
+ * WARNING: do not edit!
+ * Generated by crypto/bn/bn_prime.pl
+ *
+ * Copyright 1998-$YEAR The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+EOF
+
+
+my $num = shift || 2048;
+my @primes = ( 2 );
+my $p = 1;
+loop: while ($#primes < $num-1) {
+    $p += 2;
+    my $s = int(sqrt($p));
+
+    for (my $i = 0; defined($primes[$i]) && $primes[$i] <= $s; $i++) {
+        next loop if ($p % $primes[$i]) == 0;
+    }
+    push(@primes, $p);
+}
+
+print "typedef unsigned short prime_t;\n";
+printf "# define NUMPRIMES %d\n\n", $num;
+
+printf "static const prime_t primes[%d] = {", $num;
+for (my $i = 0; $i <= $#primes; $i++) {
+    printf "\n   " if ($i % 8) == 0;
+    printf " %5d,", $primes[$i];
+}
+print "\n};\n";
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_print.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_print.c
@ -0,0 +1,345 @@
+/*
+ * Copyright 1995-2017 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include <stdio.h>
+#include "internal/ctype.h"
+#include <limits.h>
+#include "internal/cryptlib.h"
+#include <openssl/buffer.h>
+#include "bn_lcl.h"
+
+static const char Hex[] = "0123456789ABCDEF";
+
+/* Must 'OPENSSL_free' the returned data */
+char *BN_bn2hex(const BIGNUM *a)
+{
+    int i, j, v, z = 0;
+    char *buf;
+    char *p;
+
+    if (BN_is_zero(a))
+        return OPENSSL_strdup("0");
+    buf = OPENSSL_malloc(a->top * BN_BYTES * 2 + 2);
+    if (buf == NULL) {
+        BNerr(BN_F_BN_BN2HEX, ERR_R_MALLOC_FAILURE);
+        goto err;
+    }
+    p = buf;
+    if (a->neg)
+        *p++ = '-';
+    for (i = a->top - 1; i >= 0; i--) {
+        for (j = BN_BITS2 - 8; j >= 0; j -= 8) {
+            /* strip leading zeros */
+            v = (int)((a->d[i] >> j) & 0xff);
+            if (z || v != 0) {
+                *p++ = Hex[v >> 4];
+                *p++ = Hex[v & 0x0f];
+                z = 1;
+            }
+        }
+    }
+    *p = '\0';
+ err:
+    return buf;
+}
+
+/* Must 'OPENSSL_free' the returned data */
+char *BN_bn2dec(const BIGNUM *a)
+{
+    int i = 0, num, ok = 0, n, tbytes;
+    char *buf = NULL;
+    char *p;
+    BIGNUM *t = NULL;
+    BN_ULONG *bn_data = NULL, *lp;
+    int bn_data_num;
+
+    /*-
+     * get an upper bound for the length of the decimal integer
+     * num <= (BN_num_bits(a) + 1) * log(2)
+     *     <= 3 * BN_num_bits(a) * 0.101 + log(2) + 1     (rounding error)
+     *     <= 3 * BN_num_bits(a) / 10 + 3 * BN_num_bits / 1000 + 1 + 1
+     */
+    i = BN_num_bits(a) * 3;
+    num = (i / 10 + i / 1000 + 1) + 1;
+    tbytes = num + 3;   /* negative and terminator and one spare? */
+    bn_data_num = num / BN_DEC_NUM + 1;
+    bn_data = OPENSSL_malloc(bn_data_num * sizeof(BN_ULONG));
+    buf = OPENSSL_malloc(tbytes);
+    if (buf == NULL || bn_data == NULL) {
+        BNerr(BN_F_BN_BN2DEC, ERR_R_MALLOC_FAILURE);
+        goto err;
+    }
+    if ((t = BN_dup(a)) == NULL)
+        goto err;
+
+    p = buf;
+    lp = bn_data;
+    if (BN_is_zero(t)) {
+        *p++ = '0';
+        *p++ = '\0';
+    } else {
+        if (BN_is_negative(t))
+            *p++ = '-';
+
+        while (!BN_is_zero(t)) {
+            if (lp - bn_data >= bn_data_num)
+                goto err;
+            *lp = BN_div_word(t, BN_DEC_CONV);
+            if (*lp == (BN_ULONG)-1)
+                goto err;
+            lp++;
+        }
+        lp--;
+        /*
+         * We now have a series of blocks, BN_DEC_NUM chars in length, where
+         * the last one needs truncation. The blocks need to be reversed in
+         * order.
+         */
+        n = BIO_snprintf(p, tbytes - (size_t)(p - buf), BN_DEC_FMT1, *lp);
+        if (n < 0)
+            goto err;
+        p += n;
+        while (lp != bn_data) {
+            lp--;
+            n = BIO_snprintf(p, tbytes - (size_t)(p - buf), BN_DEC_FMT2, *lp);
+            if (n < 0)
+                goto err;
+            p += n;
+        }
+    }
+    ok = 1;
+ err:
+    OPENSSL_free(bn_data);
+    BN_free(t);
+    if (ok)
+        return buf;
+    OPENSSL_free(buf);
+    return NULL;
+}
+
+int BN_hex2bn(BIGNUM **bn, const char *a)
+{
+    BIGNUM *ret = NULL;
+    BN_ULONG l = 0;
+    int neg = 0, h, m, i, j, k, c;
+    int num;
+
+    if (a == NULL || *a == '\0')
+        return 0;
+
+    if (*a == '-') {
+        neg = 1;
+        a++;
+    }
+
+    for (i = 0; i <= INT_MAX / 4 && ossl_isxdigit(a[i]); i++)
+        continue;
+
+    if (i == 0 || i > INT_MAX / 4)
+        goto err;
+
+    num = i + neg;
+    if (bn == NULL)
+        return num;
+
+    /* a is the start of the hex digits, and it is 'i' long */
+    if (*bn == NULL) {
+        if ((ret = BN_new()) == NULL)
+            return 0;
+    } else {
+        ret = *bn;
+        BN_zero(ret);
+    }
+
+    /* i is the number of hex digits */
+    if (bn_expand(ret, i * 4) == NULL)
+        goto err;
+
+    j = i;                      /* least significant 'hex' */
+    m = 0;
+    h = 0;
+    while (j > 0) {
+        m = (BN_BYTES * 2 <= j) ? BN_BYTES * 2 : j;
+        l = 0;
+        for (;;) {
+            c = a[j - m];
+            k = OPENSSL_hexchar2int(c);
+            if (k < 0)
+                k = 0;          /* paranoia */
+            l = (l << 4) | k;
+
+            if (--m <= 0) {
+                ret->d[h++] = l;
+                break;
+            }
+        }
+        j -= BN_BYTES * 2;
+    }
+    ret->top = h;
+    bn_correct_top(ret);
+
+    *bn = ret;
+    bn_check_top(ret);
+    /* Don't set the negative flag if it's zero. */
+    if (ret->top != 0)
+        ret->neg = neg;
+    return num;
+ err:
+    if (*bn == NULL)
+        BN_free(ret);
+    return 0;
+}
+
+int BN_dec2bn(BIGNUM **bn, const char *a)
+{
+    BIGNUM *ret = NULL;
+    BN_ULONG l = 0;
+    int neg = 0, i, j;
+    int num;
+
+    if (a == NULL || *a == '\0')
+        return 0;
+    if (*a == '-') {
+        neg = 1;
+        a++;
+    }
+
+    for (i = 0; i <= INT_MAX / 4 && ossl_isdigit(a[i]); i++)
+        continue;
+
+    if (i == 0 || i > INT_MAX / 4)
+        goto err;
+
+    num = i + neg;
+    if (bn == NULL)
+        return num;
+
+    /*
+     * a is the start of the digits, and it is 'i' long. We chop it into
+     * BN_DEC_NUM digits at a time
+     */
+    if (*bn == NULL) {
+        if ((ret = BN_new()) == NULL)
+            return 0;
+    } else {
+        ret = *bn;
+        BN_zero(ret);
+    }
+
+    /* i is the number of digits, a bit of an over expand */
+    if (bn_expand(ret, i * 4) == NULL)
+        goto err;
+
+    j = BN_DEC_NUM - i % BN_DEC_NUM;
+    if (j == BN_DEC_NUM)
+        j = 0;
+    l = 0;
+    while (--i >= 0) {
+        l *= 10;
+        l += *a - '0';
+        a++;
+        if (++j == BN_DEC_NUM) {
+            if (!BN_mul_word(ret, BN_DEC_CONV)
+                || !BN_add_word(ret, l))
+                goto err;
+            l = 0;
+            j = 0;
+        }
+    }
+
+    bn_correct_top(ret);
+    *bn = ret;
+    bn_check_top(ret);
+    /* Don't set the negative flag if it's zero. */
+    if (ret->top != 0)
+        ret->neg = neg;
+    return num;
+ err:
+    if (*bn == NULL)
+        BN_free(ret);
+    return 0;
+}
+
+int BN_asc2bn(BIGNUM **bn, const char *a)
+{
+    const char *p = a;
+
+    if (*p == '-')
+        p++;
+
+    if (p[0] == '0' && (p[1] == 'X' || p[1] == 'x')) {
+        if (!BN_hex2bn(bn, p + 2))
+            return 0;
+    } else {
+        if (!BN_dec2bn(bn, p))
+            return 0;
+    }
+    /* Don't set the negative flag if it's zero. */
+    if (*a == '-' && (*bn)->top != 0)
+        (*bn)->neg = 1;
+    return 1;
+}
+
+# ifndef OPENSSL_NO_STDIO
+int BN_print_fp(FILE *fp, const BIGNUM *a)
+{
+    BIO *b;
+    int ret;
+
+    if ((b = BIO_new(BIO_s_file())) == NULL)
+        return 0;
+    BIO_set_fp(b, fp, BIO_NOCLOSE);
+    ret = BN_print(b, a);
+    BIO_free(b);
+    return ret;
+}
+# endif
+
+int BN_print(BIO *bp, const BIGNUM *a)
+{
+    int i, j, v, z = 0;
+    int ret = 0;
+
+    if ((a->neg) && BIO_write(bp, "-", 1) != 1)
+        goto end;
+    if (BN_is_zero(a) && BIO_write(bp, "0", 1) != 1)
+        goto end;
+    for (i = a->top - 1; i >= 0; i--) {
+        for (j = BN_BITS2 - 4; j >= 0; j -= 4) {
+            /* strip leading zeros */
+            v = (int)((a->d[i] >> j) & 0x0f);
+            if (z || v != 0) {
+                if (BIO_write(bp, &Hex[v], 1) != 1)
+                    goto end;
+                z = 1;
+            }
+        }
+    }
+    ret = 1;
+ end:
+    return ret;
+}
+
+char *BN_options(void)
+{
+    static int init = 0;
+    static char data[16];
+
+    if (!init) {
+        init++;
+#ifdef BN_LLONG
+        BIO_snprintf(data, sizeof(data), "bn(%zu,%zu)",
+                     sizeof(BN_ULLONG) * 8, sizeof(BN_ULONG) * 8);
+#else
+        BIO_snprintf(data, sizeof(data), "bn(%zu,%zu)",
+                     sizeof(BN_ULONG) * 8, sizeof(BN_ULONG) * 8);
+#endif
+    }
+    return data;
+}
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_rand.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_rand.c
@ -0,0 +1,268 @@
+/*
+ * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include <stdio.h>
+#include <time.h>
+#include "internal/cryptlib.h"
+#include "bn_lcl.h"
+#include <openssl/rand.h>
+#include <openssl/sha.h>
+
+typedef enum bnrand_flag_e {
+    NORMAL, TESTING, PRIVATE
+} BNRAND_FLAG;
+
+static int bnrand(BNRAND_FLAG flag, BIGNUM *rnd, int bits, int top, int bottom)
+{
+    unsigned char *buf = NULL;
+    int b, ret = 0, bit, bytes, mask;
+
+    if (bits == 0) {
+        if (top != BN_RAND_TOP_ANY || bottom != BN_RAND_BOTTOM_ANY)
+            goto toosmall;
+        BN_zero(rnd);
+        return 1;
+    }
+    if (bits < 0 || (bits == 1 && top > 0))
+        goto toosmall;
+
+    bytes = (bits + 7) / 8;
+    bit = (bits - 1) % 8;
+    mask = 0xff << (bit + 1);
+
+    buf = OPENSSL_malloc(bytes);
+    if (buf == NULL) {
+        BNerr(BN_F_BNRAND, ERR_R_MALLOC_FAILURE);
+        goto err;
+    }
+
+    /* make a random number and set the top and bottom bits */
+    b = flag == NORMAL ? RAND_bytes(buf, bytes) : RAND_priv_bytes(buf, bytes);
+    if (b <= 0)
+        goto err;
+
+    if (flag == TESTING) {
+        /*
+         * generate patterns that are more likely to trigger BN library bugs
+         */
+        int i;
+        unsigned char c;
+
+        for (i = 0; i < bytes; i++) {
+            if (RAND_bytes(&c, 1) <= 0)
+                goto err;
+            if (c >= 128 && i > 0)
+                buf[i] = buf[i - 1];
+            else if (c < 42)
+                buf[i] = 0;
+            else if (c < 84)
+                buf[i] = 255;
+        }
+    }
+
+    if (top >= 0) {
+        if (top) {
+            if (bit == 0) {
+                buf[0] = 1;
+                buf[1] |= 0x80;
+            } else {
+                buf[0] |= (3 << (bit - 1));
+            }
+        } else {
+            buf[0] |= (1 << bit);
+        }
+    }
+    buf[0] &= ~mask;
+    if (bottom)                 /* set bottom bit if requested */
+        buf[bytes - 1] |= 1;
+    if (!BN_bin2bn(buf, bytes, rnd))
+        goto err;
+    ret = 1;
+ err:
+    OPENSSL_clear_free(buf, bytes);
+    bn_check_top(rnd);
+    return ret;
+
+toosmall:
+    BNerr(BN_F_BNRAND, BN_R_BITS_TOO_SMALL);
+    return 0;
+}
+
+int BN_rand(BIGNUM *rnd, int bits, int top, int bottom)
+{
+    return bnrand(NORMAL, rnd, bits, top, bottom);
+}
+
+int BN_bntest_rand(BIGNUM *rnd, int bits, int top, int bottom)
+{
+    return bnrand(TESTING, rnd, bits, top, bottom);
+}
+
+int BN_priv_rand(BIGNUM *rnd, int bits, int top, int bottom)
+{
+    return bnrand(PRIVATE, rnd, bits, top, bottom);
+}
+
+/* random number r:  0 <= r < range */
+static int bnrand_range(BNRAND_FLAG flag, BIGNUM *r, const BIGNUM *range)
+{
+    int n;
+    int count = 100;
+
+    if (range->neg || BN_is_zero(range)) {
+        BNerr(BN_F_BNRAND_RANGE, BN_R_INVALID_RANGE);
+        return 0;
+    }
+
+    n = BN_num_bits(range);     /* n > 0 */
+
+    /* BN_is_bit_set(range, n - 1) always holds */
+
+    if (n == 1)
+        BN_zero(r);
+    else if (!BN_is_bit_set(range, n - 2) && !BN_is_bit_set(range, n - 3)) {
+        /*
+         * range = 100..._2, so 3*range (= 11..._2) is exactly one bit longer
+         * than range
+         */
+        do {
+            if (!bnrand(flag, r, n + 1, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY))
+                return 0;
+
+            /*
+             * If r < 3*range, use r := r MOD range (which is either r, r -
+             * range, or r - 2*range). Otherwise, iterate once more. Since
+             * 3*range = 11..._2, each iteration succeeds with probability >=
+             * .75.
+             */
+            if (BN_cmp(r, range) >= 0) {
+                if (!BN_sub(r, r, range))
+                    return 0;
+                if (BN_cmp(r, range) >= 0)
+                    if (!BN_sub(r, r, range))
+                        return 0;
+            }
+
+            if (!--count) {
+                BNerr(BN_F_BNRAND_RANGE, BN_R_TOO_MANY_ITERATIONS);
+                return 0;
+            }
+
+        }
+        while (BN_cmp(r, range) >= 0);
+    } else {
+        do {
+            /* range = 11..._2  or  range = 101..._2 */
+            if (!bnrand(flag, r, n, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY))
+                return 0;
+
+            if (!--count) {
+                BNerr(BN_F_BNRAND_RANGE, BN_R_TOO_MANY_ITERATIONS);
+                return 0;
+            }
+        }
+        while (BN_cmp(r, range) >= 0);
+    }
+
+    bn_check_top(r);
+    return 1;
+}
+
+int BN_rand_range(BIGNUM *r, const BIGNUM *range)
+{
+    return bnrand_range(NORMAL, r, range);
+}
+
+int BN_priv_rand_range(BIGNUM *r, const BIGNUM *range)
+{
+    return bnrand_range(PRIVATE, r, range);
+}
+
+int BN_pseudo_rand(BIGNUM *rnd, int bits, int top, int bottom)
+{
+    return BN_rand(rnd, bits, top, bottom);
+}
+
+int BN_pseudo_rand_range(BIGNUM *r, const BIGNUM *range)
+{
+    return BN_rand_range(r, range);
+}
+
+/*
+ * BN_generate_dsa_nonce generates a random number 0 <= out < range. Unlike
+ * BN_rand_range, it also includes the contents of |priv| and |message| in
+ * the generation so that an RNG failure isn't fatal as long as |priv|
+ * remains secret. This is intended for use in DSA and ECDSA where an RNG
+ * weakness leads directly to private key exposure unless this function is
+ * used.
+ */
+int BN_generate_dsa_nonce(BIGNUM *out, const BIGNUM *range,
+                          const BIGNUM *priv, const unsigned char *message,
+                          size_t message_len, BN_CTX *ctx)
+{
+    SHA512_CTX sha;
+    /*
+     * We use 512 bits of random data per iteration to ensure that we have at
+     * least |range| bits of randomness.
+     */
+    unsigned char random_bytes[64];
+    unsigned char digest[SHA512_DIGEST_LENGTH];
+    unsigned done, todo;
+    /* We generate |range|+8 bytes of random output. */
+    const unsigned num_k_bytes = BN_num_bytes(range) + 8;
+    unsigned char private_bytes[96];
+    unsigned char *k_bytes;
+    int ret = 0;
+
+    k_bytes = OPENSSL_malloc(num_k_bytes);
+    if (k_bytes == NULL)
+        goto err;
+
+    /* We copy |priv| into a local buffer to avoid exposing its length. */
+    todo = sizeof(priv->d[0]) * priv->top;
+    if (todo > sizeof(private_bytes)) {
+        /*
+         * No reasonable DSA or ECDSA key should have a private key this
+         * large and we don't handle this case in order to avoid leaking the
+         * length of the private key.
+         */
+        BNerr(BN_F_BN_GENERATE_DSA_NONCE, BN_R_PRIVATE_KEY_TOO_LARGE);
+        goto err;
+    }
+    memcpy(private_bytes, priv->d, todo);
+    memset(private_bytes + todo, 0, sizeof(private_bytes) - todo);
+
+    for (done = 0; done < num_k_bytes;) {
+        if (RAND_priv_bytes(random_bytes, sizeof(random_bytes)) != 1)
+            goto err;
+        SHA512_Init(&sha);
+        SHA512_Update(&sha, &done, sizeof(done));
+        SHA512_Update(&sha, private_bytes, sizeof(private_bytes));
+        SHA512_Update(&sha, message, message_len);
+        SHA512_Update(&sha, random_bytes, sizeof(random_bytes));
+        SHA512_Final(digest, &sha);
+
+        todo = num_k_bytes - done;
+        if (todo > SHA512_DIGEST_LENGTH)
+            todo = SHA512_DIGEST_LENGTH;
+        memcpy(k_bytes + done, digest, todo);
+        done += todo;
+    }
+
+    if (!BN_bin2bn(k_bytes, num_k_bytes, out))
+        goto err;
+    if (BN_mod(out, out, range, ctx) != 1)
+        goto err;
+    ret = 1;
+
+ err:
+    OPENSSL_free(k_bytes);
+    OPENSSL_cleanse(private_bytes, sizeof(private_bytes));
+    return ret;
+}
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_recp.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_recp.c
@ -0,0 +1,194 @@
+/*
+ * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include "internal/cryptlib.h"
+#include "bn_lcl.h"
+
+void BN_RECP_CTX_init(BN_RECP_CTX *recp)
+{
+    memset(recp, 0, sizeof(*recp));
+    bn_init(&(recp->N));
+    bn_init(&(recp->Nr));
+}
+
+BN_RECP_CTX *BN_RECP_CTX_new(void)
+{
+    BN_RECP_CTX *ret;
+
+    if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL) {
+        BNerr(BN_F_BN_RECP_CTX_NEW, ERR_R_MALLOC_FAILURE);
+        return NULL;
+    }
+
+    bn_init(&(ret->N));
+    bn_init(&(ret->Nr));
+    ret->flags = BN_FLG_MALLOCED;
+    return ret;
+}
+
+void BN_RECP_CTX_free(BN_RECP_CTX *recp)
+{
+    if (recp == NULL)
+        return;
+    BN_free(&recp->N);
+    BN_free(&recp->Nr);
+    if (recp->flags & BN_FLG_MALLOCED)
+        OPENSSL_free(recp);
+}
+
+int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx)
+{
+    if (!BN_copy(&(recp->N), d))
+        return 0;
+    BN_zero(&(recp->Nr));
+    recp->num_bits = BN_num_bits(d);
+    recp->shift = 0;
+    return 1;
+}
+
+int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
+                          BN_RECP_CTX *recp, BN_CTX *ctx)
+{
+    int ret = 0;
+    BIGNUM *a;
+    const BIGNUM *ca;
+
+    BN_CTX_start(ctx);
+    if ((a = BN_CTX_get(ctx)) == NULL)
+        goto err;
+    if (y != NULL) {
+        if (x == y) {
+            if (!BN_sqr(a, x, ctx))
+                goto err;
+        } else {
+            if (!BN_mul(a, x, y, ctx))
+                goto err;
+        }
+        ca = a;
+    } else
+        ca = x;                 /* Just do the mod */
+
+    ret = BN_div_recp(NULL, r, ca, recp, ctx);
+ err:
+    BN_CTX_end(ctx);
+    bn_check_top(r);
+    return ret;
+}
+
+int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
+                BN_RECP_CTX *recp, BN_CTX *ctx)
+{
+    int i, j, ret = 0;
+    BIGNUM *a, *b, *d, *r;
+
+    BN_CTX_start(ctx);
+    d = (dv != NULL) ? dv : BN_CTX_get(ctx);
+    r = (rem != NULL) ? rem : BN_CTX_get(ctx);
+    a = BN_CTX_get(ctx);
+    b = BN_CTX_get(ctx);
+    if (b == NULL)
+        goto err;
+
+    if (BN_ucmp(m, &(recp->N)) < 0) {
+        BN_zero(d);
+        if (!BN_copy(r, m)) {
+            BN_CTX_end(ctx);
+            return 0;
+        }
+        BN_CTX_end(ctx);
+        return 1;
+    }
+
+    /*
+     * We want the remainder Given input of ABCDEF / ab we need multiply
+     * ABCDEF by 3 digests of the reciprocal of ab
+     */
+
+    /* i := max(BN_num_bits(m), 2*BN_num_bits(N)) */
+    i = BN_num_bits(m);
+    j = recp->num_bits << 1;
+    if (j > i)
+        i = j;
+
+    /* Nr := round(2^i / N) */
+    if (i != recp->shift)
+        recp->shift = BN_reciprocal(&(recp->Nr), &(recp->N), i, ctx);
+    /* BN_reciprocal could have returned -1 for an error */
+    if (recp->shift == -1)
+        goto err;
+
+    /*-
+     * d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i - BN_num_bits(N)))|
+     *    = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i - BN_num_bits(N)))|
+     *   <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)|
+     *    = |m/N|
+     */
+    if (!BN_rshift(a, m, recp->num_bits))
+        goto err;
+    if (!BN_mul(b, a, &(recp->Nr), ctx))
+        goto err;
+    if (!BN_rshift(d, b, i - recp->num_bits))
+        goto err;
+    d->neg = 0;
+
+    if (!BN_mul(b, &(recp->N), d, ctx))
+        goto err;
+    if (!BN_usub(r, m, b))
+        goto err;
+    r->neg = 0;
+
+    j = 0;
+    while (BN_ucmp(r, &(recp->N)) >= 0) {
+        if (j++ > 2) {
+            BNerr(BN_F_BN_DIV_RECP, BN_R_BAD_RECIPROCAL);
+            goto err;
+        }
+        if (!BN_usub(r, r, &(recp->N)))
+            goto err;
+        if (!BN_add_word(d, 1))
+            goto err;
+    }
+
+    r->neg = BN_is_zero(r) ? 0 : m->neg;
+    d->neg = m->neg ^ recp->N.neg;
+    ret = 1;
+ err:
+    BN_CTX_end(ctx);
+    bn_check_top(dv);
+    bn_check_top(rem);
+    return ret;
+}
+
+/*
+ * len is the expected size of the result We actually calculate with an extra
+ * word of precision, so we can do faster division if the remainder is not
+ * required.
+ */
+/* r := 2^len / m */
+int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx)
+{
+    int ret = -1;
+    BIGNUM *t;
+
+    BN_CTX_start(ctx);
+    if ((t = BN_CTX_get(ctx)) == NULL)
+        goto err;
+
+    if (!BN_set_bit(t, len))
+        goto err;
+
+    if (!BN_div(r, NULL, t, m, ctx))
+        goto err;
+
+    ret = len;
+ err:
+    bn_check_top(r);
+    BN_CTX_end(ctx);
+    return ret;
+}
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_shift.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_shift.c
@ -0,0 +1,257 @@
+/*
+ * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include <assert.h>
+#include "internal/cryptlib.h"
+#include "bn_lcl.h"
+
+int BN_lshift1(BIGNUM *r, const BIGNUM *a)
+{
+    register BN_ULONG *ap, *rp, t, c;
+    int i;
+
+    bn_check_top(r);
+    bn_check_top(a);
+
+    if (r != a) {
+        r->neg = a->neg;
+        if (bn_wexpand(r, a->top + 1) == NULL)
+            return 0;
+        r->top = a->top;
+    } else {
+        if (bn_wexpand(r, a->top + 1) == NULL)
+            return 0;
+    }
+    ap = a->d;
+    rp = r->d;
+    c = 0;
+    for (i = 0; i < a->top; i++) {
+        t = *(ap++);
+        *(rp++) = ((t << 1) | c) & BN_MASK2;
+        c = (t & BN_TBIT) ? 1 : 0;
+    }
+    if (c) {
+        *rp = 1;
+        r->top++;
+    }
+    bn_check_top(r);
+    return 1;
+}
+
+int BN_rshift1(BIGNUM *r, const BIGNUM *a)
+{
+    BN_ULONG *ap, *rp, t, c;
+    int i, j;
+
+    bn_check_top(r);
+    bn_check_top(a);
+
+    if (BN_is_zero(a)) {
+        BN_zero(r);
+        return 1;
+    }
+    i = a->top;
+    ap = a->d;
+    j = i - (ap[i - 1] == 1);
+    if (a != r) {
+        if (bn_wexpand(r, j) == NULL)
+            return 0;
+        r->neg = a->neg;
+    }
+    rp = r->d;
+    t = ap[--i];
+    c = (t & 1) ? BN_TBIT : 0;
+    if (t >>= 1)
+        rp[i] = t;
+    while (i > 0) {
+        t = ap[--i];
+        rp[i] = ((t >> 1) & BN_MASK2) | c;
+        c = (t & 1) ? BN_TBIT : 0;
+    }
+    r->top = j;
+    if (!r->top)
+        r->neg = 0; /* don't allow negative zero */
+    bn_check_top(r);
+    return 1;
+}
+
+int BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
+{
+    int ret;
+
+    if (n < 0) {
+        BNerr(BN_F_BN_LSHIFT, BN_R_INVALID_SHIFT);
+        return 0;
+    }
+
+    ret = bn_lshift_fixed_top(r, a, n);
+
+    bn_correct_top(r);
+    bn_check_top(r);
+
+    return ret;
+}
+
+/*
+ * In respect to shift factor the execution time is invariant of
+ * |n % BN_BITS2|, but not |n / BN_BITS2|. Or in other words pre-condition
+ * for constant-time-ness is |n < BN_BITS2| or |n / BN_BITS2| being
+ * non-secret.
+ */
+int bn_lshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n)
+{
+    int i, nw;
+    unsigned int lb, rb;
+    BN_ULONG *t, *f;
+    BN_ULONG l, m, rmask = 0;
+
+    assert(n >= 0);
+
+    bn_check_top(r);
+    bn_check_top(a);
+
+    nw = n / BN_BITS2;
+    if (bn_wexpand(r, a->top + nw + 1) == NULL)
+        return 0;
+
+    if (a->top != 0) {
+        lb = (unsigned int)n % BN_BITS2;
+        rb = BN_BITS2 - lb;
+        rb %= BN_BITS2;            /* say no to undefined behaviour */
+        rmask = (BN_ULONG)0 - rb;  /* rmask = 0 - (rb != 0) */
+        rmask |= rmask >> 8;
+        f = &(a->d[0]);
+        t = &(r->d[nw]);
+        l = f[a->top - 1];
+        t[a->top] = (l >> rb) & rmask;
+        for (i = a->top - 1; i > 0; i--) {
+            m = l << lb;
+            l = f[i - 1];
+            t[i] = (m | ((l >> rb) & rmask)) & BN_MASK2;
+        }
+        t[0] = (l << lb) & BN_MASK2;
+    } else {
+        /* shouldn't happen, but formally required */
+        r->d[nw] = 0;
+    }
+    if (nw != 0)
+        memset(r->d, 0, sizeof(*t) * nw);
+
+    r->neg = a->neg;
+    r->top = a->top + nw + 1;
+    r->flags |= BN_FLG_FIXED_TOP;
+
+    return 1;
+}
+
+int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
+{
+    int i, j, nw, lb, rb;
+    BN_ULONG *t, *f;
+    BN_ULONG l, tmp;
+
+    bn_check_top(r);
+    bn_check_top(a);
+
+    if (n < 0) {
+        BNerr(BN_F_BN_RSHIFT, BN_R_INVALID_SHIFT);
+        return 0;
+    }
+
+    nw = n / BN_BITS2;
+    rb = n % BN_BITS2;
+    lb = BN_BITS2 - rb;
+    if (nw >= a->top || a->top == 0) {
+        BN_zero(r);
+        return 1;
+    }
+    i = (BN_num_bits(a) - n + (BN_BITS2 - 1)) / BN_BITS2;
+    if (r != a) {
+        if (bn_wexpand(r, i) == NULL)
+            return 0;
+        r->neg = a->neg;
+    } else {
+        if (n == 0)
+            return 1;           /* or the copying loop will go berserk */
+    }
+
+    f = &(a->d[nw]);
+    t = r->d;
+    j = a->top - nw;
+    r->top = i;
+
+    if (rb == 0) {
+        for (i = j; i != 0; i--)
+            *(t++) = *(f++);
+    } else {
+        l = *(f++);
+        for (i = j - 1; i != 0; i--) {
+            tmp = (l >> rb) & BN_MASK2;
+            l = *(f++);
+            *(t++) = (tmp | (l << lb)) & BN_MASK2;
+        }
+        if ((l = (l >> rb) & BN_MASK2))
+            *(t) = l;
+    }
+    if (!r->top)
+        r->neg = 0; /* don't allow negative zero */
+    bn_check_top(r);
+    return 1;
+}
+
+/*
+ * In respect to shift factor the execution time is invariant of
+ * |n % BN_BITS2|, but not |n / BN_BITS2|. Or in other words pre-condition
+ * for constant-time-ness for sufficiently[!] zero-padded inputs is
+ * |n < BN_BITS2| or |n / BN_BITS2| being non-secret.
+ */
+int bn_rshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n)
+{
+    int i, top, nw;
+    unsigned int lb, rb;
+    BN_ULONG *t, *f;
+    BN_ULONG l, m, mask;
+
+    bn_check_top(r);
+    bn_check_top(a);
+
+    assert(n >= 0);
+
+    nw = n / BN_BITS2;
+    if (nw >= a->top) {
+        /* shouldn't happen, but formally required */
+        BN_zero(r);
+        return 1;
+    }
+
+    rb = (unsigned int)n % BN_BITS2;
+    lb = BN_BITS2 - rb;
+    lb %= BN_BITS2;            /* say no to undefined behaviour */
+    mask = (BN_ULONG)0 - lb;   /* mask = 0 - (lb != 0) */
+    mask |= mask >> 8;
+    top = a->top - nw;
+    if (r != a && bn_wexpand(r, top) == NULL)
+        return 0;
+
+    t = &(r->d[0]);
+    f = &(a->d[nw]);
+    l = f[0];
+    for (i = 0; i < top - 1; i++) {
+        m = f[i + 1];
+        t[i] = (l >> rb) | ((m << lb) & mask);
+        l = m;
+    }
+    t[i] = l >> rb;
+
+    r->neg = a->neg;
+    r->top = top;
+    r->flags |= BN_FLG_FIXED_TOP;
+
+    return 1;
+}
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_sqr.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_sqr.c
@ -0,0 +1,239 @@
+/*
+ * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include "internal/cryptlib.h"
+#include "bn_lcl.h"
+
+/* r must not be a */
+/*
+ * I've just gone over this and it is now %20 faster on x86 - eay - 27 Jun 96
+ */
+int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
+{
+    int ret = bn_sqr_fixed_top(r, a, ctx);
+
+    bn_correct_top(r);
+    bn_check_top(r);
+
+    return ret;
+}
+
+int bn_sqr_fixed_top(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
+{
+    int max, al;
+    int ret = 0;
+    BIGNUM *tmp, *rr;
+
+    bn_check_top(a);
+
+    al = a->top;
+    if (al <= 0) {
+        r->top = 0;
+        r->neg = 0;
+        return 1;
+    }
+
+    BN_CTX_start(ctx);
+    rr = (a != r) ? r : BN_CTX_get(ctx);
+    tmp = BN_CTX_get(ctx);
+    if (rr == NULL || tmp == NULL)
+        goto err;
+
+    max = 2 * al;               /* Non-zero (from above) */
+    if (bn_wexpand(rr, max) == NULL)
+        goto err;
+
+    if (al == 4) {
+#ifndef BN_SQR_COMBA
+        BN_ULONG t[8];
+        bn_sqr_normal(rr->d, a->d, 4, t);
+#else
+        bn_sqr_comba4(rr->d, a->d);
+#endif
+    } else if (al == 8) {
+#ifndef BN_SQR_COMBA
+        BN_ULONG t[16];
+        bn_sqr_normal(rr->d, a->d, 8, t);
+#else
+        bn_sqr_comba8(rr->d, a->d);
+#endif
+    } else {
+#if defined(BN_RECURSION)
+        if (al < BN_SQR_RECURSIVE_SIZE_NORMAL) {
+            BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL * 2];
+            bn_sqr_normal(rr->d, a->d, al, t);
+        } else {
+            int j, k;
+
+            j = BN_num_bits_word((BN_ULONG)al);
+            j = 1 << (j - 1);
+            k = j + j;
+            if (al == j) {
+                if (bn_wexpand(tmp, k * 2) == NULL)
+                    goto err;
+                bn_sqr_recursive(rr->d, a->d, al, tmp->d);
+            } else {
+                if (bn_wexpand(tmp, max) == NULL)
+                    goto err;
+                bn_sqr_normal(rr->d, a->d, al, tmp->d);
+            }
+        }
+#else
+        if (bn_wexpand(tmp, max) == NULL)
+            goto err;
+        bn_sqr_normal(rr->d, a->d, al, tmp->d);
+#endif
+    }
+
+    rr->neg = 0;
+    rr->top = max;
+    rr->flags |= BN_FLG_FIXED_TOP;
+    if (r != rr && BN_copy(r, rr) == NULL)
+        goto err;
+
+    ret = 1;
+ err:
+    bn_check_top(rr);
+    bn_check_top(tmp);
+    BN_CTX_end(ctx);
+    return ret;
+}
+
+/* tmp must have 2*n words */
+void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp)
+{
+    int i, j, max;
+    const BN_ULONG *ap;
+    BN_ULONG *rp;
+
+    max = n * 2;
+    ap = a;
+    rp = r;
+    rp[0] = rp[max - 1] = 0;
+    rp++;
+    j = n;
+
+    if (--j > 0) {
+        ap++;
+        rp[j] = bn_mul_words(rp, ap, j, ap[-1]);
+        rp += 2;
+    }
+
+    for (i = n - 2; i > 0; i--) {
+        j--;
+        ap++;
+        rp[j] = bn_mul_add_words(rp, ap, j, ap[-1]);
+        rp += 2;
+    }
+
+    bn_add_words(r, r, r, max);
+
+    /* There will not be a carry */
+
+    bn_sqr_words(tmp, a, n);
+
+    bn_add_words(r, r, tmp, max);
+}
+
+#ifdef BN_RECURSION
+/*-
+ * r is 2*n words in size,
+ * a and b are both n words in size.    (There's not actually a 'b' here ...)
+ * n must be a power of 2.
+ * We multiply and return the result.
+ * t must be 2*n words in size
+ * We calculate
+ * a[0]*b[0]
+ * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
+ * a[1]*b[1]
+ */
+void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t)
+{
+    int n = n2 / 2;
+    int zero, c1;
+    BN_ULONG ln, lo, *p;
+
+    if (n2 == 4) {
+# ifndef BN_SQR_COMBA
+        bn_sqr_normal(r, a, 4, t);
+# else
+        bn_sqr_comba4(r, a);
+# endif
+        return;
+    } else if (n2 == 8) {
+# ifndef BN_SQR_COMBA
+        bn_sqr_normal(r, a, 8, t);
+# else
+        bn_sqr_comba8(r, a);
+# endif
+        return;
+    }
+    if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL) {
+        bn_sqr_normal(r, a, n2, t);
+        return;
+    }
+    /* r=(a[0]-a[1])*(a[1]-a[0]) */
+    c1 = bn_cmp_words(a, &(a[n]), n);
+    zero = 0;
+    if (c1 > 0)
+        bn_sub_words(t, a, &(a[n]), n);
+    else if (c1 < 0)
+        bn_sub_words(t, &(a[n]), a, n);
+    else
+        zero = 1;
+
+    /* The result will always be negative unless it is zero */
+    p = &(t[n2 * 2]);
+
+    if (!zero)
+        bn_sqr_recursive(&(t[n2]), t, n, p);
+    else
+        memset(&t[n2], 0, sizeof(*t) * n2);
+    bn_sqr_recursive(r, a, n, p);
+    bn_sqr_recursive(&(r[n2]), &(a[n]), n, p);
+
+    /*-
+     * t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero
+     * r[10] holds (a[0]*b[0])
+     * r[32] holds (b[1]*b[1])
+     */
+
+    c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
+
+    /* t[32] is negative */
+    c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
+
+    /*-
+     * t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1])
+     * r[10] holds (a[0]*a[0])
+     * r[32] holds (a[1]*a[1])
+     * c1 holds the carry bits
+     */
+    c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
+    if (c1) {
+        p = &(r[n + n2]);
+        lo = *p;
+        ln = (lo + c1) & BN_MASK2;
+        *p = ln;
+
+        /*
+         * The overflow will stop before we over write words we should not
+         * overwrite
+         */
+        if (ln < (BN_ULONG)c1) {
+            do {
+                p++;
+                lo = *p;
+                ln = (lo + 1) & BN_MASK2;
+                *p = ln;
+            } while (ln == 0);
+        }
+    }
+}
+#endif
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_sqrt.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_sqrt.c
@ -0,0 +1,358 @@
+/*
+ * Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include "internal/cryptlib.h"
+#include "bn_lcl.h"
+
+BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
+/*
+ * Returns 'ret' such that ret^2 == a (mod p), using the Tonelli/Shanks
+ * algorithm (cf. Henri Cohen, "A Course in Algebraic Computational Number
+ * Theory", algorithm 1.5.1). 'p' must be prime!
+ */
+{
+    BIGNUM *ret = in;
+    int err = 1;
+    int r;
+    BIGNUM *A, *b, *q, *t, *x, *y;
+    int e, i, j;
+
+    if (!BN_is_odd(p) || BN_abs_is_word(p, 1)) {
+        if (BN_abs_is_word(p, 2)) {
+            if (ret == NULL)
+                ret = BN_new();
+            if (ret == NULL)
+                goto end;
+            if (!BN_set_word(ret, BN_is_bit_set(a, 0))) {
+                if (ret != in)
+                    BN_free(ret);
+                return NULL;
+            }
+            bn_check_top(ret);
+            return ret;
+        }
+
+        BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
+        return NULL;
+    }
+
+    if (BN_is_zero(a) || BN_is_one(a)) {
+        if (ret == NULL)
+            ret = BN_new();
+        if (ret == NULL)
+            goto end;
+        if (!BN_set_word(ret, BN_is_one(a))) {
+            if (ret != in)
+                BN_free(ret);
+            return NULL;
+        }
+        bn_check_top(ret);
+        return ret;
+    }
+
+    BN_CTX_start(ctx);
+    A = BN_CTX_get(ctx);
+    b = BN_CTX_get(ctx);
+    q = BN_CTX_get(ctx);
+    t = BN_CTX_get(ctx);
+    x = BN_CTX_get(ctx);
+    y = BN_CTX_get(ctx);
+    if (y == NULL)
+        goto end;
+
+    if (ret == NULL)
+        ret = BN_new();
+    if (ret == NULL)
+        goto end;
+
+    /* A = a mod p */
+    if (!BN_nnmod(A, a, p, ctx))
+        goto end;
+
+    /* now write  |p| - 1  as  2^e*q  where  q  is odd */
+    e = 1;
+    while (!BN_is_bit_set(p, e))
+        e++;
+    /* we'll set  q  later (if needed) */
+
+    if (e == 1) {
+        /*-
+         * The easy case:  (|p|-1)/2  is odd, so 2 has an inverse
+         * modulo  (|p|-1)/2,  and square roots can be computed
+         * directly by modular exponentiation.
+         * We have
+         *     2 * (|p|+1)/4 == 1   (mod (|p|-1)/2),
+         * so we can use exponent  (|p|+1)/4,  i.e.  (|p|-3)/4 + 1.
+         */
+        if (!BN_rshift(q, p, 2))
+            goto end;
+        q->neg = 0;
+        if (!BN_add_word(q, 1))
+            goto end;
+        if (!BN_mod_exp(ret, A, q, p, ctx))
+            goto end;
+        err = 0;
+        goto vrfy;
+    }
+
+    if (e == 2) {
+        /*-
+         * |p| == 5  (mod 8)
+         *
+         * In this case  2  is always a non-square since
+         * Legendre(2,p) = (-1)^((p^2-1)/8)  for any odd prime.
+         * So if  a  really is a square, then  2*a  is a non-square.
+         * Thus for
+         *      b := (2*a)^((|p|-5)/8),
+         *      i := (2*a)*b^2
+         * we have
+         *     i^2 = (2*a)^((1 + (|p|-5)/4)*2)
+         *         = (2*a)^((p-1)/2)
+         *         = -1;
+         * so if we set
+         *      x := a*b*(i-1),
+         * then
+         *     x^2 = a^2 * b^2 * (i^2 - 2*i + 1)
+         *         = a^2 * b^2 * (-2*i)
+         *         = a*(-i)*(2*a*b^2)
+         *         = a*(-i)*i
+         *         = a.
+         *
+         * (This is due to A.O.L. Atkin,
+         * <URL: http://listserv.nodak.edu/scripts/wa.exe?A2=ind9211&L=nmbrthry&O=T&P=562>,
+         * November 1992.)
+         */
+
+        /* t := 2*a */
+        if (!BN_mod_lshift1_quick(t, A, p))
+            goto end;
+
+        /* b := (2*a)^((|p|-5)/8) */
+        if (!BN_rshift(q, p, 3))
+            goto end;
+        q->neg = 0;
+        if (!BN_mod_exp(b, t, q, p, ctx))
+            goto end;
+
+        /* y := b^2 */
+        if (!BN_mod_sqr(y, b, p, ctx))
+            goto end;
+
+        /* t := (2*a)*b^2 - 1 */
+        if (!BN_mod_mul(t, t, y, p, ctx))
+            goto end;
+        if (!BN_sub_word(t, 1))
+            goto end;
+
+        /* x = a*b*t */
+        if (!BN_mod_mul(x, A, b, p, ctx))
+            goto end;
+        if (!BN_mod_mul(x, x, t, p, ctx))
+            goto end;
+
+        if (!BN_copy(ret, x))
+            goto end;
+        err = 0;
+        goto vrfy;
+    }
+
+    /*
+     * e > 2, so we really have to use the Tonelli/Shanks algorithm. First,
+     * find some y that is not a square.
+     */
+    if (!BN_copy(q, p))
+        goto end;               /* use 'q' as temp */
+    q->neg = 0;
+    i = 2;
+    do {
+        /*
+         * For efficiency, try small numbers first; if this fails, try random
+         * numbers.
+         */
+        if (i < 22) {
+            if (!BN_set_word(y, i))
+                goto end;
+        } else {
+            if (!BN_priv_rand(y, BN_num_bits(p), 0, 0))
+                goto end;
+            if (BN_ucmp(y, p) >= 0) {
+                if (!(p->neg ? BN_add : BN_sub) (y, y, p))
+                    goto end;
+            }
+            /* now 0 <= y < |p| */
+            if (BN_is_zero(y))
+                if (!BN_set_word(y, i))
+                    goto end;
+        }
+
+        r = BN_kronecker(y, q, ctx); /* here 'q' is |p| */
+        if (r < -1)
+            goto end;
+        if (r == 0) {
+            /* m divides p */
+            BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
+            goto end;
+        }
+    }
+    while (r == 1 && ++i < 82);
+
+    if (r != -1) {
+        /*
+         * Many rounds and still no non-square -- this is more likely a bug
+         * than just bad luck. Even if p is not prime, we should have found
+         * some y such that r == -1.
+         */
+        BNerr(BN_F_BN_MOD_SQRT, BN_R_TOO_MANY_ITERATIONS);
+        goto end;
+    }
+
+    /* Here's our actual 'q': */
+    if (!BN_rshift(q, q, e))
+        goto end;
+
+    /*
+     * Now that we have some non-square, we can find an element of order 2^e
+     * by computing its q'th power.
+     */
+    if (!BN_mod_exp(y, y, q, p, ctx))
+        goto end;
+    if (BN_is_one(y)) {
+        BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
+        goto end;
+    }
+
+    /*-
+     * Now we know that (if  p  is indeed prime) there is an integer
+     * k,  0 <= k < 2^e,  such that
+     *
+     *      a^q * y^k == 1   (mod p).
+     *
+     * As  a^q  is a square and  y  is not,  k  must be even.
+     * q+1  is even, too, so there is an element
+     *
+     *     X := a^((q+1)/2) * y^(k/2),
+     *
+     * and it satisfies
+     *
+     *     X^2 = a^q * a     * y^k
+     *         = a,
+     *
+     * so it is the square root that we are looking for.
+     */
+
+    /* t := (q-1)/2  (note that  q  is odd) */
+    if (!BN_rshift1(t, q))
+        goto end;
+
+    /* x := a^((q-1)/2) */
+    if (BN_is_zero(t)) {        /* special case: p = 2^e + 1 */
+        if (!BN_nnmod(t, A, p, ctx))
+            goto end;
+        if (BN_is_zero(t)) {
+            /* special case: a == 0  (mod p) */
+            BN_zero(ret);
+            err = 0;
+            goto end;
+        } else if (!BN_one(x))
+            goto end;
+    } else {
+        if (!BN_mod_exp(x, A, t, p, ctx))
+            goto end;
+        if (BN_is_zero(x)) {
+            /* special case: a == 0  (mod p) */
+            BN_zero(ret);
+            err = 0;
+            goto end;
+        }
+    }
+
+    /* b := a*x^2  (= a^q) */
+    if (!BN_mod_sqr(b, x, p, ctx))
+        goto end;
+    if (!BN_mod_mul(b, b, A, p, ctx))
+        goto end;
+
+    /* x := a*x    (= a^((q+1)/2)) */
+    if (!BN_mod_mul(x, x, A, p, ctx))
+        goto end;
+
+    while (1) {
+        /*-
+         * Now  b  is  a^q * y^k  for some even  k  (0 <= k < 2^E
+         * where  E  refers to the original value of  e,  which we
+         * don't keep in a variable),  and  x  is  a^((q+1)/2) * y^(k/2).
+         *
+         * We have  a*b = x^2,
+         *    y^2^(e-1) = -1,
+         *    b^2^(e-1) = 1.
+         */
+
+        if (BN_is_one(b)) {
+            if (!BN_copy(ret, x))
+                goto end;
+            err = 0;
+            goto vrfy;
+        }
+
+        /* find smallest  i  such that  b^(2^i) = 1 */
+        i = 1;
+        if (!BN_mod_sqr(t, b, p, ctx))
+            goto end;
+        while (!BN_is_one(t)) {
+            i++;
+            if (i == e) {
+                BNerr(BN_F_BN_MOD_SQRT, BN_R_NOT_A_SQUARE);
+                goto end;
+            }
+            if (!BN_mod_mul(t, t, t, p, ctx))
+                goto end;
+        }
+
+        /* t := y^2^(e - i - 1) */
+        if (!BN_copy(t, y))
+            goto end;
+        for (j = e - i - 1; j > 0; j--) {
+            if (!BN_mod_sqr(t, t, p, ctx))
+                goto end;
+        }
+        if (!BN_mod_mul(y, t, t, p, ctx))
+            goto end;
+        if (!BN_mod_mul(x, x, t, p, ctx))
+            goto end;
+        if (!BN_mod_mul(b, b, y, p, ctx))
+            goto end;
+        e = i;
+    }
+
+ vrfy:
+    if (!err) {
+        /*
+         * verify the result -- the input might have been not a square (test
+         * added in 0.9.8)
+         */
+
+        if (!BN_mod_sqr(x, ret, p, ctx))
+            err = 1;
+
+        if (!err && 0 != BN_cmp(x, A)) {
+            BNerr(BN_F_BN_MOD_SQRT, BN_R_NOT_A_SQUARE);
+            err = 1;
+        }
+    }
+
+ end:
+    if (err) {
+        if (ret != in)
+            BN_clear_free(ret);
+        ret = NULL;
+    }
+    BN_CTX_end(ctx);
+    bn_check_top(ret);
+    return ret;
+}
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_srp.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_srp.c
@ -0,0 +1,545 @@
+/*
+ * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include "bn_lcl.h"
+#include "internal/nelem.h"
+
+#ifndef OPENSSL_NO_SRP
+
+#include <openssl/srp.h>
+#include "internal/bn_srp.h"
+
+# if (BN_BYTES == 8)
+#  if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
+#   define bn_pack4(a1,a2,a3,a4) ((a1##UI64<<48)|(a2##UI64<<32)|(a3##UI64<<16)|a4##UI64)
+#  elif defined(__arch64__)
+#   define bn_pack4(a1,a2,a3,a4) ((a1##UL<<48)|(a2##UL<<32)|(a3##UL<<16)|a4##UL)
+#  else
+#   define bn_pack4(a1,a2,a3,a4) ((a1##ULL<<48)|(a2##ULL<<32)|(a3##ULL<<16)|a4##ULL)
+#  endif
+# elif (BN_BYTES == 4)
+#  define bn_pack4(a1,a2,a3,a4)  ((a3##UL<<16)|a4##UL), ((a1##UL<<16)|a2##UL)
+# else
+#  error "unsupported BN_BYTES"
+# endif
+
+static const BN_ULONG bn_group_1024_value[] = {
+    bn_pack4(0x9FC6, 0x1D2F, 0xC0EB, 0x06E3),
+    bn_pack4(0xFD51, 0x38FE, 0x8376, 0x435B),
+    bn_pack4(0x2FD4, 0xCBF4, 0x976E, 0xAA9A),
+    bn_pack4(0x68ED, 0xBC3C, 0x0572, 0x6CC0),
+    bn_pack4(0xC529, 0xF566, 0x660E, 0x57EC),
+    bn_pack4(0x8255, 0x9B29, 0x7BCF, 0x1885),
+    bn_pack4(0xCE8E, 0xF4AD, 0x69B1, 0x5D49),
+    bn_pack4(0x5DC7, 0xD7B4, 0x6154, 0xD6B6),
+    bn_pack4(0x8E49, 0x5C1D, 0x6089, 0xDAD1),
+    bn_pack4(0xE0D5, 0xD8E2, 0x50B9, 0x8BE4),
+    bn_pack4(0x383B, 0x4813, 0xD692, 0xC6E0),
+    bn_pack4(0xD674, 0xDF74, 0x96EA, 0x81D3),
+    bn_pack4(0x9EA2, 0x314C, 0x9C25, 0x6576),
+    bn_pack4(0x6072, 0x6187, 0x75FF, 0x3C0B),
+    bn_pack4(0x9C33, 0xF80A, 0xFA8F, 0xC5E8),
+    bn_pack4(0xEEAF, 0x0AB9, 0xADB3, 0x8DD6)
+};
+
+const BIGNUM bn_group_1024 = {
+    (BN_ULONG *)bn_group_1024_value,
+    OSSL_NELEM(bn_group_1024_value),
+    OSSL_NELEM(bn_group_1024_value),
+    0,
+    BN_FLG_STATIC_DATA
+};
+
+static const BN_ULONG bn_group_1536_value[] = {
+    bn_pack4(0xCF76, 0xE3FE, 0xD135, 0xF9BB),
+    bn_pack4(0x1518, 0x0F93, 0x499A, 0x234D),
+    bn_pack4(0x8CE7, 0xA28C, 0x2442, 0xC6F3),
+    bn_pack4(0x5A02, 0x1FFF, 0x5E91, 0x479E),
+    bn_pack4(0x7F8A, 0x2FE9, 0xB8B5, 0x292E),
+    bn_pack4(0x837C, 0x264A, 0xE3A9, 0xBEB8),
+    bn_pack4(0xE442, 0x734A, 0xF7CC, 0xB7AE),
+    bn_pack4(0x6577, 0x2E43, 0x7D6C, 0x7F8C),
+    bn_pack4(0xDB2F, 0xD53D, 0x24B7, 0xC486),
+    bn_pack4(0x6EDF, 0x0195, 0x3934, 0x9627),
+    bn_pack4(0x158B, 0xFD3E, 0x2B9C, 0x8CF5),
+    bn_pack4(0x764E, 0x3F4B, 0x53DD, 0x9DA1),
+    bn_pack4(0x4754, 0x8381, 0xDBC5, 0xB1FC),
+    bn_pack4(0x9B60, 0x9E0B, 0xE3BA, 0xB63D),
+    bn_pack4(0x8134, 0xB1C8, 0xB979, 0x8914),
+    bn_pack4(0xDF02, 0x8A7C, 0xEC67, 0xF0D0),
+    bn_pack4(0x80B6, 0x55BB, 0x9A22, 0xE8DC),
+    bn_pack4(0x1558, 0x903B, 0xA0D0, 0xF843),
+    bn_pack4(0x51C6, 0xA94B, 0xE460, 0x7A29),
+    bn_pack4(0x5F4F, 0x5F55, 0x6E27, 0xCBDE),
+    bn_pack4(0xBEEE, 0xA961, 0x4B19, 0xCC4D),
+    bn_pack4(0xDBA5, 0x1DF4, 0x99AC, 0x4C80),
+    bn_pack4(0xB1F1, 0x2A86, 0x17A4, 0x7BBB),
+    bn_pack4(0x9DEF, 0x3CAF, 0xB939, 0x277A)
+};
+
+const BIGNUM bn_group_1536 = {
+    (BN_ULONG *)bn_group_1536_value,
+    OSSL_NELEM(bn_group_1536_value),
+    OSSL_NELEM(bn_group_1536_value),
+    0,
+    BN_FLG_STATIC_DATA
+};
+
+static const BN_ULONG bn_group_2048_value[] = {
+    bn_pack4(0x0FA7, 0x111F, 0x9E4A, 0xFF73),
+    bn_pack4(0x9B65, 0xE372, 0xFCD6, 0x8EF2),
+    bn_pack4(0x35DE, 0x236D, 0x525F, 0x5475),
+    bn_pack4(0x94B5, 0xC803, 0xD89F, 0x7AE4),
+    bn_pack4(0x71AE, 0x35F8, 0xE9DB, 0xFBB6),
+    bn_pack4(0x2A56, 0x98F3, 0xA8D0, 0xC382),
+    bn_pack4(0x9CCC, 0x041C, 0x7BC3, 0x08D8),
+    bn_pack4(0xAF87, 0x4E73, 0x03CE, 0x5329),
+    bn_pack4(0x6160, 0x2790, 0x04E5, 0x7AE6),
+    bn_pack4(0x032C, 0xFBDB, 0xF52F, 0xB378),
+    bn_pack4(0x5EA7, 0x7A27, 0x75D2, 0xECFA),
+    bn_pack4(0x5445, 0x23B5, 0x24B0, 0xD57D),
+    bn_pack4(0x5B9D, 0x32E6, 0x88F8, 0x7748),
+    bn_pack4(0xF1D2, 0xB907, 0x8717, 0x461A),
+    bn_pack4(0x76BD, 0x207A, 0x436C, 0x6481),
+    bn_pack4(0xCA97, 0xB43A, 0x23FB, 0x8016),
+    bn_pack4(0x1D28, 0x1E44, 0x6B14, 0x773B),
+    bn_pack4(0x7359, 0xD041, 0xD5C3, 0x3EA7),
+    bn_pack4(0xA80D, 0x740A, 0xDBF4, 0xFF74),
+    bn_pack4(0x55F9, 0x7993, 0xEC97, 0x5EEA),
+    bn_pack4(0x2918, 0xA996, 0x2F0B, 0x93B8),
+    bn_pack4(0x661A, 0x05FB, 0xD5FA, 0xAAE8),
+    bn_pack4(0xCF60, 0x9517, 0x9A16, 0x3AB3),
+    bn_pack4(0xE808, 0x3969, 0xEDB7, 0x67B0),
+    bn_pack4(0xCD7F, 0x48A9, 0xDA04, 0xFD50),
+    bn_pack4(0xD523, 0x12AB, 0x4B03, 0x310D),
+    bn_pack4(0x8193, 0xE075, 0x7767, 0xA13D),
+    bn_pack4(0xA373, 0x29CB, 0xB4A0, 0x99ED),
+    bn_pack4(0xFC31, 0x9294, 0x3DB5, 0x6050),
+    bn_pack4(0xAF72, 0xB665, 0x1987, 0xEE07),
+    bn_pack4(0xF166, 0xDE5E, 0x1389, 0x582F),
+    bn_pack4(0xAC6B, 0xDB41, 0x324A, 0x9A9B)
+};
+
+const BIGNUM bn_group_2048 = {
+    (BN_ULONG *)bn_group_2048_value,
+    OSSL_NELEM(bn_group_2048_value),
+    OSSL_NELEM(bn_group_2048_value),
+    0,
+    BN_FLG_STATIC_DATA
+};
+
+static const BN_ULONG bn_group_3072_value[] = {
+    bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF),
+    bn_pack4(0x4B82, 0xD120, 0xA93A, 0xD2CA),
+    bn_pack4(0x43DB, 0x5BFC, 0xE0FD, 0x108E),
+    bn_pack4(0x08E2, 0x4FA0, 0x74E5, 0xAB31),
+    bn_pack4(0x7709, 0x88C0, 0xBAD9, 0x46E2),
+    bn_pack4(0xBBE1, 0x1757, 0x7A61, 0x5D6C),
+    bn_pack4(0x521F, 0x2B18, 0x177B, 0x200C),
+    bn_pack4(0xD876, 0x0273, 0x3EC8, 0x6A64),
+    bn_pack4(0xF12F, 0xFA06, 0xD98A, 0x0864),
+    bn_pack4(0xCEE3, 0xD226, 0x1AD2, 0xEE6B),
+    bn_pack4(0x1E8C, 0x94E0, 0x4A25, 0x619D),
+    bn_pack4(0xABF5, 0xAE8C, 0xDB09, 0x33D7),
+    bn_pack4(0xB397, 0x0F85, 0xA6E1, 0xE4C7),
+    bn_pack4(0x8AEA, 0x7157, 0x5D06, 0x0C7D),
+    bn_pack4(0xECFB, 0x8504, 0x58DB, 0xEF0A),
+    bn_pack4(0xA855, 0x21AB, 0xDF1C, 0xBA64),
+    bn_pack4(0xAD33, 0x170D, 0x0450, 0x7A33),
+    bn_pack4(0x1572, 0x8E5A, 0x8AAA, 0xC42D),
+    bn_pack4(0x15D2, 0x2618, 0x98FA, 0x0510),
+    bn_pack4(0x3995, 0x497C, 0xEA95, 0x6AE5),
+    bn_pack4(0xDE2B, 0xCBF6, 0x9558, 0x1718),
+    bn_pack4(0xB5C5, 0x5DF0, 0x6F4C, 0x52C9),
+    bn_pack4(0x9B27, 0x83A2, 0xEC07, 0xA28F),
+    bn_pack4(0xE39E, 0x772C, 0x180E, 0x8603),
+    bn_pack4(0x3290, 0x5E46, 0x2E36, 0xCE3B),
+    bn_pack4(0xF174, 0x6C08, 0xCA18, 0x217C),
+    bn_pack4(0x670C, 0x354E, 0x4ABC, 0x9804),
+    bn_pack4(0x9ED5, 0x2907, 0x7096, 0x966D),
+    bn_pack4(0x1C62, 0xF356, 0x2085, 0x52BB),
+    bn_pack4(0x8365, 0x5D23, 0xDCA3, 0xAD96),
+    bn_pack4(0x6916, 0x3FA8, 0xFD24, 0xCF5F),
+    bn_pack4(0x98DA, 0x4836, 0x1C55, 0xD39A),
+    bn_pack4(0xC200, 0x7CB8, 0xA163, 0xBF05),
+    bn_pack4(0x4928, 0x6651, 0xECE4, 0x5B3D),
+    bn_pack4(0xAE9F, 0x2411, 0x7C4B, 0x1FE6),
+    bn_pack4(0xEE38, 0x6BFB, 0x5A89, 0x9FA5),
+    bn_pack4(0x0BFF, 0x5CB6, 0xF406, 0xB7ED),
+    bn_pack4(0xF44C, 0x42E9, 0xA637, 0xED6B),
+    bn_pack4(0xE485, 0xB576, 0x625E, 0x7EC6),
+    bn_pack4(0x4FE1, 0x356D, 0x6D51, 0xC245),
+    bn_pack4(0x302B, 0x0A6D, 0xF25F, 0x1437),
+    bn_pack4(0xEF95, 0x19B3, 0xCD3A, 0x431B),
+    bn_pack4(0x514A, 0x0879, 0x8E34, 0x04DD),
+    bn_pack4(0x020B, 0xBEA6, 0x3B13, 0x9B22),
+    bn_pack4(0x2902, 0x4E08, 0x8A67, 0xCC74),
+    bn_pack4(0xC4C6, 0x628B, 0x80DC, 0x1CD1),
+    bn_pack4(0xC90F, 0xDAA2, 0x2168, 0xC234),
+    bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF)
+};
+
+const BIGNUM bn_group_3072 = {
+    (BN_ULONG *)bn_group_3072_value,
+    OSSL_NELEM(bn_group_3072_value),
+    OSSL_NELEM(bn_group_3072_value),
+    0,
+    BN_FLG_STATIC_DATA
+};
+
+static const BN_ULONG bn_group_4096_value[] = {
+    bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF),
+    bn_pack4(0x4DF4, 0x35C9, 0x3406, 0x3199),
+    bn_pack4(0x86FF, 0xB7DC, 0x90A6, 0xC08F),
+    bn_pack4(0x93B4, 0xEA98, 0x8D8F, 0xDDC1),
+    bn_pack4(0xD006, 0x9127, 0xD5B0, 0x5AA9),
+    bn_pack4(0xB81B, 0xDD76, 0x2170, 0x481C),
+    bn_pack4(0x1F61, 0x2970, 0xCEE2, 0xD7AF),
+    bn_pack4(0x233B, 0xA186, 0x515B, 0xE7ED),
+    bn_pack4(0x99B2, 0x964F, 0xA090, 0xC3A2),
+    bn_pack4(0x287C, 0x5947, 0x4E6B, 0xC05D),
+    bn_pack4(0x2E8E, 0xFC14, 0x1FBE, 0xCAA6),
+    bn_pack4(0xDBBB, 0xC2DB, 0x04DE, 0x8EF9),
+    bn_pack4(0x2583, 0xE9CA, 0x2AD4, 0x4CE8),
+    bn_pack4(0x1A94, 0x6834, 0xB615, 0x0BDA),
+    bn_pack4(0x99C3, 0x2718, 0x6AF4, 0xE23C),
+    bn_pack4(0x8871, 0x9A10, 0xBDBA, 0x5B26),
+    bn_pack4(0x1A72, 0x3C12, 0xA787, 0xE6D7),
+    bn_pack4(0x4B82, 0xD120, 0xA921, 0x0801),
+    bn_pack4(0x43DB, 0x5BFC, 0xE0FD, 0x108E),
+    bn_pack4(0x08E2, 0x4FA0, 0x74E5, 0xAB31),
+    bn_pack4(0x7709, 0x88C0, 0xBAD9, 0x46E2),
+    bn_pack4(0xBBE1, 0x1757, 0x7A61, 0x5D6C),
+    bn_pack4(0x521F, 0x2B18, 0x177B, 0x200C),
+    bn_pack4(0xD876, 0x0273, 0x3EC8, 0x6A64),
+    bn_pack4(0xF12F, 0xFA06, 0xD98A, 0x0864),
+    bn_pack4(0xCEE3, 0xD226, 0x1AD2, 0xEE6B),
+    bn_pack4(0x1E8C, 0x94E0, 0x4A25, 0x619D),
+    bn_pack4(0xABF5, 0xAE8C, 0xDB09, 0x33D7),
+    bn_pack4(0xB397, 0x0F85, 0xA6E1, 0xE4C7),
+    bn_pack4(0x8AEA, 0x7157, 0x5D06, 0x0C7D),
+    bn_pack4(0xECFB, 0x8504, 0x58DB, 0xEF0A),
+    bn_pack4(0xA855, 0x21AB, 0xDF1C, 0xBA64),
+    bn_pack4(0xAD33, 0x170D, 0x0450, 0x7A33),
+    bn_pack4(0x1572, 0x8E5A, 0x8AAA, 0xC42D),
+    bn_pack4(0x15D2, 0x2618, 0x98FA, 0x0510),
+    bn_pack4(0x3995, 0x497C, 0xEA95, 0x6AE5),
+    bn_pack4(0xDE2B, 0xCBF6, 0x9558, 0x1718),
+    bn_pack4(0xB5C5, 0x5DF0, 0x6F4C, 0x52C9),
+    bn_pack4(0x9B27, 0x83A2, 0xEC07, 0xA28F),
+    bn_pack4(0xE39E, 0x772C, 0x180E, 0x8603),
+    bn_pack4(0x3290, 0x5E46, 0x2E36, 0xCE3B),
+    bn_pack4(0xF174, 0x6C08, 0xCA18, 0x217C),
+    bn_pack4(0x670C, 0x354E, 0x4ABC, 0x9804),
+    bn_pack4(0x9ED5, 0x2907, 0x7096, 0x966D),
+    bn_pack4(0x1C62, 0xF356, 0x2085, 0x52BB),
+    bn_pack4(0x8365, 0x5D23, 0xDCA3, 0xAD96),
+    bn_pack4(0x6916, 0x3FA8, 0xFD24, 0xCF5F),
+    bn_pack4(0x98DA, 0x4836, 0x1C55, 0xD39A),
+    bn_pack4(0xC200, 0x7CB8, 0xA163, 0xBF05),
+    bn_pack4(0x4928, 0x6651, 0xECE4, 0x5B3D),
+    bn_pack4(0xAE9F, 0x2411, 0x7C4B, 0x1FE6),
+    bn_pack4(0xEE38, 0x6BFB, 0x5A89, 0x9FA5),
+    bn_pack4(0x0BFF, 0x5CB6, 0xF406, 0xB7ED),
+    bn_pack4(0xF44C, 0x42E9, 0xA637, 0xED6B),
+    bn_pack4(0xE485, 0xB576, 0x625E, 0x7EC6),
+    bn_pack4(0x4FE1, 0x356D, 0x6D51, 0xC245),
+    bn_pack4(0x302B, 0x0A6D, 0xF25F, 0x1437),
+    bn_pack4(0xEF95, 0x19B3, 0xCD3A, 0x431B),
+    bn_pack4(0x514A, 0x0879, 0x8E34, 0x04DD),
+    bn_pack4(0x020B, 0xBEA6, 0x3B13, 0x9B22),
+    bn_pack4(0x2902, 0x4E08, 0x8A67, 0xCC74),
+    bn_pack4(0xC4C6, 0x628B, 0x80DC, 0x1CD1),
+    bn_pack4(0xC90F, 0xDAA2, 0x2168, 0xC234),
+    bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF)
+};
+
+const BIGNUM bn_group_4096 = {
+    (BN_ULONG *)bn_group_4096_value,
+    OSSL_NELEM(bn_group_4096_value),
+    OSSL_NELEM(bn_group_4096_value),
+    0,
+    BN_FLG_STATIC_DATA
+};
+
+static const BN_ULONG bn_group_6144_value[] = {
+    bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF),
+    bn_pack4(0xE694, 0xF91E, 0x6DCC, 0x4024),
+    bn_pack4(0x12BF, 0x2D5B, 0x0B74, 0x74D6),
+    bn_pack4(0x043E, 0x8F66, 0x3F48, 0x60EE),
+    bn_pack4(0x387F, 0xE8D7, 0x6E3C, 0x0468),
+    bn_pack4(0xDA56, 0xC9EC, 0x2EF2, 0x9632),
+    bn_pack4(0xEB19, 0xCCB1, 0xA313, 0xD55C),
+    bn_pack4(0xF550, 0xAA3D, 0x8A1F, 0xBFF0),
+    bn_pack4(0x06A1, 0xD58B, 0xB7C5, 0xDA76),
+    bn_pack4(0xA797, 0x15EE, 0xF29B, 0xE328),
+    bn_pack4(0x14CC, 0x5ED2, 0x0F80, 0x37E0),
+    bn_pack4(0xCC8F, 0x6D7E, 0xBF48, 0xE1D8),
+    bn_pack4(0x4BD4, 0x07B2, 0x2B41, 0x54AA),
+    bn_pack4(0x0F1D, 0x45B7, 0xFF58, 0x5AC5),
+    bn_pack4(0x23A9, 0x7A7E, 0x36CC, 0x88BE),
+    bn_pack4(0x59E7, 0xC97F, 0xBEC7, 0xE8F3),
+    bn_pack4(0xB5A8, 0x4031, 0x900B, 0x1C9E),
+    bn_pack4(0xD55E, 0x702F, 0x4698, 0x0C82),
+    bn_pack4(0xF482, 0xD7CE, 0x6E74, 0xFEF6),
+    bn_pack4(0xF032, 0xEA15, 0xD172, 0x1D03),
+    bn_pack4(0x5983, 0xCA01, 0xC64B, 0x92EC),
+    bn_pack4(0x6FB8, 0xF401, 0x378C, 0xD2BF),
+    bn_pack4(0x3320, 0x5151, 0x2BD7, 0xAF42),
+    bn_pack4(0xDB7F, 0x1447, 0xE6CC, 0x254B),
+    bn_pack4(0x44CE, 0x6CBA, 0xCED4, 0xBB1B),
+    bn_pack4(0xDA3E, 0xDBEB, 0xCF9B, 0x14ED),
+    bn_pack4(0x1797, 0x27B0, 0x865A, 0x8918),
+    bn_pack4(0xB06A, 0x53ED, 0x9027, 0xD831),
+    bn_pack4(0xE5DB, 0x382F, 0x4130, 0x01AE),
+    bn_pack4(0xF8FF, 0x9406, 0xAD9E, 0x530E),
+    bn_pack4(0xC975, 0x1E76, 0x3DBA, 0x37BD),
+    bn_pack4(0xC1D4, 0xDCB2, 0x6026, 0x46DE),
+    bn_pack4(0x36C3, 0xFAB4, 0xD27C, 0x7026),
+    bn_pack4(0x4DF4, 0x35C9, 0x3402, 0x8492),
+    bn_pack4(0x86FF, 0xB7DC, 0x90A6, 0xC08F),
+    bn_pack4(0x93B4, 0xEA98, 0x8D8F, 0xDDC1),
+    bn_pack4(0xD006, 0x9127, 0xD5B0, 0x5AA9),
+    bn_pack4(0xB81B, 0xDD76, 0x2170, 0x481C),
+    bn_pack4(0x1F61, 0x2970, 0xCEE2, 0xD7AF),
+    bn_pack4(0x233B, 0xA186, 0x515B, 0xE7ED),
+    bn_pack4(0x99B2, 0x964F, 0xA090, 0xC3A2),
+    bn_pack4(0x287C, 0x5947, 0x4E6B, 0xC05D),
+    bn_pack4(0x2E8E, 0xFC14, 0x1FBE, 0xCAA6),
+    bn_pack4(0xDBBB, 0xC2DB, 0x04DE, 0x8EF9),
+    bn_pack4(0x2583, 0xE9CA, 0x2AD4, 0x4CE8),
+    bn_pack4(0x1A94, 0x6834, 0xB615, 0x0BDA),
+    bn_pack4(0x99C3, 0x2718, 0x6AF4, 0xE23C),
+    bn_pack4(0x8871, 0x9A10, 0xBDBA, 0x5B26),
+    bn_pack4(0x1A72, 0x3C12, 0xA787, 0xE6D7),
+    bn_pack4(0x4B82, 0xD120, 0xA921, 0x0801),
+    bn_pack4(0x43DB, 0x5BFC, 0xE0FD, 0x108E),
+    bn_pack4(0x08E2, 0x4FA0, 0x74E5, 0xAB31),
+    bn_pack4(0x7709, 0x88C0, 0xBAD9, 0x46E2),
+    bn_pack4(0xBBE1, 0x1757, 0x7A61, 0x5D6C),
+    bn_pack4(0x521F, 0x2B18, 0x177B, 0x200C),
+    bn_pack4(0xD876, 0x0273, 0x3EC8, 0x6A64),
+    bn_pack4(0xF12F, 0xFA06, 0xD98A, 0x0864),
+    bn_pack4(0xCEE3, 0xD226, 0x1AD2, 0xEE6B),
+    bn_pack4(0x1E8C, 0x94E0, 0x4A25, 0x619D),
+    bn_pack4(0xABF5, 0xAE8C, 0xDB09, 0x33D7),
+    bn_pack4(0xB397, 0x0F85, 0xA6E1, 0xE4C7),
+    bn_pack4(0x8AEA, 0x7157, 0x5D06, 0x0C7D),
+    bn_pack4(0xECFB, 0x8504, 0x58DB, 0xEF0A),
+    bn_pack4(0xA855, 0x21AB, 0xDF1C, 0xBA64),
+    bn_pack4(0xAD33, 0x170D, 0x0450, 0x7A33),
+    bn_pack4(0x1572, 0x8E5A, 0x8AAA, 0xC42D),
+    bn_pack4(0x15D2, 0x2618, 0x98FA, 0x0510),
+    bn_pack4(0x3995, 0x497C, 0xEA95, 0x6AE5),
+    bn_pack4(0xDE2B, 0xCBF6, 0x9558, 0x1718),
+    bn_pack4(0xB5C5, 0x5DF0, 0x6F4C, 0x52C9),
+    bn_pack4(0x9B27, 0x83A2, 0xEC07, 0xA28F),
+    bn_pack4(0xE39E, 0x772C, 0x180E, 0x8603),
+    bn_pack4(0x3290, 0x5E46, 0x2E36, 0xCE3B),
+    bn_pack4(0xF174, 0x6C08, 0xCA18, 0x217C),
+    bn_pack4(0x670C, 0x354E, 0x4ABC, 0x9804),
+    bn_pack4(0x9ED5, 0x2907, 0x7096, 0x966D),
+    bn_pack4(0x1C62, 0xF356, 0x2085, 0x52BB),
+    bn_pack4(0x8365, 0x5D23, 0xDCA3, 0xAD96),
+    bn_pack4(0x6916, 0x3FA8, 0xFD24, 0xCF5F),
+    bn_pack4(0x98DA, 0x4836, 0x1C55, 0xD39A),
+    bn_pack4(0xC200, 0x7CB8, 0xA163, 0xBF05),
+    bn_pack4(0x4928, 0x6651, 0xECE4, 0x5B3D),
+    bn_pack4(0xAE9F, 0x2411, 0x7C4B, 0x1FE6),
+    bn_pack4(0xEE38, 0x6BFB, 0x5A89, 0x9FA5),
+    bn_pack4(0x0BFF, 0x5CB6, 0xF406, 0xB7ED),
+    bn_pack4(0xF44C, 0x42E9, 0xA637, 0xED6B),
+    bn_pack4(0xE485, 0xB576, 0x625E, 0x7EC6),
+    bn_pack4(0x4FE1, 0x356D, 0x6D51, 0xC245),
+    bn_pack4(0x302B, 0x0A6D, 0xF25F, 0x1437),
+    bn_pack4(0xEF95, 0x19B3, 0xCD3A, 0x431B),
+    bn_pack4(0x514A, 0x0879, 0x8E34, 0x04DD),
+    bn_pack4(0x020B, 0xBEA6, 0x3B13, 0x9B22),
+    bn_pack4(0x2902, 0x4E08, 0x8A67, 0xCC74),
+    bn_pack4(0xC4C6, 0x628B, 0x80DC, 0x1CD1),
+    bn_pack4(0xC90F, 0xDAA2, 0x2168, 0xC234),
+    bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF)
+};
+
+const BIGNUM bn_group_6144 = {
+    (BN_ULONG *)bn_group_6144_value,
+    OSSL_NELEM(bn_group_6144_value),
+    OSSL_NELEM(bn_group_6144_value),
+    0,
+    BN_FLG_STATIC_DATA
+};
+
+static const BN_ULONG bn_group_8192_value[] = {
+    bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF),
+    bn_pack4(0x60C9, 0x80DD, 0x98ED, 0xD3DF),
+    bn_pack4(0xC81F, 0x56E8, 0x80B9, 0x6E71),
+    bn_pack4(0x9E30, 0x50E2, 0x7656, 0x94DF),
+    bn_pack4(0x9558, 0xE447, 0x5677, 0xE9AA),
+    bn_pack4(0xC919, 0x0DA6, 0xFC02, 0x6E47),
+    bn_pack4(0x889A, 0x002E, 0xD5EE, 0x382B),
+    bn_pack4(0x4009, 0x438B, 0x481C, 0x6CD7),
+    bn_pack4(0x3590, 0x46F4, 0xEB87, 0x9F92),
+    bn_pack4(0xFAF3, 0x6BC3, 0x1ECF, 0xA268),
+    bn_pack4(0xB1D5, 0x10BD, 0x7EE7, 0x4D73),
+    bn_pack4(0xF9AB, 0x4819, 0x5DED, 0x7EA1),
+    bn_pack4(0x64F3, 0x1CC5, 0x0846, 0x851D),
+    bn_pack4(0x4597, 0xE899, 0xA025, 0x5DC1),
+    bn_pack4(0xDF31, 0x0EE0, 0x74AB, 0x6A36),
+    bn_pack4(0x6D2A, 0x13F8, 0x3F44, 0xF82D),
+    bn_pack4(0x062B, 0x3CF5, 0xB3A2, 0x78A6),
+    bn_pack4(0x7968, 0x3303, 0xED5B, 0xDD3A),
+    bn_pack4(0xFA9D, 0x4B7F, 0xA2C0, 0x87E8),
+    bn_pack4(0x4BCB, 0xC886, 0x2F83, 0x85DD),
+    bn_pack4(0x3473, 0xFC64, 0x6CEA, 0x306B),
+    bn_pack4(0x13EB, 0x57A8, 0x1A23, 0xF0C7),
+    bn_pack4(0x2222, 0x2E04, 0xA403, 0x7C07),
+    bn_pack4(0xE3FD, 0xB8BE, 0xFC84, 0x8AD9),
+    bn_pack4(0x238F, 0x16CB, 0xE39D, 0x652D),
+    bn_pack4(0x3423, 0xB474, 0x2BF1, 0xC978),
+    bn_pack4(0x3AAB, 0x639C, 0x5AE4, 0xF568),
+    bn_pack4(0x2576, 0xF693, 0x6BA4, 0x2466),
+    bn_pack4(0x741F, 0xA7BF, 0x8AFC, 0x47ED),
+    bn_pack4(0x3BC8, 0x32B6, 0x8D9D, 0xD300),
+    bn_pack4(0xD8BE, 0xC4D0, 0x73B9, 0x31BA),
+    bn_pack4(0x3877, 0x7CB6, 0xA932, 0xDF8C),
+    bn_pack4(0x74A3, 0x926F, 0x12FE, 0xE5E4),
+    bn_pack4(0xE694, 0xF91E, 0x6DBE, 0x1159),
+    bn_pack4(0x12BF, 0x2D5B, 0x0B74, 0x74D6),
+    bn_pack4(0x043E, 0x8F66, 0x3F48, 0x60EE),
+    bn_pack4(0x387F, 0xE8D7, 0x6E3C, 0x0468),
+    bn_pack4(0xDA56, 0xC9EC, 0x2EF2, 0x9632),
+    bn_pack4(0xEB19, 0xCCB1, 0xA313, 0xD55C),
+    bn_pack4(0xF550, 0xAA3D, 0x8A1F, 0xBFF0),
+    bn_pack4(0x06A1, 0xD58B, 0xB7C5, 0xDA76),
+    bn_pack4(0xA797, 0x15EE, 0xF29B, 0xE328),
+    bn_pack4(0x14CC, 0x5ED2, 0x0F80, 0x37E0),
+    bn_pack4(0xCC8F, 0x6D7E, 0xBF48, 0xE1D8),
+    bn_pack4(0x4BD4, 0x07B2, 0x2B41, 0x54AA),
+    bn_pack4(0x0F1D, 0x45B7, 0xFF58, 0x5AC5),
+    bn_pack4(0x23A9, 0x7A7E, 0x36CC, 0x88BE),
+    bn_pack4(0x59E7, 0xC97F, 0xBEC7, 0xE8F3),
+    bn_pack4(0xB5A8, 0x4031, 0x900B, 0x1C9E),
+    bn_pack4(0xD55E, 0x702F, 0x4698, 0x0C82),
+    bn_pack4(0xF482, 0xD7CE, 0x6E74, 0xFEF6),
+    bn_pack4(0xF032, 0xEA15, 0xD172, 0x1D03),
+    bn_pack4(0x5983, 0xCA01, 0xC64B, 0x92EC),
+    bn_pack4(0x6FB8, 0xF401, 0x378C, 0xD2BF),
+    bn_pack4(0x3320, 0x5151, 0x2BD7, 0xAF42),
+    bn_pack4(0xDB7F, 0x1447, 0xE6CC, 0x254B),
+    bn_pack4(0x44CE, 0x6CBA, 0xCED4, 0xBB1B),
+    bn_pack4(0xDA3E, 0xDBEB, 0xCF9B, 0x14ED),
+    bn_pack4(0x1797, 0x27B0, 0x865A, 0x8918),
+    bn_pack4(0xB06A, 0x53ED, 0x9027, 0xD831),
+    bn_pack4(0xE5DB, 0x382F, 0x4130, 0x01AE),
+    bn_pack4(0xF8FF, 0x9406, 0xAD9E, 0x530E),
+    bn_pack4(0xC975, 0x1E76, 0x3DBA, 0x37BD),
+    bn_pack4(0xC1D4, 0xDCB2, 0x6026, 0x46DE),
+    bn_pack4(0x36C3, 0xFAB4, 0xD27C, 0x7026),
+    bn_pack4(0x4DF4, 0x35C9, 0x3402, 0x8492),
+    bn_pack4(0x86FF, 0xB7DC, 0x90A6, 0xC08F),
+    bn_pack4(0x93B4, 0xEA98, 0x8D8F, 0xDDC1),
+    bn_pack4(0xD006, 0x9127, 0xD5B0, 0x5AA9),
+    bn_pack4(0xB81B, 0xDD76, 0x2170, 0x481C),
+    bn_pack4(0x1F61, 0x2970, 0xCEE2, 0xD7AF),
+    bn_pack4(0x233B, 0xA186, 0x515B, 0xE7ED),
+    bn_pack4(0x99B2, 0x964F, 0xA090, 0xC3A2),
+    bn_pack4(0x287C, 0x5947, 0x4E6B, 0xC05D),
+    bn_pack4(0x2E8E, 0xFC14, 0x1FBE, 0xCAA6),
+    bn_pack4(0xDBBB, 0xC2DB, 0x04DE, 0x8EF9),
+    bn_pack4(0x2583, 0xE9CA, 0x2AD4, 0x4CE8),
+    bn_pack4(0x1A94, 0x6834, 0xB615, 0x0BDA),
+    bn_pack4(0x99C3, 0x2718, 0x6AF4, 0xE23C),
+    bn_pack4(0x8871, 0x9A10, 0xBDBA, 0x5B26),
+    bn_pack4(0x1A72, 0x3C12, 0xA787, 0xE6D7),
+    bn_pack4(0x4B82, 0xD120, 0xA921, 0x0801),
+    bn_pack4(0x43DB, 0x5BFC, 0xE0FD, 0x108E),
+    bn_pack4(0x08E2, 0x4FA0, 0x74E5, 0xAB31),
+    bn_pack4(0x7709, 0x88C0, 0xBAD9, 0x46E2),
+    bn_pack4(0xBBE1, 0x1757, 0x7A61, 0x5D6C),
+    bn_pack4(0x521F, 0x2B18, 0x177B, 0x200C),
+    bn_pack4(0xD876, 0x0273, 0x3EC8, 0x6A64),
+    bn_pack4(0xF12F, 0xFA06, 0xD98A, 0x0864),
+    bn_pack4(0xCEE3, 0xD226, 0x1AD2, 0xEE6B),
+    bn_pack4(0x1E8C, 0x94E0, 0x4A25, 0x619D),
+    bn_pack4(0xABF5, 0xAE8C, 0xDB09, 0x33D7),
+    bn_pack4(0xB397, 0x0F85, 0xA6E1, 0xE4C7),
+    bn_pack4(0x8AEA, 0x7157, 0x5D06, 0x0C7D),
+    bn_pack4(0xECFB, 0x8504, 0x58DB, 0xEF0A),
+    bn_pack4(0xA855, 0x21AB, 0xDF1C, 0xBA64),
+    bn_pack4(0xAD33, 0x170D, 0x0450, 0x7A33),
+    bn_pack4(0x1572, 0x8E5A, 0x8AAA, 0xC42D),
+    bn_pack4(0x15D2, 0x2618, 0x98FA, 0x0510),
+    bn_pack4(0x3995, 0x497C, 0xEA95, 0x6AE5),
+    bn_pack4(0xDE2B, 0xCBF6, 0x9558, 0x1718),
+    bn_pack4(0xB5C5, 0x5DF0, 0x6F4C, 0x52C9),
+    bn_pack4(0x9B27, 0x83A2, 0xEC07, 0xA28F),
+    bn_pack4(0xE39E, 0x772C, 0x180E, 0x8603),
+    bn_pack4(0x3290, 0x5E46, 0x2E36, 0xCE3B),
+    bn_pack4(0xF174, 0x6C08, 0xCA18, 0x217C),
+    bn_pack4(0x670C, 0x354E, 0x4ABC, 0x9804),
+    bn_pack4(0x9ED5, 0x2907, 0x7096, 0x966D),
+    bn_pack4(0x1C62, 0xF356, 0x2085, 0x52BB),
+    bn_pack4(0x8365, 0x5D23, 0xDCA3, 0xAD96),
+    bn_pack4(0x6916, 0x3FA8, 0xFD24, 0xCF5F),
+    bn_pack4(0x98DA, 0x4836, 0x1C55, 0xD39A),
+    bn_pack4(0xC200, 0x7CB8, 0xA163, 0xBF05),
+    bn_pack4(0x4928, 0x6651, 0xECE4, 0x5B3D),
+    bn_pack4(0xAE9F, 0x2411, 0x7C4B, 0x1FE6),
+    bn_pack4(0xEE38, 0x6BFB, 0x5A89, 0x9FA5),
+    bn_pack4(0x0BFF, 0x5CB6, 0xF406, 0xB7ED),
+    bn_pack4(0xF44C, 0x42E9, 0xA637, 0xED6B),
+    bn_pack4(0xE485, 0xB576, 0x625E, 0x7EC6),
+    bn_pack4(0x4FE1, 0x356D, 0x6D51, 0xC245),
+    bn_pack4(0x302B, 0x0A6D, 0xF25F, 0x1437),
+    bn_pack4(0xEF95, 0x19B3, 0xCD3A, 0x431B),
+    bn_pack4(0x514A, 0x0879, 0x8E34, 0x04DD),
+    bn_pack4(0x020B, 0xBEA6, 0x3B13, 0x9B22),
+    bn_pack4(0x2902, 0x4E08, 0x8A67, 0xCC74),
+    bn_pack4(0xC4C6, 0x628B, 0x80DC, 0x1CD1),
+    bn_pack4(0xC90F, 0xDAA2, 0x2168, 0xC234),
+    bn_pack4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF)
+};
+
+const BIGNUM bn_group_8192 = {
+    (BN_ULONG *)bn_group_8192_value,
+    OSSL_NELEM(bn_group_8192_value),
+    OSSL_NELEM(bn_group_8192_value),
+    0,
+    BN_FLG_STATIC_DATA
+};
+
+static const BN_ULONG bn_generator_19_value[] = { 19 };
+
+const BIGNUM bn_generator_19 = {
+    (BN_ULONG *)bn_generator_19_value,
+    1,
+    1,
+    0,
+    BN_FLG_STATIC_DATA
+};
+static const BN_ULONG bn_generator_5_value[] = { 5 };
+
+const BIGNUM bn_generator_5 = {
+    (BN_ULONG *)bn_generator_5_value,
+    1,
+    1,
+    0,
+    BN_FLG_STATIC_DATA
+};
+static const BN_ULONG bn_generator_2_value[] = { 2 };
+
+const BIGNUM bn_generator_2 = {
+    (BN_ULONG *)bn_generator_2_value,
+    1,
+    1,
+    0,
+    BN_FLG_STATIC_DATA
+};
+
+#endif
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_word.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_word.c
@ -0,0 +1,201 @@
+/*
+ * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include "internal/cryptlib.h"
+#include "bn_lcl.h"
+
+BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w)
+{
+#ifndef BN_LLONG
+    BN_ULONG ret = 0;
+#else
+    BN_ULLONG ret = 0;
+#endif
+    int i;
+
+    if (w == 0)
+        return (BN_ULONG)-1;
+
+#ifndef BN_LLONG
+    /*
+     * If |w| is too long and we don't have BN_ULLONG then we need to fall
+     * back to using BN_div_word
+     */
+    if (w > ((BN_ULONG)1 << BN_BITS4)) {
+        BIGNUM *tmp = BN_dup(a);
+        if (tmp == NULL)
+            return (BN_ULONG)-1;
+
+        ret = BN_div_word(tmp, w);
+        BN_free(tmp);
+
+        return ret;
+    }
+#endif
+
+    bn_check_top(a);
+    w &= BN_MASK2;
+    for (i = a->top - 1; i >= 0; i--) {
+#ifndef BN_LLONG
+        /*
+         * We can assume here that | w <= ((BN_ULONG)1 << BN_BITS4) | and so
+         * | ret < ((BN_ULONG)1 << BN_BITS4) | and therefore the shifts here are
+         * safe and will not overflow
+         */
+        ret = ((ret << BN_BITS4) | ((a->d[i] >> BN_BITS4) & BN_MASK2l)) % w;
+        ret = ((ret << BN_BITS4) | (a->d[i] & BN_MASK2l)) % w;
+#else
+        ret = (BN_ULLONG) (((ret << (BN_ULLONG) BN_BITS2) | a->d[i]) %
+                           (BN_ULLONG) w);
+#endif
+    }
+    return (BN_ULONG)ret;
+}
+
+BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w)
+{
+    BN_ULONG ret = 0;
+    int i, j;
+
+    bn_check_top(a);
+    w &= BN_MASK2;
+
+    if (!w)
+        /* actually this an error (division by zero) */
+        return (BN_ULONG)-1;
+    if (a->top == 0)
+        return 0;
+
+    /* normalize input (so bn_div_words doesn't complain) */
+    j = BN_BITS2 - BN_num_bits_word(w);
+    w <<= j;
+    if (!BN_lshift(a, a, j))
+        return (BN_ULONG)-1;
+
+    for (i = a->top - 1; i >= 0; i--) {
+        BN_ULONG l, d;
+
+        l = a->d[i];
+        d = bn_div_words(ret, l, w);
+        ret = (l - ((d * w) & BN_MASK2)) & BN_MASK2;
+        a->d[i] = d;
+    }
+    if ((a->top > 0) && (a->d[a->top - 1] == 0))
+        a->top--;
+    ret >>= j;
+    if (!a->top)
+        a->neg = 0; /* don't allow negative zero */
+    bn_check_top(a);
+    return ret;
+}
+
+int BN_add_word(BIGNUM *a, BN_ULONG w)
+{
+    BN_ULONG l;
+    int i;
+
+    bn_check_top(a);
+    w &= BN_MASK2;
+
+    /* degenerate case: w is zero */
+    if (!w)
+        return 1;
+    /* degenerate case: a is zero */
+    if (BN_is_zero(a))
+        return BN_set_word(a, w);
+    /* handle 'a' when negative */
+    if (a->neg) {
+        a->neg = 0;
+        i = BN_sub_word(a, w);
+        if (!BN_is_zero(a))
+            a->neg = !(a->neg);
+        return i;
+    }
+    for (i = 0; w != 0 && i < a->top; i++) {
+        a->d[i] = l = (a->d[i] + w) & BN_MASK2;
+        w = (w > l) ? 1 : 0;
+    }
+    if (w && i == a->top) {
+        if (bn_wexpand(a, a->top + 1) == NULL)
+            return 0;
+        a->top++;
+        a->d[i] = w;
+    }
+    bn_check_top(a);
+    return 1;
+}
+
+int BN_sub_word(BIGNUM *a, BN_ULONG w)
+{
+    int i;
+
+    bn_check_top(a);
+    w &= BN_MASK2;
+
+    /* degenerate case: w is zero */
+    if (!w)
+        return 1;
+    /* degenerate case: a is zero */
+    if (BN_is_zero(a)) {
+        i = BN_set_word(a, w);
+        if (i != 0)
+            BN_set_negative(a, 1);
+        return i;
+    }
+    /* handle 'a' when negative */
+    if (a->neg) {
+        a->neg = 0;
+        i = BN_add_word(a, w);
+        a->neg = 1;
+        return i;
+    }
+
+    if ((a->top == 1) && (a->d[0] < w)) {
+        a->d[0] = w - a->d[0];
+        a->neg = 1;
+        return 1;
+    }
+    i = 0;
+    for (;;) {
+        if (a->d[i] >= w) {
+            a->d[i] -= w;
+            break;
+        } else {
+            a->d[i] = (a->d[i] - w) & BN_MASK2;
+            i++;
+            w = 1;
+        }
+    }
+    if ((a->d[i] == 0) && (i == (a->top - 1)))
+        a->top--;
+    bn_check_top(a);
+    return 1;
+}
+
+int BN_mul_word(BIGNUM *a, BN_ULONG w)
+{
+    BN_ULONG ll;
+
+    bn_check_top(a);
+    w &= BN_MASK2;
+    if (a->top) {
+        if (w == 0)
+            BN_zero(a);
+        else {
+            ll = bn_mul_words(a->d, a->d, a->top, w);
+            if (ll) {
+                if (bn_wexpand(a, a->top + 1) == NULL)
+                    return 0;
+                a->d[a->top++] = ll;
+            }
+        }
+    }
+    bn_check_top(a);
+    return 1;
+}
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_x931p.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/bn_x931p.c
@ -0,0 +1,244 @@
+/*
+ * Copyright 2011-2018 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include <stdio.h>
+#include <openssl/bn.h>
+#include "bn_lcl.h"
+
+/* X9.31 routines for prime derivation */
+
+/*
+ * X9.31 prime derivation. This is used to generate the primes pi (p1, p2,
+ * q1, q2) from a parameter Xpi by checking successive odd integers.
+ */
+
+static int bn_x931_derive_pi(BIGNUM *pi, const BIGNUM *Xpi, BN_CTX *ctx,
+                             BN_GENCB *cb)
+{
+    int i = 0, is_prime;
+    if (!BN_copy(pi, Xpi))
+        return 0;
+    if (!BN_is_odd(pi) && !BN_add_word(pi, 1))
+        return 0;
+    for (;;) {
+        i++;
+        BN_GENCB_call(cb, 0, i);
+        /* NB 27 MR is specified in X9.31 */
+        is_prime = BN_is_prime_fasttest_ex(pi, 27, ctx, 1, cb);
+        if (is_prime < 0)
+            return 0;
+        if (is_prime)
+            break;
+        if (!BN_add_word(pi, 2))
+            return 0;
+    }
+    BN_GENCB_call(cb, 2, i);
+    return 1;
+}
+
+/*
+ * This is the main X9.31 prime derivation function. From parameters Xp1, Xp2
+ * and Xp derive the prime p. If the parameters p1 or p2 are not NULL they
+ * will be returned too: this is needed for testing.
+ */
+
+int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
+                            const BIGNUM *Xp, const BIGNUM *Xp1,
+                            const BIGNUM *Xp2, const BIGNUM *e, BN_CTX *ctx,
+                            BN_GENCB *cb)
+{
+    int ret = 0;
+
+    BIGNUM *t, *p1p2, *pm1;
+
+    /* Only even e supported */
+    if (!BN_is_odd(e))
+        return 0;
+
+    BN_CTX_start(ctx);
+    if (p1 == NULL)
+        p1 = BN_CTX_get(ctx);
+
+    if (p2 == NULL)
+        p2 = BN_CTX_get(ctx);
+
+    t = BN_CTX_get(ctx);
+
+    p1p2 = BN_CTX_get(ctx);
+
+    pm1 = BN_CTX_get(ctx);
+
+    if (pm1 == NULL)
+        goto err;
+
+    if (!bn_x931_derive_pi(p1, Xp1, ctx, cb))
+        goto err;
+
+    if (!bn_x931_derive_pi(p2, Xp2, ctx, cb))
+        goto err;
+
+    if (!BN_mul(p1p2, p1, p2, ctx))
+        goto err;
+
+    /* First set p to value of Rp */
+
+    if (!BN_mod_inverse(p, p2, p1, ctx))
+        goto err;
+
+    if (!BN_mul(p, p, p2, ctx))
+        goto err;
+
+    if (!BN_mod_inverse(t, p1, p2, ctx))
+        goto err;
+
+    if (!BN_mul(t, t, p1, ctx))
+        goto err;
+
+    if (!BN_sub(p, p, t))
+        goto err;
+
+    if (p->neg && !BN_add(p, p, p1p2))
+        goto err;
+
+    /* p now equals Rp */
+
+    if (!BN_mod_sub(p, p, Xp, p1p2, ctx))
+        goto err;
+
+    if (!BN_add(p, p, Xp))
+        goto err;
+
+    /* p now equals Yp0 */
+
+    for (;;) {
+        int i = 1;
+        BN_GENCB_call(cb, 0, i++);
+        if (!BN_copy(pm1, p))
+            goto err;
+        if (!BN_sub_word(pm1, 1))
+            goto err;
+        if (!BN_gcd(t, pm1, e, ctx))
+            goto err;
+        if (BN_is_one(t)) {
+            /*
+             * X9.31 specifies 8 MR and 1 Lucas test or any prime test
+             * offering similar or better guarantees 50 MR is considerably
+             * better.
+             */
+            int r = BN_is_prime_fasttest_ex(p, 50, ctx, 1, cb);
+            if (r < 0)
+                goto err;
+            if (r)
+                break;
+        }
+        if (!BN_add(p, p, p1p2))
+            goto err;
+    }
+
+    BN_GENCB_call(cb, 3, 0);
+
+    ret = 1;
+
+ err:
+
+    BN_CTX_end(ctx);
+
+    return ret;
+}
+
+/*
+ * Generate pair of parameters Xp, Xq for X9.31 prime generation. Note: nbits
+ * parameter is sum of number of bits in both.
+ */
+
+int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx)
+{
+    BIGNUM *t;
+    int i;
+    /*
+     * Number of bits for each prime is of the form 512+128s for s = 0, 1,
+     * ...
+     */
+    if ((nbits < 1024) || (nbits & 0xff))
+        return 0;
+    nbits >>= 1;
+    /*
+     * The random value Xp must be between sqrt(2) * 2^(nbits-1) and 2^nbits
+     * - 1. By setting the top two bits we ensure that the lower bound is
+     * exceeded.
+     */
+    if (!BN_priv_rand(Xp, nbits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ANY))
+        goto err;
+
+    BN_CTX_start(ctx);
+    t = BN_CTX_get(ctx);
+    if (t == NULL)
+        goto err;
+
+    for (i = 0; i < 1000; i++) {
+        if (!BN_priv_rand(Xq, nbits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ANY))
+            goto err;
+
+        /* Check that |Xp - Xq| > 2^(nbits - 100) */
+        if (!BN_sub(t, Xp, Xq))
+            goto err;
+        if (BN_num_bits(t) > (nbits - 100))
+            break;
+    }
+
+    BN_CTX_end(ctx);
+
+    if (i < 1000)
+        return 1;
+
+    return 0;
+
+ err:
+    BN_CTX_end(ctx);
+    return 0;
+}
+
+/*
+ * Generate primes using X9.31 algorithm. Of the values p, p1, p2, Xp1 and
+ * Xp2 only 'p' needs to be non-NULL. If any of the others are not NULL the
+ * relevant parameter will be stored in it. Due to the fact that |Xp - Xq| >
+ * 2^(nbits - 100) must be satisfied Xp and Xq are generated using the
+ * previous function and supplied as input.
+ */
+
+int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
+                              BIGNUM *Xp1, BIGNUM *Xp2,
+                              const BIGNUM *Xp,
+                              const BIGNUM *e, BN_CTX *ctx, BN_GENCB *cb)
+{
+    int ret = 0;
+
+    BN_CTX_start(ctx);
+    if (Xp1 == NULL)
+        Xp1 = BN_CTX_get(ctx);
+    if (Xp2 == NULL)
+        Xp2 = BN_CTX_get(ctx);
+    if (Xp1 == NULL || Xp2 == NULL)
+        goto error;
+
+    if (!BN_priv_rand(Xp1, 101, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY))
+        goto error;
+    if (!BN_priv_rand(Xp2, 101, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY))
+        goto error;
+    if (!BN_X931_derive_prime_ex(p, p1, p2, Xp, Xp1, Xp2, e, ctx, cb))
+        goto error;
+
+    ret = 1;
+
+ error:
+    BN_CTX_end(ctx);
+
+    return ret;
+
+}
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/build.info
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/build.info
@ -0,0 +1,67 @@
+LIBS=../../libcrypto
+SOURCE[../../libcrypto]=\
+        bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c bn_mod.c \
+        bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \
+        bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_err.c bn_sqr.c \
+        {- $target{bn_asm_src} -} \
+        bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_gf2m.c bn_nist.c \
+        bn_depr.c bn_const.c bn_x931p.c bn_intern.c bn_dh.c bn_srp.c
+INCLUDE[../../libcrypto]=../../crypto/include
+
+INCLUDE[bn_exp.o]=..
+
+GENERATE[bn-586.s]=asm/bn-586.pl \
+	$(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR)
+DEPEND[bn-586.s]=../perlasm/x86asm.pl
+GENERATE[co-586.s]=asm/co-586.pl \
+	$(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR)
+DEPEND[co-586.s]=../perlasm/x86asm.pl
+GENERATE[x86-mont.s]=asm/x86-mont.pl \
+	$(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR)
+DEPEND[x86-mont.s]=../perlasm/x86asm.pl
+GENERATE[x86-gf2m.s]=asm/x86-gf2m.pl \
+	$(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR)
+DEPEND[x86-gf2m.s]=../perlasm/x86asm.pl
+
+GENERATE[sparcv9a-mont.S]=asm/sparcv9a-mont.pl $(PERLASM_SCHEME)
+INCLUDE[sparcv9a-mont.o]=..
+GENERATE[sparcv9-mont.S]=asm/sparcv9-mont.pl $(PERLASM_SCHEME)
+INCLUDE[sparcv9-mont.o]=..
+GENERATE[vis3-mont.S]=asm/vis3-mont.pl $(PERLASM_SCHEME)
+INCLUDE[vis3-mont.o]=..
+GENERATE[sparct4-mont.S]=asm/sparct4-mont.pl $(PERLASM_SCHEME)
+INCLUDE[sparct4-mont.o]=..
+GENERATE[sparcv9-gf2m.S]=asm/sparcv9-gf2m.pl $(PERLASM_SCHEME)
+INCLUDE[sparcv9-gf2m.o]=..
+
+GENERATE[bn-mips.S]=asm/mips.pl $(PERLASM_SCHEME)
+INCLUDE[bn-mips.o]=..
+GENERATE[mips-mont.S]=asm/mips-mont.pl $(PERLASM_SCHEME)
+INCLUDE[mips-mont.o]=..
+
+GENERATE[s390x-mont.S]=asm/s390x-mont.pl $(PERLASM_SCHEME)
+GENERATE[s390x-gf2m.s]=asm/s390x-gf2m.pl $(PERLASM_SCHEME)
+
+GENERATE[x86_64-mont.s]=asm/x86_64-mont.pl $(PERLASM_SCHEME)
+GENERATE[x86_64-mont5.s]=asm/x86_64-mont5.pl $(PERLASM_SCHEME)
+GENERATE[x86_64-gf2m.s]=asm/x86_64-gf2m.pl $(PERLASM_SCHEME)
+GENERATE[rsaz-x86_64.s]=asm/rsaz-x86_64.pl $(PERLASM_SCHEME)
+GENERATE[rsaz-avx2.s]=asm/rsaz-avx2.pl $(PERLASM_SCHEME)
+
+GENERATE[bn-ia64.s]=asm/ia64.S
+GENERATE[ia64-mont.s]=asm/ia64-mont.pl $(LIB_CFLAGS) $(LIB_CPPFLAGS)
+
+GENERATE[parisc-mont.s]=asm/parisc-mont.pl $(PERLASM_SCHEME)
+
+# ppc - AIX, Linux, MacOS X...
+GENERATE[bn-ppc.s]=asm/ppc.pl $(PERLASM_SCHEME)
+GENERATE[ppc-mont.s]=asm/ppc-mont.pl $(PERLASM_SCHEME)
+GENERATE[ppc64-mont.s]=asm/ppc64-mont.pl $(PERLASM_SCHEME)
+
+GENERATE[alpha-mont.S]=asm/alpha-mont.pl $(PERLASM_SCHEME)
+
+GENERATE[armv4-mont.S]=asm/armv4-mont.pl $(PERLASM_SCHEME)
+INCLUDE[armv4-mont.o]=..
+GENERATE[armv4-gf2m.S]=asm/armv4-gf2m.pl $(PERLASM_SCHEME)
+INCLUDE[armv4-gf2m.o]=..
+GENERATE[armv8-mont.S]=asm/armv8-mont.pl $(PERLASM_SCHEME)
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/rsaz_exp.c
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/rsaz_exp.c
@ -0,0 +1,315 @@
+/*
+ * Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright (c) 2012, Intel Corporation. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ *
+ * Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)
+ * (1) Intel Corporation, Israel Development Center, Haifa, Israel
+ * (2) University of Haifa, Israel
+ */
+
+#include <openssl/opensslconf.h>
+#include "rsaz_exp.h"
+
+#ifndef RSAZ_ENABLED
+NON_EMPTY_TRANSLATION_UNIT
+#else
+
+/*
+ * See crypto/bn/asm/rsaz-avx2.pl for further details.
+ */
+void rsaz_1024_norm2red_avx2(void *red, const void *norm);
+void rsaz_1024_mul_avx2(void *ret, const void *a, const void *b,
+                        const void *n, BN_ULONG k);
+void rsaz_1024_sqr_avx2(void *ret, const void *a, const void *n, BN_ULONG k,
+                        int cnt);
+void rsaz_1024_scatter5_avx2(void *tbl, const void *val, int i);
+void rsaz_1024_gather5_avx2(void *val, const void *tbl, int i);
+void rsaz_1024_red2norm_avx2(void *norm, const void *red);
+
+#if defined(__GNUC__)
+# define ALIGN64        __attribute__((aligned(64)))
+#elif defined(_MSC_VER)
+# define ALIGN64        __declspec(align(64))
+#elif defined(__SUNPRO_C)
+# define ALIGN64
+# pragma align 64(one,two80)
+#else
+/* not fatal, might hurt performance a little */
+# define ALIGN64
+#endif
+
+ALIGN64 static const BN_ULONG one[40] = {
+    1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+ALIGN64 static const BN_ULONG two80[40] = {
+    0, 0, 1 << 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+void RSAZ_1024_mod_exp_avx2(BN_ULONG result_norm[16],
+                            const BN_ULONG base_norm[16],
+                            const BN_ULONG exponent[16],
+                            const BN_ULONG m_norm[16], const BN_ULONG RR[16],
+                            BN_ULONG k0)
+{
+    unsigned char storage[320 * 3 + 32 * 9 * 16 + 64]; /* 5.5KB */
+    unsigned char *p_str = storage + (64 - ((size_t)storage % 64));
+    unsigned char *a_inv, *m, *result;
+    unsigned char *table_s = p_str + 320 * 3;
+    unsigned char *R2 = table_s; /* borrow */
+    int index;
+    int wvalue;
+
+    if ((((size_t)p_str & 4095) + 320) >> 12) {
+        result = p_str;
+        a_inv = p_str + 320;
+        m = p_str + 320 * 2;    /* should not cross page */
+    } else {
+        m = p_str;              /* should not cross page */
+        result = p_str + 320;
+        a_inv = p_str + 320 * 2;
+    }
+
+    rsaz_1024_norm2red_avx2(m, m_norm);
+    rsaz_1024_norm2red_avx2(a_inv, base_norm);
+    rsaz_1024_norm2red_avx2(R2, RR);
+
+    rsaz_1024_mul_avx2(R2, R2, R2, m, k0);
+    rsaz_1024_mul_avx2(R2, R2, two80, m, k0);
+
+    /* table[0] = 1 */
+    rsaz_1024_mul_avx2(result, R2, one, m, k0);
+    /* table[1] = a_inv^1 */
+    rsaz_1024_mul_avx2(a_inv, a_inv, R2, m, k0);
+
+    rsaz_1024_scatter5_avx2(table_s, result, 0);
+    rsaz_1024_scatter5_avx2(table_s, a_inv, 1);
+
+    /* table[2] = a_inv^2 */
+    rsaz_1024_sqr_avx2(result, a_inv, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 2);
+#if 0
+    /* this is almost 2x smaller and less than 1% slower */
+    for (index = 3; index < 32; index++) {
+        rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+        rsaz_1024_scatter5_avx2(table_s, result, index);
+    }
+#else
+    /* table[4] = a_inv^4 */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 4);
+    /* table[8] = a_inv^8 */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 8);
+    /* table[16] = a_inv^16 */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 16);
+    /* table[17] = a_inv^17 */
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 17);
+
+    /* table[3] */
+    rsaz_1024_gather5_avx2(result, table_s, 2);
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 3);
+    /* table[6] */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 6);
+    /* table[12] */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 12);
+    /* table[24] */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 24);
+    /* table[25] */
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 25);
+
+    /* table[5] */
+    rsaz_1024_gather5_avx2(result, table_s, 4);
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 5);
+    /* table[10] */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 10);
+    /* table[20] */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 20);
+    /* table[21] */
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 21);
+
+    /* table[7] */
+    rsaz_1024_gather5_avx2(result, table_s, 6);
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 7);
+    /* table[14] */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 14);
+    /* table[28] */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 28);
+    /* table[29] */
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 29);
+
+    /* table[9] */
+    rsaz_1024_gather5_avx2(result, table_s, 8);
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 9);
+    /* table[18] */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 18);
+    /* table[19] */
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 19);
+
+    /* table[11] */
+    rsaz_1024_gather5_avx2(result, table_s, 10);
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 11);
+    /* table[22] */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 22);
+    /* table[23] */
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 23);
+
+    /* table[13] */
+    rsaz_1024_gather5_avx2(result, table_s, 12);
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 13);
+    /* table[26] */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 26);
+    /* table[27] */
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 27);
+
+    /* table[15] */
+    rsaz_1024_gather5_avx2(result, table_s, 14);
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 15);
+    /* table[30] */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 30);
+    /* table[31] */
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 31);
+#endif
+
+    /* load first window */
+    p_str = (unsigned char *)exponent;
+    wvalue = p_str[127] >> 3;
+    rsaz_1024_gather5_avx2(result, table_s, wvalue);
+
+    index = 1014;
+
+    while (index > -1) {        /* loop for the remaining 127 windows */
+
+        rsaz_1024_sqr_avx2(result, result, m, k0, 5);
+
+        wvalue = (p_str[(index / 8) + 1] << 8) | p_str[index / 8];
+        wvalue = (wvalue >> (index % 8)) & 31;
+        index -= 5;
+
+        rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); /* borrow a_inv */
+        rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    }
+
+    /* square four times */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 4);
+
+    wvalue = p_str[0] & 15;
+
+    rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); /* borrow a_inv */
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+
+    /* from Montgomery */
+    rsaz_1024_mul_avx2(result, result, one, m, k0);
+
+    rsaz_1024_red2norm_avx2(result_norm, result);
+
+    OPENSSL_cleanse(storage, sizeof(storage));
+}
+
+/*
+ * See crypto/bn/rsaz-x86_64.pl for further details.
+ */
+void rsaz_512_mul(void *ret, const void *a, const void *b, const void *n,
+                  BN_ULONG k);
+void rsaz_512_mul_scatter4(void *ret, const void *a, const void *n,
+                           BN_ULONG k, const void *tbl, unsigned int power);
+void rsaz_512_mul_gather4(void *ret, const void *a, const void *tbl,
+                          const void *n, BN_ULONG k, unsigned int power);
+void rsaz_512_mul_by_one(void *ret, const void *a, const void *n, BN_ULONG k);
+void rsaz_512_sqr(void *ret, const void *a, const void *n, BN_ULONG k,
+                  int cnt);
+void rsaz_512_scatter4(void *tbl, const BN_ULONG *val, int power);
+void rsaz_512_gather4(BN_ULONG *val, const void *tbl, int power);
+
+void RSAZ_512_mod_exp(BN_ULONG result[8],
+                      const BN_ULONG base[8], const BN_ULONG exponent[8],
+                      const BN_ULONG m[8], BN_ULONG k0, const BN_ULONG RR[8])
+{
+    unsigned char storage[16 * 8 * 8 + 64 * 2 + 64]; /* 1.2KB */
+    unsigned char *table = storage + (64 - ((size_t)storage % 64));
+    BN_ULONG *a_inv = (BN_ULONG *)(table + 16 * 8 * 8);
+    BN_ULONG *temp = (BN_ULONG *)(table + 16 * 8 * 8 + 8 * 8);
+    unsigned char *p_str = (unsigned char *)exponent;
+    int index;
+    unsigned int wvalue;
+
+    /* table[0] = 1_inv */
+    temp[0] = 0 - m[0];
+    temp[1] = ~m[1];
+    temp[2] = ~m[2];
+    temp[3] = ~m[3];
+    temp[4] = ~m[4];
+    temp[5] = ~m[5];
+    temp[6] = ~m[6];
+    temp[7] = ~m[7];
+    rsaz_512_scatter4(table, temp, 0);
+
+    /* table [1] = a_inv^1 */
+    rsaz_512_mul(a_inv, base, RR, m, k0);
+    rsaz_512_scatter4(table, a_inv, 1);
+
+    /* table [2] = a_inv^2 */
+    rsaz_512_sqr(temp, a_inv, m, k0, 1);
+    rsaz_512_scatter4(table, temp, 2);
+
+    for (index = 3; index < 16; index++)
+        rsaz_512_mul_scatter4(temp, a_inv, m, k0, table, index);
+
+    /* load first window */
+    wvalue = p_str[63];
+
+    rsaz_512_gather4(temp, table, wvalue >> 4);
+    rsaz_512_sqr(temp, temp, m, k0, 4);
+    rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue & 0xf);
+
+    for (index = 62; index >= 0; index--) {
+        wvalue = p_str[index];
+
+        rsaz_512_sqr(temp, temp, m, k0, 4);
+        rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue >> 4);
+
+        rsaz_512_sqr(temp, temp, m, k0, 4);
+        rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue & 0x0f);
+    }
+
+    /* from Montgomery */
+    rsaz_512_mul_by_one(result, temp, m, k0);
+
+    OPENSSL_cleanse(storage, sizeof(storage));
+}
+
+#endif
--- a/trunk/3rdparty/openssl-1.1-fit/crypto/bn/rsaz_exp.h
+++ b/trunk/3rdparty/openssl-1.1-fit/crypto/bn/rsaz_exp.h
@ -0,0 +1,40 @@
+/*
+ * Copyright 2013-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright (c) 2012, Intel Corporation. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ *
+ * Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)
+ * (1) Intel Corporation, Israel Development Center, Haifa, Israel
+ * (2) University of Haifa, Israel
+ */
+
+#ifndef RSAZ_EXP_H
+# define RSAZ_EXP_H
+
+# undef RSAZ_ENABLED
+# if defined(OPENSSL_BN_ASM_MONT) && \
+        (defined(__x86_64) || defined(__x86_64__) || \
+         defined(_M_AMD64) || defined(_M_X64))
+#  define RSAZ_ENABLED
+
+#  include <openssl/bn.h>
+
+void RSAZ_1024_mod_exp_avx2(BN_ULONG result[16],
+                            const BN_ULONG base_norm[16],
+                            const BN_ULONG exponent[16],
+                            const BN_ULONG m_norm[16], const BN_ULONG RR[16],
+                            BN_ULONG k0);
+int rsaz_avx2_eligible(void);
+
+void RSAZ_512_mod_exp(BN_ULONG result[8],
+                      const BN_ULONG base_norm[8], const BN_ULONG exponent[8],
+                      const BN_ULONG m_norm[8], BN_ULONG k0,
+                      const BN_ULONG RR[8]);
+
+# endif
+
+#endif