1
0
Fork 0
mirror of https://github.com/ossrs/srs.git synced 2025-03-09 15:49:59 +00:00

TEST: Upgrade pion to v3.2.9. (#3567)

------

Co-authored-by: chundonglinlin <chundonglinlin@163.com>
This commit is contained in:
Winlin 2023-06-05 11:25:04 +08:00 committed by winlin
parent 900c4cdd97
commit 1545425e06
1383 changed files with 118469 additions and 41421 deletions

View file

@ -1,3 +0,0 @@
# This source code refers to The Go Authors for copyright purposes.
# The master list of authors is in the main Go distribution,
# visible at https://tip.golang.org/AUTHORS.

View file

@ -1,3 +0,0 @@
# This source code was written by the Go contributors.
# The master list of contributors is in the main Go distribution,
# visible at https://tip.golang.org/CONTRIBUTORS.

View file

@ -117,6 +117,19 @@ func (b *Builder) AddASN1GeneralizedTime(t time.Time) {
})
}
// AddASN1UTCTime appends a DER-encoded ASN.1 UTCTime.
func (b *Builder) AddASN1UTCTime(t time.Time) {
b.AddASN1(asn1.UTCTime, func(c *Builder) {
// As utilized by the X.509 profile, UTCTime can only
// represent the years 1950 through 2049.
if t.Year() < 1950 || t.Year() >= 2050 {
b.err = fmt.Errorf("cryptobyte: cannot represent %v as a UTCTime", t)
return
}
c.AddBytes([]byte(t.Format(defaultUTCTimeFormatStr)))
})
}
// AddASN1BitString appends a DER-encoded ASN.1 BIT STRING. This does not
// support BIT STRINGs that are not a whole number of bytes.
func (b *Builder) AddASN1BitString(data []byte) {
@ -251,36 +264,35 @@ func (s *String) ReadASN1Boolean(out *bool) bool {
return true
}
var bigIntType = reflect.TypeOf((*big.Int)(nil)).Elem()
// ReadASN1Integer decodes an ASN.1 INTEGER into out and advances. If out does
// not point to an integer or to a big.Int, it panics. It reports whether the
// read was successful.
// not point to an integer, to a big.Int, or to a []byte it panics. Only
// positive and zero values can be decoded into []byte, and they are returned as
// big-endian binary values that share memory with s. Positive values will have
// no leading zeroes, and zero will be returned as a single zero byte.
// ReadASN1Integer reports whether the read was successful.
func (s *String) ReadASN1Integer(out interface{}) bool {
if reflect.TypeOf(out).Kind() != reflect.Ptr {
panic("out is not a pointer")
}
switch reflect.ValueOf(out).Elem().Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
switch out := out.(type) {
case *int, *int8, *int16, *int32, *int64:
var i int64
if !s.readASN1Int64(&i) || reflect.ValueOf(out).Elem().OverflowInt(i) {
return false
}
reflect.ValueOf(out).Elem().SetInt(i)
return true
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
case *uint, *uint8, *uint16, *uint32, *uint64:
var u uint64
if !s.readASN1Uint64(&u) || reflect.ValueOf(out).Elem().OverflowUint(u) {
return false
}
reflect.ValueOf(out).Elem().SetUint(u)
return true
case reflect.Struct:
if reflect.TypeOf(out).Elem() == bigIntType {
return s.readASN1BigInt(out.(*big.Int))
}
case *big.Int:
return s.readASN1BigInt(out)
case *[]byte:
return s.readASN1Bytes(out)
default:
panic("out does not point to an integer type")
}
panic("out does not point to an integer type")
}
func checkASN1Integer(bytes []byte) bool {
@ -320,6 +332,21 @@ func (s *String) readASN1BigInt(out *big.Int) bool {
return true
}
func (s *String) readASN1Bytes(out *[]byte) bool {
var bytes String
if !s.ReadASN1(&bytes, asn1.INTEGER) || !checkASN1Integer(bytes) {
return false
}
if bytes[0]&0x80 == 0x80 {
return false
}
for len(bytes) > 1 && bytes[0] == 0 {
bytes = bytes[1:]
}
*out = bytes
return true
}
func (s *String) readASN1Int64(out *int64) bool {
var bytes String
if !s.ReadASN1(&bytes, asn1.INTEGER) || !checkASN1Integer(bytes) || !asn1Signed(out, bytes) {
@ -394,11 +421,24 @@ func (s *String) ReadASN1Enum(out *int) bool {
func (s *String) readBase128Int(out *int) bool {
ret := 0
for i := 0; len(*s) > 0; i++ {
if i == 4 {
if i == 5 {
return false
}
// Avoid overflowing int on a 32-bit platform.
// We don't want different behavior based on the architecture.
if ret >= 1<<(31-7) {
return false
}
ret <<= 7
b := s.read(1)[0]
// ITU-T X.690, section 8.19.2:
// The subidentifier shall be encoded in the fewest possible octets,
// that is, the leading octet of the subidentifier shall not have the value 0x80.
if i == 0 && b == 0x80 {
return false
}
ret |= int(b & 0x7f)
if b&0x80 == 0 {
*out = ret
@ -466,6 +506,45 @@ func (s *String) ReadASN1GeneralizedTime(out *time.Time) bool {
return true
}
const defaultUTCTimeFormatStr = "060102150405Z0700"
// ReadASN1UTCTime decodes an ASN.1 UTCTime into out and advances.
// It reports whether the read was successful.
func (s *String) ReadASN1UTCTime(out *time.Time) bool {
var bytes String
if !s.ReadASN1(&bytes, asn1.UTCTime) {
return false
}
t := string(bytes)
formatStr := defaultUTCTimeFormatStr
var err error
res, err := time.Parse(formatStr, t)
if err != nil {
// Fallback to minute precision if we can't parse second
// precision. If we are following X.509 or X.690 we shouldn't
// support this, but we do.
formatStr = "0601021504Z0700"
res, err = time.Parse(formatStr, t)
}
if err != nil {
return false
}
if serialized := res.Format(formatStr); serialized != t {
return false
}
if res.Year() >= 2050 {
// UTCTime interprets the low order digits 50-99 as 1950-99.
// This only applies to its use in the X.509 profile.
// See https://tools.ietf.org/html/rfc5280#section-4.1.2.5.1
res = res.AddDate(-100, 0, 0)
}
*out = res
return true
}
// ReadASN1BitString decodes an ASN.1 BIT STRING into out and advances.
// It reports whether the read was successful.
func (s *String) ReadASN1BitString(out *encoding_asn1.BitString) bool {
@ -475,7 +554,7 @@ func (s *String) ReadASN1BitString(out *encoding_asn1.BitString) bool {
return false
}
paddingBits := uint8(bytes[0])
paddingBits := bytes[0]
bytes = bytes[1:]
if paddingBits > 7 ||
len(bytes) == 0 && paddingBits != 0 ||
@ -488,7 +567,7 @@ func (s *String) ReadASN1BitString(out *encoding_asn1.BitString) bool {
return true
}
// ReadASN1BitString decodes an ASN.1 BIT STRING into out and advances. It is
// ReadASN1BitStringAsBytes decodes an ASN.1 BIT STRING into out and advances. It is
// an error if the BIT STRING is not a whole number of bytes. It reports
// whether the read was successful.
func (s *String) ReadASN1BitStringAsBytes(out *[]byte) bool {
@ -497,7 +576,7 @@ func (s *String) ReadASN1BitStringAsBytes(out *[]byte) bool {
return false
}
paddingBits := uint8(bytes[0])
paddingBits := bytes[0]
if paddingBits != 0 {
return false
}
@ -597,34 +676,27 @@ func (s *String) SkipOptionalASN1(tag asn1.Tag) bool {
return s.ReadASN1(&unused, tag)
}
// ReadOptionalASN1Integer attempts to read an optional ASN.1 INTEGER
// explicitly tagged with tag into out and advances. If no element with a
// matching tag is present, it writes defaultValue into out instead. If out
// does not point to an integer or to a big.Int, it panics. It reports
// whether the read was successful.
// ReadOptionalASN1Integer attempts to read an optional ASN.1 INTEGER explicitly
// tagged with tag into out and advances. If no element with a matching tag is
// present, it writes defaultValue into out instead. Otherwise, it behaves like
// ReadASN1Integer.
func (s *String) ReadOptionalASN1Integer(out interface{}, tag asn1.Tag, defaultValue interface{}) bool {
if reflect.TypeOf(out).Kind() != reflect.Ptr {
panic("out is not a pointer")
}
var present bool
var i String
if !s.ReadOptionalASN1(&i, &present, tag) {
return false
}
if !present {
switch reflect.ValueOf(out).Elem().Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
switch out.(type) {
case *int, *int8, *int16, *int32, *int64,
*uint, *uint8, *uint16, *uint32, *uint64, *[]byte:
reflect.ValueOf(out).Elem().Set(reflect.ValueOf(defaultValue))
case reflect.Struct:
if reflect.TypeOf(out).Elem() != bigIntType {
panic("invalid integer type")
}
if reflect.TypeOf(defaultValue).Kind() != reflect.Ptr ||
reflect.TypeOf(defaultValue).Elem() != bigIntType {
case *big.Int:
if defaultValue, ok := defaultValue.(*big.Int); ok {
out.(*big.Int).Set(defaultValue)
} else {
panic("out points to big.Int, but defaultValue does not")
}
out.(*big.Int).Set(defaultValue.(*big.Int))
default:
panic("invalid integer type")
}

View file

@ -95,6 +95,11 @@ func (b *Builder) AddUint32(v uint32) {
b.add(byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
}
// AddUint64 appends a big-endian, 64-bit value to the byte string.
func (b *Builder) AddUint64(v uint64) {
b.add(byte(v>>56), byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
}
// AddBytes appends a sequence of bytes to the byte string.
func (b *Builder) AddBytes(v []byte) {
b.add(v...)
@ -106,13 +111,13 @@ func (b *Builder) AddBytes(v []byte) {
// supplied to them. The child builder passed to the continuation can be used
// to build the content of the length-prefixed sequence. For example:
//
// parent := cryptobyte.NewBuilder()
// parent.AddUint8LengthPrefixed(func (child *Builder) {
// child.AddUint8(42)
// child.AddUint8LengthPrefixed(func (grandchild *Builder) {
// grandchild.AddUint8(5)
// })
// })
// parent := cryptobyte.NewBuilder()
// parent.AddUint8LengthPrefixed(func (child *Builder) {
// child.AddUint8(42)
// child.AddUint8LengthPrefixed(func (grandchild *Builder) {
// grandchild.AddUint8(5)
// })
// })
//
// It is an error to write more bytes to the child than allowed by the reserved
// length prefix. After the continuation returns, the child must be considered
@ -298,9 +303,9 @@ func (b *Builder) add(bytes ...byte) {
b.result = append(b.result, bytes...)
}
// Unwrite rolls back n bytes written directly to the Builder. An attempt by a
// child builder passed to a continuation to unwrite bytes from its parent will
// panic.
// Unwrite rolls back non-negative n bytes written directly to the Builder.
// An attempt by a child builder passed to a continuation to unwrite bytes
// from its parent will panic.
func (b *Builder) Unwrite(n int) {
if b.err != nil {
return
@ -312,6 +317,9 @@ func (b *Builder) Unwrite(n int) {
if length < 0 {
panic("cryptobyte: internal error")
}
if n < 0 {
panic("cryptobyte: attempted to unwrite negative number of bytes")
}
if n > length {
panic("cryptobyte: attempted to unwrite more than was written")
}

View file

@ -81,6 +81,17 @@ func (s *String) ReadUint32(out *uint32) bool {
return true
}
// ReadUint64 decodes a big-endian, 64-bit value into out and advances over it.
// It reports whether the read was successful.
func (s *String) ReadUint64(out *uint64) bool {
v := s.read(8)
if v == nil {
return false
}
*out = uint64(v[0])<<56 | uint64(v[1])<<48 | uint64(v[2])<<40 | uint64(v[3])<<32 | uint64(v[4])<<24 | uint64(v[5])<<16 | uint64(v[6])<<8 | uint64(v[7])
return true
}
func (s *String) readUnsigned(out *uint32, length int) bool {
v := s.read(length)
if v == nil {

View file

@ -5,13 +5,11 @@
// Package curve25519 provides an implementation of the X25519 function, which
// performs scalar multiplication on the elliptic curve known as Curve25519.
// See RFC 7748.
//
// Starting in Go 1.20, this package is a wrapper for the X25519 implementation
// in the crypto/ecdh package.
package curve25519 // import "golang.org/x/crypto/curve25519"
import (
"crypto/subtle"
"fmt"
)
// ScalarMult sets dst to the product scalar * point.
//
// Deprecated: when provided a low-order point, ScalarMult will set dst to all
@ -27,7 +25,7 @@ func ScalarMult(dst, scalar, point *[32]byte) {
// It is recommended to use the X25519 function with Basepoint instead, as
// copying into fixed size arrays can lead to unexpected bugs.
func ScalarBaseMult(dst, scalar *[32]byte) {
ScalarMult(dst, scalar, &basePoint)
scalarBaseMult(dst, scalar)
}
const (
@ -40,21 +38,10 @@ const (
// Basepoint is the canonical Curve25519 generator.
var Basepoint []byte
var basePoint = [32]byte{9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
var basePoint = [32]byte{9}
func init() { Basepoint = basePoint[:] }
func checkBasepoint() {
if subtle.ConstantTimeCompare(Basepoint, []byte{
0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
}) != 1 {
panic("curve25519: global Basepoint value was modified")
}
}
// X25519 returns the result of the scalar multiplication (scalar * point),
// according to RFC 7748, Section 5. scalar, point and the return value are
// slices of 32 bytes.
@ -70,26 +57,3 @@ func X25519(scalar, point []byte) ([]byte, error) {
var dst [32]byte
return x25519(&dst, scalar, point)
}
func x25519(dst *[32]byte, scalar, point []byte) ([]byte, error) {
var in [32]byte
if l := len(scalar); l != 32 {
return nil, fmt.Errorf("bad scalar length: %d, expected %d", l, 32)
}
if l := len(point); l != 32 {
return nil, fmt.Errorf("bad point length: %d, expected %d", l, 32)
}
copy(in[:], scalar)
if &point[0] == &Basepoint[0] {
checkBasepoint()
ScalarBaseMult(dst, &in)
} else {
var base, zero [32]byte
copy(base[:], point)
ScalarMult(dst, &in, &base)
if subtle.ConstantTimeCompare(dst[:], zero[:]) == 1 {
return nil, fmt.Errorf("bad input point: low order point")
}
}
return dst[:], nil
}

View file

@ -1,240 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build amd64,gc,!purego
package curve25519
// These functions are implemented in the .s files. The names of the functions
// in the rest of the file are also taken from the SUPERCOP sources to help
// people following along.
//go:noescape
func cswap(inout *[5]uint64, v uint64)
//go:noescape
func ladderstep(inout *[5][5]uint64)
//go:noescape
func freeze(inout *[5]uint64)
//go:noescape
func mul(dest, a, b *[5]uint64)
//go:noescape
func square(out, in *[5]uint64)
// mladder uses a Montgomery ladder to calculate (xr/zr) *= s.
func mladder(xr, zr *[5]uint64, s *[32]byte) {
var work [5][5]uint64
work[0] = *xr
setint(&work[1], 1)
setint(&work[2], 0)
work[3] = *xr
setint(&work[4], 1)
j := uint(6)
var prevbit byte
for i := 31; i >= 0; i-- {
for j < 8 {
bit := ((*s)[i] >> j) & 1
swap := bit ^ prevbit
prevbit = bit
cswap(&work[1], uint64(swap))
ladderstep(&work)
j--
}
j = 7
}
*xr = work[1]
*zr = work[2]
}
func scalarMult(out, in, base *[32]byte) {
var e [32]byte
copy(e[:], (*in)[:])
e[0] &= 248
e[31] &= 127
e[31] |= 64
var t, z [5]uint64
unpack(&t, base)
mladder(&t, &z, &e)
invert(&z, &z)
mul(&t, &t, &z)
pack(out, &t)
}
func setint(r *[5]uint64, v uint64) {
r[0] = v
r[1] = 0
r[2] = 0
r[3] = 0
r[4] = 0
}
// unpack sets r = x where r consists of 5, 51-bit limbs in little-endian
// order.
func unpack(r *[5]uint64, x *[32]byte) {
r[0] = uint64(x[0]) |
uint64(x[1])<<8 |
uint64(x[2])<<16 |
uint64(x[3])<<24 |
uint64(x[4])<<32 |
uint64(x[5])<<40 |
uint64(x[6]&7)<<48
r[1] = uint64(x[6])>>3 |
uint64(x[7])<<5 |
uint64(x[8])<<13 |
uint64(x[9])<<21 |
uint64(x[10])<<29 |
uint64(x[11])<<37 |
uint64(x[12]&63)<<45
r[2] = uint64(x[12])>>6 |
uint64(x[13])<<2 |
uint64(x[14])<<10 |
uint64(x[15])<<18 |
uint64(x[16])<<26 |
uint64(x[17])<<34 |
uint64(x[18])<<42 |
uint64(x[19]&1)<<50
r[3] = uint64(x[19])>>1 |
uint64(x[20])<<7 |
uint64(x[21])<<15 |
uint64(x[22])<<23 |
uint64(x[23])<<31 |
uint64(x[24])<<39 |
uint64(x[25]&15)<<47
r[4] = uint64(x[25])>>4 |
uint64(x[26])<<4 |
uint64(x[27])<<12 |
uint64(x[28])<<20 |
uint64(x[29])<<28 |
uint64(x[30])<<36 |
uint64(x[31]&127)<<44
}
// pack sets out = x where out is the usual, little-endian form of the 5,
// 51-bit limbs in x.
func pack(out *[32]byte, x *[5]uint64) {
t := *x
freeze(&t)
out[0] = byte(t[0])
out[1] = byte(t[0] >> 8)
out[2] = byte(t[0] >> 16)
out[3] = byte(t[0] >> 24)
out[4] = byte(t[0] >> 32)
out[5] = byte(t[0] >> 40)
out[6] = byte(t[0] >> 48)
out[6] ^= byte(t[1]<<3) & 0xf8
out[7] = byte(t[1] >> 5)
out[8] = byte(t[1] >> 13)
out[9] = byte(t[1] >> 21)
out[10] = byte(t[1] >> 29)
out[11] = byte(t[1] >> 37)
out[12] = byte(t[1] >> 45)
out[12] ^= byte(t[2]<<6) & 0xc0
out[13] = byte(t[2] >> 2)
out[14] = byte(t[2] >> 10)
out[15] = byte(t[2] >> 18)
out[16] = byte(t[2] >> 26)
out[17] = byte(t[2] >> 34)
out[18] = byte(t[2] >> 42)
out[19] = byte(t[2] >> 50)
out[19] ^= byte(t[3]<<1) & 0xfe
out[20] = byte(t[3] >> 7)
out[21] = byte(t[3] >> 15)
out[22] = byte(t[3] >> 23)
out[23] = byte(t[3] >> 31)
out[24] = byte(t[3] >> 39)
out[25] = byte(t[3] >> 47)
out[25] ^= byte(t[4]<<4) & 0xf0
out[26] = byte(t[4] >> 4)
out[27] = byte(t[4] >> 12)
out[28] = byte(t[4] >> 20)
out[29] = byte(t[4] >> 28)
out[30] = byte(t[4] >> 36)
out[31] = byte(t[4] >> 44)
}
// invert calculates r = x^-1 mod p using Fermat's little theorem.
func invert(r *[5]uint64, x *[5]uint64) {
var z2, z9, z11, z2_5_0, z2_10_0, z2_20_0, z2_50_0, z2_100_0, t [5]uint64
square(&z2, x) /* 2 */
square(&t, &z2) /* 4 */
square(&t, &t) /* 8 */
mul(&z9, &t, x) /* 9 */
mul(&z11, &z9, &z2) /* 11 */
square(&t, &z11) /* 22 */
mul(&z2_5_0, &t, &z9) /* 2^5 - 2^0 = 31 */
square(&t, &z2_5_0) /* 2^6 - 2^1 */
for i := 1; i < 5; i++ { /* 2^20 - 2^10 */
square(&t, &t)
}
mul(&z2_10_0, &t, &z2_5_0) /* 2^10 - 2^0 */
square(&t, &z2_10_0) /* 2^11 - 2^1 */
for i := 1; i < 10; i++ { /* 2^20 - 2^10 */
square(&t, &t)
}
mul(&z2_20_0, &t, &z2_10_0) /* 2^20 - 2^0 */
square(&t, &z2_20_0) /* 2^21 - 2^1 */
for i := 1; i < 20; i++ { /* 2^40 - 2^20 */
square(&t, &t)
}
mul(&t, &t, &z2_20_0) /* 2^40 - 2^0 */
square(&t, &t) /* 2^41 - 2^1 */
for i := 1; i < 10; i++ { /* 2^50 - 2^10 */
square(&t, &t)
}
mul(&z2_50_0, &t, &z2_10_0) /* 2^50 - 2^0 */
square(&t, &z2_50_0) /* 2^51 - 2^1 */
for i := 1; i < 50; i++ { /* 2^100 - 2^50 */
square(&t, &t)
}
mul(&z2_100_0, &t, &z2_50_0) /* 2^100 - 2^0 */
square(&t, &z2_100_0) /* 2^101 - 2^1 */
for i := 1; i < 100; i++ { /* 2^200 - 2^100 */
square(&t, &t)
}
mul(&t, &t, &z2_100_0) /* 2^200 - 2^0 */
square(&t, &t) /* 2^201 - 2^1 */
for i := 1; i < 50; i++ { /* 2^250 - 2^50 */
square(&t, &t)
}
mul(&t, &t, &z2_50_0) /* 2^250 - 2^0 */
square(&t, &t) /* 2^251 - 2^1 */
square(&t, &t) /* 2^252 - 2^2 */
square(&t, &t) /* 2^253 - 2^3 */
square(&t, &t) /* 2^254 - 2^4 */
square(&t, &t) /* 2^255 - 2^5 */
mul(r, &t, &z11) /* 2^255 - 21 */
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,105 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !go1.20
package curve25519
import (
"crypto/subtle"
"errors"
"strconv"
"golang.org/x/crypto/curve25519/internal/field"
)
func scalarMult(dst, scalar, point *[32]byte) {
var e [32]byte
copy(e[:], scalar[:])
e[0] &= 248
e[31] &= 127
e[31] |= 64
var x1, x2, z2, x3, z3, tmp0, tmp1 field.Element
x1.SetBytes(point[:])
x2.One()
x3.Set(&x1)
z3.One()
swap := 0
for pos := 254; pos >= 0; pos-- {
b := e[pos/8] >> uint(pos&7)
b &= 1
swap ^= int(b)
x2.Swap(&x3, swap)
z2.Swap(&z3, swap)
swap = int(b)
tmp0.Subtract(&x3, &z3)
tmp1.Subtract(&x2, &z2)
x2.Add(&x2, &z2)
z2.Add(&x3, &z3)
z3.Multiply(&tmp0, &x2)
z2.Multiply(&z2, &tmp1)
tmp0.Square(&tmp1)
tmp1.Square(&x2)
x3.Add(&z3, &z2)
z2.Subtract(&z3, &z2)
x2.Multiply(&tmp1, &tmp0)
tmp1.Subtract(&tmp1, &tmp0)
z2.Square(&z2)
z3.Mult32(&tmp1, 121666)
x3.Square(&x3)
tmp0.Add(&tmp0, &z3)
z3.Multiply(&x1, &z2)
z2.Multiply(&tmp1, &tmp0)
}
x2.Swap(&x3, swap)
z2.Swap(&z3, swap)
z2.Invert(&z2)
x2.Multiply(&x2, &z2)
copy(dst[:], x2.Bytes())
}
func scalarBaseMult(dst, scalar *[32]byte) {
checkBasepoint()
scalarMult(dst, scalar, &basePoint)
}
func x25519(dst *[32]byte, scalar, point []byte) ([]byte, error) {
var in [32]byte
if l := len(scalar); l != 32 {
return nil, errors.New("bad scalar length: " + strconv.Itoa(l) + ", expected 32")
}
if l := len(point); l != 32 {
return nil, errors.New("bad point length: " + strconv.Itoa(l) + ", expected 32")
}
copy(in[:], scalar)
if &point[0] == &Basepoint[0] {
scalarBaseMult(dst, &in)
} else {
var base, zero [32]byte
copy(base[:], point)
scalarMult(dst, &in, &base)
if subtle.ConstantTimeCompare(dst[:], zero[:]) == 1 {
return nil, errors.New("bad input point: low order point")
}
}
return dst[:], nil
}
func checkBasepoint() {
if subtle.ConstantTimeCompare(Basepoint, []byte{
0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
}) != 1 {
panic("curve25519: global Basepoint value was modified")
}
}

View file

@ -1,828 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package curve25519
import "encoding/binary"
// This code is a port of the public domain, "ref10" implementation of
// curve25519 from SUPERCOP 20130419 by D. J. Bernstein.
// fieldElement represents an element of the field GF(2^255 - 19). An element
// t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
// t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on
// context.
type fieldElement [10]int32
func feZero(fe *fieldElement) {
for i := range fe {
fe[i] = 0
}
}
func feOne(fe *fieldElement) {
feZero(fe)
fe[0] = 1
}
func feAdd(dst, a, b *fieldElement) {
for i := range dst {
dst[i] = a[i] + b[i]
}
}
func feSub(dst, a, b *fieldElement) {
for i := range dst {
dst[i] = a[i] - b[i]
}
}
func feCopy(dst, src *fieldElement) {
for i := range dst {
dst[i] = src[i]
}
}
// feCSwap replaces (f,g) with (g,f) if b == 1; replaces (f,g) with (f,g) if b == 0.
//
// Preconditions: b in {0,1}.
func feCSwap(f, g *fieldElement, b int32) {
b = -b
for i := range f {
t := b & (f[i] ^ g[i])
f[i] ^= t
g[i] ^= t
}
}
// load3 reads a 24-bit, little-endian value from in.
func load3(in []byte) int64 {
var r int64
r = int64(in[0])
r |= int64(in[1]) << 8
r |= int64(in[2]) << 16
return r
}
// load4 reads a 32-bit, little-endian value from in.
func load4(in []byte) int64 {
return int64(binary.LittleEndian.Uint32(in))
}
func feFromBytes(dst *fieldElement, src *[32]byte) {
h0 := load4(src[:])
h1 := load3(src[4:]) << 6
h2 := load3(src[7:]) << 5
h3 := load3(src[10:]) << 3
h4 := load3(src[13:]) << 2
h5 := load4(src[16:])
h6 := load3(src[20:]) << 7
h7 := load3(src[23:]) << 5
h8 := load3(src[26:]) << 4
h9 := (load3(src[29:]) & 0x7fffff) << 2
var carry [10]int64
carry[9] = (h9 + 1<<24) >> 25
h0 += carry[9] * 19
h9 -= carry[9] << 25
carry[1] = (h1 + 1<<24) >> 25
h2 += carry[1]
h1 -= carry[1] << 25
carry[3] = (h3 + 1<<24) >> 25
h4 += carry[3]
h3 -= carry[3] << 25
carry[5] = (h5 + 1<<24) >> 25
h6 += carry[5]
h5 -= carry[5] << 25
carry[7] = (h7 + 1<<24) >> 25
h8 += carry[7]
h7 -= carry[7] << 25
carry[0] = (h0 + 1<<25) >> 26
h1 += carry[0]
h0 -= carry[0] << 26
carry[2] = (h2 + 1<<25) >> 26
h3 += carry[2]
h2 -= carry[2] << 26
carry[4] = (h4 + 1<<25) >> 26
h5 += carry[4]
h4 -= carry[4] << 26
carry[6] = (h6 + 1<<25) >> 26
h7 += carry[6]
h6 -= carry[6] << 26
carry[8] = (h8 + 1<<25) >> 26
h9 += carry[8]
h8 -= carry[8] << 26
dst[0] = int32(h0)
dst[1] = int32(h1)
dst[2] = int32(h2)
dst[3] = int32(h3)
dst[4] = int32(h4)
dst[5] = int32(h5)
dst[6] = int32(h6)
dst[7] = int32(h7)
dst[8] = int32(h8)
dst[9] = int32(h9)
}
// feToBytes marshals h to s.
// Preconditions:
// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
//
// Write p=2^255-19; q=floor(h/p).
// Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
//
// Proof:
// Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
// Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
//
// Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
// Then 0<y<1.
//
// Write r=h-pq.
// Have 0<=r<=p-1=2^255-20.
// Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
//
// Write x=r+19(2^-255)r+y.
// Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
//
// Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
// so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
func feToBytes(s *[32]byte, h *fieldElement) {
var carry [10]int32
q := (19*h[9] + (1 << 24)) >> 25
q = (h[0] + q) >> 26
q = (h[1] + q) >> 25
q = (h[2] + q) >> 26
q = (h[3] + q) >> 25
q = (h[4] + q) >> 26
q = (h[5] + q) >> 25
q = (h[6] + q) >> 26
q = (h[7] + q) >> 25
q = (h[8] + q) >> 26
q = (h[9] + q) >> 25
// Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20.
h[0] += 19 * q
// Goal: Output h-2^255 q, which is between 0 and 2^255-20.
carry[0] = h[0] >> 26
h[1] += carry[0]
h[0] -= carry[0] << 26
carry[1] = h[1] >> 25
h[2] += carry[1]
h[1] -= carry[1] << 25
carry[2] = h[2] >> 26
h[3] += carry[2]
h[2] -= carry[2] << 26
carry[3] = h[3] >> 25
h[4] += carry[3]
h[3] -= carry[3] << 25
carry[4] = h[4] >> 26
h[5] += carry[4]
h[4] -= carry[4] << 26
carry[5] = h[5] >> 25
h[6] += carry[5]
h[5] -= carry[5] << 25
carry[6] = h[6] >> 26
h[7] += carry[6]
h[6] -= carry[6] << 26
carry[7] = h[7] >> 25
h[8] += carry[7]
h[7] -= carry[7] << 25
carry[8] = h[8] >> 26
h[9] += carry[8]
h[8] -= carry[8] << 26
carry[9] = h[9] >> 25
h[9] -= carry[9] << 25
// h10 = carry9
// Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
// Have h[0]+...+2^230 h[9] between 0 and 2^255-1;
// evidently 2^255 h10-2^255 q = 0.
// Goal: Output h[0]+...+2^230 h[9].
s[0] = byte(h[0] >> 0)
s[1] = byte(h[0] >> 8)
s[2] = byte(h[0] >> 16)
s[3] = byte((h[0] >> 24) | (h[1] << 2))
s[4] = byte(h[1] >> 6)
s[5] = byte(h[1] >> 14)
s[6] = byte((h[1] >> 22) | (h[2] << 3))
s[7] = byte(h[2] >> 5)
s[8] = byte(h[2] >> 13)
s[9] = byte((h[2] >> 21) | (h[3] << 5))
s[10] = byte(h[3] >> 3)
s[11] = byte(h[3] >> 11)
s[12] = byte((h[3] >> 19) | (h[4] << 6))
s[13] = byte(h[4] >> 2)
s[14] = byte(h[4] >> 10)
s[15] = byte(h[4] >> 18)
s[16] = byte(h[5] >> 0)
s[17] = byte(h[5] >> 8)
s[18] = byte(h[5] >> 16)
s[19] = byte((h[5] >> 24) | (h[6] << 1))
s[20] = byte(h[6] >> 7)
s[21] = byte(h[6] >> 15)
s[22] = byte((h[6] >> 23) | (h[7] << 3))
s[23] = byte(h[7] >> 5)
s[24] = byte(h[7] >> 13)
s[25] = byte((h[7] >> 21) | (h[8] << 4))
s[26] = byte(h[8] >> 4)
s[27] = byte(h[8] >> 12)
s[28] = byte((h[8] >> 20) | (h[9] << 6))
s[29] = byte(h[9] >> 2)
s[30] = byte(h[9] >> 10)
s[31] = byte(h[9] >> 18)
}
// feMul calculates h = f * g
// Can overlap h with f or g.
//
// Preconditions:
// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
// |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
//
// Postconditions:
// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
//
// Notes on implementation strategy:
//
// Using schoolbook multiplication.
// Karatsuba would save a little in some cost models.
//
// Most multiplications by 2 and 19 are 32-bit precomputations;
// cheaper than 64-bit postcomputations.
//
// There is one remaining multiplication by 19 in the carry chain;
// one *19 precomputation can be merged into this,
// but the resulting data flow is considerably less clean.
//
// There are 12 carries below.
// 10 of them are 2-way parallelizable and vectorizable.
// Can get away with 11 carries, but then data flow is much deeper.
//
// With tighter constraints on inputs can squeeze carries into int32.
func feMul(h, f, g *fieldElement) {
f0 := f[0]
f1 := f[1]
f2 := f[2]
f3 := f[3]
f4 := f[4]
f5 := f[5]
f6 := f[6]
f7 := f[7]
f8 := f[8]
f9 := f[9]
g0 := g[0]
g1 := g[1]
g2 := g[2]
g3 := g[3]
g4 := g[4]
g5 := g[5]
g6 := g[6]
g7 := g[7]
g8 := g[8]
g9 := g[9]
g1_19 := 19 * g1 // 1.4*2^29
g2_19 := 19 * g2 // 1.4*2^30; still ok
g3_19 := 19 * g3
g4_19 := 19 * g4
g5_19 := 19 * g5
g6_19 := 19 * g6
g7_19 := 19 * g7
g8_19 := 19 * g8
g9_19 := 19 * g9
f1_2 := 2 * f1
f3_2 := 2 * f3
f5_2 := 2 * f5
f7_2 := 2 * f7
f9_2 := 2 * f9
f0g0 := int64(f0) * int64(g0)
f0g1 := int64(f0) * int64(g1)
f0g2 := int64(f0) * int64(g2)
f0g3 := int64(f0) * int64(g3)
f0g4 := int64(f0) * int64(g4)
f0g5 := int64(f0) * int64(g5)
f0g6 := int64(f0) * int64(g6)
f0g7 := int64(f0) * int64(g7)
f0g8 := int64(f0) * int64(g8)
f0g9 := int64(f0) * int64(g9)
f1g0 := int64(f1) * int64(g0)
f1g1_2 := int64(f1_2) * int64(g1)
f1g2 := int64(f1) * int64(g2)
f1g3_2 := int64(f1_2) * int64(g3)
f1g4 := int64(f1) * int64(g4)
f1g5_2 := int64(f1_2) * int64(g5)
f1g6 := int64(f1) * int64(g6)
f1g7_2 := int64(f1_2) * int64(g7)
f1g8 := int64(f1) * int64(g8)
f1g9_38 := int64(f1_2) * int64(g9_19)
f2g0 := int64(f2) * int64(g0)
f2g1 := int64(f2) * int64(g1)
f2g2 := int64(f2) * int64(g2)
f2g3 := int64(f2) * int64(g3)
f2g4 := int64(f2) * int64(g4)
f2g5 := int64(f2) * int64(g5)
f2g6 := int64(f2) * int64(g6)
f2g7 := int64(f2) * int64(g7)
f2g8_19 := int64(f2) * int64(g8_19)
f2g9_19 := int64(f2) * int64(g9_19)
f3g0 := int64(f3) * int64(g0)
f3g1_2 := int64(f3_2) * int64(g1)
f3g2 := int64(f3) * int64(g2)
f3g3_2 := int64(f3_2) * int64(g3)
f3g4 := int64(f3) * int64(g4)
f3g5_2 := int64(f3_2) * int64(g5)
f3g6 := int64(f3) * int64(g6)
f3g7_38 := int64(f3_2) * int64(g7_19)
f3g8_19 := int64(f3) * int64(g8_19)
f3g9_38 := int64(f3_2) * int64(g9_19)
f4g0 := int64(f4) * int64(g0)
f4g1 := int64(f4) * int64(g1)
f4g2 := int64(f4) * int64(g2)
f4g3 := int64(f4) * int64(g3)
f4g4 := int64(f4) * int64(g4)
f4g5 := int64(f4) * int64(g5)
f4g6_19 := int64(f4) * int64(g6_19)
f4g7_19 := int64(f4) * int64(g7_19)
f4g8_19 := int64(f4) * int64(g8_19)
f4g9_19 := int64(f4) * int64(g9_19)
f5g0 := int64(f5) * int64(g0)
f5g1_2 := int64(f5_2) * int64(g1)
f5g2 := int64(f5) * int64(g2)
f5g3_2 := int64(f5_2) * int64(g3)
f5g4 := int64(f5) * int64(g4)
f5g5_38 := int64(f5_2) * int64(g5_19)
f5g6_19 := int64(f5) * int64(g6_19)
f5g7_38 := int64(f5_2) * int64(g7_19)
f5g8_19 := int64(f5) * int64(g8_19)
f5g9_38 := int64(f5_2) * int64(g9_19)
f6g0 := int64(f6) * int64(g0)
f6g1 := int64(f6) * int64(g1)
f6g2 := int64(f6) * int64(g2)
f6g3 := int64(f6) * int64(g3)
f6g4_19 := int64(f6) * int64(g4_19)
f6g5_19 := int64(f6) * int64(g5_19)
f6g6_19 := int64(f6) * int64(g6_19)
f6g7_19 := int64(f6) * int64(g7_19)
f6g8_19 := int64(f6) * int64(g8_19)
f6g9_19 := int64(f6) * int64(g9_19)
f7g0 := int64(f7) * int64(g0)
f7g1_2 := int64(f7_2) * int64(g1)
f7g2 := int64(f7) * int64(g2)
f7g3_38 := int64(f7_2) * int64(g3_19)
f7g4_19 := int64(f7) * int64(g4_19)
f7g5_38 := int64(f7_2) * int64(g5_19)
f7g6_19 := int64(f7) * int64(g6_19)
f7g7_38 := int64(f7_2) * int64(g7_19)
f7g8_19 := int64(f7) * int64(g8_19)
f7g9_38 := int64(f7_2) * int64(g9_19)
f8g0 := int64(f8) * int64(g0)
f8g1 := int64(f8) * int64(g1)
f8g2_19 := int64(f8) * int64(g2_19)
f8g3_19 := int64(f8) * int64(g3_19)
f8g4_19 := int64(f8) * int64(g4_19)
f8g5_19 := int64(f8) * int64(g5_19)
f8g6_19 := int64(f8) * int64(g6_19)
f8g7_19 := int64(f8) * int64(g7_19)
f8g8_19 := int64(f8) * int64(g8_19)
f8g9_19 := int64(f8) * int64(g9_19)
f9g0 := int64(f9) * int64(g0)
f9g1_38 := int64(f9_2) * int64(g1_19)
f9g2_19 := int64(f9) * int64(g2_19)
f9g3_38 := int64(f9_2) * int64(g3_19)
f9g4_19 := int64(f9) * int64(g4_19)
f9g5_38 := int64(f9_2) * int64(g5_19)
f9g6_19 := int64(f9) * int64(g6_19)
f9g7_38 := int64(f9_2) * int64(g7_19)
f9g8_19 := int64(f9) * int64(g8_19)
f9g9_38 := int64(f9_2) * int64(g9_19)
h0 := f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38
h1 := f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19
h2 := f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38
h3 := f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19
h4 := f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38
h5 := f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19
h6 := f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38
h7 := f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19
h8 := f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38
h9 := f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0
var carry [10]int64
// |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
// i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
// |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
// i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
carry[0] = (h0 + (1 << 25)) >> 26
h1 += carry[0]
h0 -= carry[0] << 26
carry[4] = (h4 + (1 << 25)) >> 26
h5 += carry[4]
h4 -= carry[4] << 26
// |h0| <= 2^25
// |h4| <= 2^25
// |h1| <= 1.51*2^58
// |h5| <= 1.51*2^58
carry[1] = (h1 + (1 << 24)) >> 25
h2 += carry[1]
h1 -= carry[1] << 25
carry[5] = (h5 + (1 << 24)) >> 25
h6 += carry[5]
h5 -= carry[5] << 25
// |h1| <= 2^24; from now on fits into int32
// |h5| <= 2^24; from now on fits into int32
// |h2| <= 1.21*2^59
// |h6| <= 1.21*2^59
carry[2] = (h2 + (1 << 25)) >> 26
h3 += carry[2]
h2 -= carry[2] << 26
carry[6] = (h6 + (1 << 25)) >> 26
h7 += carry[6]
h6 -= carry[6] << 26
// |h2| <= 2^25; from now on fits into int32 unchanged
// |h6| <= 2^25; from now on fits into int32 unchanged
// |h3| <= 1.51*2^58
// |h7| <= 1.51*2^58
carry[3] = (h3 + (1 << 24)) >> 25
h4 += carry[3]
h3 -= carry[3] << 25
carry[7] = (h7 + (1 << 24)) >> 25
h8 += carry[7]
h7 -= carry[7] << 25
// |h3| <= 2^24; from now on fits into int32 unchanged
// |h7| <= 2^24; from now on fits into int32 unchanged
// |h4| <= 1.52*2^33
// |h8| <= 1.52*2^33
carry[4] = (h4 + (1 << 25)) >> 26
h5 += carry[4]
h4 -= carry[4] << 26
carry[8] = (h8 + (1 << 25)) >> 26
h9 += carry[8]
h8 -= carry[8] << 26
// |h4| <= 2^25; from now on fits into int32 unchanged
// |h8| <= 2^25; from now on fits into int32 unchanged
// |h5| <= 1.01*2^24
// |h9| <= 1.51*2^58
carry[9] = (h9 + (1 << 24)) >> 25
h0 += carry[9] * 19
h9 -= carry[9] << 25
// |h9| <= 2^24; from now on fits into int32 unchanged
// |h0| <= 1.8*2^37
carry[0] = (h0 + (1 << 25)) >> 26
h1 += carry[0]
h0 -= carry[0] << 26
// |h0| <= 2^25; from now on fits into int32 unchanged
// |h1| <= 1.01*2^24
h[0] = int32(h0)
h[1] = int32(h1)
h[2] = int32(h2)
h[3] = int32(h3)
h[4] = int32(h4)
h[5] = int32(h5)
h[6] = int32(h6)
h[7] = int32(h7)
h[8] = int32(h8)
h[9] = int32(h9)
}
// feSquare calculates h = f*f. Can overlap h with f.
//
// Preconditions:
// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
//
// Postconditions:
// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
func feSquare(h, f *fieldElement) {
f0 := f[0]
f1 := f[1]
f2 := f[2]
f3 := f[3]
f4 := f[4]
f5 := f[5]
f6 := f[6]
f7 := f[7]
f8 := f[8]
f9 := f[9]
f0_2 := 2 * f0
f1_2 := 2 * f1
f2_2 := 2 * f2
f3_2 := 2 * f3
f4_2 := 2 * f4
f5_2 := 2 * f5
f6_2 := 2 * f6
f7_2 := 2 * f7
f5_38 := 38 * f5 // 1.31*2^30
f6_19 := 19 * f6 // 1.31*2^30
f7_38 := 38 * f7 // 1.31*2^30
f8_19 := 19 * f8 // 1.31*2^30
f9_38 := 38 * f9 // 1.31*2^30
f0f0 := int64(f0) * int64(f0)
f0f1_2 := int64(f0_2) * int64(f1)
f0f2_2 := int64(f0_2) * int64(f2)
f0f3_2 := int64(f0_2) * int64(f3)
f0f4_2 := int64(f0_2) * int64(f4)
f0f5_2 := int64(f0_2) * int64(f5)
f0f6_2 := int64(f0_2) * int64(f6)
f0f7_2 := int64(f0_2) * int64(f7)
f0f8_2 := int64(f0_2) * int64(f8)
f0f9_2 := int64(f0_2) * int64(f9)
f1f1_2 := int64(f1_2) * int64(f1)
f1f2_2 := int64(f1_2) * int64(f2)
f1f3_4 := int64(f1_2) * int64(f3_2)
f1f4_2 := int64(f1_2) * int64(f4)
f1f5_4 := int64(f1_2) * int64(f5_2)
f1f6_2 := int64(f1_2) * int64(f6)
f1f7_4 := int64(f1_2) * int64(f7_2)
f1f8_2 := int64(f1_2) * int64(f8)
f1f9_76 := int64(f1_2) * int64(f9_38)
f2f2 := int64(f2) * int64(f2)
f2f3_2 := int64(f2_2) * int64(f3)
f2f4_2 := int64(f2_2) * int64(f4)
f2f5_2 := int64(f2_2) * int64(f5)
f2f6_2 := int64(f2_2) * int64(f6)
f2f7_2 := int64(f2_2) * int64(f7)
f2f8_38 := int64(f2_2) * int64(f8_19)
f2f9_38 := int64(f2) * int64(f9_38)
f3f3_2 := int64(f3_2) * int64(f3)
f3f4_2 := int64(f3_2) * int64(f4)
f3f5_4 := int64(f3_2) * int64(f5_2)
f3f6_2 := int64(f3_2) * int64(f6)
f3f7_76 := int64(f3_2) * int64(f7_38)
f3f8_38 := int64(f3_2) * int64(f8_19)
f3f9_76 := int64(f3_2) * int64(f9_38)
f4f4 := int64(f4) * int64(f4)
f4f5_2 := int64(f4_2) * int64(f5)
f4f6_38 := int64(f4_2) * int64(f6_19)
f4f7_38 := int64(f4) * int64(f7_38)
f4f8_38 := int64(f4_2) * int64(f8_19)
f4f9_38 := int64(f4) * int64(f9_38)
f5f5_38 := int64(f5) * int64(f5_38)
f5f6_38 := int64(f5_2) * int64(f6_19)
f5f7_76 := int64(f5_2) * int64(f7_38)
f5f8_38 := int64(f5_2) * int64(f8_19)
f5f9_76 := int64(f5_2) * int64(f9_38)
f6f6_19 := int64(f6) * int64(f6_19)
f6f7_38 := int64(f6) * int64(f7_38)
f6f8_38 := int64(f6_2) * int64(f8_19)
f6f9_38 := int64(f6) * int64(f9_38)
f7f7_38 := int64(f7) * int64(f7_38)
f7f8_38 := int64(f7_2) * int64(f8_19)
f7f9_76 := int64(f7_2) * int64(f9_38)
f8f8_19 := int64(f8) * int64(f8_19)
f8f9_38 := int64(f8) * int64(f9_38)
f9f9_38 := int64(f9) * int64(f9_38)
h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38
h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38
h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19
h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38
h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38
h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38
h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19
h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38
h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38
h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2
var carry [10]int64
carry[0] = (h0 + (1 << 25)) >> 26
h1 += carry[0]
h0 -= carry[0] << 26
carry[4] = (h4 + (1 << 25)) >> 26
h5 += carry[4]
h4 -= carry[4] << 26
carry[1] = (h1 + (1 << 24)) >> 25
h2 += carry[1]
h1 -= carry[1] << 25
carry[5] = (h5 + (1 << 24)) >> 25
h6 += carry[5]
h5 -= carry[5] << 25
carry[2] = (h2 + (1 << 25)) >> 26
h3 += carry[2]
h2 -= carry[2] << 26
carry[6] = (h6 + (1 << 25)) >> 26
h7 += carry[6]
h6 -= carry[6] << 26
carry[3] = (h3 + (1 << 24)) >> 25
h4 += carry[3]
h3 -= carry[3] << 25
carry[7] = (h7 + (1 << 24)) >> 25
h8 += carry[7]
h7 -= carry[7] << 25
carry[4] = (h4 + (1 << 25)) >> 26
h5 += carry[4]
h4 -= carry[4] << 26
carry[8] = (h8 + (1 << 25)) >> 26
h9 += carry[8]
h8 -= carry[8] << 26
carry[9] = (h9 + (1 << 24)) >> 25
h0 += carry[9] * 19
h9 -= carry[9] << 25
carry[0] = (h0 + (1 << 25)) >> 26
h1 += carry[0]
h0 -= carry[0] << 26
h[0] = int32(h0)
h[1] = int32(h1)
h[2] = int32(h2)
h[3] = int32(h3)
h[4] = int32(h4)
h[5] = int32(h5)
h[6] = int32(h6)
h[7] = int32(h7)
h[8] = int32(h8)
h[9] = int32(h9)
}
// feMul121666 calculates h = f * 121666. Can overlap h with f.
//
// Preconditions:
// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
//
// Postconditions:
// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
func feMul121666(h, f *fieldElement) {
h0 := int64(f[0]) * 121666
h1 := int64(f[1]) * 121666
h2 := int64(f[2]) * 121666
h3 := int64(f[3]) * 121666
h4 := int64(f[4]) * 121666
h5 := int64(f[5]) * 121666
h6 := int64(f[6]) * 121666
h7 := int64(f[7]) * 121666
h8 := int64(f[8]) * 121666
h9 := int64(f[9]) * 121666
var carry [10]int64
carry[9] = (h9 + (1 << 24)) >> 25
h0 += carry[9] * 19
h9 -= carry[9] << 25
carry[1] = (h1 + (1 << 24)) >> 25
h2 += carry[1]
h1 -= carry[1] << 25
carry[3] = (h3 + (1 << 24)) >> 25
h4 += carry[3]
h3 -= carry[3] << 25
carry[5] = (h5 + (1 << 24)) >> 25
h6 += carry[5]
h5 -= carry[5] << 25
carry[7] = (h7 + (1 << 24)) >> 25
h8 += carry[7]
h7 -= carry[7] << 25
carry[0] = (h0 + (1 << 25)) >> 26
h1 += carry[0]
h0 -= carry[0] << 26
carry[2] = (h2 + (1 << 25)) >> 26
h3 += carry[2]
h2 -= carry[2] << 26
carry[4] = (h4 + (1 << 25)) >> 26
h5 += carry[4]
h4 -= carry[4] << 26
carry[6] = (h6 + (1 << 25)) >> 26
h7 += carry[6]
h6 -= carry[6] << 26
carry[8] = (h8 + (1 << 25)) >> 26
h9 += carry[8]
h8 -= carry[8] << 26
h[0] = int32(h0)
h[1] = int32(h1)
h[2] = int32(h2)
h[3] = int32(h3)
h[4] = int32(h4)
h[5] = int32(h5)
h[6] = int32(h6)
h[7] = int32(h7)
h[8] = int32(h8)
h[9] = int32(h9)
}
// feInvert sets out = z^-1.
func feInvert(out, z *fieldElement) {
var t0, t1, t2, t3 fieldElement
var i int
feSquare(&t0, z)
for i = 1; i < 1; i++ {
feSquare(&t0, &t0)
}
feSquare(&t1, &t0)
for i = 1; i < 2; i++ {
feSquare(&t1, &t1)
}
feMul(&t1, z, &t1)
feMul(&t0, &t0, &t1)
feSquare(&t2, &t0)
for i = 1; i < 1; i++ {
feSquare(&t2, &t2)
}
feMul(&t1, &t1, &t2)
feSquare(&t2, &t1)
for i = 1; i < 5; i++ {
feSquare(&t2, &t2)
}
feMul(&t1, &t2, &t1)
feSquare(&t2, &t1)
for i = 1; i < 10; i++ {
feSquare(&t2, &t2)
}
feMul(&t2, &t2, &t1)
feSquare(&t3, &t2)
for i = 1; i < 20; i++ {
feSquare(&t3, &t3)
}
feMul(&t2, &t3, &t2)
feSquare(&t2, &t2)
for i = 1; i < 10; i++ {
feSquare(&t2, &t2)
}
feMul(&t1, &t2, &t1)
feSquare(&t2, &t1)
for i = 1; i < 50; i++ {
feSquare(&t2, &t2)
}
feMul(&t2, &t2, &t1)
feSquare(&t3, &t2)
for i = 1; i < 100; i++ {
feSquare(&t3, &t3)
}
feMul(&t2, &t3, &t2)
feSquare(&t2, &t2)
for i = 1; i < 50; i++ {
feSquare(&t2, &t2)
}
feMul(&t1, &t2, &t1)
feSquare(&t1, &t1)
for i = 1; i < 5; i++ {
feSquare(&t1, &t1)
}
feMul(out, &t1, &t0)
}
func scalarMultGeneric(out, in, base *[32]byte) {
var e [32]byte
copy(e[:], in[:])
e[0] &= 248
e[31] &= 127
e[31] |= 64
var x1, x2, z2, x3, z3, tmp0, tmp1 fieldElement
feFromBytes(&x1, base)
feOne(&x2)
feCopy(&x3, &x1)
feOne(&z3)
swap := int32(0)
for pos := 254; pos >= 0; pos-- {
b := e[pos/8] >> uint(pos&7)
b &= 1
swap ^= int32(b)
feCSwap(&x2, &x3, swap)
feCSwap(&z2, &z3, swap)
swap = int32(b)
feSub(&tmp0, &x3, &z3)
feSub(&tmp1, &x2, &z2)
feAdd(&x2, &x2, &z2)
feAdd(&z2, &x3, &z3)
feMul(&z3, &tmp0, &x2)
feMul(&z2, &z2, &tmp1)
feSquare(&tmp0, &tmp1)
feSquare(&tmp1, &x2)
feAdd(&x3, &z3, &z2)
feSub(&z2, &z3, &z2)
feMul(&x2, &tmp1, &tmp0)
feSub(&tmp1, &tmp1, &tmp0)
feSquare(&z2, &z2)
feMul121666(&z3, &tmp1)
feSquare(&x3, &x3)
feAdd(&tmp0, &tmp0, &z3)
feMul(&z3, &x1, &z2)
feMul(&z2, &tmp1, &tmp0)
}
feCSwap(&x2, &x3, swap)
feCSwap(&z2, &z3, swap)
feInvert(&z2, &z2)
feMul(&x2, &x2, &z2)
feToBytes(out, &x2)
}

View file

@ -0,0 +1,46 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build go1.20
package curve25519
import "crypto/ecdh"
func x25519(dst *[32]byte, scalar, point []byte) ([]byte, error) {
curve := ecdh.X25519()
pub, err := curve.NewPublicKey(point)
if err != nil {
return nil, err
}
priv, err := curve.NewPrivateKey(scalar)
if err != nil {
return nil, err
}
out, err := priv.ECDH(pub)
if err != nil {
return nil, err
}
copy(dst[:], out)
return dst[:], nil
}
func scalarMult(dst, scalar, point *[32]byte) {
if _, err := x25519(dst, scalar[:], point[:]); err != nil {
// The only error condition for x25519 when the inputs are 32 bytes long
// is if the output would have been the all-zero value.
for i := range dst {
dst[i] = 0
}
}
}
func scalarBaseMult(dst, scalar *[32]byte) {
curve := ecdh.X25519()
priv, err := curve.NewPrivateKey(scalar[:])
if err != nil {
panic("curve25519: internal error: scalarBaseMult was not 32 bytes")
}
copy(dst[:], priv.PublicKey().Bytes())
}

View file

@ -1,11 +0,0 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64 !gc purego
package curve25519
func scalarMult(out, in, base *[32]byte) {
scalarMultGeneric(out, in, base)
}

View file

@ -0,0 +1,7 @@
This package is kept in sync with crypto/ed25519/internal/edwards25519/field in
the standard library.
If there are any changes in the standard library that need to be synced to this
package, run sync.sh. It will not overwrite any local changes made since the
previous sync, so it's ok to land changes in this package first, and then sync
to the standard library later.

View file

@ -0,0 +1,416 @@
// Copyright (c) 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package field implements fast arithmetic modulo 2^255-19.
package field
import (
"crypto/subtle"
"encoding/binary"
"math/bits"
)
// Element represents an element of the field GF(2^255-19). Note that this
// is not a cryptographically secure group, and should only be used to interact
// with edwards25519.Point coordinates.
//
// This type works similarly to math/big.Int, and all arguments and receivers
// are allowed to alias.
//
// The zero value is a valid zero element.
type Element struct {
// An element t represents the integer
// t.l0 + t.l1*2^51 + t.l2*2^102 + t.l3*2^153 + t.l4*2^204
//
// Between operations, all limbs are expected to be lower than 2^52.
l0 uint64
l1 uint64
l2 uint64
l3 uint64
l4 uint64
}
const maskLow51Bits uint64 = (1 << 51) - 1
var feZero = &Element{0, 0, 0, 0, 0}
// Zero sets v = 0, and returns v.
func (v *Element) Zero() *Element {
*v = *feZero
return v
}
var feOne = &Element{1, 0, 0, 0, 0}
// One sets v = 1, and returns v.
func (v *Element) One() *Element {
*v = *feOne
return v
}
// reduce reduces v modulo 2^255 - 19 and returns it.
func (v *Element) reduce() *Element {
v.carryPropagate()
// After the light reduction we now have a field element representation
// v < 2^255 + 2^13 * 19, but need v < 2^255 - 19.
// If v >= 2^255 - 19, then v + 19 >= 2^255, which would overflow 2^255 - 1,
// generating a carry. That is, c will be 0 if v < 2^255 - 19, and 1 otherwise.
c := (v.l0 + 19) >> 51
c = (v.l1 + c) >> 51
c = (v.l2 + c) >> 51
c = (v.l3 + c) >> 51
c = (v.l4 + c) >> 51
// If v < 2^255 - 19 and c = 0, this will be a no-op. Otherwise, it's
// effectively applying the reduction identity to the carry.
v.l0 += 19 * c
v.l1 += v.l0 >> 51
v.l0 = v.l0 & maskLow51Bits
v.l2 += v.l1 >> 51
v.l1 = v.l1 & maskLow51Bits
v.l3 += v.l2 >> 51
v.l2 = v.l2 & maskLow51Bits
v.l4 += v.l3 >> 51
v.l3 = v.l3 & maskLow51Bits
// no additional carry
v.l4 = v.l4 & maskLow51Bits
return v
}
// Add sets v = a + b, and returns v.
func (v *Element) Add(a, b *Element) *Element {
v.l0 = a.l0 + b.l0
v.l1 = a.l1 + b.l1
v.l2 = a.l2 + b.l2
v.l3 = a.l3 + b.l3
v.l4 = a.l4 + b.l4
// Using the generic implementation here is actually faster than the
// assembly. Probably because the body of this function is so simple that
// the compiler can figure out better optimizations by inlining the carry
// propagation. TODO
return v.carryPropagateGeneric()
}
// Subtract sets v = a - b, and returns v.
func (v *Element) Subtract(a, b *Element) *Element {
// We first add 2 * p, to guarantee the subtraction won't underflow, and
// then subtract b (which can be up to 2^255 + 2^13 * 19).
v.l0 = (a.l0 + 0xFFFFFFFFFFFDA) - b.l0
v.l1 = (a.l1 + 0xFFFFFFFFFFFFE) - b.l1
v.l2 = (a.l2 + 0xFFFFFFFFFFFFE) - b.l2
v.l3 = (a.l3 + 0xFFFFFFFFFFFFE) - b.l3
v.l4 = (a.l4 + 0xFFFFFFFFFFFFE) - b.l4
return v.carryPropagate()
}
// Negate sets v = -a, and returns v.
func (v *Element) Negate(a *Element) *Element {
return v.Subtract(feZero, a)
}
// Invert sets v = 1/z mod p, and returns v.
//
// If z == 0, Invert returns v = 0.
func (v *Element) Invert(z *Element) *Element {
// Inversion is implemented as exponentiation with exponent p 2. It uses the
// same sequence of 255 squarings and 11 multiplications as [Curve25519].
var z2, z9, z11, z2_5_0, z2_10_0, z2_20_0, z2_50_0, z2_100_0, t Element
z2.Square(z) // 2
t.Square(&z2) // 4
t.Square(&t) // 8
z9.Multiply(&t, z) // 9
z11.Multiply(&z9, &z2) // 11
t.Square(&z11) // 22
z2_5_0.Multiply(&t, &z9) // 31 = 2^5 - 2^0
t.Square(&z2_5_0) // 2^6 - 2^1
for i := 0; i < 4; i++ {
t.Square(&t) // 2^10 - 2^5
}
z2_10_0.Multiply(&t, &z2_5_0) // 2^10 - 2^0
t.Square(&z2_10_0) // 2^11 - 2^1
for i := 0; i < 9; i++ {
t.Square(&t) // 2^20 - 2^10
}
z2_20_0.Multiply(&t, &z2_10_0) // 2^20 - 2^0
t.Square(&z2_20_0) // 2^21 - 2^1
for i := 0; i < 19; i++ {
t.Square(&t) // 2^40 - 2^20
}
t.Multiply(&t, &z2_20_0) // 2^40 - 2^0
t.Square(&t) // 2^41 - 2^1
for i := 0; i < 9; i++ {
t.Square(&t) // 2^50 - 2^10
}
z2_50_0.Multiply(&t, &z2_10_0) // 2^50 - 2^0
t.Square(&z2_50_0) // 2^51 - 2^1
for i := 0; i < 49; i++ {
t.Square(&t) // 2^100 - 2^50
}
z2_100_0.Multiply(&t, &z2_50_0) // 2^100 - 2^0
t.Square(&z2_100_0) // 2^101 - 2^1
for i := 0; i < 99; i++ {
t.Square(&t) // 2^200 - 2^100
}
t.Multiply(&t, &z2_100_0) // 2^200 - 2^0
t.Square(&t) // 2^201 - 2^1
for i := 0; i < 49; i++ {
t.Square(&t) // 2^250 - 2^50
}
t.Multiply(&t, &z2_50_0) // 2^250 - 2^0
t.Square(&t) // 2^251 - 2^1
t.Square(&t) // 2^252 - 2^2
t.Square(&t) // 2^253 - 2^3
t.Square(&t) // 2^254 - 2^4
t.Square(&t) // 2^255 - 2^5
return v.Multiply(&t, &z11) // 2^255 - 21
}
// Set sets v = a, and returns v.
func (v *Element) Set(a *Element) *Element {
*v = *a
return v
}
// SetBytes sets v to x, which must be a 32-byte little-endian encoding.
//
// Consistent with RFC 7748, the most significant bit (the high bit of the
// last byte) is ignored, and non-canonical values (2^255-19 through 2^255-1)
// are accepted. Note that this is laxer than specified by RFC 8032.
func (v *Element) SetBytes(x []byte) *Element {
if len(x) != 32 {
panic("edwards25519: invalid field element input size")
}
// Bits 0:51 (bytes 0:8, bits 0:64, shift 0, mask 51).
v.l0 = binary.LittleEndian.Uint64(x[0:8])
v.l0 &= maskLow51Bits
// Bits 51:102 (bytes 6:14, bits 48:112, shift 3, mask 51).
v.l1 = binary.LittleEndian.Uint64(x[6:14]) >> 3
v.l1 &= maskLow51Bits
// Bits 102:153 (bytes 12:20, bits 96:160, shift 6, mask 51).
v.l2 = binary.LittleEndian.Uint64(x[12:20]) >> 6
v.l2 &= maskLow51Bits
// Bits 153:204 (bytes 19:27, bits 152:216, shift 1, mask 51).
v.l3 = binary.LittleEndian.Uint64(x[19:27]) >> 1
v.l3 &= maskLow51Bits
// Bits 204:251 (bytes 24:32, bits 192:256, shift 12, mask 51).
// Note: not bytes 25:33, shift 4, to avoid overread.
v.l4 = binary.LittleEndian.Uint64(x[24:32]) >> 12
v.l4 &= maskLow51Bits
return v
}
// Bytes returns the canonical 32-byte little-endian encoding of v.
func (v *Element) Bytes() []byte {
// This function is outlined to make the allocations inline in the caller
// rather than happen on the heap.
var out [32]byte
return v.bytes(&out)
}
func (v *Element) bytes(out *[32]byte) []byte {
t := *v
t.reduce()
var buf [8]byte
for i, l := range [5]uint64{t.l0, t.l1, t.l2, t.l3, t.l4} {
bitsOffset := i * 51
binary.LittleEndian.PutUint64(buf[:], l<<uint(bitsOffset%8))
for i, bb := range buf {
off := bitsOffset/8 + i
if off >= len(out) {
break
}
out[off] |= bb
}
}
return out[:]
}
// Equal returns 1 if v and u are equal, and 0 otherwise.
func (v *Element) Equal(u *Element) int {
sa, sv := u.Bytes(), v.Bytes()
return subtle.ConstantTimeCompare(sa, sv)
}
// mask64Bits returns 0xffffffff if cond is 1, and 0 otherwise.
func mask64Bits(cond int) uint64 { return ^(uint64(cond) - 1) }
// Select sets v to a if cond == 1, and to b if cond == 0.
func (v *Element) Select(a, b *Element, cond int) *Element {
m := mask64Bits(cond)
v.l0 = (m & a.l0) | (^m & b.l0)
v.l1 = (m & a.l1) | (^m & b.l1)
v.l2 = (m & a.l2) | (^m & b.l2)
v.l3 = (m & a.l3) | (^m & b.l3)
v.l4 = (m & a.l4) | (^m & b.l4)
return v
}
// Swap swaps v and u if cond == 1 or leaves them unchanged if cond == 0, and returns v.
func (v *Element) Swap(u *Element, cond int) {
m := mask64Bits(cond)
t := m & (v.l0 ^ u.l0)
v.l0 ^= t
u.l0 ^= t
t = m & (v.l1 ^ u.l1)
v.l1 ^= t
u.l1 ^= t
t = m & (v.l2 ^ u.l2)
v.l2 ^= t
u.l2 ^= t
t = m & (v.l3 ^ u.l3)
v.l3 ^= t
u.l3 ^= t
t = m & (v.l4 ^ u.l4)
v.l4 ^= t
u.l4 ^= t
}
// IsNegative returns 1 if v is negative, and 0 otherwise.
func (v *Element) IsNegative() int {
return int(v.Bytes()[0] & 1)
}
// Absolute sets v to |u|, and returns v.
func (v *Element) Absolute(u *Element) *Element {
return v.Select(new(Element).Negate(u), u, u.IsNegative())
}
// Multiply sets v = x * y, and returns v.
func (v *Element) Multiply(x, y *Element) *Element {
feMul(v, x, y)
return v
}
// Square sets v = x * x, and returns v.
func (v *Element) Square(x *Element) *Element {
feSquare(v, x)
return v
}
// Mult32 sets v = x * y, and returns v.
func (v *Element) Mult32(x *Element, y uint32) *Element {
x0lo, x0hi := mul51(x.l0, y)
x1lo, x1hi := mul51(x.l1, y)
x2lo, x2hi := mul51(x.l2, y)
x3lo, x3hi := mul51(x.l3, y)
x4lo, x4hi := mul51(x.l4, y)
v.l0 = x0lo + 19*x4hi // carried over per the reduction identity
v.l1 = x1lo + x0hi
v.l2 = x2lo + x1hi
v.l3 = x3lo + x2hi
v.l4 = x4lo + x3hi
// The hi portions are going to be only 32 bits, plus any previous excess,
// so we can skip the carry propagation.
return v
}
// mul51 returns lo + hi * 2⁵¹ = a * b.
func mul51(a uint64, b uint32) (lo uint64, hi uint64) {
mh, ml := bits.Mul64(a, uint64(b))
lo = ml & maskLow51Bits
hi = (mh << 13) | (ml >> 51)
return
}
// Pow22523 set v = x^((p-5)/8), and returns v. (p-5)/8 is 2^252-3.
func (v *Element) Pow22523(x *Element) *Element {
var t0, t1, t2 Element
t0.Square(x) // x^2
t1.Square(&t0) // x^4
t1.Square(&t1) // x^8
t1.Multiply(x, &t1) // x^9
t0.Multiply(&t0, &t1) // x^11
t0.Square(&t0) // x^22
t0.Multiply(&t1, &t0) // x^31
t1.Square(&t0) // x^62
for i := 1; i < 5; i++ { // x^992
t1.Square(&t1)
}
t0.Multiply(&t1, &t0) // x^1023 -> 1023 = 2^10 - 1
t1.Square(&t0) // 2^11 - 2
for i := 1; i < 10; i++ { // 2^20 - 2^10
t1.Square(&t1)
}
t1.Multiply(&t1, &t0) // 2^20 - 1
t2.Square(&t1) // 2^21 - 2
for i := 1; i < 20; i++ { // 2^40 - 2^20
t2.Square(&t2)
}
t1.Multiply(&t2, &t1) // 2^40 - 1
t1.Square(&t1) // 2^41 - 2
for i := 1; i < 10; i++ { // 2^50 - 2^10
t1.Square(&t1)
}
t0.Multiply(&t1, &t0) // 2^50 - 1
t1.Square(&t0) // 2^51 - 2
for i := 1; i < 50; i++ { // 2^100 - 2^50
t1.Square(&t1)
}
t1.Multiply(&t1, &t0) // 2^100 - 1
t2.Square(&t1) // 2^101 - 2
for i := 1; i < 100; i++ { // 2^200 - 2^100
t2.Square(&t2)
}
t1.Multiply(&t2, &t1) // 2^200 - 1
t1.Square(&t1) // 2^201 - 2
for i := 1; i < 50; i++ { // 2^250 - 2^50
t1.Square(&t1)
}
t0.Multiply(&t1, &t0) // 2^250 - 1
t0.Square(&t0) // 2^251 - 2
t0.Square(&t0) // 2^252 - 4
return v.Multiply(&t0, x) // 2^252 - 3 -> x^(2^252-3)
}
// sqrtM1 is 2^((p-1)/4), which squared is equal to -1 by Euler's Criterion.
var sqrtM1 = &Element{1718705420411056, 234908883556509,
2233514472574048, 2117202627021982, 765476049583133}
// SqrtRatio sets r to the non-negative square root of the ratio of u and v.
//
// If u/v is square, SqrtRatio returns r and 1. If u/v is not square, SqrtRatio
// sets r according to Section 4.3 of draft-irtf-cfrg-ristretto255-decaf448-00,
// and returns r and 0.
func (r *Element) SqrtRatio(u, v *Element) (rr *Element, wasSquare int) {
var a, b Element
// r = (u * v3) * (u * v7)^((p-5)/8)
v2 := a.Square(v)
uv3 := b.Multiply(u, b.Multiply(v2, v))
uv7 := a.Multiply(uv3, a.Square(v2))
r.Multiply(uv3, r.Pow22523(uv7))
check := a.Multiply(v, a.Square(r)) // check = v * r^2
uNeg := b.Negate(u)
correctSignSqrt := check.Equal(u)
flippedSignSqrt := check.Equal(uNeg)
flippedSignSqrtI := check.Equal(uNeg.Multiply(uNeg, sqrtM1))
rPrime := b.Multiply(r, sqrtM1) // r_prime = SQRT_M1 * r
// r = CT_SELECT(r_prime IF flipped_sign_sqrt | flipped_sign_sqrt_i ELSE r)
r.Select(rPrime, r, flippedSignSqrt|flippedSignSqrtI)
r.Absolute(r) // Choose the nonnegative square root.
return r, correctSignSqrt | flippedSignSqrt
}

View file

@ -0,0 +1,16 @@
// Code generated by command: go run fe_amd64_asm.go -out ../fe_amd64.s -stubs ../fe_amd64.go -pkg field. DO NOT EDIT.
//go:build amd64 && gc && !purego
// +build amd64,gc,!purego
package field
// feMul sets out = a * b. It works like feMulGeneric.
//
//go:noescape
func feMul(out *Element, a *Element, b *Element)
// feSquare sets out = a * a. It works like feSquareGeneric.
//
//go:noescape
func feSquare(out *Element, a *Element)

View file

@ -0,0 +1,379 @@
// Code generated by command: go run fe_amd64_asm.go -out ../fe_amd64.s -stubs ../fe_amd64.go -pkg field. DO NOT EDIT.
//go:build amd64 && gc && !purego
// +build amd64,gc,!purego
#include "textflag.h"
// func feMul(out *Element, a *Element, b *Element)
TEXT ·feMul(SB), NOSPLIT, $0-24
MOVQ a+8(FP), CX
MOVQ b+16(FP), BX
// r0 = a0×b0
MOVQ (CX), AX
MULQ (BX)
MOVQ AX, DI
MOVQ DX, SI
// r0 += 19×a1×b4
MOVQ 8(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 32(BX)
ADDQ AX, DI
ADCQ DX, SI
// r0 += 19×a2×b3
MOVQ 16(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 24(BX)
ADDQ AX, DI
ADCQ DX, SI
// r0 += 19×a3×b2
MOVQ 24(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 16(BX)
ADDQ AX, DI
ADCQ DX, SI
// r0 += 19×a4×b1
MOVQ 32(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 8(BX)
ADDQ AX, DI
ADCQ DX, SI
// r1 = a0×b1
MOVQ (CX), AX
MULQ 8(BX)
MOVQ AX, R9
MOVQ DX, R8
// r1 += a1×b0
MOVQ 8(CX), AX
MULQ (BX)
ADDQ AX, R9
ADCQ DX, R8
// r1 += 19×a2×b4
MOVQ 16(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 32(BX)
ADDQ AX, R9
ADCQ DX, R8
// r1 += 19×a3×b3
MOVQ 24(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 24(BX)
ADDQ AX, R9
ADCQ DX, R8
// r1 += 19×a4×b2
MOVQ 32(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 16(BX)
ADDQ AX, R9
ADCQ DX, R8
// r2 = a0×b2
MOVQ (CX), AX
MULQ 16(BX)
MOVQ AX, R11
MOVQ DX, R10
// r2 += a1×b1
MOVQ 8(CX), AX
MULQ 8(BX)
ADDQ AX, R11
ADCQ DX, R10
// r2 += a2×b0
MOVQ 16(CX), AX
MULQ (BX)
ADDQ AX, R11
ADCQ DX, R10
// r2 += 19×a3×b4
MOVQ 24(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 32(BX)
ADDQ AX, R11
ADCQ DX, R10
// r2 += 19×a4×b3
MOVQ 32(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 24(BX)
ADDQ AX, R11
ADCQ DX, R10
// r3 = a0×b3
MOVQ (CX), AX
MULQ 24(BX)
MOVQ AX, R13
MOVQ DX, R12
// r3 += a1×b2
MOVQ 8(CX), AX
MULQ 16(BX)
ADDQ AX, R13
ADCQ DX, R12
// r3 += a2×b1
MOVQ 16(CX), AX
MULQ 8(BX)
ADDQ AX, R13
ADCQ DX, R12
// r3 += a3×b0
MOVQ 24(CX), AX
MULQ (BX)
ADDQ AX, R13
ADCQ DX, R12
// r3 += 19×a4×b4
MOVQ 32(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 32(BX)
ADDQ AX, R13
ADCQ DX, R12
// r4 = a0×b4
MOVQ (CX), AX
MULQ 32(BX)
MOVQ AX, R15
MOVQ DX, R14
// r4 += a1×b3
MOVQ 8(CX), AX
MULQ 24(BX)
ADDQ AX, R15
ADCQ DX, R14
// r4 += a2×b2
MOVQ 16(CX), AX
MULQ 16(BX)
ADDQ AX, R15
ADCQ DX, R14
// r4 += a3×b1
MOVQ 24(CX), AX
MULQ 8(BX)
ADDQ AX, R15
ADCQ DX, R14
// r4 += a4×b0
MOVQ 32(CX), AX
MULQ (BX)
ADDQ AX, R15
ADCQ DX, R14
// First reduction chain
MOVQ $0x0007ffffffffffff, AX
SHLQ $0x0d, DI, SI
SHLQ $0x0d, R9, R8
SHLQ $0x0d, R11, R10
SHLQ $0x0d, R13, R12
SHLQ $0x0d, R15, R14
ANDQ AX, DI
IMUL3Q $0x13, R14, R14
ADDQ R14, DI
ANDQ AX, R9
ADDQ SI, R9
ANDQ AX, R11
ADDQ R8, R11
ANDQ AX, R13
ADDQ R10, R13
ANDQ AX, R15
ADDQ R12, R15
// Second reduction chain (carryPropagate)
MOVQ DI, SI
SHRQ $0x33, SI
MOVQ R9, R8
SHRQ $0x33, R8
MOVQ R11, R10
SHRQ $0x33, R10
MOVQ R13, R12
SHRQ $0x33, R12
MOVQ R15, R14
SHRQ $0x33, R14
ANDQ AX, DI
IMUL3Q $0x13, R14, R14
ADDQ R14, DI
ANDQ AX, R9
ADDQ SI, R9
ANDQ AX, R11
ADDQ R8, R11
ANDQ AX, R13
ADDQ R10, R13
ANDQ AX, R15
ADDQ R12, R15
// Store output
MOVQ out+0(FP), AX
MOVQ DI, (AX)
MOVQ R9, 8(AX)
MOVQ R11, 16(AX)
MOVQ R13, 24(AX)
MOVQ R15, 32(AX)
RET
// func feSquare(out *Element, a *Element)
TEXT ·feSquare(SB), NOSPLIT, $0-16
MOVQ a+8(FP), CX
// r0 = l0×l0
MOVQ (CX), AX
MULQ (CX)
MOVQ AX, SI
MOVQ DX, BX
// r0 += 38×l1×l4
MOVQ 8(CX), AX
IMUL3Q $0x26, AX, AX
MULQ 32(CX)
ADDQ AX, SI
ADCQ DX, BX
// r0 += 38×l2×l3
MOVQ 16(CX), AX
IMUL3Q $0x26, AX, AX
MULQ 24(CX)
ADDQ AX, SI
ADCQ DX, BX
// r1 = 2×l0×l1
MOVQ (CX), AX
SHLQ $0x01, AX
MULQ 8(CX)
MOVQ AX, R8
MOVQ DX, DI
// r1 += 38×l2×l4
MOVQ 16(CX), AX
IMUL3Q $0x26, AX, AX
MULQ 32(CX)
ADDQ AX, R8
ADCQ DX, DI
// r1 += 19×l3×l3
MOVQ 24(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 24(CX)
ADDQ AX, R8
ADCQ DX, DI
// r2 = 2×l0×l2
MOVQ (CX), AX
SHLQ $0x01, AX
MULQ 16(CX)
MOVQ AX, R10
MOVQ DX, R9
// r2 += l1×l1
MOVQ 8(CX), AX
MULQ 8(CX)
ADDQ AX, R10
ADCQ DX, R9
// r2 += 38×l3×l4
MOVQ 24(CX), AX
IMUL3Q $0x26, AX, AX
MULQ 32(CX)
ADDQ AX, R10
ADCQ DX, R9
// r3 = 2×l0×l3
MOVQ (CX), AX
SHLQ $0x01, AX
MULQ 24(CX)
MOVQ AX, R12
MOVQ DX, R11
// r3 += 2×l1×l2
MOVQ 8(CX), AX
IMUL3Q $0x02, AX, AX
MULQ 16(CX)
ADDQ AX, R12
ADCQ DX, R11
// r3 += 19×l4×l4
MOVQ 32(CX), AX
IMUL3Q $0x13, AX, AX
MULQ 32(CX)
ADDQ AX, R12
ADCQ DX, R11
// r4 = 2×l0×l4
MOVQ (CX), AX
SHLQ $0x01, AX
MULQ 32(CX)
MOVQ AX, R14
MOVQ DX, R13
// r4 += 2×l1×l3
MOVQ 8(CX), AX
IMUL3Q $0x02, AX, AX
MULQ 24(CX)
ADDQ AX, R14
ADCQ DX, R13
// r4 += l2×l2
MOVQ 16(CX), AX
MULQ 16(CX)
ADDQ AX, R14
ADCQ DX, R13
// First reduction chain
MOVQ $0x0007ffffffffffff, AX
SHLQ $0x0d, SI, BX
SHLQ $0x0d, R8, DI
SHLQ $0x0d, R10, R9
SHLQ $0x0d, R12, R11
SHLQ $0x0d, R14, R13
ANDQ AX, SI
IMUL3Q $0x13, R13, R13
ADDQ R13, SI
ANDQ AX, R8
ADDQ BX, R8
ANDQ AX, R10
ADDQ DI, R10
ANDQ AX, R12
ADDQ R9, R12
ANDQ AX, R14
ADDQ R11, R14
// Second reduction chain (carryPropagate)
MOVQ SI, BX
SHRQ $0x33, BX
MOVQ R8, DI
SHRQ $0x33, DI
MOVQ R10, R9
SHRQ $0x33, R9
MOVQ R12, R11
SHRQ $0x33, R11
MOVQ R14, R13
SHRQ $0x33, R13
ANDQ AX, SI
IMUL3Q $0x13, R13, R13
ADDQ R13, SI
ANDQ AX, R8
ADDQ BX, R8
ANDQ AX, R10
ADDQ DI, R10
ANDQ AX, R12
ADDQ R9, R12
ANDQ AX, R14
ADDQ R11, R14
// Store output
MOVQ out+0(FP), AX
MOVQ SI, (AX)
MOVQ R8, 8(AX)
MOVQ R10, 16(AX)
MOVQ R12, 24(AX)
MOVQ R14, 32(AX)
RET

View file

@ -0,0 +1,12 @@
// Copyright (c) 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !amd64 || !gc || purego
// +build !amd64 !gc purego
package field
func feMul(v, x, y *Element) { feMulGeneric(v, x, y) }
func feSquare(v, x *Element) { feSquareGeneric(v, x) }

View file

@ -0,0 +1,16 @@
// Copyright (c) 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build arm64 && gc && !purego
// +build arm64,gc,!purego
package field
//go:noescape
func carryPropagate(v *Element)
func (v *Element) carryPropagate() *Element {
carryPropagate(v)
return v
}

View file

@ -0,0 +1,43 @@
// Copyright (c) 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build arm64 && gc && !purego
// +build arm64,gc,!purego
#include "textflag.h"
// carryPropagate works exactly like carryPropagateGeneric and uses the
// same AND, ADD, and LSR+MADD instructions emitted by the compiler, but
// avoids loading R0-R4 twice and uses LDP and STP.
//
// See https://golang.org/issues/43145 for the main compiler issue.
//
// func carryPropagate(v *Element)
TEXT ·carryPropagate(SB),NOFRAME|NOSPLIT,$0-8
MOVD v+0(FP), R20
LDP 0(R20), (R0, R1)
LDP 16(R20), (R2, R3)
MOVD 32(R20), R4
AND $0x7ffffffffffff, R0, R10
AND $0x7ffffffffffff, R1, R11
AND $0x7ffffffffffff, R2, R12
AND $0x7ffffffffffff, R3, R13
AND $0x7ffffffffffff, R4, R14
ADD R0>>51, R11, R11
ADD R1>>51, R12, R12
ADD R2>>51, R13, R13
ADD R3>>51, R14, R14
// R4>>51 * 19 + R10 -> R10
LSR $51, R4, R21
MOVD $19, R22
MADD R22, R10, R21, R10
STP (R10, R11), 0(R20)
STP (R12, R13), 16(R20)
MOVD R14, 32(R20)
RET

View file

@ -0,0 +1,12 @@
// Copyright (c) 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !arm64 || !gc || purego
// +build !arm64 !gc purego
package field
func (v *Element) carryPropagate() *Element {
return v.carryPropagateGeneric()
}

View file

@ -0,0 +1,264 @@
// Copyright (c) 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package field
import "math/bits"
// uint128 holds a 128-bit number as two 64-bit limbs, for use with the
// bits.Mul64 and bits.Add64 intrinsics.
type uint128 struct {
lo, hi uint64
}
// mul64 returns a * b.
func mul64(a, b uint64) uint128 {
hi, lo := bits.Mul64(a, b)
return uint128{lo, hi}
}
// addMul64 returns v + a * b.
func addMul64(v uint128, a, b uint64) uint128 {
hi, lo := bits.Mul64(a, b)
lo, c := bits.Add64(lo, v.lo, 0)
hi, _ = bits.Add64(hi, v.hi, c)
return uint128{lo, hi}
}
// shiftRightBy51 returns a >> 51. a is assumed to be at most 115 bits.
func shiftRightBy51(a uint128) uint64 {
return (a.hi << (64 - 51)) | (a.lo >> 51)
}
func feMulGeneric(v, a, b *Element) {
a0 := a.l0
a1 := a.l1
a2 := a.l2
a3 := a.l3
a4 := a.l4
b0 := b.l0
b1 := b.l1
b2 := b.l2
b3 := b.l3
b4 := b.l4
// Limb multiplication works like pen-and-paper columnar multiplication, but
// with 51-bit limbs instead of digits.
//
// a4 a3 a2 a1 a0 x
// b4 b3 b2 b1 b0 =
// ------------------------
// a4b0 a3b0 a2b0 a1b0 a0b0 +
// a4b1 a3b1 a2b1 a1b1 a0b1 +
// a4b2 a3b2 a2b2 a1b2 a0b2 +
// a4b3 a3b3 a2b3 a1b3 a0b3 +
// a4b4 a3b4 a2b4 a1b4 a0b4 =
// ----------------------------------------------
// r8 r7 r6 r5 r4 r3 r2 r1 r0
//
// We can then use the reduction identity (a * 2²⁵⁵ + b = a * 19 + b) to
// reduce the limbs that would overflow 255 bits. r5 * 2²⁵⁵ becomes 19 * r5,
// r6 * 2³⁰⁶ becomes 19 * r6 * 2⁵¹, etc.
//
// Reduction can be carried out simultaneously to multiplication. For
// example, we do not compute r5: whenever the result of a multiplication
// belongs to r5, like a1b4, we multiply it by 19 and add the result to r0.
//
// a4b0 a3b0 a2b0 a1b0 a0b0 +
// a3b1 a2b1 a1b1 a0b1 19×a4b1 +
// a2b2 a1b2 a0b2 19×a4b2 19×a3b2 +
// a1b3 a0b3 19×a4b3 19×a3b3 19×a2b3 +
// a0b4 19×a4b4 19×a3b4 19×a2b4 19×a1b4 =
// --------------------------------------
// r4 r3 r2 r1 r0
//
// Finally we add up the columns into wide, overlapping limbs.
a1_19 := a1 * 19
a2_19 := a2 * 19
a3_19 := a3 * 19
a4_19 := a4 * 19
// r0 = a0×b0 + 19×(a1×b4 + a2×b3 + a3×b2 + a4×b1)
r0 := mul64(a0, b0)
r0 = addMul64(r0, a1_19, b4)
r0 = addMul64(r0, a2_19, b3)
r0 = addMul64(r0, a3_19, b2)
r0 = addMul64(r0, a4_19, b1)
// r1 = a0×b1 + a1×b0 + 19×(a2×b4 + a3×b3 + a4×b2)
r1 := mul64(a0, b1)
r1 = addMul64(r1, a1, b0)
r1 = addMul64(r1, a2_19, b4)
r1 = addMul64(r1, a3_19, b3)
r1 = addMul64(r1, a4_19, b2)
// r2 = a0×b2 + a1×b1 + a2×b0 + 19×(a3×b4 + a4×b3)
r2 := mul64(a0, b2)
r2 = addMul64(r2, a1, b1)
r2 = addMul64(r2, a2, b0)
r2 = addMul64(r2, a3_19, b4)
r2 = addMul64(r2, a4_19, b3)
// r3 = a0×b3 + a1×b2 + a2×b1 + a3×b0 + 19×a4×b4
r3 := mul64(a0, b3)
r3 = addMul64(r3, a1, b2)
r3 = addMul64(r3, a2, b1)
r3 = addMul64(r3, a3, b0)
r3 = addMul64(r3, a4_19, b4)
// r4 = a0×b4 + a1×b3 + a2×b2 + a3×b1 + a4×b0
r4 := mul64(a0, b4)
r4 = addMul64(r4, a1, b3)
r4 = addMul64(r4, a2, b2)
r4 = addMul64(r4, a3, b1)
r4 = addMul64(r4, a4, b0)
// After the multiplication, we need to reduce (carry) the five coefficients
// to obtain a result with limbs that are at most slightly larger than 2⁵¹,
// to respect the Element invariant.
//
// Overall, the reduction works the same as carryPropagate, except with
// wider inputs: we take the carry for each coefficient by shifting it right
// by 51, and add it to the limb above it. The top carry is multiplied by 19
// according to the reduction identity and added to the lowest limb.
//
// The largest coefficient (r0) will be at most 111 bits, which guarantees
// that all carries are at most 111 - 51 = 60 bits, which fits in a uint64.
//
// r0 = a0×b0 + 19×(a1×b4 + a2×b3 + a3×b2 + a4×b1)
// r0 < 2⁵²×2⁵² + 19×(2⁵²×2⁵² + 2⁵²×2⁵² + 2⁵²×2⁵² + 2⁵²×2⁵²)
// r0 < (1 + 19 × 4) × 2⁵² × 2⁵²
// r0 < 2⁷ × 2⁵² × 2⁵²
// r0 < 2¹¹¹
//
// Moreover, the top coefficient (r4) is at most 107 bits, so c4 is at most
// 56 bits, and c4 * 19 is at most 61 bits, which again fits in a uint64 and
// allows us to easily apply the reduction identity.
//
// r4 = a0×b4 + a1×b3 + a2×b2 + a3×b1 + a4×b0
// r4 < 5 × 2⁵² × 2⁵²
// r4 < 2¹⁰⁷
//
c0 := shiftRightBy51(r0)
c1 := shiftRightBy51(r1)
c2 := shiftRightBy51(r2)
c3 := shiftRightBy51(r3)
c4 := shiftRightBy51(r4)
rr0 := r0.lo&maskLow51Bits + c4*19
rr1 := r1.lo&maskLow51Bits + c0
rr2 := r2.lo&maskLow51Bits + c1
rr3 := r3.lo&maskLow51Bits + c2
rr4 := r4.lo&maskLow51Bits + c3
// Now all coefficients fit into 64-bit registers but are still too large to
// be passed around as a Element. We therefore do one last carry chain,
// where the carries will be small enough to fit in the wiggle room above 2⁵¹.
*v = Element{rr0, rr1, rr2, rr3, rr4}
v.carryPropagate()
}
func feSquareGeneric(v, a *Element) {
l0 := a.l0
l1 := a.l1
l2 := a.l2
l3 := a.l3
l4 := a.l4
// Squaring works precisely like multiplication above, but thanks to its
// symmetry we get to group a few terms together.
//
// l4 l3 l2 l1 l0 x
// l4 l3 l2 l1 l0 =
// ------------------------
// l4l0 l3l0 l2l0 l1l0 l0l0 +
// l4l1 l3l1 l2l1 l1l1 l0l1 +
// l4l2 l3l2 l2l2 l1l2 l0l2 +
// l4l3 l3l3 l2l3 l1l3 l0l3 +
// l4l4 l3l4 l2l4 l1l4 l0l4 =
// ----------------------------------------------
// r8 r7 r6 r5 r4 r3 r2 r1 r0
//
// l4l0 l3l0 l2l0 l1l0 l0l0 +
// l3l1 l2l1 l1l1 l0l1 19×l4l1 +
// l2l2 l1l2 l0l2 19×l4l2 19×l3l2 +
// l1l3 l0l3 19×l4l3 19×l3l3 19×l2l3 +
// l0l4 19×l4l4 19×l3l4 19×l2l4 19×l1l4 =
// --------------------------------------
// r4 r3 r2 r1 r0
//
// With precomputed 2×, 19×, and 2×19× terms, we can compute each limb with
// only three Mul64 and four Add64, instead of five and eight.
l0_2 := l0 * 2
l1_2 := l1 * 2
l1_38 := l1 * 38
l2_38 := l2 * 38
l3_38 := l3 * 38
l3_19 := l3 * 19
l4_19 := l4 * 19
// r0 = l0×l0 + 19×(l1×l4 + l2×l3 + l3×l2 + l4×l1) = l0×l0 + 19×2×(l1×l4 + l2×l3)
r0 := mul64(l0, l0)
r0 = addMul64(r0, l1_38, l4)
r0 = addMul64(r0, l2_38, l3)
// r1 = l0×l1 + l1×l0 + 19×(l2×l4 + l3×l3 + l4×l2) = 2×l0×l1 + 19×2×l2×l4 + 19×l3×l3
r1 := mul64(l0_2, l1)
r1 = addMul64(r1, l2_38, l4)
r1 = addMul64(r1, l3_19, l3)
// r2 = l0×l2 + l1×l1 + l2×l0 + 19×(l3×l4 + l4×l3) = 2×l0×l2 + l1×l1 + 19×2×l3×l4
r2 := mul64(l0_2, l2)
r2 = addMul64(r2, l1, l1)
r2 = addMul64(r2, l3_38, l4)
// r3 = l0×l3 + l1×l2 + l2×l1 + l3×l0 + 19×l4×l4 = 2×l0×l3 + 2×l1×l2 + 19×l4×l4
r3 := mul64(l0_2, l3)
r3 = addMul64(r3, l1_2, l2)
r3 = addMul64(r3, l4_19, l4)
// r4 = l0×l4 + l1×l3 + l2×l2 + l3×l1 + l4×l0 = 2×l0×l4 + 2×l1×l3 + l2×l2
r4 := mul64(l0_2, l4)
r4 = addMul64(r4, l1_2, l3)
r4 = addMul64(r4, l2, l2)
c0 := shiftRightBy51(r0)
c1 := shiftRightBy51(r1)
c2 := shiftRightBy51(r2)
c3 := shiftRightBy51(r3)
c4 := shiftRightBy51(r4)
rr0 := r0.lo&maskLow51Bits + c4*19
rr1 := r1.lo&maskLow51Bits + c0
rr2 := r2.lo&maskLow51Bits + c1
rr3 := r3.lo&maskLow51Bits + c2
rr4 := r4.lo&maskLow51Bits + c3
*v = Element{rr0, rr1, rr2, rr3, rr4}
v.carryPropagate()
}
// carryPropagateGeneric brings the limbs below 52 bits by applying the reduction
// identity (a * 2²⁵⁵ + b = a * 19 + b) to the l4 carry. TODO inline
func (v *Element) carryPropagateGeneric() *Element {
c0 := v.l0 >> 51
c1 := v.l1 >> 51
c2 := v.l2 >> 51
c3 := v.l3 >> 51
c4 := v.l4 >> 51
v.l0 = v.l0&maskLow51Bits + c4*19
v.l1 = v.l1&maskLow51Bits + c0
v.l2 = v.l2&maskLow51Bits + c1
v.l3 = v.l3&maskLow51Bits + c2
v.l4 = v.l4&maskLow51Bits + c3
return v
}

View file

@ -0,0 +1 @@
b0c49ae9f59d233526f8934262c5bbbe14d4358d

View file

@ -0,0 +1,19 @@
#! /bin/bash
set -euo pipefail
cd "$(git rev-parse --show-toplevel)"
STD_PATH=src/crypto/ed25519/internal/edwards25519/field
LOCAL_PATH=curve25519/internal/field
LAST_SYNC_REF=$(cat $LOCAL_PATH/sync.checkpoint)
git fetch https://go.googlesource.com/go master
if git diff --quiet $LAST_SYNC_REF:$STD_PATH FETCH_HEAD:$STD_PATH; then
echo "No changes."
else
NEW_REF=$(git rev-parse FETCH_HEAD | tee $LOCAL_PATH/sync.checkpoint)
echo "Applying changes from $LAST_SYNC_REF to $NEW_REF..."
git diff $LAST_SYNC_REF:$STD_PATH FETCH_HEAD:$STD_PATH | \
git apply -3 --directory=$LOCAL_PATH
fi