[dev.boringcrypto] all: merge master into dev.boringcrypto

Change-Id: If0a6a3d0abf15d9584ce572510b5bb31872d432f
This commit is contained in:
Heschi Kreinick 2021-11-08 14:46:41 -05:00
commit c9858c7bdc
90 changed files with 12935 additions and 3333 deletions

View File

@ -31,19 +31,30 @@ Do not send CLs removing the interior tags from such phrases.
<h2 id="ports">Ports</h2>
<p id="freebsd">
<h3 id="freebsd">FreeBSD</h3>
<p>
Go 1.18 is the last release that is supported on FreeBSD 11.x, which has
already reached end-of-life. Go 1.19 will require FreeBSD 12.2+ or FreeBSD
13.0+.
FreeBSD 13.0+ will require a kernel with the COMPAT_FREEBSD12 option set (this is the default).
</p>
<h2 id="tools">Tools</h2>
<h3 id="ppc64">PPC64</h3>
<p>
TODO: complete this section, or delete if not needed
<p><!-- CL 353969 -->
TODO: <a href="https://golang.org/cl/353969">https://golang.org/cl/353969</a>: internal/buildcfg: enable register ABI for PPC64
</p>
<h3 id="riscv">RISC-V</h3>
<p><!-- golang.org/issue/47100, CL 334872 -->
The 64-bit RISC-V architecture on Linux (the <code>linux/riscv64</code> port)
now supports the <code>c-archive</code> and <code>c-shared</code> build modes.
</p>
<h2 id="tools">Tools</h2>
<h3 id="go-command">Go command</h3>
<p><!-- golang.org/issue/43684 -->
@ -103,8 +114,8 @@ Do not send CLs removing the interior tags from such phrases.
<code>go</code> <code>mod</code> <code>download</code> <code>all</code>.
</p>
<p>
TODO: complete this section, or delete if not needed
<p><!-- CL 349595 -->
TODO: <a href="https://golang.org/cl/349595">https://golang.org/cl/349595</a>: https://golang.org/cl/349595: cmd/go: add GOAMD64 environment variable
</p>
<h3 id="gofmt"><code>gofmt</code></h3>
@ -115,7 +126,6 @@ Do not send CLs removing the interior tags from such phrases.
multiple CPUs, <code>gofmt</code> should now be significantly faster.
</p>
<h2 id="runtime">Runtime</h2>
<p>
@ -124,24 +134,30 @@ Do not send CLs removing the interior tags from such phrases.
<h2 id="compiler">Compiler</h2>
<p>
TODO: complete this section, or delete if not needed
<p><!-- CL 298611 -->
TODO: <a href="https://golang.org/cl/298611">https://golang.org/cl/298611</a>: https://golang.org/cl/298611: cmd/compile: add -asan option
</p>
<p><!-- CL 352057 -->
TODO: <a href="https://golang.org/cl/352057">https://golang.org/cl/352057</a>: https://golang.org/cl/352057: cmd/compile, runtime: track argument stack slot liveness
</p>
<h2 id="linker">Linker</h2>
<p>
TODO: complete this section, or delete if not needed
<p><!-- CL 298610 -->
TODO: <a href="https://golang.org/cl/298610">https://golang.org/cl/298610</a>: https://golang.org/cl/298610: cmd/link: add -asan option
</p>
<h2 id="library">Core library</h2>
<h3>TODO</h3>
<p>
TODO: complete this section
<h3 id="constraints">New <code>constraints</code> package</h3>
<p><!-- CL 349709 -->
TODO: <a href="https://golang.org/cl/349709">https://golang.org/cl/349709</a>: constraints: new package
</p>
<h3 id="netip">New <code>net/netip</code> package</h3>
<p>
The new <a href="/pkg/net/netip/"><code>net/netip</code></a>
package defines a new IP address type, <a href="/pkg/net/netip/#Addr"><code>Addr</code></a>.
@ -163,6 +179,12 @@ Do not send CLs removing the interior tags from such phrases.
<code>*net.UDPAddr</code> values.
</p>
<h3>TODO</h3>
<p>
TODO: complete this section
</p>
<h3 id="minor_library_changes">Minor changes to the library</h3>
<p>
@ -175,6 +197,26 @@ Do not send CLs removing the interior tags from such phrases.
TODO: complete this section
</p>
<dl id="bufio"><dt><a href="/pkg/bufio/">bufio</a></dt>
<dd>
<p><!-- CL 345569 -->
TODO: <a href="https://golang.org/cl/345569">https://golang.org/cl/345569</a>: add Writer.AvailableBuffer
</p>
<p><!-- CL 345570 -->
TODO: <a href="https://golang.org/cl/345570">https://golang.org/cl/345570</a>: make Reader.Reset and Writer.Reset work on the zero value
</p>
</dd>
</dl><!-- bufio -->
<dl id="crypto/tls"><dt><a href="/pkg/crypto/tls/">crypto/tls</a></dt>
<dd>
<p><!-- CL 325250 -->
TODO: <a href="https://golang.org/cl/325250">https://golang.org/cl/325250</a>: add Conn.NetConn method
</p>
</dd>
</dl><!-- crypto/tls -->
<dl id="debug/buildinfo"><dt><a href="/pkg/debug/buildinfo">debug/buildinfo</a></dt>
<dd>
<p><!-- golang.org/issue/39301 -->
@ -201,9 +243,33 @@ Do not send CLs removing the interior tags from such phrases.
</dd>
</dl><!-- image/draw -->
<dl id="net"><dt><a href="/pkg/net/">net</a></dt>
<dd>
<p><!-- CL 340261 -->
TODO: <a href="https://golang.org/cl/340261">https://golang.org/cl/340261</a>: deprecate (net.Error).Temporary
</p>
</dd>
</dl><!-- net -->
<dl id="net/http"><dt><a href="/pkg/net/http/">net/http</a></dt>
<dd>
<p><!-- CL 338590 -->
TODO: <a href="https://golang.org/cl/338590">https://golang.org/cl/338590</a>: add Cookie.Valid method
</p>
</dd>
</dl><!-- net/http -->
<dl id="os/user"><dt><a href="/pkg/os/user/">os/user</a></dt>
<dd>
<p><!-- CL 330753 -->
TODO: <a href="https://golang.org/cl/330753">https://golang.org/cl/330753</a>: implement go native GroupIds
</p>
</dd>
</dl><!-- os/user -->
<dl id="reflect"><dt><a href="/pkg/reflect/">reflect</a></dt>
<dd>
<p><!-- CL 356049, 320929 -->
<p><!-- CL 356049, CL 320929 -->
The new
<a href="/pkg/reflect/#Value.SetIterKey"><code>Value.SetIterKey</code></a>
and <a href="/pkg/reflect/#Value.SetIterValue"><code>Value.SetIterValue</code></a>
@ -211,8 +277,7 @@ Do not send CLs removing the interior tags from such phrases.
<code>Value.Set(iter.Key())</code> and <code>Value.Set(iter.Value())</code> but
do fewer allocations.
</p>
</dd>
<dd>
<p><!-- CL 350691 -->
The new
<a href="/pkg/reflect/#Value.UnsafePointer"><code>Value.UnsafePointer</code></a>
@ -221,9 +286,69 @@ Do not send CLs removing the interior tags from such phrases.
and <a href="/pkg/reflect/#Value.Pointer"><code>Value.Pointer</code></a>
to eliminate the need to perform uintptr to unsafe.Pointer conversions at the callsite (as unsafe.Pointer rules require).
</p>
<p><!-- CL 321889 -->
TODO: <a href="https://golang.org/cl/321889">https://golang.org/cl/321889</a>: allocate hiter as part of MapIter
</p>
<p><!-- CL 321891 -->
TODO: <a href="https://golang.org/cl/321891">https://golang.org/cl/321891</a>: add MapIter.Reset
</p>
<p><!-- CL 345486 -->
TODO: <a href="https://golang.org/cl/345486">https://golang.org/cl/345486</a>: optimize for maps with string keys
</p>
<p><!-- CL 352131 -->
TODO: <a href="https://golang.org/cl/352131">https://golang.org/cl/352131</a>: add Value.{CanInt, CanUint, CanFloat, CanComplex}
</p>
<p><!-- CL 357962 -->
TODO: <a href="https://golang.org/cl/357962">https://golang.org/cl/357962</a>: add FieldByIndexErr
</p>
</dd>
</dl><!-- reflect -->
<dl id="regexp"><dt><a href="/pkg/regexp/">regexp</a></dt>
<dd>
<p><!-- CL 354569 -->
TODO: <a href="https://golang.org/cl/354569">https://golang.org/cl/354569</a>: document and implement that invalid UTF-8 bytes are the same as U+FFFD
</p>
</dd>
</dl><!-- regexp -->
<dl id="strconv"><dt><a href="/pkg/strconv/">strconv</a></dt>
<dd>
<p><!-- CL 343877 -->
TODO: <a href="https://golang.org/cl/343877">https://golang.org/cl/343877</a>: reject surrogate halves in Unquote
</p>
</dd>
</dl><!-- strconv -->
<dl id="strings"><dt><a href="/pkg/strings/">strings</a></dt>
<dd>
<p><!-- CL 345849 -->
TODO: <a href="https://golang.org/cl/345849">https://golang.org/cl/345849</a>: add Clone function
</p>
</dd>
</dl><!-- strings -->
<dl id="strings,bytes"><dt><a href="/pkg/strings,bytes/">strings,bytes</a></dt>
<dd>
<p><!-- CL 332771 -->
TODO: <a href="https://golang.org/cl/332771">https://golang.org/cl/332771</a>: avoid allocations in Trim/TrimLeft/TrimRight
</p>
</dd>
</dl><!-- strings,bytes -->
<dl id="sync"><dt><a href="/pkg/sync/">sync</a></dt>
<dd>
<p><!-- CL 319769 -->
TODO: <a href="https://golang.org/cl/319769">https://golang.org/cl/319769</a>: add Mutex.TryLock, RWMutex.TryLock, RWMutex.TryRLock
</p>
</dd>
</dl><!-- sync -->
<dl id="syscall"><dt><a href="/pkg/syscall/">syscall</a></dt>
<dd>
<p><!-- CL 336550 -->
@ -238,5 +363,45 @@ Do not send CLs removing the interior tags from such phrases.
<a href="/pkg/syscall/?GOOS=windows#Syscall18"><code>Syscall18</code></a> are
deprecated in favor of <a href="/pkg/syscall/?GOOS=windows#SyscallN"><code>SyscallN</code></a>.
</p>
<p><!-- CL 355570 -->
TODO: <a href="https://golang.org/cl/355570">https://golang.org/cl/355570</a>: add support for SysProcAttr.Pdeathsig on FreeBSD
</p>
</dd>
</dl><!-- syscall -->
<dl id="syscall/js"><dt><a href="/pkg/syscall/js/">syscall/js</a></dt>
<dd>
<p><!-- CL 356430 -->
TODO: <a href="https://golang.org/cl/356430">https://golang.org/cl/356430</a>: remove Wrapper interface
</p>
</dd>
</dl><!-- syscall/js -->
<dl id="testing"><dt><a href="/pkg/testing/">testing</a></dt>
<dd>
<p><!-- CL 343883 -->
TODO: <a href="https://golang.org/cl/343883">https://golang.org/cl/343883</a>: increase alternation precedence
</p>
<p><!-- CL 356669 -->
TODO: <a href="https://golang.org/cl/356669">https://golang.org/cl/356669</a>: skip extra -count iterations if there are no tests
</p>
</dd>
</dl><!-- testing -->
<dl id="text/template"><dt><a href="/pkg/text/template/">text/template</a></dt>
<dd>
<p><!-- CL 321490 -->
TODO: <a href="https://golang.org/cl/321490">https://golang.org/cl/321490</a>: implement short-circuit and, or
</p>
</dd>
</dl><!-- text/template -->
<dl id="unicode/utf8"><dt><a href="/pkg/unicode/utf8/">unicode/utf8</a></dt>
<dd>
<p><!-- CL 345571 -->
TODO: <a href="https://golang.org/cl/345571">https://golang.org/cl/345571</a>: add AppendRune
</p>
</dd>
</dl><!-- unicode/utf8 -->

View File

@ -1520,7 +1520,7 @@ func TestReaderDiscard(t *testing.T) {
wantBuffered: 0,
},
// Any error from filling shouldn't show up until we
// get past the valid bytes. Here we return we return 5 valid bytes at the same time
// get past the valid bytes. Here we return 5 valid bytes at the same time
// as an error, but test that we don't see the error from Discard.
{
name: "fill error, discard less",

View File

@ -746,7 +746,8 @@ func isSeparator(r rune) bool {
// Title treats s as UTF-8-encoded bytes and returns a copy with all Unicode letters that begin
// words mapped to their title case.
//
// BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
// Deprecated: The rule Title uses for word boundaries does not handle Unicode
// punctuation properly. Use golang.org/x/text/cases instead.
func Title(s []byte) []byte {
// Use a closure here to remember state.
// Hackish but effective. Depends on Map scanning in order and calling

View File

@ -37,6 +37,16 @@ func ExampleBuffer_Bytes() {
// Output: hello world
}
func ExampleBuffer_Cap() {
buf1 := bytes.NewBuffer(make([]byte, 10))
buf2 := bytes.NewBuffer(make([]byte, 0, 10))
fmt.Println(buf1.Cap())
fmt.Println(buf2.Cap())
// Output:
// 10
// 10
}
func ExampleBuffer_Grow() {
var b bytes.Buffer
b.Grow(64)
@ -67,6 +77,39 @@ func ExampleBuffer_Next() {
// e
}
func ExampleBuffer_Read() {
var b bytes.Buffer
b.Grow(64)
b.Write([]byte("abcde"))
rdbuf := make([]byte, 1)
n, err := b.Read(rdbuf)
if err != nil {
panic(err)
}
fmt.Println(n)
fmt.Println(b.String())
fmt.Println(string(rdbuf))
// Output
// 1
// bcde
// a
}
func ExampleBuffer_ReadByte() {
var b bytes.Buffer
b.Grow(64)
b.Write([]byte("abcde"))
c, err := b.ReadByte()
if err != nil {
panic(err)
}
fmt.Println(c)
fmt.Println(b.String())
// Output
// 97
// bcde
}
func ExampleCompare() {
// Interpret Compare's result by comparing it to zero.
var a, b []byte

View File

@ -309,7 +309,7 @@ func (v *hairyVisitor) doNode(n ir.Node) bool {
break
}
if fn := inlCallee(n.X); fn != nil && fn.Inl != nil {
if fn := inlCallee(n.X); fn != nil && typecheck.HaveInlineBody(fn) {
v.budget -= fn.Inl.Cost
break
}
@ -585,7 +585,7 @@ func inlnode(n ir.Node, maxCost int32, inlMap map[*ir.Func]bool, edit func(ir.No
if ir.IsIntrinsicCall(call) {
break
}
if fn := inlCallee(call.X); fn != nil && fn.Inl != nil {
if fn := inlCallee(call.X); fn != nil && typecheck.HaveInlineBody(fn) {
n = mkinlcall(call, fn, maxCost, inlMap, edit)
}
}

View File

@ -954,11 +954,11 @@ func (x *expandState) storeArgOrLoad(pos src.XPos, b *Block, source, mem *Value,
elt := t.Elem()
if source.Type != t && t.NumElem() == 1 && elt.Size() == t.Size() && t.Size() == x.regSize {
t = removeTrivialWrapperTypes(t)
source.Type = t
// it could be a leaf type, but the "leaf" could be complex64 (for example)
return x.storeArgOrLoad(pos, b, source, mem, t, storeOffset, loadRegOffset, storeRc)
}
eltRO := x.regWidth(elt)
source.Type = t
for i := int64(0); i < t.NumElem(); i++ {
sel := source.Block.NewValue1I(pos, OpArraySelect, elt, i, source)
mem = x.storeArgOrLoad(pos, b, sel, mem, elt, storeOffset+i*elt.Size(), loadRegOffset, storeRc.at(t, 0))
@ -988,11 +988,11 @@ func (x *expandState) storeArgOrLoad(pos src.XPos, b *Block, source, mem *Value,
// v139 is later stored as an intVal == struct{val *big.Int} which naively requires the fields of
// of a *uint8, which does not succeed.
t = removeTrivialWrapperTypes(t)
source.Type = t
// it could be a leaf type, but the "leaf" could be complex64 (for example)
return x.storeArgOrLoad(pos, b, source, mem, t, storeOffset, loadRegOffset, storeRc)
}
source.Type = t
for i := 0; i < t.NumFields(); i++ {
fld := t.Field(i)
sel := source.Block.NewValue1I(pos, OpStructSelect, fld.Type, int64(i), source)

View File

@ -89,6 +89,9 @@ func TestStmtLines(t *testing.T) {
if pkgname == "runtime" {
continue
}
if pkgname == "crypto/elliptic/internal/fiat" {
continue // golang.org/issue/49372
}
if e.Val(dwarf.AttrStmtList) == nil {
continue
}

View File

@ -207,7 +207,7 @@ func (p *crawler) markInlBody(n *ir.Name) {
if fn == nil {
base.Fatalf("markInlBody: missing Func on %v", n)
}
if fn.Inl == nil {
if !HaveInlineBody(fn) {
return
}

View File

@ -81,6 +81,27 @@ func ImportBody(fn *ir.Func) {
inimport = false
}
// HaveInlineBody reports whether we have fn's inline body available
// for inlining.
func HaveInlineBody(fn *ir.Func) bool {
if fn.Inl == nil {
return false
}
// Unified IR is much more conservative about pruning unreachable
// methods (at the cost of increased build artifact size).
if base.Debug.Unified != 0 {
return true
}
if fn.Inl.Body != nil {
return true
}
_, ok := inlineImporter[fn.Nname.Sym()]
return ok
}
func importReaderFor(sym *types.Sym, importers map[*types.Sym]iimporterAndOffset) *importReader {
x, ok := importers[sym]
if !ok {

View File

@ -1013,7 +1013,7 @@ func (t *tester) internalLink() bool {
func (t *tester) internalLinkPIE() bool {
switch goos + "-" + goarch {
case "darwin-amd64", "darwin-arm64",
"linux-amd64", "linux-arm64",
"linux-amd64", "linux-arm64", "linux-ppc64le",
"android-arm64",
"windows-amd64", "windows-386", "windows-arm":
return true

View File

@ -968,7 +968,7 @@ func makeMainModules(ms []module.Version, rootDirs []string, modFiles []*modfile
for _, r := range modFiles[i].Replace {
if replacedByWorkFile[r.Old.Path] {
continue
} else if prev, ok := replacements[r.Old]; ok && !curModuleReplaces[r.Old] {
} else if prev, ok := replacements[r.Old]; ok && !curModuleReplaces[r.Old] && prev != r.New {
base.Fatalf("go: conflicting replacements for %v:\n\t%v\n\t%v\nuse \"go mod editwork -replace %v=[override]\" to resolve", r.Old, prev, r.New, r.Old)
}
curModuleReplaces[r.Old] = true

View File

@ -378,7 +378,7 @@ func canonicalizeReplacePath(r module.Version, modRoot string) module.Version {
return r
}
abs := filepath.Join(modRoot, r.Path)
if rel, err := filepath.Rel(workFilePath, abs); err == nil {
if rel, err := filepath.Rel(filepath.Dir(workFilePath), abs); err == nil {
return module.Version{Path: rel, Version: r.Version}
}
// We couldn't make the version's path relative to the workspace's path,

View File

@ -58,8 +58,7 @@ go build -n -ldflags=-X=math.pi=3
stderr 'link.* -X=math.pi=3'
# -ldflags applies to current directory even if GOPATH is funny
[windows] cd $WORK/GoPath/src/my/cmd/prog
[darwin] cd $WORK/GoPath/src/my/cmd/prog
[!case-sensitive] cd $WORK/GoPath/src/my/cmd/prog
go build -n -ldflags=-X=math.pi=3
stderr 'link.* -X=math.pi=3'

View File

@ -158,7 +158,7 @@ func BuildModeSupported(compiler, buildmode, goos, goarch string) bool {
func InternalLinkPIESupported(goos, goarch string) bool {
switch goos + "/" + goarch {
case "darwin/amd64", "darwin/arm64",
"linux/amd64", "linux/arm64",
"linux/amd64", "linux/arm64", "linux/ppc64le",
"android/arm64",
"windows-amd64", "windows-386", "windows-arm":
return true

View File

@ -225,7 +225,8 @@ func mustLinkExternal(ctxt *Link) (res bool, reason string) {
return true, "buildmode=c-shared"
case BuildModePIE:
switch buildcfg.GOOS + "/" + buildcfg.GOARCH {
case "linux/amd64", "linux/arm64", "android/arm64":
case "android/arm64":
case "linux/amd64", "linux/arm64", "linux/ppc64le":
case "windows/386", "windows/amd64", "windows/arm", "windows/arm64":
case "darwin/amd64", "darwin/arm64":
default:

View File

@ -227,6 +227,8 @@ func (st *relocSymState) relocsym(s loader.Sym, P []byte) {
// DWARF info between the compiler and linker.
continue
}
} else if target.IsPPC64() && target.IsPIE() && ldr.SymName(rs) == ".TOC." {
// This is a TOC relative relocation generated from a go object. It is safe to resolve.
} else {
st.err.errorUnresolved(ldr, s, rs)
continue

View File

@ -321,6 +321,11 @@ func addelfdynrel(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, s lo
rela.AddUint64(target.Arch, elf.R_INFO(uint32(ldr.SymDynid(targ)), uint32(elf.R_PPC64_ADDR64)))
rela.AddUint64(target.Arch, uint64(r.Add()))
su.SetRelocType(rIdx, objabi.ElfRelocOffset) // ignore during relocsym
} else if target.IsPIE() && target.IsInternal() {
// For internal linking PIE, this R_ADDR relocation cannot
// be resolved statically. We need to generate a dynamic
// relocation. Let the code below handle it.
break
}
return true
@ -383,12 +388,94 @@ func addelfdynrel(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, s lo
}
// Handle references to ELF symbols from our own object files.
if targType != sym.SDYNIMPORT {
relocs := ldr.Relocs(s)
r = relocs.At(rIdx)
switch r.Type() {
case objabi.R_ADDR:
if ldr.SymType(s) == sym.STEXT {
log.Fatalf("R_ADDR relocation in text symbol %s is unsupported\n", ldr.SymName(s))
}
if target.IsPIE() && target.IsInternal() {
// When internally linking, generate dynamic relocations
// for all typical R_ADDR relocations. The exception
// are those R_ADDR that are created as part of generating
// the dynamic relocations and must be resolved statically.
//
// There are three phases relevant to understanding this:
//
// dodata() // we are here
// address() // symbol address assignment
// reloc() // resolution of static R_ADDR relocs
//
// At this point symbol addresses have not been
// assigned yet (as the final size of the .rela section
// will affect the addresses), and so we cannot write
// the Elf64_Rela.r_offset now. Instead we delay it
// until after the 'address' phase of the linker is
// complete. We do this via Addaddrplus, which creates
// a new R_ADDR relocation which will be resolved in
// the 'reloc' phase.
//
// These synthetic static R_ADDR relocs must be skipped
// now, or else we will be caught in an infinite loop
// of generating synthetic relocs for our synthetic
// relocs.
//
// Furthermore, the rela sections contain dynamic
// relocations with R_ADDR relocations on
// Elf64_Rela.r_offset. This field should contain the
// symbol offset as determined by reloc(), not the
// final dynamically linked address as a dynamic
// relocation would provide.
switch ldr.SymName(s) {
case ".dynsym", ".rela", ".rela.plt", ".got.plt", ".dynamic":
return false
}
} else {
// Either internally linking a static executable,
// in which case we can resolve these relocations
// statically in the 'reloc' phase, or externally
// linking, in which case the relocation will be
// prepared in the 'reloc' phase and passed to the
// external linker in the 'asmb' phase.
if ldr.SymType(s) != sym.SDATA && ldr.SymType(s) != sym.SRODATA {
break
}
}
// Generate R_PPC64_RELATIVE relocations for best
// efficiency in the dynamic linker.
//
// As noted above, symbol addresses have not been
// assigned yet, so we can't generate the final reloc
// entry yet. We ultimately want:
//
// r_offset = s + r.Off
// r_info = R_PPC64_RELATIVE
// r_addend = targ + r.Add
//
// The dynamic linker will set *offset = base address +
// addend.
//
// AddAddrPlus is used for r_offset and r_addend to
// generate new R_ADDR relocations that will update
// these fields in the 'reloc' phase.
rela := ldr.MakeSymbolUpdater(syms.Rela)
rela.AddAddrPlus(target.Arch, s, int64(r.Off()))
if r.Siz() == 8 {
rela.AddUint64(target.Arch, elf.R_INFO(0, uint32(elf.R_PPC64_RELATIVE)))
} else {
ldr.Errorf(s, "unexpected relocation for dynamic symbol %s", ldr.SymName(targ))
}
rela.AddAddrPlus(target.Arch, targ, int64(r.Add()))
// Not mark r done here. So we still apply it statically,
// so in the file content we'll also have the right offset
// to the relocation target. So it can be examined statically
// (e.g. go version).
return true
}
// TODO(austin): Translate our relocations to ELF
return false
}
@ -542,35 +629,40 @@ func symtoc(ldr *loader.Loader, syms *ld.ArchSyms, s loader.Sym) int64 {
}
// archreloctoc relocates a TOC relative symbol.
// If the symbol pointed by this TOC relative symbol is in .data or .bss, the
// default load instruction can be changed to an addi instruction and the
// symbol address can be used directly.
// This code is for AIX only.
func archreloctoc(ldr *loader.Loader, target *ld.Target, syms *ld.ArchSyms, r loader.Reloc, s loader.Sym, val int64) int64 {
rs := r.Sym()
if target.IsLinux() {
ldr.Errorf(s, "archrelocaddr called for %s relocation\n", ldr.SymName(rs))
}
var o1, o2 uint32
o1 = uint32(val >> 32)
o2 = uint32(val)
if !strings.HasPrefix(ldr.SymName(rs), "TOC.") {
ldr.Errorf(s, "archreloctoc called for a symbol without TOC anchor")
}
var t int64
useAddi := false
relocs := ldr.Relocs(rs)
tarSym := relocs.At(0).Sym()
if target.IsInternal() && tarSym != 0 && ldr.AttrReachable(tarSym) && ldr.SymSect(tarSym).Seg == &ld.Segdata {
t = ldr.SymValue(tarSym) + r.Add() - ldr.SymValue(syms.TOC)
// change ld to addi in the second instruction
o2 = (o2 & 0x03FF0000) | 0xE<<26
useAddi = true
if target.IsBigEndian() {
o1 = uint32(val >> 32)
o2 = uint32(val)
} else {
t = ldr.SymValue(rs) + r.Add() - ldr.SymValue(syms.TOC)
o1 = uint32(val)
o2 = uint32(val >> 32)
}
// On AIX, TOC data accesses are always made indirectly against R2 (a sequence of addis+ld+load/store). If the
// The target of the load is known, the sequence can be written into addis+addi+load/store. On Linux,
// TOC data accesses are always made directly against R2 (e.g addis+load/store).
if target.IsAIX() {
if !strings.HasPrefix(ldr.SymName(rs), "TOC.") {
ldr.Errorf(s, "archreloctoc called for a symbol without TOC anchor")
}
relocs := ldr.Relocs(rs)
tarSym := relocs.At(0).Sym()
if target.IsInternal() && tarSym != 0 && ldr.AttrReachable(tarSym) && ldr.SymSect(tarSym).Seg == &ld.Segdata {
t = ldr.SymValue(tarSym) + r.Add() - ldr.SymValue(syms.TOC)
// change ld to addi in the second instruction
o2 = (o2 & 0x03FF0000) | 0xE<<26
useAddi = true
} else {
t = ldr.SymValue(rs) + r.Add() - ldr.SymValue(syms.TOC)
}
} else {
t = ldr.SymValue(rs) + r.Add() - symtoc(ldr, syms, s)
}
if t != int64(int32(t)) {
@ -593,15 +685,20 @@ func archreloctoc(ldr *loader.Loader, target *ld.Target, syms *ld.ArchSyms, r lo
}
o2 |= uint32(t) & 0xFFFC
}
case objabi.R_ADDRPOWER_TOCREL:
o2 |= uint32(t) & 0xffff
default:
return -1
}
return int64(o1)<<32 | int64(o2)
if target.IsBigEndian() {
return int64(o1)<<32 | int64(o2)
}
return int64(o2)<<32 | int64(o1)
}
// archrelocaddr relocates a symbol address.
// This code is for AIX only.
// This code is for linux only.
func archrelocaddr(ldr *loader.Loader, target *ld.Target, syms *ld.ArchSyms, r loader.Reloc, s loader.Sym, val int64) int64 {
rs := r.Sym()
if target.IsAIX() {
@ -860,6 +957,18 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade
t := ldr.SymValue(rs) + r.Add() - (ldr.SymValue(s) + int64(r.Off()))
tgtName := ldr.SymName(rs)
// If we are linking PIE or shared code, all golang generated object files have an extra 2 instruction prologue
// to regenerate the TOC pointer from R12. The exception are two special case functions tested below. Note,
// local call offsets for externally generated objects are accounted for when converting into golang relocs.
if !ldr.IsExternal(rs) && ldr.AttrShared(rs) && tgtName != "runtime.duffzero" && tgtName != "runtime.duffcopy" {
// Furthermore, only apply the offset if the target looks like the start of a function call.
if r.Add() == 0 && ldr.SymType(rs) == sym.STEXT {
t += 8
}
}
if t&3 != 0 {
ldr.Errorf(s, "relocation for %s+%d is not aligned: %d", ldr.SymName(rs), r.Off(), t)
}
@ -872,6 +981,62 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade
case objabi.R_POWER_TOC: // S + A - .TOC.
return ldr.SymValue(rs) + r.Add() - symtoc(ldr, syms, s), nExtReloc, true
case objabi.R_ADDRPOWER_PCREL: // S + A - P
t := ldr.SymValue(rs) + r.Add() - (ldr.SymValue(s) + int64(r.Off()))
ha := uint16(((t + 0x8000) >> 16) & 0xFFFF)
l := uint16(t)
if target.IsBigEndian() {
val |= int64(l)
val |= int64(ha) << 32
} else {
val |= int64(ha)
val |= int64(l) << 32
}
return val, nExtReloc, true
case objabi.R_POWER_TLS:
const OP_ADD = 31<<26 | 266<<1
const MASK_OP_ADD = 0x3F<<26 | 0x1FF<<1
if val&MASK_OP_ADD != OP_ADD {
ldr.Errorf(s, "R_POWER_TLS reloc only supports XO form ADD, not %08X", val)
}
// Verify RB is R13 in ADD RA,RB,RT.
if (val>>11)&0x1F != 13 {
// If external linking is made to support this, it may expect the linker to rewrite RB.
ldr.Errorf(s, "R_POWER_TLS reloc requires R13 in RB (%08X).", uint32(val))
}
return val, nExtReloc, true
case objabi.R_POWER_TLS_IE:
// Convert TLS_IE relocation to TLS_LE if supported.
if !(target.IsPIE() && target.IsElf()) {
log.Fatalf("cannot handle R_POWER_TLS_IE (sym %s) when linking non-PIE, non-ELF binaries internally", ldr.SymName(s))
}
// We are an ELF binary, we can safely convert to TLS_LE from:
// addis to, r2, x@got@tprel@ha
// ld to, to, x@got@tprel@l(to)
//
// to TLS_LE by converting to:
// addis to, r0, x@tprel@ha
// addi to, to, x@tprel@l(to)
const OP_ADDI = 14 << 26
const OP_MASK = 0x3F << 26
const OP_RA_MASK = 0x1F << 16
uval := uint64(val)
// convert r2 to r0, and ld to addi
if target.IsBigEndian() {
uval = uval &^ (OP_RA_MASK << 32)
uval = (uval &^ OP_MASK) | OP_ADDI
} else {
uval = uval &^ (OP_RA_MASK)
uval = (uval &^ (OP_MASK << 32)) | (OP_ADDI << 32)
}
val = int64(uval)
// Treat this like an R_POWER_TLS_LE relocation now.
fallthrough
case objabi.R_POWER_TLS_LE:
// The thread pointer points 0x7000 bytes after the start of the
// thread local storage area as documented in section "3.7.2 TLS

View File

@ -13,8 +13,8 @@
// Original code can be found at the link below:
// https://github.com/dot-asm/cryptogams/blob/master/ppc/aesp8-ppc.pl
// I changed some function names in order to be more likely to go standards.
// For instance, function aes_p8_set_{en,de}crypt_key become
// Some function names were changed to be consistent with Go function
// names. For instance, function aes_p8_set_{en,de}crypt_key become
// set{En,De}cryptKeyAsm. I also split setEncryptKeyAsm in two parts
// and a new session was created (doEncryptKeyAsm). This was necessary to
// avoid arguments overwriting when setDecryptKeyAsm calls setEncryptKeyAsm.
@ -50,452 +50,451 @@
#define BLK_ROUNDS R6
#define BLK_IDX R7
DATA ·rcon+0x00(SB)/8, $0x0100000001000000 // RCON
DATA ·rcon+0x08(SB)/8, $0x0100000001000000 // RCON
DATA ·rcon+0x10(SB)/8, $0x1b0000001b000000
DATA ·rcon+0x18(SB)/8, $0x1b0000001b000000
DATA ·rcon+0x20(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
DATA ·rcon+0x28(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
DATA ·rcon+0x30(SB)/8, $0x0000000000000000
DATA ·rcon+0x38(SB)/8, $0x0000000000000000
DATA ·rcon+0x00(SB)/8, $0x0100000001000000 // RCON
DATA ·rcon+0x08(SB)/8, $0x0100000001000000 // RCON
DATA ·rcon+0x10(SB)/8, $0x1b0000001b000000
DATA ·rcon+0x18(SB)/8, $0x1b0000001b000000
DATA ·rcon+0x20(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
DATA ·rcon+0x28(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
DATA ·rcon+0x30(SB)/8, $0x0000000000000000
DATA ·rcon+0x38(SB)/8, $0x0000000000000000
GLOBL ·rcon(SB), RODATA, $64
// func setEncryptKeyAsm(key *byte, keylen int, enc *uint32) int
TEXT ·setEncryptKeyAsm(SB),NOSPLIT|NOFRAME,$0
TEXT ·setEncryptKeyAsm(SB), NOSPLIT|NOFRAME, $0
// Load the arguments inside the registers
MOVD key+0(FP), INP
MOVD keylen+8(FP), BITS
MOVD enc+16(FP), OUT
JMP ·doEncryptKeyAsm(SB)
MOVD key+0(FP), INP
MOVD keylen+8(FP), BITS
MOVD enc+16(FP), OUT
JMP ·doEncryptKeyAsm(SB)
// This text is used both setEncryptKeyAsm and setDecryptKeyAsm
TEXT ·doEncryptKeyAsm(SB),NOSPLIT|NOFRAME,$0
TEXT ·doEncryptKeyAsm(SB), NOSPLIT|NOFRAME, $0
// Do not change R10 since it's storing the LR value in setDecryptKeyAsm
// Check arguments
MOVD $-1, PTR // li 6,-1 exit code to -1 (255)
CMPU INP, $0 // cmpldi r3,0 input key pointer set?
BC 0x0E, 2, enc_key_abort // beq- .Lenc_key_abort
CMPU OUT, $0 // cmpldi r5,0 output key pointer set?
BC 0x0E, 2, enc_key_abort // beq- .Lenc_key_abort
MOVD $-2, PTR // li 6,-2 exit code to -2 (254)
CMPW BITS, $128 // cmpwi 4,128 greater or equal to 128
BC 0x0E, 0, enc_key_abort // blt- .Lenc_key_abort
CMPW BITS, $256 // cmpwi 4,256 lesser or equal to 256
BC 0x0E, 1, enc_key_abort // bgt- .Lenc_key_abort
ANDCC $0x3f, BITS, TEMP // andi. 0,4,0x3f multiple of 64
BC 0x06, 2, enc_key_abort // bne- .Lenc_key_abort
MOVD $-1, PTR // li 6,-1 exit code to -1 (255)
CMPU INP, $0 // cmpldi r3,0 input key pointer set?
BC 0x0E, 2, enc_key_abort // beq- .Lenc_key_abort
CMPU OUT, $0 // cmpldi r5,0 output key pointer set?
BC 0x0E, 2, enc_key_abort // beq- .Lenc_key_abort
MOVD $-2, PTR // li 6,-2 exit code to -2 (254)
CMPW BITS, $128 // cmpwi 4,128 greater or equal to 128
BC 0x0E, 0, enc_key_abort // blt- .Lenc_key_abort
CMPW BITS, $256 // cmpwi 4,256 lesser or equal to 256
BC 0x0E, 1, enc_key_abort // bgt- .Lenc_key_abort
ANDCC $0x3f, BITS, TEMP // andi. 0,4,0x3f multiple of 64
BC 0x06, 2, enc_key_abort // bne- .Lenc_key_abort
MOVD $·rcon(SB), PTR // PTR point to rcon addr
MOVD $·rcon(SB), PTR // PTR point to rcon addr
// Get key from memory and write aligned into VR
NEG INP, R9 // neg 9,3 R9 is ~INP + 1
LVX (INP)(R0), IN0 // lvx 1,0,3 Load key inside IN0
ADD $15, INP, INP // addi 3,3,15 Add 15B to INP addr
LVSR (R9)(R0), KEY // lvsr 3,0,9
MOVD $0x20, R8 // li 8,0x20 R8 = 32
CMPW BITS, $192 // cmpwi 4,192 Key size == 192?
LVX (INP)(R0), IN1 // lvx 2,0,3
VSPLTISB $0x0f, MASK // vspltisb 5,0x0f 0x0f0f0f0f... mask
LVX (PTR)(R0), RCON // lvx 4,0,6 Load first 16 bytes into RCON
VXOR KEY, MASK, KEY // vxor 3,3,5 Adjust for byte swap
LVX (PTR)(R8), MASK // lvx 5,8,6
ADD $0x10, PTR, PTR // addi 6,6,0x10 PTR to next 16 bytes of RCON
VPERM IN0, IN1, KEY, IN0 // vperm 1,1,2,3 Align
MOVD $8, CNT // li 7,8 CNT = 8
VXOR ZERO, ZERO, ZERO // vxor 0,0,0 Zero to be zero :)
MOVD CNT, CTR // mtctr 7 Set the counter to 8 (rounds)
NEG INP, R9 // neg 9,3 R9 is ~INP + 1
LVX (INP)(R0), IN0 // lvx 1,0,3 Load key inside IN0
ADD $15, INP, INP // addi 3,3,15 Add 15B to INP addr
LVSR (R9)(R0), KEY // lvsr 3,0,9
MOVD $0x20, R8 // li 8,0x20 R8 = 32
CMPW BITS, $192 // cmpwi 4,192 Key size == 192?
LVX (INP)(R0), IN1 // lvx 2,0,3
VSPLTISB $0x0f, MASK// vspltisb 5,0x0f 0x0f0f0f0f... mask
LVX (PTR)(R0), RCON // lvx 4,0,6 Load first 16 bytes into RCON
VXOR KEY, MASK, KEY // vxor 3,3,5 Adjust for byte swap
LVX (PTR)(R8), MASK // lvx 5,8,6
ADD $0x10, PTR, PTR // addi 6,6,0x10 PTR to next 16 bytes of RCON
VPERM IN0, IN1, KEY, IN0 // vperm 1,1,2,3 Align
MOVD $8, CNT // li 7,8 CNT = 8
VXOR ZERO, ZERO, ZERO // vxor 0,0,0 Zero to be zero :)
MOVD CNT, CTR // mtctr 7 Set the counter to 8 (rounds)
LVSL (OUT)(R0), OUTPERM // lvsl 8,0,5
VSPLTISB $-1, OUTMASK // vspltisb 9,-1
LVX (OUT)(R0), OUTHEAD // lvx 10,0,5
VPERM OUTMASK, ZERO, OUTPERM, OUTMASK // vperm 9,9,0,8
LVSL (OUT)(R0), OUTPERM // lvsl 8,0,5
VSPLTISB $-1, OUTMASK // vspltisb 9,-1
LVX (OUT)(R0), OUTHEAD // lvx 10,0,5
VPERM OUTMASK, ZERO, OUTPERM, OUTMASK // vperm 9,9,0,8
BLT loop128 // blt .Loop128
ADD $8, INP, INP // addi 3,3,8
BEQ l192 // beq .L192
ADD $8, INP, INP // addi 3,3,8
JMP l256 // b .L256
BLT loop128 // blt .Loop128
ADD $8, INP, INP // addi 3,3,8
BEQ l192 // beq .L192
ADD $8, INP, INP // addi 3,3,8
JMP l256 // b .L256
loop128:
// Key schedule (Round 1 to 8)
VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat
VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
STVX STAGE, (OUT+R0) // stvx 7,0,5 Write to output
ADD $16, OUT, OUT // addi 5,5,16 Point to the next round
VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat
VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
STVX STAGE, (OUT+R0) // stvx 7,0,5 Write to output
ADD $16, OUT, OUT // addi 5,5,16 Point to the next round
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
VXOR IN0, KEY, IN0 // vxor 1,1,3
BC 0x10, 0, loop128 // bdnz .Loop128
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
VXOR IN0, KEY, IN0 // vxor 1,1,3
BC 0x10, 0, loop128 // bdnz .Loop128
LVX (PTR)(R0), RCON // lvx 4,0,6 Last two round keys
LVX (PTR)(R0), RCON // lvx 4,0,6 Last two round keys
// Key schedule (Round 9)
VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-spat
VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 9
ADD $16, OUT, OUT // addi 5,5,16
VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-spat
VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 9
ADD $16, OUT, OUT // addi 5,5,16
// Key schedule (Round 10)
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
VXOR IN0, KEY, IN0 // vxor 1,1,3
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
VXOR IN0, KEY, IN0 // vxor 1,1,3
VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat
VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 10
ADD $16, OUT, OUT // addi 5,5,16
VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat
VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 10
ADD $16, OUT, OUT // addi 5,5,16
// Key schedule (Round 11)
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VXOR IN0, KEY, IN0 // vxor 1,1,3
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 11
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VXOR IN0, KEY, IN0 // vxor 1,1,3
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 11
ADD $15, OUT, INP // addi 3,5,15
ADD $0x50, OUT, OUT // addi 5,5,0x50
ADD $15, OUT, INP // addi 3,5,15
ADD $0x50, OUT, OUT // addi 5,5,0x50
MOVD $10, ROUNDS // li 8,10
JMP done // b .Ldone
MOVD $10, ROUNDS // li 8,10
JMP done // b .Ldone
l192:
LVX (INP)(R0), TMP // lvx 6,0,3
MOVD $4, CNT // li 7,4
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
STVX STAGE, (OUT+R0) // stvx 7,0,5
ADD $16, OUT, OUT // addi 5,5,16
VPERM IN1, TMP, KEY, IN1 // vperm 2,2,6,3
VSPLTISB $8, KEY // vspltisb 3,8
MOVD CNT, CTR // mtctr 7
VSUBUBM MASK, KEY, MASK // vsububm 5,5,3
LVX (INP)(R0), TMP // lvx 6,0,3
MOVD $4, CNT // li 7,4
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
STVX STAGE, (OUT+R0) // stvx 7,0,5
ADD $16, OUT, OUT // addi 5,5,16
VPERM IN1, TMP, KEY, IN1 // vperm 2,2,6,3
VSPLTISB $8, KEY // vspltisb 3,8
MOVD CNT, CTR // mtctr 7
VSUBUBM MASK, KEY, MASK // vsububm 5,5,3
loop192:
VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5
VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5
VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $8, ZERO, IN1, STAGE // vsldoi 7,0,2,8
VSPLTW $3, IN0, TMP // vspltw 6,1,3
VXOR TMP, IN1, TMP // vxor 6,6,2
VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12
VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
VXOR IN1, TMP, IN1 // vxor 2,2,6
VXOR IN0, KEY, IN0 // vxor 1,1,3
VXOR IN1, KEY, IN1 // vxor 2,2,3
VSLDOI $8, STAGE, IN0, STAGE // vsldoi 7,7,1,8
VSLDOI $8, ZERO, IN1, STAGE // vsldoi 7,0,2,8
VSPLTW $3, IN0, TMP // vspltw 6,1,3
VXOR TMP, IN1, TMP // vxor 6,6,2
VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12
VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
VXOR IN1, TMP, IN1 // vxor 2,2,6
VXOR IN0, KEY, IN0 // vxor 1,1,3
VXOR IN1, KEY, IN1 // vxor 2,2,3
VSLDOI $8, STAGE, IN0, STAGE // vsldoi 7,7,1,8
VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5
VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
STVX STAGE, (OUT+R0) // stvx 7,0,5
ADD $16, OUT, OUT // addi 5,5,16
VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5
VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
STVX STAGE, (OUT+R0) // stvx 7,0,5
ADD $16, OUT, OUT // addi 5,5,16
VSLDOI $8, IN0, IN1, STAGE // vsldoi 7,1,2,8
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
STVX STAGE, (OUT+R0) // stvx 7,0,5
ADD $16, OUT, OUT // addi 5,5,16
VSLDOI $8, IN0, IN1, STAGE // vsldoi 7,1,2,8
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
STVX STAGE, (OUT+R0) // stvx 7,0,5
ADD $16, OUT, OUT // addi 5,5,16
VSPLTW $3, IN0, TMP // vspltw 6,1,3
VXOR TMP, IN1, TMP // vxor 6,6,2
VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12
VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
VXOR IN1, TMP, IN1 // vxor 2,2,6
VXOR IN0, KEY, IN0 // vxor 1,1,3
VXOR IN1, KEY, IN1 // vxor 2,2,3
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
STVX STAGE, (OUT+R0) // stvx 7,0,5
ADD $15, OUT, INP // addi 3,5,15
ADD $16, OUT, OUT // addi 5,5,16
BC 0x10, 0, loop192 // bdnz .Loop192
VSPLTW $3, IN0, TMP // vspltw 6,1,3
VXOR TMP, IN1, TMP // vxor 6,6,2
VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12
VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
VXOR IN1, TMP, IN1 // vxor 2,2,6
VXOR IN0, KEY, IN0 // vxor 1,1,3
VXOR IN1, KEY, IN1 // vxor 2,2,3
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
STVX STAGE, (OUT+R0) // stvx 7,0,5
ADD $15, OUT, INP // addi 3,5,15
ADD $16, OUT, OUT // addi 5,5,16
BC 0x10, 0, loop192 // bdnz .Loop192
MOVD $12, ROUNDS // li 8,12
ADD $0x20, OUT, OUT // addi 5,5,0x20
JMP done // b .Ldone
MOVD $12, ROUNDS // li 8,12
ADD $0x20, OUT, OUT // addi 5,5,0x20
BR done // b .Ldone
l256:
LVX (INP)(R0), TMP // lvx 6,0,3
MOVD $7, CNT // li 7,7
MOVD $14, ROUNDS // li 8,14
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
STVX STAGE, (OUT+R0) // stvx 7,0,5
ADD $16, OUT, OUT // addi 5,5,16
VPERM IN1, TMP, KEY, IN1 // vperm 2,2,6,3
MOVD CNT, CTR // mtctr 7
LVX (INP)(R0), TMP // lvx 6,0,3
MOVD $7, CNT // li 7,7
MOVD $14, ROUNDS // li 8,14
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
STVX STAGE, (OUT+R0) // stvx 7,0,5
ADD $16, OUT, OUT // addi 5,5,16
VPERM IN1, TMP, KEY, IN1 // vperm 2,2,6,3
MOVD CNT, CTR // mtctr 7
loop256:
VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5
VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
VPERM IN1, IN1, OUTPERM, OUTTAIL // vperm 11,2,2,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
STVX STAGE, (OUT+R0) // stvx 7,0,5
ADD $16, OUT, OUT // addi 5,5,16
VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5
VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12
VPERM IN1, IN1, OUTPERM, OUTTAIL // vperm 11,2,2,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4
STVX STAGE, (OUT+R0) // stvx 7,0,5
ADD $16, OUT, OUT // addi 5,5,16
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
VXOR IN0, KEY, IN0 // vxor 1,1,3
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
STVX STAGE, (OUT+R0) // stvx 7,0,5
ADD $15, OUT, INP // addi 3,5,15
ADD $16, OUT, OUT // addi 5,5,16
BC 0x12, 0, done // bdz .Ldone
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN0, TMP, IN0 // vxor 1,1,6
VADDUWM RCON, RCON, RCON // vadduwm 4,4,4
VXOR IN0, KEY, IN0 // vxor 1,1,3
VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11
STVX STAGE, (OUT+R0) // stvx 7,0,5
ADD $15, OUT, INP // addi 3,5,15
ADD $16, OUT, OUT // addi 5,5,16
BC 0x12, 0, done // bdz .Ldone
VSPLTW $3, IN0, KEY // vspltw 3,1,3
VSLDOI $12, ZERO, IN1, TMP // vsldoi 6,0,2,12
VSBOX KEY, KEY // vsbox 3,3
VSPLTW $3, IN0, KEY // vspltw 3,1,3
VSLDOI $12, ZERO, IN1, TMP // vsldoi 6,0,2,12
VSBOX KEY, KEY // vsbox 3,3
VXOR IN1, TMP, IN1 // vxor 2,2,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN1, TMP, IN1 // vxor 2,2,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN1, TMP, IN1 // vxor 2,2,6
VXOR IN1, TMP, IN1 // vxor 2,2,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN1, TMP, IN1 // vxor 2,2,6
VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12
VXOR IN1, TMP, IN1 // vxor 2,2,6
VXOR IN1, KEY, IN1 // vxor 2,2,3
JMP loop256 // b .Loop256
VXOR IN1, KEY, IN1 // vxor 2,2,3
JMP loop256 // b .Loop256
done:
LVX (INP)(R0), IN1 // lvx 2,0,3
VSEL OUTHEAD, IN1, OUTMASK, IN1 // vsel 2,10,2,9
STVX IN1, (INP+R0) // stvx 2,0,3
MOVD $0, PTR // li 6,0 set PTR to 0 (exit code 0)
MOVW ROUNDS, 0(OUT) // stw 8,0(5)
LVX (INP)(R0), IN1 // lvx 2,0,3
VSEL OUTHEAD, IN1, OUTMASK, IN1 // vsel 2,10,2,9
STVX IN1, (INP+R0) // stvx 2,0,3
MOVD $0, PTR // li 6,0 set PTR to 0 (exit code 0)
MOVW ROUNDS, 0(OUT) // stw 8,0(5)
enc_key_abort:
MOVD PTR, INP // mr 3,6 set exit code with PTR value
MOVD INP, ret+24(FP) // Put return value into the FP
RET // blr
MOVD PTR, INP // mr 3,6 set exit code with PTR value
MOVD INP, ret+24(FP) // Put return value into the FP
RET // blr
// func setDecryptKeyAsm(key *byte, keylen int, dec *uint32) int
TEXT ·setDecryptKeyAsm(SB),NOSPLIT|NOFRAME,$0
TEXT ·setDecryptKeyAsm(SB), NOSPLIT|NOFRAME, $0
// Load the arguments inside the registers
MOVD key+0(FP), INP
MOVD keylen+8(FP), BITS
MOVD dec+16(FP), OUT
MOVD key+0(FP), INP
MOVD keylen+8(FP), BITS
MOVD dec+16(FP), OUT
MOVD LR, R10 // mflr 10
CALL ·doEncryptKeyAsm(SB)
MOVD R10, LR // mtlr 10
MOVD LR, R10 // mflr 10
CALL ·doEncryptKeyAsm(SB)
MOVD R10, LR // mtlr 10
CMPW INP, $0 // cmpwi 3,0 exit 0 = ok
BC 0x06, 2, dec_key_abort // bne- .Ldec_key_abort
CMPW INP, $0 // cmpwi 3,0 exit 0 = ok
BC 0x06, 2, dec_key_abort // bne- .Ldec_key_abort
// doEncryptKeyAsm set ROUNDS (R8) with the proper value for each mode
SLW $4, ROUNDS, CNT // slwi 7,8,4
SUB $240, OUT, INP // subi 3,5,240
SRW $1, ROUNDS, ROUNDS // srwi 8,8,1
ADD R7, INP, OUT // add 5,3,7
MOVD ROUNDS, CTR // mtctr 8
SLW $4, ROUNDS, CNT // slwi 7,8,4
SUB $240, OUT, INP // subi 3,5,240
SRW $1, ROUNDS, ROUNDS // srwi 8,8,1
ADD R7, INP, OUT // add 5,3,7
MOVD ROUNDS, CTR // mtctr 8
// dec_key will invert the key sequence in order to be used for decrypt
// dec_key will invert the key sequence in order to be used for decrypt
dec_key:
MOVWZ 0(INP), TEMP // lwz 0, 0(3)
MOVWZ 4(INP), R6 // lwz 6, 4(3)
MOVWZ 8(INP), R7 // lwz 7, 8(3)
MOVWZ 12(INP), R8 // lwz 8, 12(3)
ADD $16, INP, INP // addi 3,3,16
MOVWZ 0(OUT), R9 // lwz 9, 0(5)
MOVWZ 4(OUT), R10 // lwz 10,4(5)
MOVWZ 8(OUT), R11 // lwz 11,8(5)
MOVWZ 12(OUT), R12 // lwz 12,12(5)
MOVW TEMP, 0(OUT) // stw 0, 0(5)
MOVW R6, 4(OUT) // stw 6, 4(5)
MOVW R7, 8(OUT) // stw 7, 8(5)
MOVW R8, 12(OUT) // stw 8, 12(5)
SUB $16, OUT, OUT // subi 5,5,16
MOVW R9, -16(INP) // stw 9, -16(3)
MOVW R10, -12(INP) // stw 10,-12(3)
MOVW R11, -8(INP) // stw 11,-8(3)
MOVW R12, -4(INP) // stw 12,-4(3)
BC 0x10, 0, dec_key // bdnz .Ldeckey
MOVWZ 0(INP), TEMP // lwz 0, 0(3)
MOVWZ 4(INP), R6 // lwz 6, 4(3)
MOVWZ 8(INP), R7 // lwz 7, 8(3)
MOVWZ 12(INP), R8 // lwz 8, 12(3)
ADD $16, INP, INP // addi 3,3,16
MOVWZ 0(OUT), R9 // lwz 9, 0(5)
MOVWZ 4(OUT), R10 // lwz 10,4(5)
MOVWZ 8(OUT), R11 // lwz 11,8(5)
MOVWZ 12(OUT), R12 // lwz 12,12(5)
MOVW TEMP, 0(OUT) // stw 0, 0(5)
MOVW R6, 4(OUT) // stw 6, 4(5)
MOVW R7, 8(OUT) // stw 7, 8(5)
MOVW R8, 12(OUT) // stw 8, 12(5)
SUB $16, OUT, OUT // subi 5,5,16
MOVW R9, -16(INP) // stw 9, -16(3)
MOVW R10, -12(INP) // stw 10,-12(3)
MOVW R11, -8(INP) // stw 11,-8(3)
MOVW R12, -4(INP) // stw 12,-4(3)
BC 0x10, 0, dec_key // bdnz .Ldeckey
XOR R3, R3, R3 // xor 3,3,3 Clean R3
XOR R3, R3, R3 // xor 3,3,3 Clean R3
dec_key_abort:
MOVD R3, ret+24(FP) // Put return value into the FP
RET // blr
MOVD R3, ret+24(FP) // Put return value into the FP
RET // blr
// func encryptBlockAsm(dst, src *byte, enc *uint32)
TEXT ·encryptBlockAsm(SB),NOSPLIT|NOFRAME,$0
TEXT ·encryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
// Load the arguments inside the registers
MOVD dst+0(FP), BLK_OUT
MOVD src+8(FP), BLK_INP
MOVD enc+16(FP), BLK_KEY
MOVD dst+0(FP), BLK_OUT
MOVD src+8(FP), BLK_INP
MOVD enc+16(FP), BLK_KEY
MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5)
MOVD $15, BLK_IDX // li 7,15
MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5)
MOVD $15, BLK_IDX // li 7,15
LVX (BLK_INP)(R0), ZERO // lvx 0,0,3
NEG BLK_OUT, R11 // neg 11,4
LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3
LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3
VSPLTISB $0x0f, RCON // vspltisb 4,0x0f
LVSR (R11)(R0), KEY // lvsr 3,0,11
VXOR IN1, RCON, IN1 // vxor 2,2,4
MOVD $16, BLK_IDX // li 7,16
VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2
LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5
LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5
SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1
LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1
VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
LVX (BLK_INP)(R0), ZERO // lvx 0,0,3
NEG BLK_OUT, R11 // neg 11,4
LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3
LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3
VSPLTISB $0x0f, RCON // vspltisb 4,0x0f
LVSR (R11)(R0), KEY // lvsr 3,0,11
VXOR IN1, RCON, IN1 // vxor 2,2,4
MOVD $16, BLK_IDX // li 7,16
VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2
LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5
LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5
SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1
LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1
VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
VXOR ZERO, IN0, ZERO // vxor 0,0,1
LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
MOVD BLK_ROUNDS, CTR // mtctr 6
VXOR ZERO, IN0, ZERO // vxor 0,0,1
LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
MOVD BLK_ROUNDS, CTR // mtctr 6
loop_enc:
VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5
VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2
LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
VCIPHER ZERO, IN0, ZERO // vcipher 0,0,1
LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
BC 0x10, 0, loop_enc // bdnz .Loop_enc
VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5
VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2
LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
VCIPHER ZERO, IN0, ZERO // vcipher 0,0,1
LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
BC 0x10, 0, loop_enc // bdnz .Loop_enc
VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5
VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2
LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
VCIPHERLAST ZERO, IN0, ZERO // vcipherlast 0,0,1
VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5
VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2
LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
VCIPHERLAST ZERO, IN0, ZERO // vcipherlast 0,0,1
VSPLTISB $-1, IN1 // vspltisb 2,-1
VXOR IN0, IN0, IN0 // vxor 1,1,1
MOVD $15, BLK_IDX // li 7,15
VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3
VXOR KEY, RCON, KEY // vxor 3,3,4
LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4
VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3
VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2
LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4
STVX IN0, (BLK_OUT+R0) // stvx 1,0,4
VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2
STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4
RET // blr
VSPLTISB $-1, IN1 // vspltisb 2,-1
VXOR IN0, IN0, IN0 // vxor 1,1,1
MOVD $15, BLK_IDX // li 7,15
VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3
VXOR KEY, RCON, KEY // vxor 3,3,4
LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4
VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3
VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2
LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4
STVX IN0, (BLK_OUT+R0) // stvx 1,0,4
VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2
STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4
RET // blr
// func decryptBlockAsm(dst, src *byte, dec *uint32)
TEXT ·decryptBlockAsm(SB),NOSPLIT|NOFRAME,$0
TEXT ·decryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
// Load the arguments inside the registers
MOVD dst+0(FP), BLK_OUT
MOVD src+8(FP), BLK_INP
MOVD dec+16(FP), BLK_KEY
MOVD dst+0(FP), BLK_OUT
MOVD src+8(FP), BLK_INP
MOVD dec+16(FP), BLK_KEY
MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5)
MOVD $15, BLK_IDX // li 7,15
MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5)
MOVD $15, BLK_IDX // li 7,15
LVX (BLK_INP)(R0), ZERO // lvx 0,0,3
NEG BLK_OUT, R11 // neg 11,4
LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3
LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3
VSPLTISB $0x0f, RCON // vspltisb 4,0x0f
LVSR (R11)(R0), KEY // lvsr 3,0,11
VXOR IN1, RCON, IN1 // vxor 2,2,4
MOVD $16, BLK_IDX // li 7,16
VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2
LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5
LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5
SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1
LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1
VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
LVX (BLK_INP)(R0), ZERO // lvx 0,0,3
NEG BLK_OUT, R11 // neg 11,4
LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3
LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3
VSPLTISB $0x0f, RCON // vspltisb 4,0x0f
LVSR (R11)(R0), KEY // lvsr 3,0,11
VXOR IN1, RCON, IN1 // vxor 2,2,4
MOVD $16, BLK_IDX // li 7,16
VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2
LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5
LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5
SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1
LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1
VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
VXOR ZERO, IN0, ZERO // vxor 0,0,1
LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
MOVD BLK_ROUNDS, CTR // mtctr 6
VXOR ZERO, IN0, ZERO // vxor 0,0,1
LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
MOVD BLK_ROUNDS, CTR // mtctr 6
loop_dec:
VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5
VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2
LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
VNCIPHER ZERO, IN0, ZERO // vncipher 0,0,1
LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
BC 0x10, 0, loop_dec // bdnz .Loop_dec
VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5
VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2
LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
VNCIPHER ZERO, IN0, ZERO // vncipher 0,0,1
LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16
BC 0x10, 0, loop_dec // bdnz .Loop_dec
VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5
VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2
LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
VNCIPHERLAST ZERO, IN0, ZERO // vncipherlast 0,0,1
VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5
VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2
LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5
VNCIPHERLAST ZERO, IN0, ZERO // vncipherlast 0,0,1
VSPLTISB $-1, IN1 // vspltisb 2,-1
VXOR IN0, IN0, IN0 // vxor 1,1,1
MOVD $15, BLK_IDX // li 7,15
VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3
VXOR KEY, RCON, KEY // vxor 3,3,4
LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4
VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3
VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2
LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4
STVX IN0, (BLK_OUT+R0) // stvx 1,0,4
VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2
STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4
VSPLTISB $-1, IN1 // vspltisb 2,-1
VXOR IN0, IN0, IN0 // vxor 1,1,1
MOVD $15, BLK_IDX // li 7,15
VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3
VXOR KEY, RCON, KEY // vxor 3,3,4
LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4
VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3
VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2
LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4
STVX IN0, (BLK_OUT+R0) // stvx 1,0,4
VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2
STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4
RET // blr
RET // blr

View File

@ -85,7 +85,7 @@ func (curve *CurveParams) polynomial(x *big.Int) *big.Int {
func (curve *CurveParams) IsOnCurve(x, y *big.Int) bool {
// If there is a dedicated constant-time implementation for this curve operation,
// use that instead of the generic one.
if specific, ok := matchesSpecificCurve(curve, p224, p521); ok {
if specific, ok := matchesSpecificCurve(curve, p224, p384, p521); ok {
return specific.IsOnCurve(x, y)
}
@ -128,7 +128,7 @@ func (curve *CurveParams) affineFromJacobian(x, y, z *big.Int) (xOut, yOut *big.
func (curve *CurveParams) Add(x1, y1, x2, y2 *big.Int) (*big.Int, *big.Int) {
// If there is a dedicated constant-time implementation for this curve operation,
// use that instead of the generic one.
if specific, ok := matchesSpecificCurve(curve, p224, p521); ok {
if specific, ok := matchesSpecificCurve(curve, p224, p384, p521); ok {
return specific.Add(x1, y1, x2, y2)
}
@ -218,7 +218,7 @@ func (curve *CurveParams) addJacobian(x1, y1, z1, x2, y2, z2 *big.Int) (*big.Int
func (curve *CurveParams) Double(x1, y1 *big.Int) (*big.Int, *big.Int) {
// If there is a dedicated constant-time implementation for this curve operation,
// use that instead of the generic one.
if specific, ok := matchesSpecificCurve(curve, p224, p521); ok {
if specific, ok := matchesSpecificCurve(curve, p224, p384, p521); ok {
return specific.Double(x1, y1)
}
@ -290,7 +290,7 @@ func (curve *CurveParams) doubleJacobian(x, y, z *big.Int) (*big.Int, *big.Int,
func (curve *CurveParams) ScalarMult(Bx, By *big.Int, k []byte) (*big.Int, *big.Int) {
// If there is a dedicated constant-time implementation for this curve operation,
// use that instead of the generic one.
if specific, ok := matchesSpecificCurve(curve, p224, p256, p521); ok {
if specific, ok := matchesSpecificCurve(curve, p224, p256, p384, p521); ok {
return specific.ScalarMult(Bx, By, k)
}
@ -313,7 +313,7 @@ func (curve *CurveParams) ScalarMult(Bx, By *big.Int, k []byte) (*big.Int, *big.
func (curve *CurveParams) ScalarBaseMult(k []byte) (*big.Int, *big.Int) {
// If there is a dedicated constant-time implementation for this curve operation,
// use that instead of the generic one.
if specific, ok := matchesSpecificCurve(curve, p224, p256, p521); ok {
if specific, ok := matchesSpecificCurve(curve, p224, p256, p384, p521); ok {
return specific.ScalarBaseMult(k)
}
@ -431,7 +431,6 @@ func UnmarshalCompressed(curve Curve, data []byte) (x, y *big.Int) {
}
var initonce sync.Once
var p384 *CurveParams
func initAll() {
initP224()
@ -440,15 +439,16 @@ func initAll() {
initP521()
}
func initP384() {
// See FIPS 186-3, section D.2.4
p384 = &CurveParams{Name: "P-384"}
p384.P, _ = new(big.Int).SetString("39402006196394479212279040100143613805079739270465446667948293404245721771496870329047266088258938001861606973112319", 10)
p384.N, _ = new(big.Int).SetString("39402006196394479212279040100143613805079739270465446667946905279627659399113263569398956308152294913554433653942643", 10)
p384.B, _ = new(big.Int).SetString("b3312fa7e23ee7e4988e056be3f82d19181d9c6efe8141120314088f5013875ac656398d8a2ed19d2a85c8edd3ec2aef", 16)
p384.Gx, _ = new(big.Int).SetString("aa87ca22be8b05378eb1c71ef320ad746e1d3b628ba79b9859f741e082542a385502f25dbf55296c3a545e3872760ab7", 16)
p384.Gy, _ = new(big.Int).SetString("3617de4a96262c6f5d9e98bf9292dc29f8f41dbd289a147ce9da3113b5f0b8c00a60b1ce1d7e819d7a431d7c90ea0e5f", 16)
p384.BitSize = 384
// P224 returns a Curve which implements NIST P-224 (FIPS 186-3, section D.2.2),
// also known as secp224r1. The CurveParams.Name of this Curve is "P-224".
//
// Multiple invocations of this function will return the same value, so it can
// be used for equality checks and switch statements.
//
// The cryptographic operations are implemented using constant-time algorithms.
func P224() Curve {
initonce.Do(initAll)
return p224
}
// P256 returns a Curve which implements NIST P-256 (FIPS 186-3, section D.2.3),
@ -470,7 +470,7 @@ func P256() Curve {
// Multiple invocations of this function will return the same value, so it can
// be used for equality checks and switch statements.
//
// The cryptographic operations do not use constant-time algorithms.
// The cryptographic operations are implemented using constant-time algorithms.
func P384() Curve {
initonce.Do(initAll)
return p384

View File

@ -14,9 +14,8 @@ import (
// genericParamsForCurve returns the dereferenced CurveParams for
// the specified curve. This is used to avoid the logic for
// upgrading a curve to it's specific implementation, forcing
// usage of the generic implementation. This is only relevant
// for the P224, P256, and P521 curves.
// upgrading a curve to its specific implementation, forcing
// usage of the generic implementation.
func genericParamsForCurve(c Curve) *CurveParams {
d := *(c.Params())
return &d

View File

@ -4,9 +4,9 @@
FROM coqorg/coq:8.13.2
RUN git clone https://github.com/mit-plv/fiat-crypto
RUN cd fiat-crypto && git checkout c076f3550bea2bb7f4cb5766a32594b9e67694f2
RUN cd fiat-crypto && git submodule update --init --recursive
RUN git clone https://github.com/mit-plv/fiat-crypto && cd fiat-crypto && \
git checkout 23d2dbc4ab897d14bde4404f70cd6991635f9c01 && \
git submodule update --init --recursive
RUN cd fiat-crypto && eval $(opam env) && make -j4 standalone-ocaml SKIP_BEDROCK2=1
ENTRYPOINT ["fiat-crypto/src/ExtractionOCaml/unsaturated_solinas"]
ENV PATH /home/coq/fiat-crypto/src/ExtractionOCaml:$PATH

View File

@ -1,17 +1,12 @@
The code in this package was autogenerated by the fiat-crypto project
at commit c076f3550 from a formally verified model.
at version v0.0.9 from a formally verified model, and by the addchain
project at a recent tip version.
docker build -t fiat-crypto:c076f3550 .
docker run fiat-crypto:c076f3550 --lang Go --no-wide-int --cmovznz-by-mul \
--internal-static --public-function-case camelCase --public-type-case camelCase \
--private-function-case camelCase --private-type-case camelCase \
--no-prefix-fiat --package-name fiat --doc-text-before-function-name '' \
--doc-prepend-header 'Code generated by Fiat Cryptography. DO NOT EDIT.' \
--doc-newline-before-package-declaration p521 64 9 '2^521 - 1' \
carry_mul carry_square carry add sub to_bytes from_bytes selectznz \
> p521_fiat64.go
docker build -t fiat-crypto:v0.0.9 .
go install github.com/mmcloughlin/addchain/cmd/addchain@v0.3.1-0.20211027081849-6a7d3decbe08
../../../../../bin/go run generate.go
It comes under the following license.
fiat-crypto code comes under the following license.
Copyright (c) 2015-2020 The fiat-crypto Authors. All rights reserved.

View File

@ -0,0 +1,64 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package fiat_test
import (
"crypto/elliptic/internal/fiat"
"testing"
)
func BenchmarkMul(b *testing.B) {
b.Run("P224", func(b *testing.B) {
v := new(fiat.P224Element).One()
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
v.Mul(v, v)
}
})
b.Run("P384", func(b *testing.B) {
v := new(fiat.P384Element).One()
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
v.Mul(v, v)
}
})
b.Run("P521", func(b *testing.B) {
v := new(fiat.P521Element).One()
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
v.Mul(v, v)
}
})
}
func BenchmarkSquare(b *testing.B) {
b.Run("P224", func(b *testing.B) {
v := new(fiat.P224Element).One()
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
v.Square(v)
}
})
b.Run("P384", func(b *testing.B) {
v := new(fiat.P384Element).One()
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
v.Square(v)
}
})
b.Run("P521", func(b *testing.B) {
v := new(fiat.P521Element).One()
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
v.Square(v)
}
})
}

View File

@ -0,0 +1,330 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build ignore
package main
import (
"bytes"
"go/format"
"io"
"log"
"os"
"os/exec"
"text/template"
)
var curves = []struct {
Element string
Prime string
Prefix string
FiatType string
BytesLen int
}{
{
Element: "P224Element",
Prime: "2^224 - 2^96 + 1",
Prefix: "p224",
FiatType: "[4]uint64",
BytesLen: 28,
},
// The 32-bit pure Go P-256 in crypto/elliptic is still faster than the
// autogenerated code here, regrettably.
// {
// Element: "P256Element",
// Prime: "2^256 - 2^224 + 2^192 + 2^96 - 1",
// Prefix: "p256",
// FiatType: "[4]uint64",
// BytesLen: 32,
// },
{
Element: "P384Element",
Prime: "2^384 - 2^128 - 2^96 + 2^32 - 1",
Prefix: "p384",
FiatType: "[6]uint64",
BytesLen: 48,
},
// Note that unsaturated_solinas would be about 2x faster than
// word_by_word_montgomery for P-521, but this curve is used rarely enough
// that it's not worth carrying unsaturated_solinas support for it.
{
Element: "P521Element",
Prime: "2^521 - 1",
Prefix: "p521",
FiatType: "[9]uint64",
BytesLen: 66,
},
}
func main() {
t := template.Must(template.New("montgomery").Parse(tmplWrapper))
tmplAddchainFile, err := os.CreateTemp("", "addchain-template")
if err != nil {
log.Fatal(err)
}
defer os.Remove(tmplAddchainFile.Name())
if _, err := io.WriteString(tmplAddchainFile, tmplAddchain); err != nil {
log.Fatal(err)
}
if err := tmplAddchainFile.Close(); err != nil {
log.Fatal(err)
}
for _, c := range curves {
log.Printf("Generating %s.go...", c.Prefix)
f, err := os.Create(c.Prefix + ".go")
if err != nil {
log.Fatal(err)
}
if err := t.Execute(f, c); err != nil {
log.Fatal(err)
}
if err := f.Close(); err != nil {
log.Fatal(err)
}
log.Printf("Generating %s_fiat64.go...", c.Prefix)
cmd := exec.Command("docker", "run", "--rm", "--entrypoint", "word_by_word_montgomery",
"fiat-crypto:v0.0.9", "--lang", "Go", "--no-wide-int", "--cmovznz-by-mul",
"--relax-primitive-carry-to-bitwidth", "32,64", "--internal-static",
"--public-function-case", "camelCase", "--public-type-case", "camelCase",
"--private-function-case", "camelCase", "--private-type-case", "camelCase",
"--doc-text-before-function-name", "", "--doc-newline-before-package-declaration",
"--doc-prepend-header", "Code generated by Fiat Cryptography. DO NOT EDIT.",
"--package-name", "fiat", "--no-prefix-fiat", c.Prefix, "64", c.Prime,
"mul", "square", "add", "sub", "one", "from_montgomery", "to_montgomery",
"selectznz", "to_bytes", "from_bytes")
cmd.Stderr = os.Stderr
out, err := cmd.Output()
if err != nil {
log.Fatal(err)
}
out, err = format.Source(out)
if err != nil {
log.Fatal(err)
}
if err := os.WriteFile(c.Prefix+"_fiat64.go", out, 0644); err != nil {
log.Fatal(err)
}
log.Printf("Generating %s_invert.go...", c.Prefix)
f, err = os.CreateTemp("", "addchain-"+c.Prefix)
if err != nil {
log.Fatal(err)
}
defer os.Remove(f.Name())
cmd = exec.Command("addchain", "search", c.Prime+" - 2")
cmd.Stderr = os.Stderr
cmd.Stdout = f
if err := cmd.Run(); err != nil {
log.Fatal(err)
}
if err := f.Close(); err != nil {
log.Fatal(err)
}
cmd = exec.Command("addchain", "gen", "-tmpl", tmplAddchainFile.Name(), f.Name())
cmd.Stderr = os.Stderr
out, err = cmd.Output()
if err != nil {
log.Fatal(err)
}
out = bytes.Replace(out, []byte("Element"), []byte(c.Element), -1)
out, err = format.Source(out)
if err != nil {
log.Fatal(err)
}
if err := os.WriteFile(c.Prefix+"_invert.go", out, 0644); err != nil {
log.Fatal(err)
}
}
}
const tmplWrapper = `// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Code generated by generate.go. DO NOT EDIT.
package fiat
import (
"crypto/subtle"
"errors"
)
// {{ .Element }} is an integer modulo {{ .Prime }}.
//
// The zero value is a valid zero element.
type {{ .Element }} struct {
// Values are represented internally always in the Montgomery domain, and
// converted in Bytes and SetBytes.
x {{ .Prefix }}MontgomeryDomainFieldElement
}
const {{ .Prefix }}ElementLen = {{ .BytesLen }}
type {{ .Prefix }}UntypedFieldElement = {{ .FiatType }}
// One sets e = 1, and returns e.
func (e *{{ .Element }}) One() *{{ .Element }} {
{{ .Prefix }}SetOne(&e.x)
return e
}
// Equal returns 1 if e == t, and zero otherwise.
func (e *{{ .Element }}) Equal(t *{{ .Element }}) int {
eBytes := e.Bytes()
tBytes := t.Bytes()
return subtle.ConstantTimeCompare(eBytes, tBytes)
}
var {{ .Prefix }}ZeroEncoding = new({{ .Element }}).Bytes()
// IsZero returns 1 if e == 0, and zero otherwise.
func (e *{{ .Element }}) IsZero() int {
eBytes := e.Bytes()
return subtle.ConstantTimeCompare(eBytes, {{ .Prefix }}ZeroEncoding)
}
// Set sets e = t, and returns e.
func (e *{{ .Element }}) Set(t *{{ .Element }}) *{{ .Element }} {
e.x = t.x
return e
}
// Bytes returns the {{ .BytesLen }}-byte big-endian encoding of e.
func (e *{{ .Element }}) Bytes() []byte {
// This function is outlined to make the allocations inline in the caller
// rather than happen on the heap.
var out [{{ .Prefix }}ElementLen]byte
return e.bytes(&out)
}
func (e *{{ .Element }}) bytes(out *[{{ .Prefix }}ElementLen]byte) []byte {
var tmp {{ .Prefix }}NonMontgomeryDomainFieldElement
{{ .Prefix }}FromMontgomery(&tmp, &e.x)
{{ .Prefix }}ToBytes(out, (*{{ .Prefix }}UntypedFieldElement)(&tmp))
{{ .Prefix }}InvertEndianness(out[:])
return out[:]
}
// {{ .Prefix }}MinusOneEncoding is the encoding of -1 mod p, so p - 1, the
// highest canonical encoding. It is used by SetBytes to check for non-canonical
// encodings such as p + k, 2p + k, etc.
var {{ .Prefix }}MinusOneEncoding = new({{ .Element }}).Sub(
new({{ .Element }}), new({{ .Element }}).One()).Bytes()
// SetBytes sets e = v, where v is a big-endian {{ .BytesLen }}-byte encoding, and returns e.
// If v is not {{ .BytesLen }} bytes or it encodes a value higher than {{ .Prime }},
// SetBytes returns nil and an error, and e is unchanged.
func (e *{{ .Element }}) SetBytes(v []byte) (*{{ .Element }}, error) {
if len(v) != {{ .Prefix }}ElementLen {
return nil, errors.New("invalid {{ .Element }} encoding")
}
for i := range v {
if v[i] < {{ .Prefix }}MinusOneEncoding[i] {
break
}
if v[i] > {{ .Prefix }}MinusOneEncoding[i] {
return nil, errors.New("invalid {{ .Element }} encoding")
}
}
var in [{{ .Prefix }}ElementLen]byte
copy(in[:], v)
{{ .Prefix }}InvertEndianness(in[:])
var tmp {{ .Prefix }}NonMontgomeryDomainFieldElement
{{ .Prefix }}FromBytes((*{{ .Prefix }}UntypedFieldElement)(&tmp), &in)
{{ .Prefix }}ToMontgomery(&e.x, &tmp)
return e, nil
}
// Add sets e = t1 + t2, and returns e.
func (e *{{ .Element }}) Add(t1, t2 *{{ .Element }}) *{{ .Element }} {
{{ .Prefix }}Add(&e.x, &t1.x, &t2.x)
return e
}
// Sub sets e = t1 - t2, and returns e.
func (e *{{ .Element }}) Sub(t1, t2 *{{ .Element }}) *{{ .Element }} {
{{ .Prefix }}Sub(&e.x, &t1.x, &t2.x)
return e
}
// Mul sets e = t1 * t2, and returns e.
func (e *{{ .Element }}) Mul(t1, t2 *{{ .Element }}) *{{ .Element }} {
{{ .Prefix }}Mul(&e.x, &t1.x, &t2.x)
return e
}
// Square sets e = t * t, and returns e.
func (e *{{ .Element }}) Square(t *{{ .Element }}) *{{ .Element }} {
{{ .Prefix }}Square(&e.x, &t.x)
return e
}
// Select sets v to a if cond == 1, and to b if cond == 0.
func (v *{{ .Element }}) Select(a, b *{{ .Element }}, cond int) *{{ .Element }} {
{{ .Prefix }}Selectznz((*{{ .Prefix }}UntypedFieldElement)(&v.x), {{ .Prefix }}Uint1(cond),
(*{{ .Prefix }}UntypedFieldElement)(&b.x), (*{{ .Prefix }}UntypedFieldElement)(&a.x))
return v
}
func {{ .Prefix }}InvertEndianness(v []byte) {
for i := 0; i < len(v)/2; i++ {
v[i], v[len(v)-1-i] = v[len(v)-1-i], v[i]
}
}
`
const tmplAddchain = `// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Code generated by {{ .Meta.Name }}. DO NOT EDIT.
package fiat
// Invert sets e = 1/x, and returns e.
//
// If x == 0, Invert returns e = 0.
func (e *Element) Invert(x *Element) *Element {
// Inversion is implemented as exponentiation with exponent p 2.
// The sequence of {{ .Ops.Adds }} multiplications and {{ .Ops.Doubles }} squarings is derived from the
// following addition chain generated with {{ .Meta.Module }} {{ .Meta.ReleaseTag }}.
//
{{- range lines (format .Script) }}
// {{ . }}
{{- end }}
//
var z = new(Element).Set(e)
{{- range .Program.Temporaries }}
var {{ . }} = new(Element)
{{- end }}
{{ range $i := .Program.Instructions -}}
{{- with add $i.Op }}
{{ $i.Output }}.Mul({{ .X }}, {{ .Y }})
{{- end -}}
{{- with double $i.Op }}
{{ $i.Output }}.Square({{ .X }})
{{- end -}}
{{- with shift $i.Op -}}
{{- $first := 0 -}}
{{- if ne $i.Output.Identifier .X.Identifier }}
{{ $i.Output }}.Square({{ .X }})
{{- $first = 1 -}}
{{- end }}
for s := {{ $first }}; s < {{ .S }}; s++ {
{{ $i.Output }}.Square({{ $i.Output }})
}
{{- end -}}
{{- end }}
return e.Set(z)
}
`

View File

@ -0,0 +1,135 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Code generated by generate.go. DO NOT EDIT.
package fiat
import (
"crypto/subtle"
"errors"
)
// P224Element is an integer modulo 2^224 - 2^96 + 1.
//
// The zero value is a valid zero element.
type P224Element struct {
// Values are represented internally always in the Montgomery domain, and
// converted in Bytes and SetBytes.
x p224MontgomeryDomainFieldElement
}
const p224ElementLen = 28
type p224UntypedFieldElement = [4]uint64
// One sets e = 1, and returns e.
func (e *P224Element) One() *P224Element {
p224SetOne(&e.x)
return e
}
// Equal returns 1 if e == t, and zero otherwise.
func (e *P224Element) Equal(t *P224Element) int {
eBytes := e.Bytes()
tBytes := t.Bytes()
return subtle.ConstantTimeCompare(eBytes, tBytes)
}
var p224ZeroEncoding = new(P224Element).Bytes()
// IsZero returns 1 if e == 0, and zero otherwise.
func (e *P224Element) IsZero() int {
eBytes := e.Bytes()
return subtle.ConstantTimeCompare(eBytes, p224ZeroEncoding)
}
// Set sets e = t, and returns e.
func (e *P224Element) Set(t *P224Element) *P224Element {
e.x = t.x
return e
}
// Bytes returns the 28-byte big-endian encoding of e.
func (e *P224Element) Bytes() []byte {
// This function is outlined to make the allocations inline in the caller
// rather than happen on the heap.
var out [p224ElementLen]byte
return e.bytes(&out)
}
func (e *P224Element) bytes(out *[p224ElementLen]byte) []byte {
var tmp p224NonMontgomeryDomainFieldElement
p224FromMontgomery(&tmp, &e.x)
p224ToBytes(out, (*p224UntypedFieldElement)(&tmp))
p224InvertEndianness(out[:])
return out[:]
}
// p224MinusOneEncoding is the encoding of -1 mod p, so p - 1, the
// highest canonical encoding. It is used by SetBytes to check for non-canonical
// encodings such as p + k, 2p + k, etc.
var p224MinusOneEncoding = new(P224Element).Sub(
new(P224Element), new(P224Element).One()).Bytes()
// SetBytes sets e = v, where v is a big-endian 28-byte encoding, and returns e.
// If v is not 28 bytes or it encodes a value higher than 2^224 - 2^96 + 1,
// SetBytes returns nil and an error, and e is unchanged.
func (e *P224Element) SetBytes(v []byte) (*P224Element, error) {
if len(v) != p224ElementLen {
return nil, errors.New("invalid P224Element encoding")
}
for i := range v {
if v[i] < p224MinusOneEncoding[i] {
break
}
if v[i] > p224MinusOneEncoding[i] {
return nil, errors.New("invalid P224Element encoding")
}
}
var in [p224ElementLen]byte
copy(in[:], v)
p224InvertEndianness(in[:])
var tmp p224NonMontgomeryDomainFieldElement
p224FromBytes((*p224UntypedFieldElement)(&tmp), &in)
p224ToMontgomery(&e.x, &tmp)
return e, nil
}
// Add sets e = t1 + t2, and returns e.
func (e *P224Element) Add(t1, t2 *P224Element) *P224Element {
p224Add(&e.x, &t1.x, &t2.x)
return e
}
// Sub sets e = t1 - t2, and returns e.
func (e *P224Element) Sub(t1, t2 *P224Element) *P224Element {
p224Sub(&e.x, &t1.x, &t2.x)
return e
}
// Mul sets e = t1 * t2, and returns e.
func (e *P224Element) Mul(t1, t2 *P224Element) *P224Element {
p224Mul(&e.x, &t1.x, &t2.x)
return e
}
// Square sets e = t * t, and returns e.
func (e *P224Element) Square(t *P224Element) *P224Element {
p224Square(&e.x, &t.x)
return e
}
// Select sets v to a if cond == 1, and to b if cond == 0.
func (v *P224Element) Select(a, b *P224Element, cond int) *P224Element {
p224Selectznz((*p224UntypedFieldElement)(&v.x), p224Uint1(cond),
(*p224UntypedFieldElement)(&b.x), (*p224UntypedFieldElement)(&a.x))
return v
}
func p224InvertEndianness(v []byte) {
for i := 0; i < len(v)/2; i++ {
v[i], v[len(v)-1-i] = v[len(v)-1-i], v[i]
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,87 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Code generated by addchain. DO NOT EDIT.
package fiat
// Invert sets e = 1/x, and returns e.
//
// If x == 0, Invert returns e = 0.
func (e *P224Element) Invert(x *P224Element) *P224Element {
// Inversion is implemented as exponentiation with exponent p 2.
// The sequence of 11 multiplications and 223 squarings is derived from the
// following addition chain generated with github.com/mmcloughlin/addchain v0.3.0.
//
// _10 = 2*1
// _11 = 1 + _10
// _110 = 2*_11
// _111 = 1 + _110
// _111000 = _111 << 3
// _111111 = _111 + _111000
// x12 = _111111 << 6 + _111111
// x14 = x12 << 2 + _11
// x17 = x14 << 3 + _111
// x31 = x17 << 14 + x14
// x48 = x31 << 17 + x17
// x96 = x48 << 48 + x48
// x127 = x96 << 31 + x31
// return x127 << 97 + x96
//
var z = new(P224Element).Set(e)
var t0 = new(P224Element)
var t1 = new(P224Element)
var t2 = new(P224Element)
z.Square(x)
t0.Mul(x, z)
z.Square(t0)
z.Mul(x, z)
t1.Square(z)
for s := 1; s < 3; s++ {
t1.Square(t1)
}
t1.Mul(z, t1)
t2.Square(t1)
for s := 1; s < 6; s++ {
t2.Square(t2)
}
t1.Mul(t1, t2)
for s := 0; s < 2; s++ {
t1.Square(t1)
}
t0.Mul(t0, t1)
t1.Square(t0)
for s := 1; s < 3; s++ {
t1.Square(t1)
}
z.Mul(z, t1)
t1.Square(z)
for s := 1; s < 14; s++ {
t1.Square(t1)
}
t0.Mul(t0, t1)
t1.Square(t0)
for s := 1; s < 17; s++ {
t1.Square(t1)
}
z.Mul(z, t1)
t1.Square(z)
for s := 1; s < 48; s++ {
t1.Square(t1)
}
z.Mul(z, t1)
t1.Square(z)
for s := 1; s < 31; s++ {
t1.Square(t1)
}
t0.Mul(t0, t1)
for s := 0; s < 97; s++ {
t0.Square(t0)
}
z.Mul(z, t0)
return e.Set(z)
}

View File

@ -0,0 +1,135 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Code generated by generate.go. DO NOT EDIT.
package fiat
import (
"crypto/subtle"
"errors"
)
// P384Element is an integer modulo 2^384 - 2^128 - 2^96 + 2^32 - 1.
//
// The zero value is a valid zero element.
type P384Element struct {
// Values are represented internally always in the Montgomery domain, and
// converted in Bytes and SetBytes.
x p384MontgomeryDomainFieldElement
}
const p384ElementLen = 48
type p384UntypedFieldElement = [6]uint64
// One sets e = 1, and returns e.
func (e *P384Element) One() *P384Element {
p384SetOne(&e.x)
return e
}
// Equal returns 1 if e == t, and zero otherwise.
func (e *P384Element) Equal(t *P384Element) int {
eBytes := e.Bytes()
tBytes := t.Bytes()
return subtle.ConstantTimeCompare(eBytes, tBytes)
}
var p384ZeroEncoding = new(P384Element).Bytes()
// IsZero returns 1 if e == 0, and zero otherwise.
func (e *P384Element) IsZero() int {
eBytes := e.Bytes()
return subtle.ConstantTimeCompare(eBytes, p384ZeroEncoding)
}
// Set sets e = t, and returns e.
func (e *P384Element) Set(t *P384Element) *P384Element {
e.x = t.x
return e
}
// Bytes returns the 48-byte big-endian encoding of e.
func (e *P384Element) Bytes() []byte {
// This function is outlined to make the allocations inline in the caller
// rather than happen on the heap.
var out [p384ElementLen]byte
return e.bytes(&out)
}
func (e *P384Element) bytes(out *[p384ElementLen]byte) []byte {
var tmp p384NonMontgomeryDomainFieldElement
p384FromMontgomery(&tmp, &e.x)
p384ToBytes(out, (*p384UntypedFieldElement)(&tmp))
p384InvertEndianness(out[:])
return out[:]
}
// p384MinusOneEncoding is the encoding of -1 mod p, so p - 1, the
// highest canonical encoding. It is used by SetBytes to check for non-canonical
// encodings such as p + k, 2p + k, etc.
var p384MinusOneEncoding = new(P384Element).Sub(
new(P384Element), new(P384Element).One()).Bytes()
// SetBytes sets e = v, where v is a big-endian 48-byte encoding, and returns e.
// If v is not 48 bytes or it encodes a value higher than 2^384 - 2^128 - 2^96 + 2^32 - 1,
// SetBytes returns nil and an error, and e is unchanged.
func (e *P384Element) SetBytes(v []byte) (*P384Element, error) {
if len(v) != p384ElementLen {
return nil, errors.New("invalid P384Element encoding")
}
for i := range v {
if v[i] < p384MinusOneEncoding[i] {
break
}
if v[i] > p384MinusOneEncoding[i] {
return nil, errors.New("invalid P384Element encoding")
}
}
var in [p384ElementLen]byte
copy(in[:], v)
p384InvertEndianness(in[:])
var tmp p384NonMontgomeryDomainFieldElement
p384FromBytes((*p384UntypedFieldElement)(&tmp), &in)
p384ToMontgomery(&e.x, &tmp)
return e, nil
}
// Add sets e = t1 + t2, and returns e.
func (e *P384Element) Add(t1, t2 *P384Element) *P384Element {
p384Add(&e.x, &t1.x, &t2.x)
return e
}
// Sub sets e = t1 - t2, and returns e.
func (e *P384Element) Sub(t1, t2 *P384Element) *P384Element {
p384Sub(&e.x, &t1.x, &t2.x)
return e
}
// Mul sets e = t1 * t2, and returns e.
func (e *P384Element) Mul(t1, t2 *P384Element) *P384Element {
p384Mul(&e.x, &t1.x, &t2.x)
return e
}
// Square sets e = t * t, and returns e.
func (e *P384Element) Square(t *P384Element) *P384Element {
p384Square(&e.x, &t.x)
return e
}
// Select sets v to a if cond == 1, and to b if cond == 0.
func (v *P384Element) Select(a, b *P384Element, cond int) *P384Element {
p384Selectznz((*p384UntypedFieldElement)(&v.x), p384Uint1(cond),
(*p384UntypedFieldElement)(&b.x), (*p384UntypedFieldElement)(&a.x))
return v
}
func p384InvertEndianness(v []byte) {
for i := 0; i < len(v)/2; i++ {
v[i], v[len(v)-1-i] = v[len(v)-1-i], v[i]
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,102 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Code generated by addchain. DO NOT EDIT.
package fiat
// Invert sets e = 1/x, and returns e.
//
// If x == 0, Invert returns e = 0.
func (e *P384Element) Invert(x *P384Element) *P384Element {
// Inversion is implemented as exponentiation with exponent p 2.
// The sequence of 15 multiplications and 383 squarings is derived from the
// following addition chain generated with github.com/mmcloughlin/addchain v0.3.0.
//
// _10 = 2*1
// _11 = 1 + _10
// _110 = 2*_11
// _111 = 1 + _110
// _111000 = _111 << 3
// _111111 = _111 + _111000
// x12 = _111111 << 6 + _111111
// x24 = x12 << 12 + x12
// x30 = x24 << 6 + _111111
// x31 = 2*x30 + 1
// x32 = 2*x31 + 1
// x63 = x32 << 31 + x31
// x126 = x63 << 63 + x63
// x252 = x126 << 126 + x126
// x255 = x252 << 3 + _111
// i397 = ((x255 << 33 + x32) << 94 + x30) << 2
// return 1 + i397
//
var z = new(P384Element).Set(e)
var t0 = new(P384Element)
var t1 = new(P384Element)
var t2 = new(P384Element)
var t3 = new(P384Element)
z.Square(x)
z.Mul(x, z)
z.Square(z)
t1.Mul(x, z)
z.Square(t1)
for s := 1; s < 3; s++ {
z.Square(z)
}
z.Mul(t1, z)
t0.Square(z)
for s := 1; s < 6; s++ {
t0.Square(t0)
}
t0.Mul(z, t0)
t2.Square(t0)
for s := 1; s < 12; s++ {
t2.Square(t2)
}
t0.Mul(t0, t2)
for s := 0; s < 6; s++ {
t0.Square(t0)
}
z.Mul(z, t0)
t0.Square(z)
t2.Mul(x, t0)
t0.Square(t2)
t0.Mul(x, t0)
t3.Square(t0)
for s := 1; s < 31; s++ {
t3.Square(t3)
}
t2.Mul(t2, t3)
t3.Square(t2)
for s := 1; s < 63; s++ {
t3.Square(t3)
}
t2.Mul(t2, t3)
t3.Square(t2)
for s := 1; s < 126; s++ {
t3.Square(t3)
}
t2.Mul(t2, t3)
for s := 0; s < 3; s++ {
t2.Square(t2)
}
t1.Mul(t1, t2)
for s := 0; s < 33; s++ {
t1.Square(t1)
}
t0.Mul(t0, t1)
for s := 0; s < 94; s++ {
t0.Square(t0)
}
z.Mul(z, t0)
for s := 0; s < 2; s++ {
z.Square(z)
}
z.Mul(x, z)
return e.Set(z)
}

View File

@ -2,8 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package fiat implements prime order fields using formally verified algorithms
// from the Fiat Cryptography project.
// Code generated by generate.go. DO NOT EDIT.
package fiat
import (
@ -15,20 +15,18 @@ import (
//
// The zero value is a valid zero element.
type P521Element struct {
// This element has the following bounds, which are tighter than
// the output bounds of some operations. Those operations must be
// followed by a carry.
//
// [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000],
// [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000],
// [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]
x [9]uint64
// Values are represented internally always in the Montgomery domain, and
// converted in Bytes and SetBytes.
x p521MontgomeryDomainFieldElement
}
const p521ElementLen = 66
type p521UntypedFieldElement = [9]uint64
// One sets e = 1, and returns e.
func (e *P521Element) One() *P521Element {
*e = P521Element{}
e.x[0] = 1
p521SetOne(&e.x)
return e
}
@ -57,153 +55,81 @@ func (e *P521Element) Set(t *P521Element) *P521Element {
func (e *P521Element) Bytes() []byte {
// This function is outlined to make the allocations inline in the caller
// rather than happen on the heap.
var out [66]byte
var out [p521ElementLen]byte
return e.bytes(&out)
}
func (e *P521Element) bytes(out *[66]byte) []byte {
p521ToBytes(out, &e.x)
invertEndianness(out[:])
func (e *P521Element) bytes(out *[p521ElementLen]byte) []byte {
var tmp p521NonMontgomeryDomainFieldElement
p521FromMontgomery(&tmp, &e.x)
p521ToBytes(out, (*p521UntypedFieldElement)(&tmp))
p521InvertEndianness(out[:])
return out[:]
}
// SetBytes sets e = v, where v is a big-endian 66-byte encoding, and returns
// e. If v is not 66 bytes or it encodes a value higher than 2^521 - 1, SetBytes
// returns nil and an error, and e is unchanged.
func (e *P521Element) SetBytes(v []byte) (*P521Element, error) {
if len(v) != 66 || v[0] > 1 {
return nil, errors.New("invalid P-521 field encoding")
}
var in [66]byte
copy(in[:], v)
invertEndianness(in[:])
p521FromBytes(&e.x, &in)
return e, nil
}
// p521MinusOneEncoding is the encoding of -1 mod p, so p - 1, the
// highest canonical encoding. It is used by SetBytes to check for non-canonical
// encodings such as p + k, 2p + k, etc.
var p521MinusOneEncoding = new(P521Element).Sub(
new(P521Element), new(P521Element).One()).Bytes()
func invertEndianness(v []byte) {
for i := 0; i < len(v)/2; i++ {
v[i], v[len(v)-1-i] = v[len(v)-1-i], v[i]
// SetBytes sets e = v, where v is a big-endian 66-byte encoding, and returns e.
// If v is not 66 bytes or it encodes a value higher than 2^521 - 1,
// SetBytes returns nil and an error, and e is unchanged.
func (e *P521Element) SetBytes(v []byte) (*P521Element, error) {
if len(v) != p521ElementLen {
return nil, errors.New("invalid P521Element encoding")
}
for i := range v {
if v[i] < p521MinusOneEncoding[i] {
break
}
if v[i] > p521MinusOneEncoding[i] {
return nil, errors.New("invalid P521Element encoding")
}
}
var in [p521ElementLen]byte
copy(in[:], v)
p521InvertEndianness(in[:])
var tmp p521NonMontgomeryDomainFieldElement
p521FromBytes((*p521UntypedFieldElement)(&tmp), &in)
p521ToMontgomery(&e.x, &tmp)
return e, nil
}
// Add sets e = t1 + t2, and returns e.
func (e *P521Element) Add(t1, t2 *P521Element) *P521Element {
p521Add(&e.x, &t1.x, &t2.x)
p521Carry(&e.x, &e.x)
return e
}
// Sub sets e = t1 - t2, and returns e.
func (e *P521Element) Sub(t1, t2 *P521Element) *P521Element {
p521Sub(&e.x, &t1.x, &t2.x)
p521Carry(&e.x, &e.x)
return e
}
// Mul sets e = t1 * t2, and returns e.
func (e *P521Element) Mul(t1, t2 *P521Element) *P521Element {
p521CarryMul(&e.x, &t1.x, &t2.x)
p521Mul(&e.x, &t1.x, &t2.x)
return e
}
// Square sets e = t * t, and returns e.
func (e *P521Element) Square(t *P521Element) *P521Element {
p521CarrySquare(&e.x, &t.x)
p521Square(&e.x, &t.x)
return e
}
// Select sets e to a if cond == 1, and to b if cond == 0.
// Select sets v to a if cond == 1, and to b if cond == 0.
func (v *P521Element) Select(a, b *P521Element, cond int) *P521Element {
p521Selectznz(&v.x, p521Uint1(cond), &b.x, &a.x)
p521Selectznz((*p521UntypedFieldElement)(&v.x), p521Uint1(cond),
(*p521UntypedFieldElement)(&b.x), (*p521UntypedFieldElement)(&a.x))
return v
}
// Invert sets e = 1/t, and returns e.
//
// If t == 0, Invert returns e = 0.
func (e *P521Element) Invert(t *P521Element) *P521Element {
// Inversion is implemented as exponentiation with exponent p 2.
// The sequence of multiplications and squarings was generated with
// github.com/mmcloughlin/addchain v0.2.0.
var t1, t2 = new(P521Element), new(P521Element)
// _10 = 2 * 1
t1.Square(t)
// _11 = 1 + _10
t1.Mul(t, t1)
// _1100 = _11 << 2
t2.Square(t1)
t2.Square(t2)
// _1111 = _11 + _1100
t1.Mul(t1, t2)
// _11110000 = _1111 << 4
t2.Square(t1)
for i := 0; i < 3; i++ {
t2.Square(t2)
func p521InvertEndianness(v []byte) {
for i := 0; i < len(v)/2; i++ {
v[i], v[len(v)-1-i] = v[len(v)-1-i], v[i]
}
// _11111111 = _1111 + _11110000
t1.Mul(t1, t2)
// x16 = _11111111<<8 + _11111111
t2.Square(t1)
for i := 0; i < 7; i++ {
t2.Square(t2)
}
t1.Mul(t1, t2)
// x32 = x16<<16 + x16
t2.Square(t1)
for i := 0; i < 15; i++ {
t2.Square(t2)
}
t1.Mul(t1, t2)
// x64 = x32<<32 + x32
t2.Square(t1)
for i := 0; i < 31; i++ {
t2.Square(t2)
}
t1.Mul(t1, t2)
// x65 = 2*x64 + 1
t2.Square(t1)
t2.Mul(t2, t)
// x129 = x65<<64 + x64
for i := 0; i < 64; i++ {
t2.Square(t2)
}
t1.Mul(t1, t2)
// x130 = 2*x129 + 1
t2.Square(t1)
t2.Mul(t2, t)
// x259 = x130<<129 + x129
for i := 0; i < 129; i++ {
t2.Square(t2)
}
t1.Mul(t1, t2)
// x260 = 2*x259 + 1
t2.Square(t1)
t2.Mul(t2, t)
// x519 = x260<<259 + x259
for i := 0; i < 259; i++ {
t2.Square(t2)
}
t1.Mul(t1, t2)
// return x519<<2 + 1
t1.Square(t1)
t1.Square(t1)
return e.Mul(t1, t)
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,89 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Code generated by addchain. DO NOT EDIT.
package fiat
// Invert sets e = 1/x, and returns e.
//
// If x == 0, Invert returns e = 0.
func (e *P521Element) Invert(x *P521Element) *P521Element {
// Inversion is implemented as exponentiation with exponent p 2.
// The sequence of 13 multiplications and 520 squarings is derived from the
// following addition chain generated with github.com/mmcloughlin/addchain v0.3.0.
//
// _10 = 2*1
// _11 = 1 + _10
// _1100 = _11 << 2
// _1111 = _11 + _1100
// _11110000 = _1111 << 4
// _11111111 = _1111 + _11110000
// x16 = _11111111 << 8 + _11111111
// x32 = x16 << 16 + x16
// x64 = x32 << 32 + x32
// x65 = 2*x64 + 1
// x129 = x65 << 64 + x64
// x130 = 2*x129 + 1
// x259 = x130 << 129 + x129
// x260 = 2*x259 + 1
// x519 = x260 << 259 + x259
// return x519 << 2 + 1
//
var z = new(P521Element).Set(e)
var t0 = new(P521Element)
z.Square(x)
z.Mul(x, z)
t0.Square(z)
for s := 1; s < 2; s++ {
t0.Square(t0)
}
z.Mul(z, t0)
t0.Square(z)
for s := 1; s < 4; s++ {
t0.Square(t0)
}
z.Mul(z, t0)
t0.Square(z)
for s := 1; s < 8; s++ {
t0.Square(t0)
}
z.Mul(z, t0)
t0.Square(z)
for s := 1; s < 16; s++ {
t0.Square(t0)
}
z.Mul(z, t0)
t0.Square(z)
for s := 1; s < 32; s++ {
t0.Square(t0)
}
z.Mul(z, t0)
t0.Square(z)
t0.Mul(x, t0)
for s := 0; s < 64; s++ {
t0.Square(t0)
}
z.Mul(z, t0)
t0.Square(z)
t0.Mul(x, t0)
for s := 0; s < 129; s++ {
t0.Square(t0)
}
z.Mul(z, t0)
t0.Square(z)
t0.Mul(x, t0)
for s := 0; s < 259; s++ {
t0.Square(t0)
}
z.Mul(z, t0)
for s := 0; s < 2; s++ {
z.Square(z)
}
z.Mul(x, z)
return e.Set(z)
}

View File

@ -1,37 +0,0 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package fiat_test
import (
"crypto/elliptic/internal/fiat"
"crypto/rand"
"testing"
)
func p521Random(t *testing.T) *fiat.P521Element {
buf := make([]byte, 66)
if _, err := rand.Read(buf); err != nil {
t.Fatal(err)
}
buf[0] &= 1
e, err := new(fiat.P521Element).SetBytes(buf)
if err != nil {
t.Fatal(err)
}
return e
}
func TestP521Invert(t *testing.T) {
a := p521Random(t)
inv := new(fiat.P521Element).Invert(a)
one := new(fiat.P521Element).Mul(a, inv)
if new(fiat.P521Element).One().Equal(one) != 1 {
t.Errorf("a * 1/a != 1; got %x for %x", one.Bytes(), a.Bytes())
}
inv.Invert(new(fiat.P521Element))
if new(fiat.P521Element).Equal(inv) != 1 {
t.Errorf("1/0 != 0; got %x", inv.Bytes())
}
}

View File

@ -0,0 +1,94 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package nistec_test
import (
"crypto/elliptic/internal/nistec"
"math/rand"
"os"
"strings"
"testing"
)
func TestAllocations(t *testing.T) {
if strings.HasSuffix(os.Getenv("GO_BUILDER_NAME"), "-noopt") {
t.Skip("skipping allocations test without relevant optimizations")
}
t.Run("P224", func(t *testing.T) {
if allocs := testing.AllocsPerRun(100, func() {
p := nistec.NewP224Generator()
scalar := make([]byte, 66)
rand.Read(scalar)
p.ScalarMult(p, scalar)
out := p.Bytes()
if _, err := p.SetBytes(out); err != nil {
t.Fatal(err)
}
}); allocs > 0 {
t.Errorf("expected zero allocations, got %0.1f", allocs)
}
})
t.Run("P384", func(t *testing.T) {
if allocs := testing.AllocsPerRun(100, func() {
p := nistec.NewP384Generator()
scalar := make([]byte, 66)
rand.Read(scalar)
p.ScalarMult(p, scalar)
out := p.Bytes()
if _, err := p.SetBytes(out); err != nil {
t.Fatal(err)
}
}); allocs > 0 {
t.Errorf("expected zero allocations, got %0.1f", allocs)
}
})
t.Run("P521", func(t *testing.T) {
if allocs := testing.AllocsPerRun(100, func() {
p := nistec.NewP521Generator()
scalar := make([]byte, 66)
rand.Read(scalar)
p.ScalarMult(p, scalar)
out := p.Bytes()
if _, err := p.SetBytes(out); err != nil {
t.Fatal(err)
}
}); allocs > 0 {
t.Errorf("expected zero allocations, got %0.1f", allocs)
}
})
}
func BenchmarkScalarMult(b *testing.B) {
b.Run("P224", func(b *testing.B) {
scalar := make([]byte, 66)
rand.Read(scalar)
p := nistec.NewP224Generator()
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
p.ScalarMult(p, scalar)
}
})
b.Run("P384", func(b *testing.B) {
scalar := make([]byte, 66)
rand.Read(scalar)
p := nistec.NewP384Generator()
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
p.ScalarMult(p, scalar)
}
})
b.Run("P521", func(b *testing.B) {
scalar := make([]byte, 66)
rand.Read(scalar)
p := nistec.NewP521Generator()
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
p.ScalarMult(p, scalar)
}
})
}

View File

@ -0,0 +1,293 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package nistec
import (
"crypto/elliptic/internal/fiat"
"crypto/subtle"
"errors"
)
var p224B, _ = new(fiat.P224Element).SetBytes([]byte{0xb4, 0x05, 0x0a, 0x85,
0x0c, 0x04, 0xb3, 0xab, 0xf5, 0x41, 0x32, 0x56, 0x50, 0x44, 0xb0, 0xb7,
0xd7, 0xbf, 0xd8, 0xba, 0x27, 0x0b, 0x39, 0x43, 0x23, 0x55, 0xff, 0xb4})
var p224G, _ = NewP224Point().SetBytes([]byte{0x04,
0xb7, 0x0e, 0x0c, 0xbd, 0x6b, 0xb4, 0xbf, 0x7f, 0x32, 0x13, 0x90, 0xb9,
0x4a, 0x03, 0xc1, 0xd3, 0x56, 0xc2, 0x11, 0x22, 0x34, 0x32, 0x80, 0xd6,
0x11, 0x5c, 0x1d, 0x21, 0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb,
0x4c, 0x22, 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
0x44, 0xd5, 0x81, 0x99, 0x85, 0x0, 0x7e, 0x34})
const p224ElementLength = 28
// P224Point is a P-224 point. The zero value is NOT valid.
type P224Point struct {
// The point is represented in projective coordinates (X:Y:Z),
// where x = X/Z and y = Y/Z.
x, y, z *fiat.P224Element
}
// NewP224Point returns a new P224Point representing the point at infinity point.
func NewP224Point() *P224Point {
return &P224Point{
x: new(fiat.P224Element),
y: new(fiat.P224Element).One(),
z: new(fiat.P224Element),
}
}
// NewP224Generator returns a new P224Point set to the canonical generator.
func NewP224Generator() *P224Point {
return (&P224Point{
x: new(fiat.P224Element),
y: new(fiat.P224Element),
z: new(fiat.P224Element),
}).Set(p224G)
}
// Set sets p = q and returns p.
func (p *P224Point) Set(q *P224Point) *P224Point {
p.x.Set(q.x)
p.y.Set(q.y)
p.z.Set(q.z)
return p
}
// SetBytes sets p to the compressed, uncompressed, or infinity value encoded in
// b, as specified in SEC 1, Version 2.0, Section 2.3.4. If the point is not on
// the curve, it returns nil and an error, and the receiver is unchanged.
// Otherwise, it returns p.
func (p *P224Point) SetBytes(b []byte) (*P224Point, error) {
switch {
// Point at infinity.
case len(b) == 1 && b[0] == 0:
return p.Set(NewP224Point()), nil
// Uncompressed form.
case len(b) == 1+2*p224ElementLength && b[0] == 4:
x, err := new(fiat.P224Element).SetBytes(b[1 : 1+p224ElementLength])
if err != nil {
return nil, err
}
y, err := new(fiat.P224Element).SetBytes(b[1+p224ElementLength:])
if err != nil {
return nil, err
}
if err := p224CheckOnCurve(x, y); err != nil {
return nil, err
}
p.x.Set(x)
p.y.Set(y)
p.z.One()
return p, nil
// Compressed form
case len(b) == 1+p224ElementLength && b[0] == 0:
return nil, errors.New("unimplemented") // TODO(filippo)
default:
return nil, errors.New("invalid P224 point encoding")
}
}
func p224CheckOnCurve(x, y *fiat.P224Element) error {
// x³ - 3x + b.
x3 := new(fiat.P224Element).Square(x)
x3.Mul(x3, x)
threeX := new(fiat.P224Element).Add(x, x)
threeX.Add(threeX, x)
x3.Sub(x3, threeX)
x3.Add(x3, p224B)
// y² = x³ - 3x + b
y2 := new(fiat.P224Element).Square(y)
if x3.Equal(y2) != 1 {
return errors.New("P224 point not on curve")
}
return nil
}
// Bytes returns the uncompressed or infinity encoding of p, as specified in
// SEC 1, Version 2.0, Section 2.3.3. Note that the encoding of the point at
// infinity is shorter than all other encodings.
func (p *P224Point) Bytes() []byte {
// This function is outlined to make the allocations inline in the caller
// rather than happen on the heap.
var out [133]byte
return p.bytes(&out)
}
func (p *P224Point) bytes(out *[133]byte) []byte {
if p.z.IsZero() == 1 {
return append(out[:0], 0)
}
zinv := new(fiat.P224Element).Invert(p.z)
xx := new(fiat.P224Element).Mul(p.x, zinv)
yy := new(fiat.P224Element).Mul(p.y, zinv)
buf := append(out[:0], 4)
buf = append(buf, xx.Bytes()...)
buf = append(buf, yy.Bytes()...)
return buf
}
// Add sets q = p1 + p2, and returns q. The points may overlap.
func (q *P224Point) Add(p1, p2 *P224Point) *P224Point {
// Complete addition formula for a = -3 from "Complete addition formulas for
// prime order elliptic curves" (https://eprint.iacr.org/2015/1060), §A.2.
t0 := new(fiat.P224Element).Mul(p1.x, p2.x) // t0 := X1 * X2
t1 := new(fiat.P224Element).Mul(p1.y, p2.y) // t1 := Y1 * Y2
t2 := new(fiat.P224Element).Mul(p1.z, p2.z) // t2 := Z1 * Z2
t3 := new(fiat.P224Element).Add(p1.x, p1.y) // t3 := X1 + Y1
t4 := new(fiat.P224Element).Add(p2.x, p2.y) // t4 := X2 + Y2
t3.Mul(t3, t4) // t3 := t3 * t4
t4.Add(t0, t1) // t4 := t0 + t1
t3.Sub(t3, t4) // t3 := t3 - t4
t4.Add(p1.y, p1.z) // t4 := Y1 + Z1
x3 := new(fiat.P224Element).Add(p2.y, p2.z) // X3 := Y2 + Z2
t4.Mul(t4, x3) // t4 := t4 * X3
x3.Add(t1, t2) // X3 := t1 + t2
t4.Sub(t4, x3) // t4 := t4 - X3
x3.Add(p1.x, p1.z) // X3 := X1 + Z1
y3 := new(fiat.P224Element).Add(p2.x, p2.z) // Y3 := X2 + Z2
x3.Mul(x3, y3) // X3 := X3 * Y3
y3.Add(t0, t2) // Y3 := t0 + t2
y3.Sub(x3, y3) // Y3 := X3 - Y3
z3 := new(fiat.P224Element).Mul(p224B, t2) // Z3 := b * t2
x3.Sub(y3, z3) // X3 := Y3 - Z3
z3.Add(x3, x3) // Z3 := X3 + X3
x3.Add(x3, z3) // X3 := X3 + Z3
z3.Sub(t1, x3) // Z3 := t1 - X3
x3.Add(t1, x3) // X3 := t1 + X3
y3.Mul(p224B, y3) // Y3 := b * Y3
t1.Add(t2, t2) // t1 := t2 + t2
t2.Add(t1, t2) // t2 := t1 + t2
y3.Sub(y3, t2) // Y3 := Y3 - t2
y3.Sub(y3, t0) // Y3 := Y3 - t0
t1.Add(y3, y3) // t1 := Y3 + Y3
y3.Add(t1, y3) // Y3 := t1 + Y3
t1.Add(t0, t0) // t1 := t0 + t0
t0.Add(t1, t0) // t0 := t1 + t0
t0.Sub(t0, t2) // t0 := t0 - t2
t1.Mul(t4, y3) // t1 := t4 * Y3
t2.Mul(t0, y3) // t2 := t0 * Y3
y3.Mul(x3, z3) // Y3 := X3 * Z3
y3.Add(y3, t2) // Y3 := Y3 + t2
x3.Mul(t3, x3) // X3 := t3 * X3
x3.Sub(x3, t1) // X3 := X3 - t1
z3.Mul(t4, z3) // Z3 := t4 * Z3
t1.Mul(t3, t0) // t1 := t3 * t0
z3.Add(z3, t1) // Z3 := Z3 + t1
q.x.Set(x3)
q.y.Set(y3)
q.z.Set(z3)
return q
}
// Double sets q = p + p, and returns q. The points may overlap.
func (q *P224Point) Double(p *P224Point) *P224Point {
// Complete addition formula for a = -3 from "Complete addition formulas for
// prime order elliptic curves" (https://eprint.iacr.org/2015/1060), §A.2.
t0 := new(fiat.P224Element).Square(p.x) // t0 := X ^ 2
t1 := new(fiat.P224Element).Square(p.y) // t1 := Y ^ 2
t2 := new(fiat.P224Element).Square(p.z) // t2 := Z ^ 2
t3 := new(fiat.P224Element).Mul(p.x, p.y) // t3 := X * Y
t3.Add(t3, t3) // t3 := t3 + t3
z3 := new(fiat.P224Element).Mul(p.x, p.z) // Z3 := X * Z
z3.Add(z3, z3) // Z3 := Z3 + Z3
y3 := new(fiat.P224Element).Mul(p224B, t2) // Y3 := b * t2
y3.Sub(y3, z3) // Y3 := Y3 - Z3
x3 := new(fiat.P224Element).Add(y3, y3) // X3 := Y3 + Y3
y3.Add(x3, y3) // Y3 := X3 + Y3
x3.Sub(t1, y3) // X3 := t1 - Y3
y3.Add(t1, y3) // Y3 := t1 + Y3
y3.Mul(x3, y3) // Y3 := X3 * Y3
x3.Mul(x3, t3) // X3 := X3 * t3
t3.Add(t2, t2) // t3 := t2 + t2
t2.Add(t2, t3) // t2 := t2 + t3
z3.Mul(p224B, z3) // Z3 := b * Z3
z3.Sub(z3, t2) // Z3 := Z3 - t2
z3.Sub(z3, t0) // Z3 := Z3 - t0
t3.Add(z3, z3) // t3 := Z3 + Z3
z3.Add(z3, t3) // Z3 := Z3 + t3
t3.Add(t0, t0) // t3 := t0 + t0
t0.Add(t3, t0) // t0 := t3 + t0
t0.Sub(t0, t2) // t0 := t0 - t2
t0.Mul(t0, z3) // t0 := t0 * Z3
y3.Add(y3, t0) // Y3 := Y3 + t0
t0.Mul(p.y, p.z) // t0 := Y * Z
t0.Add(t0, t0) // t0 := t0 + t0
z3.Mul(t0, z3) // Z3 := t0 * Z3
x3.Sub(x3, z3) // X3 := X3 - Z3
z3.Mul(t0, t1) // Z3 := t0 * t1
z3.Add(z3, z3) // Z3 := Z3 + Z3
z3.Add(z3, z3) // Z3 := Z3 + Z3
q.x.Set(x3)
q.y.Set(y3)
q.z.Set(z3)
return q
}
// Select sets q to p1 if cond == 1, and to p2 if cond == 0.
func (q *P224Point) Select(p1, p2 *P224Point, cond int) *P224Point {
q.x.Select(p1.x, p2.x, cond)
q.y.Select(p1.y, p2.y, cond)
q.z.Select(p1.z, p2.z, cond)
return q
}
// ScalarMult sets p = scalar * q, and returns p.
func (p *P224Point) ScalarMult(q *P224Point, scalar []byte) *P224Point {
// table holds the first 16 multiples of q. The explicit newP224Point calls
// get inlined, letting the allocations live on the stack.
var table = [16]*P224Point{
NewP224Point(), NewP224Point(), NewP224Point(), NewP224Point(),
NewP224Point(), NewP224Point(), NewP224Point(), NewP224Point(),
NewP224Point(), NewP224Point(), NewP224Point(), NewP224Point(),
NewP224Point(), NewP224Point(), NewP224Point(), NewP224Point(),
}
for i := 1; i < 16; i++ {
table[i].Add(table[i-1], q)
}
// Instead of doing the classic double-and-add chain, we do it with a
// four-bit window: we double four times, and then add [0-15]P.
t := NewP224Point()
p.Set(NewP224Point())
for _, byte := range scalar {
p.Double(p)
p.Double(p)
p.Double(p)
p.Double(p)
for i := uint8(0); i < 16; i++ {
cond := subtle.ConstantTimeByteEq(byte>>4, i)
t.Select(table[i], t, cond)
}
p.Add(p, t)
p.Double(p)
p.Double(p)
p.Double(p)
p.Double(p)
for i := uint8(0); i < 16; i++ {
cond := subtle.ConstantTimeByteEq(byte&0b1111, i)
t.Select(table[i], t, cond)
}
p.Add(p, t)
}
return p
}

View File

@ -0,0 +1,298 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package nistec
import (
"crypto/elliptic/internal/fiat"
"crypto/subtle"
"errors"
)
var p384B, _ = new(fiat.P384Element).SetBytes([]byte{
0xb3, 0x31, 0x2f, 0xa7, 0xe2, 0x3e, 0xe7, 0xe4, 0x98, 0x8e, 0x05, 0x6b,
0xe3, 0xf8, 0x2d, 0x19, 0x18, 0x1d, 0x9c, 0x6e, 0xfe, 0x81, 0x41, 0x12,
0x03, 0x14, 0x08, 0x8f, 0x50, 0x13, 0x87, 0x5a, 0xc6, 0x56, 0x39, 0x8d,
0x8a, 0x2e, 0xd1, 0x9d, 0x2a, 0x85, 0xc8, 0xed, 0xd3, 0xec, 0x2a, 0xef})
var p384G, _ = NewP384Point().SetBytes([]byte{0x4,
0xaa, 0x87, 0xca, 0x22, 0xbe, 0x8b, 0x05, 0x37, 0x8e, 0xb1, 0xc7, 0x1e,
0xf3, 0x20, 0xad, 0x74, 0x6e, 0x1d, 0x3b, 0x62, 0x8b, 0xa7, 0x9b, 0x98,
0x59, 0xf7, 0x41, 0xe0, 0x82, 0x54, 0x2a, 0x38, 0x55, 0x02, 0xf2, 0x5d,
0xbf, 0x55, 0x29, 0x6c, 0x3a, 0x54, 0x5e, 0x38, 0x72, 0x76, 0x0a, 0xb7,
0x36, 0x17, 0xde, 0x4a, 0x96, 0x26, 0x2c, 0x6f, 0x5d, 0x9e, 0x98, 0xbf,
0x92, 0x92, 0xdc, 0x29, 0xf8, 0xf4, 0x1d, 0xbd, 0x28, 0x9a, 0x14, 0x7c,
0xe9, 0xda, 0x31, 0x13, 0xb5, 0xf0, 0xb8, 0xc0, 0x0a, 0x60, 0xb1, 0xce,
0x1d, 0x7e, 0x81, 0x9d, 0x7a, 0x43, 0x1d, 0x7c, 0x90, 0xea, 0x0e, 0x5f})
const p384ElementLength = 48
// P384Point is a P-384 point. The zero value is NOT valid.
type P384Point struct {
// The point is represented in projective coordinates (X:Y:Z),
// where x = X/Z and y = Y/Z.
x, y, z *fiat.P384Element
}
// NewP384Point returns a new P384Point representing the point at infinity point.
func NewP384Point() *P384Point {
return &P384Point{
x: new(fiat.P384Element),
y: new(fiat.P384Element).One(),
z: new(fiat.P384Element),
}
}
// NewP384Generator returns a new P384Point set to the canonical generator.
func NewP384Generator() *P384Point {
return (&P384Point{
x: new(fiat.P384Element),
y: new(fiat.P384Element),
z: new(fiat.P384Element),
}).Set(p384G)
}
// Set sets p = q and returns p.
func (p *P384Point) Set(q *P384Point) *P384Point {
p.x.Set(q.x)
p.y.Set(q.y)
p.z.Set(q.z)
return p
}
// SetBytes sets p to the compressed, uncompressed, or infinity value encoded in
// b, as specified in SEC 1, Version 2.0, Section 2.3.4. If the point is not on
// the curve, it returns nil and an error, and the receiver is unchanged.
// Otherwise, it returns p.
func (p *P384Point) SetBytes(b []byte) (*P384Point, error) {
switch {
// Point at infinity.
case len(b) == 1 && b[0] == 0:
return p.Set(NewP384Point()), nil
// Uncompressed form.
case len(b) == 1+2*p384ElementLength && b[0] == 4:
x, err := new(fiat.P384Element).SetBytes(b[1 : 1+p384ElementLength])
if err != nil {
return nil, err
}
y, err := new(fiat.P384Element).SetBytes(b[1+p384ElementLength:])
if err != nil {
return nil, err
}
if err := p384CheckOnCurve(x, y); err != nil {
return nil, err
}
p.x.Set(x)
p.y.Set(y)
p.z.One()
return p, nil
// Compressed form
case len(b) == 1+p384ElementLength && b[0] == 0:
return nil, errors.New("unimplemented") // TODO(filippo)
default:
return nil, errors.New("invalid P384 point encoding")
}
}
func p384CheckOnCurve(x, y *fiat.P384Element) error {
// x³ - 3x + b.
x3 := new(fiat.P384Element).Square(x)
x3.Mul(x3, x)
threeX := new(fiat.P384Element).Add(x, x)
threeX.Add(threeX, x)
x3.Sub(x3, threeX)
x3.Add(x3, p384B)
// y² = x³ - 3x + b
y2 := new(fiat.P384Element).Square(y)
if x3.Equal(y2) != 1 {
return errors.New("P384 point not on curve")
}
return nil
}
// Bytes returns the uncompressed or infinity encoding of p, as specified in
// SEC 1, Version 2.0, Section 2.3.3. Note that the encoding of the point at
// infinity is shorter than all other encodings.
func (p *P384Point) Bytes() []byte {
// This function is outlined to make the allocations inline in the caller
// rather than happen on the heap.
var out [133]byte
return p.bytes(&out)
}
func (p *P384Point) bytes(out *[133]byte) []byte {
if p.z.IsZero() == 1 {
return append(out[:0], 0)
}
zinv := new(fiat.P384Element).Invert(p.z)
xx := new(fiat.P384Element).Mul(p.x, zinv)
yy := new(fiat.P384Element).Mul(p.y, zinv)
buf := append(out[:0], 4)
buf = append(buf, xx.Bytes()...)
buf = append(buf, yy.Bytes()...)
return buf
}
// Add sets q = p1 + p2, and returns q. The points may overlap.
func (q *P384Point) Add(p1, p2 *P384Point) *P384Point {
// Complete addition formula for a = -3 from "Complete addition formulas for
// prime order elliptic curves" (https://eprint.iacr.org/2015/1060), §A.2.
t0 := new(fiat.P384Element).Mul(p1.x, p2.x) // t0 := X1 * X2
t1 := new(fiat.P384Element).Mul(p1.y, p2.y) // t1 := Y1 * Y2
t2 := new(fiat.P384Element).Mul(p1.z, p2.z) // t2 := Z1 * Z2
t3 := new(fiat.P384Element).Add(p1.x, p1.y) // t3 := X1 + Y1
t4 := new(fiat.P384Element).Add(p2.x, p2.y) // t4 := X2 + Y2
t3.Mul(t3, t4) // t3 := t3 * t4
t4.Add(t0, t1) // t4 := t0 + t1
t3.Sub(t3, t4) // t3 := t3 - t4
t4.Add(p1.y, p1.z) // t4 := Y1 + Z1
x3 := new(fiat.P384Element).Add(p2.y, p2.z) // X3 := Y2 + Z2
t4.Mul(t4, x3) // t4 := t4 * X3
x3.Add(t1, t2) // X3 := t1 + t2
t4.Sub(t4, x3) // t4 := t4 - X3
x3.Add(p1.x, p1.z) // X3 := X1 + Z1
y3 := new(fiat.P384Element).Add(p2.x, p2.z) // Y3 := X2 + Z2
x3.Mul(x3, y3) // X3 := X3 * Y3
y3.Add(t0, t2) // Y3 := t0 + t2
y3.Sub(x3, y3) // Y3 := X3 - Y3
z3 := new(fiat.P384Element).Mul(p384B, t2) // Z3 := b * t2
x3.Sub(y3, z3) // X3 := Y3 - Z3
z3.Add(x3, x3) // Z3 := X3 + X3
x3.Add(x3, z3) // X3 := X3 + Z3
z3.Sub(t1, x3) // Z3 := t1 - X3
x3.Add(t1, x3) // X3 := t1 + X3
y3.Mul(p384B, y3) // Y3 := b * Y3
t1.Add(t2, t2) // t1 := t2 + t2
t2.Add(t1, t2) // t2 := t1 + t2
y3.Sub(y3, t2) // Y3 := Y3 - t2
y3.Sub(y3, t0) // Y3 := Y3 - t0
t1.Add(y3, y3) // t1 := Y3 + Y3
y3.Add(t1, y3) // Y3 := t1 + Y3
t1.Add(t0, t0) // t1 := t0 + t0
t0.Add(t1, t0) // t0 := t1 + t0
t0.Sub(t0, t2) // t0 := t0 - t2
t1.Mul(t4, y3) // t1 := t4 * Y3
t2.Mul(t0, y3) // t2 := t0 * Y3
y3.Mul(x3, z3) // Y3 := X3 * Z3
y3.Add(y3, t2) // Y3 := Y3 + t2
x3.Mul(t3, x3) // X3 := t3 * X3
x3.Sub(x3, t1) // X3 := X3 - t1
z3.Mul(t4, z3) // Z3 := t4 * Z3
t1.Mul(t3, t0) // t1 := t3 * t0
z3.Add(z3, t1) // Z3 := Z3 + t1
q.x.Set(x3)
q.y.Set(y3)
q.z.Set(z3)
return q
}
// Double sets q = p + p, and returns q. The points may overlap.
func (q *P384Point) Double(p *P384Point) *P384Point {
// Complete addition formula for a = -3 from "Complete addition formulas for
// prime order elliptic curves" (https://eprint.iacr.org/2015/1060), §A.2.
t0 := new(fiat.P384Element).Square(p.x) // t0 := X ^ 2
t1 := new(fiat.P384Element).Square(p.y) // t1 := Y ^ 2
t2 := new(fiat.P384Element).Square(p.z) // t2 := Z ^ 2
t3 := new(fiat.P384Element).Mul(p.x, p.y) // t3 := X * Y
t3.Add(t3, t3) // t3 := t3 + t3
z3 := new(fiat.P384Element).Mul(p.x, p.z) // Z3 := X * Z
z3.Add(z3, z3) // Z3 := Z3 + Z3
y3 := new(fiat.P384Element).Mul(p384B, t2) // Y3 := b * t2
y3.Sub(y3, z3) // Y3 := Y3 - Z3
x3 := new(fiat.P384Element).Add(y3, y3) // X3 := Y3 + Y3
y3.Add(x3, y3) // Y3 := X3 + Y3
x3.Sub(t1, y3) // X3 := t1 - Y3
y3.Add(t1, y3) // Y3 := t1 + Y3
y3.Mul(x3, y3) // Y3 := X3 * Y3
x3.Mul(x3, t3) // X3 := X3 * t3
t3.Add(t2, t2) // t3 := t2 + t2
t2.Add(t2, t3) // t2 := t2 + t3
z3.Mul(p384B, z3) // Z3 := b * Z3
z3.Sub(z3, t2) // Z3 := Z3 - t2
z3.Sub(z3, t0) // Z3 := Z3 - t0
t3.Add(z3, z3) // t3 := Z3 + Z3
z3.Add(z3, t3) // Z3 := Z3 + t3
t3.Add(t0, t0) // t3 := t0 + t0
t0.Add(t3, t0) // t0 := t3 + t0
t0.Sub(t0, t2) // t0 := t0 - t2
t0.Mul(t0, z3) // t0 := t0 * Z3
y3.Add(y3, t0) // Y3 := Y3 + t0
t0.Mul(p.y, p.z) // t0 := Y * Z
t0.Add(t0, t0) // t0 := t0 + t0
z3.Mul(t0, z3) // Z3 := t0 * Z3
x3.Sub(x3, z3) // X3 := X3 - Z3
z3.Mul(t0, t1) // Z3 := t0 * t1
z3.Add(z3, z3) // Z3 := Z3 + Z3
z3.Add(z3, z3) // Z3 := Z3 + Z3
q.x.Set(x3)
q.y.Set(y3)
q.z.Set(z3)
return q
}
// Select sets q to p1 if cond == 1, and to p2 if cond == 0.
func (q *P384Point) Select(p1, p2 *P384Point, cond int) *P384Point {
q.x.Select(p1.x, p2.x, cond)
q.y.Select(p1.y, p2.y, cond)
q.z.Select(p1.z, p2.z, cond)
return q
}
// ScalarMult sets p = scalar * q, and returns p.
func (p *P384Point) ScalarMult(q *P384Point, scalar []byte) *P384Point {
// table holds the first 16 multiples of q. The explicit newP384Point calls
// get inlined, letting the allocations live on the stack.
var table = [16]*P384Point{
NewP384Point(), NewP384Point(), NewP384Point(), NewP384Point(),
NewP384Point(), NewP384Point(), NewP384Point(), NewP384Point(),
NewP384Point(), NewP384Point(), NewP384Point(), NewP384Point(),
NewP384Point(), NewP384Point(), NewP384Point(), NewP384Point(),
}
for i := 1; i < 16; i++ {
table[i].Add(table[i-1], q)
}
// Instead of doing the classic double-and-add chain, we do it with a
// four-bit window: we double four times, and then add [0-15]P.
t := NewP384Point()
p.Set(NewP384Point())
for _, byte := range scalar {
p.Double(p)
p.Double(p)
p.Double(p)
p.Double(p)
for i := uint8(0); i < 16; i++ {
cond := subtle.ConstantTimeByteEq(byte>>4, i)
t.Select(table[i], t, cond)
}
p.Add(p, t)
p.Double(p)
p.Double(p)
p.Double(p)
p.Double(p)
for i := uint8(0); i < 16; i++ {
cond := subtle.ConstantTimeByteEq(byte&0b1111, i)
t.Select(table[i], t, cond)
}
p.Add(p, t)
}
return p
}

View File

@ -58,7 +58,11 @@ func NewP521Point() *P521Point {
// NewP521Generator returns a new P521Point set to the canonical generator.
func NewP521Generator() *P521Point {
return NewP521Point().Set(p521G)
return (&P521Point{
x: new(fiat.P521Element),
y: new(fiat.P521Element),
z: new(fiat.P521Element),
}).Set(p521G)
}
// Set sets p = q and returns p.

View File

@ -1,44 +0,0 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package nistec_test
import (
"crypto/elliptic/internal/nistec"
"math/rand"
"os"
"strings"
"testing"
)
func TestP521Allocations(t *testing.T) {
if strings.HasSuffix(os.Getenv("GO_BUILDER_NAME"), "-noopt") {
t.Skip("skipping allocations test without relevant optimizations")
}
if allocs := testing.AllocsPerRun(100, func() {
p := nistec.NewP521Generator()
scalar := make([]byte, 66)
rand.Read(scalar)
p.ScalarMult(p, scalar)
out := p.Bytes()
if _, err := p.SetBytes(out); err != nil {
t.Fatal(err)
}
}); allocs > 0 {
t.Errorf("expected zero allocations, got %0.1f", allocs)
}
}
func BenchmarkScalarMult(b *testing.B) {
b.Run("P521", func(b *testing.B) {
scalar := make([]byte, 66)
rand.Read(scalar)
p := nistec.NewP521Generator()
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
p.ScalarMult(p, scalar)
}
})
}

View File

@ -1,739 +1,136 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package elliptic
// This is a constant-time, 32-bit implementation of P224. See FIPS 186-3,
// section D.2.2.
//
// See https://www.imperialviolet.org/2010/12/04/ecc.html ([1]) for background.
import (
"encoding/binary"
"crypto/elliptic/internal/nistec"
"crypto/rand"
"math/big"
"math/bits"
)
var p224 p224Curve
// p224Curve is a Curve implementation based on nistec.P224Point.
//
// It's a wrapper that exposes the big.Int-based Curve interface and encodes the
// legacy idiosyncrasies it requires, such as invalid and infinity point
// handling.
//
// To interact with the nistec package, points are encoded into and decoded from
// properly formatted byte slices. All big.Int use is limited to this package.
// Encoding and decoding is 1/1000th of the runtime of a scalar multiplication,
// so the overhead is acceptable.
type p224Curve struct {
*CurveParams
gx, gy, b p224FieldElement
params *CurveParams
}
var p224 p224Curve
var _ Curve = p224
func initP224() {
// See FIPS 186-3, section D.2.2
p224.CurveParams = &CurveParams{Name: "P-224"}
p224.P, _ = new(big.Int).SetString("26959946667150639794667015087019630673557916260026308143510066298881", 10)
p224.N, _ = new(big.Int).SetString("26959946667150639794667015087019625940457807714424391721682722368061", 10)
p224.B, _ = new(big.Int).SetString("b4050a850c04b3abf54132565044b0b7d7bfd8ba270b39432355ffb4", 16)
p224.Gx, _ = new(big.Int).SetString("b70e0cbd6bb4bf7f321390b94a03c1d356c21122343280d6115c1d21", 16)
p224.Gy, _ = new(big.Int).SetString("bd376388b5f723fb4c22dfe6cd4375a05a07476444d5819985007e34", 16)
p224.BitSize = 224
p224FromBig(&p224.gx, p224.Gx)
p224FromBig(&p224.gy, p224.Gy)
p224FromBig(&p224.b, p224.B)
}
// P224 returns a Curve which implements P-224 (see FIPS 186-3, section D.2.2).
//
// The cryptographic operations are implemented using constant-time algorithms.
func P224() Curve {
initonce.Do(initAll)
return p224
p224.params = &CurveParams{
Name: "P-224",
BitSize: 224,
// FIPS 186-4, section D.1.2.2
P: bigFromDecimal("26959946667150639794667015087019630673557916260026308143510066298881"),
N: bigFromDecimal("26959946667150639794667015087019625940457807714424391721682722368061"),
B: bigFromHex("b4050a850c04b3abf54132565044b0b7d7bfd8ba270b39432355ffb4"),
Gx: bigFromHex("b70e0cbd6bb4bf7f321390b94a03c1d356c21122343280d6115c1d21"),
Gy: bigFromHex("bd376388b5f723fb4c22dfe6cd4375a05a07476444d5819985007e34"),
}
}
func (curve p224Curve) Params() *CurveParams {
return curve.CurveParams
return curve.params
}
func (curve p224Curve) IsOnCurve(bigX, bigY *big.Int) bool {
if bigX.BitLen() > 224 || bigY.BitLen() > 224 {
func (curve p224Curve) IsOnCurve(x, y *big.Int) bool {
// IsOnCurve is documented to reject (0, 0), the conventional point at
// infinity, which however is accepted by p224PointFromAffine.
if x.Sign() == 0 && y.Sign() == 0 {
return false
}
var x, y p224FieldElement
p224FromBig(&x, bigX)
p224FromBig(&y, bigY)
// y² = x³ - 3x + b
var tmp p224LargeFieldElement
var x3 p224FieldElement
p224Square(&x3, &x, &tmp)
p224Mul(&x3, &x3, &x, &tmp)
for i := 0; i < 8; i++ {
x[i] *= 3
}
p224Sub(&x3, &x3, &x)
p224Reduce(&x3)
p224Add(&x3, &x3, &curve.b)
p224Contract(&x3, &x3)
p224Square(&y, &y, &tmp)
p224Contract(&y, &y)
for i := 0; i < 8; i++ {
if y[i] != x3[i] {
return false
}
}
return true
_, ok := p224PointFromAffine(x, y)
return ok
}
func (p224Curve) Add(bigX1, bigY1, bigX2, bigY2 *big.Int) (x, y *big.Int) {
var x1, y1, z1, x2, y2, z2, x3, y3, z3 p224FieldElement
p224FromBig(&x1, bigX1)
p224FromBig(&y1, bigY1)
if bigX1.Sign() != 0 || bigY1.Sign() != 0 {
z1[0] = 1
func p224PointFromAffine(x, y *big.Int) (p *nistec.P224Point, ok bool) {
// (0, 0) is by convention the point at infinity, which can't be represented
// in affine coordinates. Marshal incorrectly encodes it as an uncompressed
// point, which SetBytes would correctly reject. See Issue 37294.
if x.Sign() == 0 && y.Sign() == 0 {
return nistec.NewP224Point(), true
}
p224FromBig(&x2, bigX2)
p224FromBig(&y2, bigY2)
if bigX2.Sign() != 0 || bigY2.Sign() != 0 {
z2[0] = 1
if x.BitLen() > 224 || y.BitLen() > 224 {
return nil, false
}
p224AddJacobian(&x3, &y3, &z3, &x1, &y1, &z1, &x2, &y2, &z2)
return p224ToAffine(&x3, &y3, &z3)
p, err := nistec.NewP224Point().SetBytes(Marshal(P224(), x, y))
if err != nil {
return nil, false
}
return p, true
}
func (p224Curve) Double(bigX1, bigY1 *big.Int) (x, y *big.Int) {
var x1, y1, z1, x2, y2, z2 p224FieldElement
p224FromBig(&x1, bigX1)
p224FromBig(&y1, bigY1)
z1[0] = 1
p224DoubleJacobian(&x2, &y2, &z2, &x1, &y1, &z1)
return p224ToAffine(&x2, &y2, &z2)
}
func (p224Curve) ScalarMult(bigX1, bigY1 *big.Int, scalar []byte) (x, y *big.Int) {
var x1, y1, z1, x2, y2, z2 p224FieldElement
p224FromBig(&x1, bigX1)
p224FromBig(&y1, bigY1)
z1[0] = 1
p224ScalarMult(&x2, &y2, &z2, &x1, &y1, &z1, scalar)
return p224ToAffine(&x2, &y2, &z2)
}
func (curve p224Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
var z1, x2, y2, z2 p224FieldElement
z1[0] = 1
p224ScalarMult(&x2, &y2, &z2, &curve.gx, &curve.gy, &z1, scalar)
return p224ToAffine(&x2, &y2, &z2)
}
// Field element functions.
//
// The field that we're dealing with is /p where p = 2**224 - 2**96 + 1.
//
// Field elements are represented by a FieldElement, which is a typedef to an
// array of 8 uint32's. The value of a FieldElement, a, is:
// a[0] + 2**28·a[1] + 2**56·a[1] + ... + 2**196·a[7]
//
// Using 28-bit limbs means that there's only 4 bits of headroom, which is less
// than we would really like. But it has the useful feature that we hit 2**224
// exactly, making the reflections during a reduce much nicer.
type p224FieldElement [8]uint32
// p224P is the order of the field, represented as a p224FieldElement.
var p224P = p224FieldElement{1, 0, 0, 0xffff000, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff}
// p224IsZero returns 1 if a == 0 mod p and 0 otherwise.
//
// a[i] < 2**29
func p224IsZero(a *p224FieldElement) uint32 {
var minimal p224FieldElement
p224Contract(&minimal, a)
var acc uint32
for _, v := range minimal {
acc |= v
}
mask := ^maskIsNotZero(acc)
return 1 & mask
}
// p224Add computes *out = a+b
//
// a[i] + b[i] < 2**32
func p224Add(out, a, b *p224FieldElement) {
for i := 0; i < 8; i++ {
out[i] = a[i] + b[i]
}
}
const two31p3 = 1<<31 + 1<<3
const two31m3 = 1<<31 - 1<<3
const two31m15m3 = 1<<31 - 1<<15 - 1<<3
// p224ZeroModP31 is 0 mod p where bit 31 is set in all limbs so that we can
// subtract smaller amounts without underflow. See the section "Subtraction" in
// [1] for reasoning.
//
// To calculate this value, start by adding 2³¹ to the lowest limb and
// subtracting 2³ from the next one to compensate. Repeat for each next limb,
// ending up with 2³¹ - 2³ in each of them, and a carry of -2³. Apply the
// reduction identity, and we need to subtract 2³ * 2⁹⁶ - 2³ = 2¹⁵ * 2⁸⁴ - 2³ so
// we subtract 2¹⁵ from the 4th limb and add 2³ to the first limb.
var p224ZeroModP31 = []uint32{two31p3, two31m3, two31m3, two31m15m3, two31m3, two31m3, two31m3, two31m3}
// p224Sub computes *out = a-b
//
// a[i], b[i] < 2**30
// out[i] < 2**32
func p224Sub(out, a, b *p224FieldElement) {
for i := 0; i < 8; i++ {
out[i] = a[i] + p224ZeroModP31[i] - b[i]
}
}
// LargeFieldElement also represents an element of the field. The limbs are
// still spaced 28-bits apart and in little-endian order. So the limbs are at
// 0, 28, 56, ..., 392 bits, each 64-bits wide.
type p224LargeFieldElement [15]uint64
const two63p35 = 1<<63 + 1<<35
const two63m35 = 1<<63 - 1<<35
const two63m35m19 = 1<<63 - 1<<35 - 1<<19
// p224ZeroModP63 is 0 mod p where bit 63 is set in all limbs. See the section
// "Subtraction" in [1] for why.
var p224ZeroModP63 = [8]uint64{two63p35, two63m35, two63m35, two63m35, two63m35m19, two63m35, two63m35, two63m35}
const bottom12Bits = 0xfff
const bottom28Bits = 0xfffffff
// p224Mul computes *out = a*b
//
// a[i] < 2**29, b[i] < 2**30 (or vice versa)
// out[i] < 2**29
func p224Mul(out, a, b *p224FieldElement, tmp *p224LargeFieldElement) {
for i := range tmp {
tmp[i] = 0
}
for i := 0; i < 8; i++ {
for j := 0; j < 8; j++ {
tmp[i+j] += uint64(a[i]) * uint64(b[j])
}
}
p224ReduceLarge(out, tmp)
}
// Square computes *out = a*a
//
// a[i] < 2**29
// out[i] < 2**29
func p224Square(out, a *p224FieldElement, tmp *p224LargeFieldElement) {
for i := range tmp {
tmp[i] = 0
}
for i := 0; i < 8; i++ {
for j := 0; j <= i; j++ {
r := uint64(a[i]) * uint64(a[j])
if i == j {
tmp[i+j] += r
} else {
tmp[i+j] += r * 2
}
}
}
p224ReduceLarge(out, tmp)
}
// ReduceLarge converts a p224LargeFieldElement to a p224FieldElement.
//
// in[i] < 2**62
// out[i] < 2**29
func p224ReduceLarge(out *p224FieldElement, in *p224LargeFieldElement) {
for i := 0; i < 8; i++ {
in[i] += p224ZeroModP63[i]
}
// Eliminate the coefficients at 2**224 and greater by applying the
// reduction identity.
//
// a + top * 2²²⁴ = a + top * 2⁹⁶ - top
//
// Since top here is in[8..14], both the subtraction at offset 0 and the
// addition at offset 96 (3 * 28 + 16) span multiple limbs. The subtraction
// can't underflow because of the p224ZeroModP63 addition above, while the
// addition can't overflow because of the 62 bit input bounds.
for i := 14; i >= 8; i-- {
in[i-8] -= in[i]
in[i-5] += (in[i] & 0xffff) << 12
in[i-4] += in[i] >> 16
}
in[8] = 0
// in[0..7] < 2**64
// in[9..14] discarded
// Run a carry chain and light reduction. Keep [0] large so we can do the
// subtraction safely. As the values become small enough, we start to store
// them in out and use 32-bit operations.
for i := 1; i < 8; i++ {
in[i+1] += in[i] >> 28
out[i] = uint32(in[i] & bottom28Bits)
}
in[0] -= in[8]
out[3] += uint32(in[8]&0xffff) << 12
out[4] += uint32(in[8] >> 16)
// in[0] < 2**64
// out[3] < 2**29
// out[4] < 2**29
// out[1,2,5..7] < 2**28
// Carry the overflow of [0] into the short 28 bit limbs.
out[0] = uint32(in[0] & bottom28Bits)
out[1] += uint32((in[0] >> 28) & bottom28Bits)
out[2] += uint32(in[0] >> 56)
// out[0] < 2**28
// out[1..4] < 2**29
// out[5..7] < 2**28
}
// Reduce reduces the coefficients of a to smaller bounds.
//
// On entry: a[i] < 2**31 + 2**30
// On exit: a[i] < 2**29
func p224Reduce(a *p224FieldElement) {
for i := 0; i < 7; i++ {
a[i+1] += a[i] >> 28
a[i] &= bottom28Bits
}
top := a[7] >> 28
a[7] &= bottom28Bits
a[0] -= top
a[3] += top << 12
// We may have just made a[0] negative but if we did top must have been not
// zero, so a[3] is not zero, so we can carry down to a[0]. (Note that we
// don't actually check if a[0] went negative, like in p224Contract, nor we
// try to stop the carry at a[1] or a[2], because here we can afford to go
// above 28 bits, so instead we carry all the way down from a[3].)
mask := maskIsNotZero(top)
a[3] -= 1 & mask
a[2] += mask & (1<<28 - 1)
a[1] += mask & (1<<28 - 1)
a[0] += mask & (1 << 28)
}
// p224Invert calculates *out = in**-1 by using Fermat's little theorem and
// computing in**(p-2) = in**(2**224 - 2**96 - 1).
func p224Invert(out, in *p224FieldElement) {
var f1, f2, f3, f4 p224FieldElement
var c p224LargeFieldElement
p224Square(&f1, in, &c) // 2
p224Mul(&f1, &f1, in, &c) // 2**2 - 1
p224Square(&f1, &f1, &c) // 2**3 - 2
p224Mul(&f1, &f1, in, &c) // 2**3 - 1
p224Square(&f2, &f1, &c) // 2**4 - 2
p224Square(&f2, &f2, &c) // 2**5 - 4
p224Square(&f2, &f2, &c) // 2**6 - 8
p224Mul(&f1, &f1, &f2, &c) // 2**6 - 1
p224Square(&f2, &f1, &c) // 2**7 - 2
for i := 0; i < 5; i++ { // 2**12 - 2**6
p224Square(&f2, &f2, &c)
}
p224Mul(&f2, &f2, &f1, &c) // 2**12 - 1
p224Square(&f3, &f2, &c) // 2**13 - 2
for i := 0; i < 11; i++ { // 2**24 - 2**12
p224Square(&f3, &f3, &c)
}
p224Mul(&f2, &f3, &f2, &c) // 2**24 - 1
p224Square(&f3, &f2, &c) // 2**25 - 2
for i := 0; i < 23; i++ { // 2**48 - 2**24
p224Square(&f3, &f3, &c)
}
p224Mul(&f3, &f3, &f2, &c) // 2**48 - 1
p224Square(&f4, &f3, &c) // 2**49 - 2
for i := 0; i < 47; i++ { // 2**96 - 2**48
p224Square(&f4, &f4, &c)
}
p224Mul(&f3, &f3, &f4, &c) // 2**96 - 1
p224Square(&f4, &f3, &c) // 2**97 - 2
for i := 0; i < 23; i++ { // 2**120 - 2**24
p224Square(&f4, &f4, &c)
}
p224Mul(&f2, &f4, &f2, &c) // 2**120 - 1
for i := 0; i < 6; i++ { // 2**126 - 2**6
p224Square(&f2, &f2, &c)
}
p224Mul(&f1, &f1, &f2, &c) // 2**126 - 1
p224Square(&f1, &f1, &c) // 2**127 - 2
p224Mul(&f1, &f1, in, &c) // 2**127 - 1
for i := 0; i < 97; i++ { // 2**224 - 2**97
p224Square(&f1, &f1, &c)
}
p224Mul(out, &f1, &f3, &c) // 2**224 - 2**96 - 1
}
// p224Contract converts a FieldElement to its unique, minimal form.
//
// On entry, in[i] < 2**29
// On exit, out[i] < 2**28 and out < p
func p224Contract(out, in *p224FieldElement) {
copy(out[:], in[:])
// First, carry the bits above 28 to the higher limb.
for i := 0; i < 7; i++ {
out[i+1] += out[i] >> 28
out[i] &= bottom28Bits
}
top := out[7] >> 28
out[7] &= bottom28Bits
// Use the reduction identity to carry the overflow.
//
// a + top * 2²²⁴ = a + top * 2⁹⁶ - top
out[0] -= top
out[3] += top << 12
// We may just have made out[0] negative. So we carry down. If we made
// out[0] negative then we know that out[3] is sufficiently positive
// because we just added to it.
for i := 0; i < 3; i++ {
mask := maskIsNegative(out[i])
out[i] += (1 << 28) & mask
out[i+1] -= 1 & mask
}
// We might have pushed out[3] over 2**28 so we perform another, partial,
// carry chain; carry the overflow according to the reduction identity; and
// carry down in case we made out[0] negative.
for i := 3; i < 7; i++ {
out[i+1] += out[i] >> 28
out[i] &= bottom28Bits
}
top = out[7] >> 28
out[7] &= bottom28Bits
out[0] -= top
out[3] += top << 12
for i := 0; i < 3; i++ {
mask := maskIsNegative(out[i])
out[i] += (1 << 28) & mask
out[i+1] -= 1 & mask
}
// There are two cases to consider for out[3]:
// 1) The first time that we eliminated top, we didn't push out[3] over
// 2**28. In this case, the partial carry chain didn't change any values
// and top is now zero.
// 2) We did push out[3] over 2**28 the first time that we eliminated top.
// The first value of top was in [0..2], therefore, after overflowing
// and being reduced by the second carry chain, out[3] <= 2<<12 - 1.
// In both cases, out[3] cannot have overflowed when we eliminated top for
// the second time.
// Now we need to subtract p if the value is >= p. To check, we subtract p
// with a borrow chain and look at the final borrow bit.
var b uint32
for i := 0; i < len(out); i++ {
_, b = bits.Sub32(out[i], p224P[i], b)
}
mask := ^maskIsNotZero(b)
out[0] -= 1 & mask
out[3] -= 0xffff000 & mask
out[4] -= 0xfffffff & mask
out[5] -= 0xfffffff & mask
out[6] -= 0xfffffff & mask
out[7] -= 0xfffffff & mask
// Do one final carry down, in case we made out[0] negative. One of
// out[0..3] needs to be positive and able to absorb the -1 or the value
// would have been < p, and the subtraction wouldn't have happened.
for i := 0; i < 3; i++ {
mask := maskIsNegative(out[i])
out[i] += (1 << 28) & mask
out[i+1] -= 1 & mask
}
}
// maskIsNegative returns 0xffffffff if the most significant bit of v is set,
// and 0 otherwise.
func maskIsNegative(v uint32) uint32 { return uint32(int32(v) >> 31) }
// maskIfNegative returns 0xffffffff if v is not zero, and 0 otherwise.
func maskIsNotZero(v uint32) uint32 {
v |= v >> 16
v |= v >> 8
v |= v >> 4
v |= v >> 2
v |= v >> 1
return uint32(int32(v<<31) >> 31)
}
// Group element functions.
//
// These functions deal with group elements. The group is an elliptic curve
// group with a = -3 defined in FIPS 186-3, section D.2.2.
// p224AddJacobian computes *out = a+b where a != b.
func p224AddJacobian(x3, y3, z3, x1, y1, z1, x2, y2, z2 *p224FieldElement) {
// See https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-p224Add-2007-bl
var z1z1, z2z2, u1, u2, s1, s2, h, i, j, r, v p224FieldElement
var c p224LargeFieldElement
z1IsZero := p224IsZero(z1)
z2IsZero := p224IsZero(z2)
// Z1Z1 = Z1²
p224Square(&z1z1, z1, &c)
// Z2Z2 = Z2²
p224Square(&z2z2, z2, &c)
// U1 = X1*Z2Z2
p224Mul(&u1, x1, &z2z2, &c)
// U2 = X2*Z1Z1
p224Mul(&u2, x2, &z1z1, &c)
// S1 = Y1*Z2*Z2Z2
p224Mul(&s1, z2, &z2z2, &c)
p224Mul(&s1, y1, &s1, &c)
// S2 = Y2*Z1*Z1Z1
p224Mul(&s2, z1, &z1z1, &c)
p224Mul(&s2, y2, &s2, &c)
// H = U2-U1
p224Sub(&h, &u2, &u1)
p224Reduce(&h)
xEqual := p224IsZero(&h)
// I = (2*H)²
for j := 0; j < 8; j++ {
i[j] = h[j] << 1
}
p224Reduce(&i)
p224Square(&i, &i, &c)
// J = H*I
p224Mul(&j, &h, &i, &c)
// r = 2*(S2-S1)
p224Sub(&r, &s2, &s1)
p224Reduce(&r)
yEqual := p224IsZero(&r)
if xEqual == 1 && yEqual == 1 && z1IsZero == 0 && z2IsZero == 0 {
p224DoubleJacobian(x3, y3, z3, x1, y1, z1)
return
}
for i := 0; i < 8; i++ {
r[i] <<= 1
}
p224Reduce(&r)
// V = U1*I
p224Mul(&v, &u1, &i, &c)
// Z3 = ((Z1+Z2)²-Z1Z1-Z2Z2)*H
p224Add(&z1z1, &z1z1, &z2z2)
p224Add(&z2z2, z1, z2)
p224Reduce(&z2z2)
p224Square(&z2z2, &z2z2, &c)
p224Sub(z3, &z2z2, &z1z1)
p224Reduce(z3)
p224Mul(z3, z3, &h, &c)
// X3 = r²-J-2*V
for i := 0; i < 8; i++ {
z1z1[i] = v[i] << 1
}
p224Add(&z1z1, &j, &z1z1)
p224Reduce(&z1z1)
p224Square(x3, &r, &c)
p224Sub(x3, x3, &z1z1)
p224Reduce(x3)
// Y3 = r*(V-X3)-2*S1*J
for i := 0; i < 8; i++ {
s1[i] <<= 1
}
p224Mul(&s1, &s1, &j, &c)
p224Sub(&z1z1, &v, x3)
p224Reduce(&z1z1)
p224Mul(&z1z1, &z1z1, &r, &c)
p224Sub(y3, &z1z1, &s1)
p224Reduce(y3)
p224CopyConditional(x3, x2, z1IsZero)
p224CopyConditional(x3, x1, z2IsZero)
p224CopyConditional(y3, y2, z1IsZero)
p224CopyConditional(y3, y1, z2IsZero)
p224CopyConditional(z3, z2, z1IsZero)
p224CopyConditional(z3, z1, z2IsZero)
}
// p224DoubleJacobian computes *out = a+a.
func p224DoubleJacobian(x3, y3, z3, x1, y1, z1 *p224FieldElement) {
var delta, gamma, beta, alpha, t p224FieldElement
var c p224LargeFieldElement
p224Square(&delta, z1, &c)
p224Square(&gamma, y1, &c)
p224Mul(&beta, x1, &gamma, &c)
// alpha = 3*(X1-delta)*(X1+delta)
p224Add(&t, x1, &delta)
for i := 0; i < 8; i++ {
t[i] += t[i] << 1
}
p224Reduce(&t)
p224Sub(&alpha, x1, &delta)
p224Reduce(&alpha)
p224Mul(&alpha, &alpha, &t, &c)
// Z3 = (Y1+Z1)²-gamma-delta
p224Add(z3, y1, z1)
p224Reduce(z3)
p224Square(z3, z3, &c)
p224Sub(z3, z3, &gamma)
p224Reduce(z3)
p224Sub(z3, z3, &delta)
p224Reduce(z3)
// X3 = alpha²-8*beta
for i := 0; i < 8; i++ {
delta[i] = beta[i] << 3
}
p224Reduce(&delta)
p224Square(x3, &alpha, &c)
p224Sub(x3, x3, &delta)
p224Reduce(x3)
// Y3 = alpha*(4*beta-X3)-8*gamma²
for i := 0; i < 8; i++ {
beta[i] <<= 2
}
p224Sub(&beta, &beta, x3)
p224Reduce(&beta)
p224Square(&gamma, &gamma, &c)
for i := 0; i < 8; i++ {
gamma[i] <<= 3
}
p224Reduce(&gamma)
p224Mul(y3, &alpha, &beta, &c)
p224Sub(y3, y3, &gamma)
p224Reduce(y3)
}
// p224CopyConditional sets *out = *in in constant time if control is not zero.
func p224CopyConditional(out, in *p224FieldElement, control uint32) {
mask := maskIsNotZero(control)
for i := 0; i < 8; i++ {
out[i] ^= (out[i] ^ in[i]) & mask
}
}
func p224ScalarMult(outX, outY, outZ, inX, inY, inZ *p224FieldElement, scalar []byte) {
var xx, yy, zz p224FieldElement
for i := 0; i < 8; i++ {
outX[i] = 0
outY[i] = 0
outZ[i] = 0
}
for _, byte := range scalar {
for bitNum := uint(0); bitNum < 8; bitNum++ {
p224DoubleJacobian(outX, outY, outZ, outX, outY, outZ)
bit := uint32((byte >> (7 - bitNum)) & 1)
p224AddJacobian(&xx, &yy, &zz, inX, inY, inZ, outX, outY, outZ)
p224CopyConditional(outX, &xx, bit)
p224CopyConditional(outY, &yy, bit)
p224CopyConditional(outZ, &zz, bit)
}
}
}
// p224ToAffine converts from Jacobian to affine form.
func p224ToAffine(x, y, z *p224FieldElement) (*big.Int, *big.Int) {
var zinv, zinvsq, outx, outy p224FieldElement
var tmp p224LargeFieldElement
if isPointAtInfinity := p224IsZero(z); isPointAtInfinity == 1 {
func p224PointToAffine(p *nistec.P224Point) (x, y *big.Int) {
out := p.Bytes()
if len(out) == 1 && out[0] == 0 {
// This is the correct encoding of the point at infinity, which
// Unmarshal does not support. See Issue 37294.
return new(big.Int), new(big.Int)
}
p224Invert(&zinv, z)
p224Square(&zinvsq, &zinv, &tmp)
p224Mul(x, x, &zinvsq, &tmp)
p224Mul(&zinvsq, &zinvsq, &zinv, &tmp)
p224Mul(y, y, &zinvsq, &tmp)
p224Contract(&outx, x)
p224Contract(&outy, y)
return p224ToBig(&outx), p224ToBig(&outy)
}
// get28BitsFromEnd returns the least-significant 28 bits from buf>>shift,
// where buf is interpreted as a big-endian number. shift must be at most
// 4 bits higher than a multiple of 8.
func get28BitsFromEnd(buf []byte, shift int) uint32 {
buf = buf[:len(buf)-shift/8]
shift = shift % 8
if shift > 4 {
panic("misuse of get28BitsFromEnd")
x, y = Unmarshal(P224(), out)
if x == nil {
panic("crypto/elliptic: internal error: Unmarshal rejected a valid point encoding")
}
ret := binary.BigEndian.Uint32(buf[len(buf)-4:])
ret >>= shift
ret &= bottom28Bits
return ret
return x, y
}
// p224FromBig sets *out = *in.
func p224FromBig(out *p224FieldElement, in *big.Int) {
bytes := in.FillBytes(make([]byte, 224/8))
for i := range out {
out[i] = get28BitsFromEnd(bytes, 28*i)
// p224RandomPoint returns a random point on the curve. It's used when Add,
// Double, or ScalarMult are fed a point not on the curve, which is undefined
// behavior. Originally, we used to do the math on it anyway (which allows
// invalid curve attacks) and relied on the caller and Unmarshal to avoid this
// happening in the first place. Now, we just can't construct a nistec.P224Point
// for an invalid pair of coordinates, because that API is safer. If we panic,
// we risk introducing a DoS. If we return nil, we risk a panic. If we return
// the input, ecdsa.Verify might fail open. The safest course seems to be to
// return a valid, random point, which hopefully won't help the attacker.
func p224RandomPoint() (x, y *big.Int) {
_, x, y, err := GenerateKey(P224(), rand.Reader)
if err != nil {
panic("crypto/elliptic: failed to generate random point")
}
return x, y
}
// p224ToBig returns in as a big.Int.
func p224ToBig(in *p224FieldElement) *big.Int {
var buf [28]byte
buf[27] = byte(in[0])
buf[26] = byte(in[0] >> 8)
buf[25] = byte(in[0] >> 16)
buf[24] = byte(((in[0] >> 24) & 0x0f) | (in[1]<<4)&0xf0)
buf[23] = byte(in[1] >> 4)
buf[22] = byte(in[1] >> 12)
buf[21] = byte(in[1] >> 20)
buf[20] = byte(in[2])
buf[19] = byte(in[2] >> 8)
buf[18] = byte(in[2] >> 16)
buf[17] = byte(((in[2] >> 24) & 0x0f) | (in[3]<<4)&0xf0)
buf[16] = byte(in[3] >> 4)
buf[15] = byte(in[3] >> 12)
buf[14] = byte(in[3] >> 20)
buf[13] = byte(in[4])
buf[12] = byte(in[4] >> 8)
buf[11] = byte(in[4] >> 16)
buf[10] = byte(((in[4] >> 24) & 0x0f) | (in[5]<<4)&0xf0)
buf[9] = byte(in[5] >> 4)
buf[8] = byte(in[5] >> 12)
buf[7] = byte(in[5] >> 20)
buf[6] = byte(in[6])
buf[5] = byte(in[6] >> 8)
buf[4] = byte(in[6] >> 16)
buf[3] = byte(((in[6] >> 24) & 0x0f) | (in[7]<<4)&0xf0)
buf[2] = byte(in[7] >> 4)
buf[1] = byte(in[7] >> 12)
buf[0] = byte(in[7] >> 20)
return new(big.Int).SetBytes(buf[:])
func (p224Curve) Add(x1, y1, x2, y2 *big.Int) (*big.Int, *big.Int) {
p1, ok := p224PointFromAffine(x1, y1)
if !ok {
return p224RandomPoint()
}
p2, ok := p224PointFromAffine(x2, y2)
if !ok {
return p224RandomPoint()
}
return p224PointToAffine(p1.Add(p1, p2))
}
func (p224Curve) Double(x1, y1 *big.Int) (*big.Int, *big.Int) {
p, ok := p224PointFromAffine(x1, y1)
if !ok {
return p224RandomPoint()
}
return p224PointToAffine(p.Double(p))
}
func (p224Curve) ScalarMult(Bx, By *big.Int, scalar []byte) (*big.Int, *big.Int) {
p, ok := p224PointFromAffine(Bx, By)
if !ok {
return p224RandomPoint()
}
return p224PointToAffine(p.ScalarMult(p, scalar))
}
func (p224Curve) ScalarBaseMult(scalar []byte) (*big.Int, *big.Int) {
p := nistec.NewP224Generator()
return p224PointToAffine(p.ScalarMult(p, scalar))
}

View File

@ -8,313 +8,9 @@ import (
"encoding/hex"
"fmt"
"math/big"
"math/bits"
"math/rand"
"reflect"
"testing"
"testing/quick"
)
var toFromBigTests = []string{
"0",
"1",
"23",
"b70e0cb46bb4bf7f321390b94a03c1d356c01122343280d6105c1d21",
"706a46d476dcb76798e6046d89474788d164c18032d268fd10704fa6",
}
func p224AlternativeToBig(in *p224FieldElement) *big.Int {
ret := new(big.Int)
tmp := new(big.Int)
for i := len(in) - 1; i >= 0; i-- {
ret.Lsh(ret, 28)
tmp.SetInt64(int64(in[i]))
ret.Add(ret, tmp)
}
ret.Mod(ret, P224().Params().P)
return ret
}
func TestP224ToFromBig(t *testing.T) {
for i, test := range toFromBigTests {
n, _ := new(big.Int).SetString(test, 16)
var x p224FieldElement
p224FromBig(&x, n)
m := p224ToBig(&x)
if n.Cmp(m) != 0 {
t.Errorf("#%d: %x != %x", i, n, m)
}
q := p224AlternativeToBig(&x)
if n.Cmp(q) != 0 {
t.Errorf("#%d: %x != %x (alternative)", i, n, q)
}
}
}
// quickCheckConfig32 will make each quickcheck test run (32 * -quickchecks)
// times. The default value of -quickchecks is 100.
var quickCheckConfig32 = &quick.Config{MaxCountScale: 32}
// weirdLimbs can be combined to generate a range of edge-case field elements.
var weirdLimbs = [...]uint32{
0, 1, (1 << 29) - 1,
(1 << 12), (1 << 12) - 1,
(1 << 28), (1 << 28) - 1,
}
func generateLimb(rand *rand.Rand) uint32 {
const bottom29Bits = 0x1fffffff
n := rand.Intn(len(weirdLimbs) + 3)
switch n {
case len(weirdLimbs):
// Random value.
return uint32(rand.Int31n(1 << 29))
case len(weirdLimbs) + 1:
// Sum of two values.
k := generateLimb(rand) + generateLimb(rand)
return k & bottom29Bits
case len(weirdLimbs) + 2:
// Difference of two values.
k := generateLimb(rand) - generateLimb(rand)
return k & bottom29Bits
default:
return weirdLimbs[n]
}
}
func (p224FieldElement) Generate(rand *rand.Rand, size int) reflect.Value {
return reflect.ValueOf(p224FieldElement{
generateLimb(rand),
generateLimb(rand),
generateLimb(rand),
generateLimb(rand),
generateLimb(rand),
generateLimb(rand),
generateLimb(rand),
generateLimb(rand),
})
}
func isInBounds(x *p224FieldElement) bool {
return bits.Len32(x[0]) <= 29 &&
bits.Len32(x[1]) <= 29 &&
bits.Len32(x[2]) <= 29 &&
bits.Len32(x[3]) <= 29 &&
bits.Len32(x[4]) <= 29 &&
bits.Len32(x[5]) <= 29 &&
bits.Len32(x[6]) <= 29 &&
bits.Len32(x[7]) <= 29
}
func TestP224Mul(t *testing.T) {
mulMatchesBigInt := func(a, b, out p224FieldElement) bool {
var tmp p224LargeFieldElement
p224Mul(&out, &a, &b, &tmp)
exp := new(big.Int).Mul(p224AlternativeToBig(&a), p224AlternativeToBig(&b))
exp.Mod(exp, P224().Params().P)
got := p224AlternativeToBig(&out)
if exp.Cmp(got) != 0 || !isInBounds(&out) {
t.Logf("a = %x", a)
t.Logf("b = %x", b)
t.Logf("p224Mul(a, b) = %x = %v", out, got)
t.Logf("a * b = %v", exp)
return false
}
return true
}
a := p224FieldElement{0xfffffff, 0xfffffff, 0xf00ffff, 0x20f, 0x0, 0x0, 0x0, 0x0}
b := p224FieldElement{1, 0, 0, 0, 0, 0, 0, 0}
if !mulMatchesBigInt(a, b, p224FieldElement{}) {
t.Fail()
}
if err := quick.Check(mulMatchesBigInt, quickCheckConfig32); err != nil {
t.Error(err)
}
}
func TestP224Square(t *testing.T) {
squareMatchesBigInt := func(a, out p224FieldElement) bool {
var tmp p224LargeFieldElement
p224Square(&out, &a, &tmp)
exp := p224AlternativeToBig(&a)
exp.Mul(exp, exp)
exp.Mod(exp, P224().Params().P)
got := p224AlternativeToBig(&out)
if exp.Cmp(got) != 0 || !isInBounds(&out) {
t.Logf("a = %x", a)
t.Logf("p224Square(a, b) = %x = %v", out, got)
t.Logf("a * a = %v", exp)
return false
}
return true
}
if err := quick.Check(squareMatchesBigInt, quickCheckConfig32); err != nil {
t.Error(err)
}
}
func TestP224Add(t *testing.T) {
addMatchesBigInt := func(a, b, out p224FieldElement) bool {
p224Add(&out, &a, &b)
exp := new(big.Int).Add(p224AlternativeToBig(&a), p224AlternativeToBig(&b))
exp.Mod(exp, P224().Params().P)
got := p224AlternativeToBig(&out)
if exp.Cmp(got) != 0 {
t.Logf("a = %x", a)
t.Logf("b = %x", b)
t.Logf("p224Add(a, b) = %x = %v", out, got)
t.Logf("a + b = %v", exp)
return false
}
return true
}
if err := quick.Check(addMatchesBigInt, quickCheckConfig32); err != nil {
t.Error(err)
}
}
func TestP224Reduce(t *testing.T) {
reduceMatchesBigInt := func(a p224FieldElement) bool {
out := a
// TODO: generate higher values for functions like p224Reduce that are
// expected to work with higher input bounds.
p224Reduce(&out)
exp := p224AlternativeToBig(&a)
got := p224AlternativeToBig(&out)
if exp.Cmp(got) != 0 || !isInBounds(&out) {
t.Logf("a = %x = %v", a, exp)
t.Logf("p224Reduce(a) = %x = %v", out, got)
return false
}
return true
}
if err := quick.Check(reduceMatchesBigInt, quickCheckConfig32); err != nil {
t.Error(err)
}
}
func TestP224Contract(t *testing.T) {
contractMatchesBigInt := func(a, out p224FieldElement) bool {
p224Contract(&out, &a)
exp := p224AlternativeToBig(&a)
got := p224AlternativeToBig(&out)
if exp.Cmp(got) != 0 {
t.Logf("a = %x = %v", a, exp)
t.Logf("p224Contract(a) = %x = %v", out, got)
return false
}
// Check that out < P.
for i := range p224P {
k := 8 - i - 1
if out[k] > p224P[k] {
t.Logf("p224Contract(a) = %x", out)
return false
}
if out[k] < p224P[k] {
return true
}
}
t.Logf("p224Contract(a) = %x", out)
return false
}
if !contractMatchesBigInt(p224P, p224FieldElement{}) {
t.Error("p224Contract(p) is broken")
}
pMinus1 := p224FieldElement{0, 0, 0, 0xffff000, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff}
if !contractMatchesBigInt(pMinus1, p224FieldElement{}) {
t.Error("p224Contract(p - 1) is broken")
}
// Check that we can handle input above p, but lowest limb zero.
a := p224FieldElement{0, 1, 0, 0xffff000, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff}
if !contractMatchesBigInt(a, p224FieldElement{}) {
t.Error("p224Contract(p + 2²⁸) is broken")
}
// Check that we can handle input above p, but lowest three limbs zero.
b := p224FieldElement{0, 0, 0, 0xffff001, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff}
if !contractMatchesBigInt(b, p224FieldElement{}) {
t.Error("p224Contract(p + 2⁸⁴) is broken")
}
if err := quick.Check(contractMatchesBigInt, quickCheckConfig32); err != nil {
t.Error(err)
}
}
func TestP224IsZero(t *testing.T) {
if got := p224IsZero(&p224FieldElement{}); got != 1 {
t.Errorf("p224IsZero(0) = %d, expected 1", got)
}
if got := p224IsZero(&p224P); got != 1 {
t.Errorf("p224IsZero(p) = %d, expected 1", got)
}
if got := p224IsZero(&p224FieldElement{1}); got != 0 {
t.Errorf("p224IsZero(1) = %d, expected 0", got)
}
isZeroMatchesBigInt := func(a p224FieldElement) bool {
isZero := p224IsZero(&a)
big := p224AlternativeToBig(&a)
if big.Sign() == 0 && isZero != 1 {
return false
}
if big.Sign() != 0 && isZero != 0 {
return false
}
return true
}
if err := quick.Check(isZeroMatchesBigInt, quickCheckConfig32); err != nil {
t.Error(err)
}
}
func TestP224Invert(t *testing.T) {
var out p224FieldElement
p224Invert(&out, &p224FieldElement{})
if got := p224IsZero(&out); got != 1 {
t.Errorf("p224Invert(0) = %x, expected 0", out)
}
p224Invert(&out, &p224P)
if got := p224IsZero(&out); got != 1 {
t.Errorf("p224Invert(p) = %x, expected 0", out)
}
p224Invert(&out, &p224FieldElement{1})
p224Contract(&out, &out)
if out != (p224FieldElement{1}) {
t.Errorf("p224Invert(1) = %x, expected 1", out)
}
var tmp p224LargeFieldElement
a := p224FieldElement{1, 2, 3, 4, 5, 6, 7, 8}
p224Invert(&out, &a)
p224Mul(&out, &out, &a, &tmp)
p224Contract(&out, &out)
if out != (p224FieldElement{1}) {
t.Errorf("p224Invert(a) * a = %x, expected 1", out)
}
}
type baseMultTest struct {
k string
x, y string
@ -602,7 +298,7 @@ func TestP224BaseMult(t *testing.T) {
func TestP224GenericBaseMult(t *testing.T) {
// We use the P224 CurveParams directly in order to test the generic implementation.
p224 := P224().Params()
p224 := genericParamsForCurve(P224())
for i, e := range p224BaseMultTests {
k, ok := new(big.Int).SetString(e.k, 10)
if !ok {

View File

@ -209,6 +209,8 @@ var p256Precomputed = [p256Limbs * 2 * 15 * 2]uint32{
// Field element operations:
const bottom28Bits = 0xfffffff
// nonZeroToAllOnes returns:
// 0xffffffff for 0 < x <= 2**31
// 0 for x == 0 or x > 2**31.
@ -269,6 +271,7 @@ const (
two30m2 = 1<<30 - 1<<2
two30p13m2 = 1<<30 + 1<<13 - 1<<2
two31m2 = 1<<31 - 1<<2
two31m3 = 1<<31 - 1<<3
two31p24m2 = 1<<31 + 1<<24 - 1<<2
two30m27m2 = 1<<30 - 1<<27 - 1<<2
)

View File

@ -34,7 +34,7 @@ var p256MultTests = []scalarMultTest{
func TestP256BaseMult(t *testing.T) {
p256 := P256()
p256Generic := p256.Params()
p256Generic := genericParamsForCurve(p256)
scalars := make([]*big.Int, 0, len(p224BaseMultTests)+1)
for _, e := range p224BaseMultTests {
@ -60,23 +60,6 @@ func TestP256BaseMult(t *testing.T) {
func TestP256Mult(t *testing.T) {
p256 := P256()
p256Generic := p256.Params()
for i, e := range p224BaseMultTests {
x, _ := new(big.Int).SetString(e.x, 16)
y, _ := new(big.Int).SetString(e.y, 16)
k, _ := new(big.Int).SetString(e.k, 10)
xx, yy := p256.ScalarMult(x, y, k.Bytes())
xx2, yy2 := p256Generic.ScalarMult(x, y, k.Bytes())
if xx.Cmp(xx2) != 0 || yy.Cmp(yy2) != 0 {
t.Errorf("#%d: got (%x, %x), want (%x, %x)", i, xx, yy, xx2, yy2)
}
if testing.Short() && i > 5 {
break
}
}
for i, e := range p256MultTests {
x, _ := new(big.Int).SetString(e.xIn, 16)
y, _ := new(big.Int).SetString(e.yIn, 16)

141
src/crypto/elliptic/p384.go Normal file
View File

@ -0,0 +1,141 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package elliptic
import (
"crypto/elliptic/internal/nistec"
"crypto/rand"
"math/big"
)
// p384Curve is a Curve implementation based on nistec.P384Point.
//
// It's a wrapper that exposes the big.Int-based Curve interface and encodes the
// legacy idiosyncrasies it requires, such as invalid and infinity point
// handling.
//
// To interact with the nistec package, points are encoded into and decoded from
// properly formatted byte slices. All big.Int use is limited to this package.
// Encoding and decoding is 1/1000th of the runtime of a scalar multiplication,
// so the overhead is acceptable.
type p384Curve struct {
params *CurveParams
}
var p384 p384Curve
var _ Curve = p384
func initP384() {
p384.params = &CurveParams{
Name: "P-384",
BitSize: 384,
// FIPS 186-4, section D.1.2.4
P: bigFromDecimal("394020061963944792122790401001436138050797392704654" +
"46667948293404245721771496870329047266088258938001861606973112319"),
N: bigFromDecimal("394020061963944792122790401001436138050797392704654" +
"46667946905279627659399113263569398956308152294913554433653942643"),
B: bigFromHex("b3312fa7e23ee7e4988e056be3f82d19181d9c6efe8141120314088" +
"f5013875ac656398d8a2ed19d2a85c8edd3ec2aef"),
Gx: bigFromHex("aa87ca22be8b05378eb1c71ef320ad746e1d3b628ba79b9859f741" +
"e082542a385502f25dbf55296c3a545e3872760ab7"),
Gy: bigFromHex("3617de4a96262c6f5d9e98bf9292dc29f8f41dbd289a147ce9da31" +
"13b5f0b8c00a60b1ce1d7e819d7a431d7c90ea0e5f"),
}
}
func (curve p384Curve) Params() *CurveParams {
return curve.params
}
func (curve p384Curve) IsOnCurve(x, y *big.Int) bool {
// IsOnCurve is documented to reject (0, 0), the conventional point at
// infinity, which however is accepted by p384PointFromAffine.
if x.Sign() == 0 && y.Sign() == 0 {
return false
}
_, ok := p384PointFromAffine(x, y)
return ok
}
func p384PointFromAffine(x, y *big.Int) (p *nistec.P384Point, ok bool) {
// (0, 0) is by convention the point at infinity, which can't be represented
// in affine coordinates. Marshal incorrectly encodes it as an uncompressed
// point, which SetBytes would correctly reject. See Issue 37294.
if x.Sign() == 0 && y.Sign() == 0 {
return nistec.NewP384Point(), true
}
if x.BitLen() > 384 || y.BitLen() > 384 {
return nil, false
}
p, err := nistec.NewP384Point().SetBytes(Marshal(P384(), x, y))
if err != nil {
return nil, false
}
return p, true
}
func p384PointToAffine(p *nistec.P384Point) (x, y *big.Int) {
out := p.Bytes()
if len(out) == 1 && out[0] == 0 {
// This is the correct encoding of the point at infinity, which
// Unmarshal does not support. See Issue 37294.
return new(big.Int), new(big.Int)
}
x, y = Unmarshal(P384(), out)
if x == nil {
panic("crypto/elliptic: internal error: Unmarshal rejected a valid point encoding")
}
return x, y
}
// p384RandomPoint returns a random point on the curve. It's used when Add,
// Double, or ScalarMult are fed a point not on the curve, which is undefined
// behavior. Originally, we used to do the math on it anyway (which allows
// invalid curve attacks) and relied on the caller and Unmarshal to avoid this
// happening in the first place. Now, we just can't construct a nistec.P384Point
// for an invalid pair of coordinates, because that API is safer. If we panic,
// we risk introducing a DoS. If we return nil, we risk a panic. If we return
// the input, ecdsa.Verify might fail open. The safest course seems to be to
// return a valid, random point, which hopefully won't help the attacker.
func p384RandomPoint() (x, y *big.Int) {
_, x, y, err := GenerateKey(P384(), rand.Reader)
if err != nil {
panic("crypto/elliptic: failed to generate random point")
}
return x, y
}
func (p384Curve) Add(x1, y1, x2, y2 *big.Int) (*big.Int, *big.Int) {
p1, ok := p384PointFromAffine(x1, y1)
if !ok {
return p384RandomPoint()
}
p2, ok := p384PointFromAffine(x2, y2)
if !ok {
return p384RandomPoint()
}
return p384PointToAffine(p1.Add(p1, p2))
}
func (p384Curve) Double(x1, y1 *big.Int) (*big.Int, *big.Int) {
p, ok := p384PointFromAffine(x1, y1)
if !ok {
return p384RandomPoint()
}
return p384PointToAffine(p.Double(p))
}
func (p384Curve) ScalarMult(Bx, By *big.Int, scalar []byte) (*big.Int, *big.Int) {
p, ok := p384PointFromAffine(Bx, By)
if !ok {
return p384RandomPoint()
}
return p384PointToAffine(p.ScalarMult(p, scalar))
}
func (p384Curve) ScalarBaseMult(scalar []byte) (*big.Int, *big.Int) {
p := nistec.NewP384Generator()
return p384PointToAffine(p.ScalarMult(p, scalar))
}

View File

@ -112,7 +112,7 @@ func p521RandomPoint() (x, y *big.Int) {
return x, y
}
func (curve p521Curve) Add(x1, y1, x2, y2 *big.Int) (*big.Int, *big.Int) {
func (p521Curve) Add(x1, y1, x2, y2 *big.Int) (*big.Int, *big.Int) {
p1, ok := p521PointFromAffine(x1, y1)
if !ok {
return p521RandomPoint()
@ -124,7 +124,7 @@ func (curve p521Curve) Add(x1, y1, x2, y2 *big.Int) (*big.Int, *big.Int) {
return p521PointToAffine(p1.Add(p1, p2))
}
func (curve p521Curve) Double(x1, y1 *big.Int) (*big.Int, *big.Int) {
func (p521Curve) Double(x1, y1 *big.Int) (*big.Int, *big.Int) {
p, ok := p521PointFromAffine(x1, y1)
if !ok {
return p521RandomPoint()
@ -132,7 +132,7 @@ func (curve p521Curve) Double(x1, y1 *big.Int) (*big.Int, *big.Int) {
return p521PointToAffine(p.Double(p))
}
func (curve p521Curve) ScalarMult(Bx, By *big.Int, scalar []byte) (*big.Int, *big.Int) {
func (p521Curve) ScalarMult(Bx, By *big.Int, scalar []byte) (*big.Int, *big.Int) {
p, ok := p521PointFromAffine(Bx, By)
if !ok {
return p521RandomPoint()
@ -140,7 +140,7 @@ func (curve p521Curve) ScalarMult(Bx, By *big.Int, scalar []byte) (*big.Int, *bi
return p521PointToAffine(p.ScalarMult(p, scalar))
}
func (curve p521Curve) ScalarBaseMult(scalar []byte) (*big.Int, *big.Int) {
func (p521Curve) ScalarBaseMult(scalar []byte) (*big.Int, *big.Int) {
p := nistec.NewP521Generator()
return p521PointToAffine(p.ScalarMult(p, scalar))
}

View File

@ -18,6 +18,7 @@ import (
"crypto/x509"
"errors"
"fmt"
"internal/godebug"
"io"
"net"
"strings"
@ -682,11 +683,20 @@ type Config struct {
ClientSessionCache ClientSessionCache
// MinVersion contains the minimum TLS version that is acceptable.
// If zero, TLS 1.0 is currently taken as the minimum.
//
// By default, TLS 1.2 is currently used as the minimum when acting as a
// client, and TLS 1.0 when acting as a server. TLS 1.0 is the minimum
// supported by this package, both as a client and as a server.
//
// The client-side default can temporarily be reverted to TLS 1.0 by
// including the value "x509sha1=1" in the GODEBUG environment variable.
// Note that this option will be removed in Go 1.19 (but it will still be
// possible to set this field to VersionTLS10 explicitly).
MinVersion uint16
// MaxVersion contains the maximum TLS version that is acceptable.
// If zero, the maximum version supported by this package is used,
//
// By default, the maximum version supported by this package is used,
// which is currently TLS 1.3.
MaxVersion uint16
@ -967,12 +977,24 @@ var supportedVersions = []uint16{
VersionTLS10,
}
func (c *Config) supportedVersions() []uint16 {
// debugEnableTLS10 enables TLS 1.0. See issue 45428.
var debugEnableTLS10 = godebug.Get("tls10default") == "1"
// roleClient and roleServer are meant to call supportedVersions and parents
// with more readability at the callsite.
const roleClient = true
const roleServer = false
func (c *Config) supportedVersions(isClient bool) []uint16 {
versions := make([]uint16, 0, len(supportedVersions))
for _, v := range supportedVersions {
if needFIPS() && (v < fipsMinVersion(c) || v > fipsMaxVersion(c)) {
continue
}
if (c == nil || c.MinVersion == 0) && !debugEnableTLS10 &&
isClient && v < VersionTLS12 {
continue
}
if c != nil && c.MinVersion != 0 && v < c.MinVersion {
continue
}
@ -984,8 +1006,8 @@ func (c *Config) supportedVersions() []uint16 {
return versions
}
func (c *Config) maxSupportedVersion() uint16 {
supportedVersions := c.supportedVersions()
func (c *Config) maxSupportedVersion(isClient bool) uint16 {
supportedVersions := c.supportedVersions(isClient)
if len(supportedVersions) == 0 {
return 0
}
@ -1029,8 +1051,8 @@ func (c *Config) supportsCurve(curve CurveID) bool {
// mutualVersion returns the protocol version to use given the advertised
// versions of the peer. Priority is given to the peer preference order.
func (c *Config) mutualVersion(peerVersions []uint16) (uint16, bool) {
supportedVersions := c.supportedVersions()
func (c *Config) mutualVersion(isClient bool, peerVersions []uint16) (uint16, bool) {
supportedVersions := c.supportedVersions(isClient)
for _, peerVersion := range peerVersions {
for _, v := range supportedVersions {
if v == peerVersion {
@ -1109,7 +1131,7 @@ func (chi *ClientHelloInfo) SupportsCertificate(c *Certificate) error {
if config == nil {
config = &Config{}
}
vers, ok := config.mutualVersion(chi.SupportedVersions)
vers, ok := config.mutualVersion(roleServer, chi.SupportedVersions)
if !ok {
return errors.New("no mutually supported protocol versions")
}

View File

@ -52,12 +52,12 @@ func (c *Conn) makeClientHello() (*clientHelloMsg, ecdheParameters, error) {
return nil, nil, errors.New("tls: NextProtos values too large")
}
supportedVersions := config.supportedVersions()
supportedVersions := config.supportedVersions(roleClient)
if len(supportedVersions) == 0 {
return nil, nil, errors.New("tls: no supported versions satisfy MinVersion and MaxVersion")
}
clientHelloVersion := config.maxSupportedVersion()
clientHelloVersion := config.maxSupportedVersion(roleClient)
// The version at the beginning of the ClientHello was capped at TLS 1.2
// for compatibility reasons. The supported_versions extension is used
// to negotiate versions now. See RFC 8446, Section 4.2.1.
@ -197,7 +197,7 @@ func (c *Conn) clientHandshake(ctx context.Context) (err error) {
// If we are negotiating a protocol version that's lower than what we
// support, check for the server downgrade canaries.
// See RFC 8446, Section 4.1.3.
maxVers := c.config.maxSupportedVersion()
maxVers := c.config.maxSupportedVersion(roleClient)
tls12Downgrade := string(serverHello.random[24:]) == downgradeCanaryTLS12
tls11Downgrade := string(serverHello.random[24:]) == downgradeCanaryTLS11
if maxVers == VersionTLS13 && c.vers <= VersionTLS12 && (tls12Downgrade || tls11Downgrade) ||
@ -365,7 +365,7 @@ func (c *Conn) pickTLSVersion(serverHello *serverHelloMsg) error {
peerVersion = serverHello.supportedVersion
}
vers, ok := c.config.mutualVersion([]uint16{peerVersion})
vers, ok := c.config.mutualVersion(roleClient, []uint16{peerVersion})
if !ok {
c.sendAlert(alertProtocolVersion)
return fmt.Errorf("tls: server selected unsupported protocol version %x", peerVersion)

View File

@ -156,7 +156,7 @@ func (c *Conn) readClientHello(ctx context.Context) (*clientHelloMsg, error) {
if len(clientHello.supportedVersions) == 0 {
clientVersions = supportedVersionsFromMax(clientHello.vers)
}
c.vers, ok = c.config.mutualVersion(clientVersions)
c.vers, ok = c.config.mutualVersion(roleServer, clientVersions)
if !ok {
c.sendAlert(alertProtocolVersion)
return nil, fmt.Errorf("tls: client offered only unsupported versions: %x", clientVersions)
@ -191,7 +191,7 @@ func (hs *serverHandshakeState) processClientHello() error {
hs.hello.random = make([]byte, 32)
serverRandom := hs.hello.random
// Downgrade protection canaries. See RFC 8446, Section 4.1.3.
maxVers := c.config.maxSupportedVersion()
maxVers := c.config.maxSupportedVersion(roleServer)
if maxVers >= VersionTLS12 && c.vers < maxVers || testingOnlyForceDowngradeCanary {
if c.vers == VersionTLS12 {
copy(serverRandom[24:], downgradeCanaryTLS12)
@ -354,7 +354,7 @@ func (hs *serverHandshakeState) pickCipherSuite() error {
for _, id := range hs.clientHello.cipherSuites {
if id == TLS_FALLBACK_SCSV {
// The client is doing a fallback connection. See RFC 7507.
if hs.clientHello.vers < c.config.maxSupportedVersion() {
if hs.clientHello.vers < c.config.maxSupportedVersion(roleServer) {
c.sendAlert(alertInappropriateFallback)
return errors.New("tls: client using inappropriate protocol fallback")
}

View File

@ -385,13 +385,30 @@ func TestVersion(t *testing.T) {
}
clientConfig := &Config{
InsecureSkipVerify: true,
MinVersion: VersionTLS10,
}
state, _, err := testHandshake(t, clientConfig, serverConfig)
if err != nil {
t.Fatalf("handshake failed: %s", err)
}
if state.Version != VersionTLS11 {
t.Fatalf("Incorrect version %x, should be %x", state.Version, VersionTLS11)
t.Fatalf("incorrect version %x, should be %x", state.Version, VersionTLS11)
}
clientConfig.MinVersion = 0
_, _, err = testHandshake(t, clientConfig, serverConfig)
if err == nil {
t.Fatalf("expected failure to connect with TLS 1.0/1.1")
}
defer func(old bool) { debugEnableTLS10 = old }(debugEnableTLS10)
debugEnableTLS10 = true
_, _, err = testHandshake(t, clientConfig, serverConfig)
if err != nil {
t.Fatalf("handshake failed: %s", err)
}
if state.Version != VersionTLS11 {
t.Fatalf("incorrect version %x, should be %x", state.Version, VersionTLS11)
}
}
@ -472,6 +489,7 @@ func testCrossVersionResume(t *testing.T, version uint16) {
InsecureSkipVerify: true,
ClientSessionCache: NewLRUClientSessionCache(1),
ServerName: "servername",
MinVersion: VersionTLS10,
}
// Establish a session at TLS 1.1.

View File

@ -114,7 +114,7 @@ func (hs *serverHandshakeStateTLS13) processClientHello() error {
if id == TLS_FALLBACK_SCSV {
// Use c.vers instead of max(supported_versions) because an attacker
// could defeat this by adding an arbitrary high version otherwise.
if c.vers < c.config.maxSupportedVersion() {
if c.vers < c.config.maxSupportedVersion(roleServer) {
c.sendAlert(alertInappropriateFallback)
return errors.New("tls: client using inappropriate protocol fallback")
}

View File

@ -363,6 +363,8 @@ func runMain(m *testing.M) int {
Certificates: make([]Certificate, 2),
InsecureSkipVerify: true,
CipherSuites: allCipherSuites(),
MinVersion: VersionTLS10,
MaxVersion: VersionTLS13,
}
testConfig.Certificates[0].Certificate = [][]byte{testRSACertificate}
testConfig.Certificates[0].PrivateKey = testRSAPrivateKey

View File

@ -534,6 +534,10 @@ func testVerify(t *testing.T, test verifyTest, useSystemRoots bool) {
}
func TestGoVerify(t *testing.T) {
// Temporarily enable SHA-1 verification since a number of test chains
// require it. TODO(filippo): regenerate test chains.
defer func(old bool) { debugAllowSHA1 = old }(debugAllowSHA1)
debugAllowSHA1 = true
for _, test := range verifyTests {
t.Run(test.name, func(t *testing.T) {
testVerify(t, test, false)

View File

@ -18,6 +18,7 @@ import (
"encoding/pem"
"errors"
"fmt"
"internal/godebug"
"io"
"math/big"
"net"
@ -181,15 +182,15 @@ type SignatureAlgorithm int
const (
UnknownSignatureAlgorithm SignatureAlgorithm = iota
MD2WithRSA // Unsupported.
MD5WithRSA // Only supported for signing, not verification.
SHA1WithRSA
MD2WithRSA // Unsupported.
MD5WithRSA // Only supported for signing, not verification.
SHA1WithRSA // Only supported for signing, not verification.
SHA256WithRSA
SHA384WithRSA
SHA512WithRSA
DSAWithSHA1 // Unsupported.
DSAWithSHA256 // Unsupported.
ECDSAWithSHA1
ECDSAWithSHA1 // Only supported for signing, not verification.
ECDSAWithSHA256
ECDSAWithSHA384
ECDSAWithSHA512
@ -729,11 +730,23 @@ type Certificate struct {
// involves algorithms that are not currently implemented.
var ErrUnsupportedAlgorithm = errors.New("x509: cannot verify signature: algorithm unimplemented")
// An InsecureAlgorithmError
// debugAllowSHA1 allows SHA-1 signatures. See issue 41682.
var debugAllowSHA1 = godebug.Get("x509sha1") == "1"
// An InsecureAlgorithmError indicates that the SignatureAlgorithm used to
// generate the signature is not secure, and the signature has been rejected.
//
// To temporarily restore support for SHA-1 signatures, include the value
// "x509sha1=1" in the GODEBUG environment variable. Note that this option will
// be removed in Go 1.19.
type InsecureAlgorithmError SignatureAlgorithm
func (e InsecureAlgorithmError) Error() string {
return fmt.Sprintf("x509: cannot verify signature: insecure algorithm %v", SignatureAlgorithm(e))
var override string
if SignatureAlgorithm(e) == SHA1WithRSA || SignatureAlgorithm(e) == ECDSAWithSHA1 {
override = " (temporarily override with GODEBUG=x509sha1=1)"
}
return fmt.Sprintf("x509: cannot verify signature: insecure algorithm %v", SignatureAlgorithm(e)) + override
}
// ConstraintViolationError results when a requested usage is not permitted by
@ -825,6 +838,11 @@ func checkSignature(algo SignatureAlgorithm, signed, signature []byte, publicKey
}
case crypto.MD5:
return InsecureAlgorithmError(algo)
case crypto.SHA1:
if !debugAllowSHA1 {
return InsecureAlgorithmError(algo)
}
fallthrough
default:
if !hashType.Available() {
return ErrUnsupportedAlgorithm
@ -1579,9 +1597,12 @@ func CreateCertificate(rand io.Reader, template, parent *Certificate, pub, priv
}
// Check the signature to ensure the crypto.Signer behaved correctly.
// We skip this check if the signature algorithm is MD5WithRSA as we
// only support this algorithm for signing, and not verification.
if sigAlg := getSignatureAlgorithmFromAI(signatureAlgorithm); sigAlg != MD5WithRSA {
sigAlg := getSignatureAlgorithmFromAI(signatureAlgorithm)
switch sigAlg {
case MD5WithRSA, SHA1WithRSA, ECDSAWithSHA1:
// We skip the check if the signature algorithm is only supported for
// signing, not verification.
default:
if err := checkSignature(sigAlg, c.Raw, signature, key.Public()); err != nil {
return nil, fmt.Errorf("x509: signature over certificate returned by signer is invalid: %w", err)
}

View File

@ -585,10 +585,10 @@ func TestCreateSelfSignedCertificate(t *testing.T) {
checkSig bool
sigAlgo SignatureAlgorithm
}{
{"RSA/RSA", &testPrivateKey.PublicKey, testPrivateKey, true, SHA1WithRSA},
{"RSA/RSA", &testPrivateKey.PublicKey, testPrivateKey, true, SHA384WithRSA},
{"RSA/ECDSA", &testPrivateKey.PublicKey, ecdsaPriv, false, ECDSAWithSHA384},
{"ECDSA/RSA", &ecdsaPriv.PublicKey, testPrivateKey, false, SHA256WithRSA},
{"ECDSA/ECDSA", &ecdsaPriv.PublicKey, ecdsaPriv, true, ECDSAWithSHA1},
{"ECDSA/ECDSA", &ecdsaPriv.PublicKey, ecdsaPriv, true, ECDSAWithSHA256},
{"RSAPSS/RSAPSS", &testPrivateKey.PublicKey, testPrivateKey, true, SHA256WithRSAPSS},
{"ECDSA/RSAPSS", &ecdsaPriv.PublicKey, testPrivateKey, false, SHA256WithRSAPSS},
{"RSAPSS/ECDSA", &testPrivateKey.PublicKey, ecdsaPriv, false, ECDSAWithSHA384},
@ -886,7 +886,6 @@ var ecdsaTests = []struct {
sigAlgo SignatureAlgorithm
pemCert string
}{
{ECDSAWithSHA1, ecdsaSHA1CertPem},
{ECDSAWithSHA256, ecdsaSHA256p256CertPem},
{ECDSAWithSHA256, ecdsaSHA256p384CertPem},
{ECDSAWithSHA384, ecdsaSHA384p521CertPem},
@ -1389,10 +1388,10 @@ func TestCreateCertificateRequest(t *testing.T) {
priv interface{}
sigAlgo SignatureAlgorithm
}{
{"RSA", testPrivateKey, SHA1WithRSA},
{"ECDSA-256", ecdsa256Priv, ECDSAWithSHA1},
{"ECDSA-384", ecdsa384Priv, ECDSAWithSHA1},
{"ECDSA-521", ecdsa521Priv, ECDSAWithSHA1},
{"RSA", testPrivateKey, SHA256WithRSA},
{"ECDSA-256", ecdsa256Priv, ECDSAWithSHA256},
{"ECDSA-384", ecdsa384Priv, ECDSAWithSHA256},
{"ECDSA-521", ecdsa521Priv, ECDSAWithSHA256},
{"Ed25519", ed25519Priv, PureEd25519},
}
@ -1783,6 +1782,9 @@ func TestInsecureAlgorithmErrorString(t *testing.T) {
sa SignatureAlgorithm
want string
}{
{MD5WithRSA, "x509: cannot verify signature: insecure algorithm MD5-RSA"},
{SHA1WithRSA, "x509: cannot verify signature: insecure algorithm SHA1-RSA (temporarily override with GODEBUG=x509sha1=1)"},
{ECDSAWithSHA1, "x509: cannot verify signature: insecure algorithm ECDSA-SHA1 (temporarily override with GODEBUG=x509sha1=1)"},
{MD2WithRSA, "x509: cannot verify signature: insecure algorithm MD2-RSA"},
{-1, "x509: cannot verify signature: insecure algorithm -1"},
{0, "x509: cannot verify signature: insecure algorithm 0"},
@ -1846,6 +1848,30 @@ func TestMD5(t *testing.T) {
}
}
func TestSHA1(t *testing.T) {
pemBlock, _ := pem.Decode([]byte(ecdsaSHA1CertPem))
cert, err := ParseCertificate(pemBlock.Bytes)
if err != nil {
t.Fatalf("failed to parse certificate: %s", err)
}
if sa := cert.SignatureAlgorithm; sa != ECDSAWithSHA1 {
t.Errorf("signature algorithm is %v, want %v", sa, ECDSAWithSHA1)
}
if err = cert.CheckSignatureFrom(cert); err == nil {
t.Fatalf("certificate verification succeeded incorrectly")
}
if _, ok := err.(InsecureAlgorithmError); !ok {
t.Fatalf("certificate verification returned %v (%T), wanted InsecureAlgorithmError", err, err)
}
defer func(old bool) { debugAllowSHA1 = old }(debugAllowSHA1)
debugAllowSHA1 = true
if err = cert.CheckSignatureFrom(cert); err != nil {
t.Fatalf("SHA-1 certificate did not verify with GODEBUG=x509sha1=1: %v", err)
}
}
// certMissingRSANULL contains an RSA public key where the AlgorithmIdentifier
// parameters are omitted rather than being an ASN.1 NULL.
const certMissingRSANULL = `
@ -2897,19 +2923,31 @@ func TestCreateCertificateBrokenSigner(t *testing.T) {
}
}
func TestCreateCertificateMD5(t *testing.T) {
template := &Certificate{
SerialNumber: big.NewInt(10),
DNSNames: []string{"example.com"},
SignatureAlgorithm: MD5WithRSA,
}
k, err := rsa.GenerateKey(rand.Reader, 1024)
func TestCreateCertificateLegacy(t *testing.T) {
ecdsaPriv, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
if err != nil {
t.Fatalf("failed to generate test key: %s", err)
t.Fatalf("Failed to generate ECDSA key: %s", err)
}
_, err = CreateCertificate(rand.Reader, template, template, k.Public(), &brokenSigner{k.Public()})
if err != nil {
t.Fatalf("CreateCertificate failed when SignatureAlgorithm = MD5WithRSA: %s", err)
for _, sigAlg := range []SignatureAlgorithm{
MD5WithRSA, SHA1WithRSA, ECDSAWithSHA1,
} {
template := &Certificate{
SerialNumber: big.NewInt(10),
DNSNames: []string{"example.com"},
SignatureAlgorithm: sigAlg,
}
var k crypto.Signer
switch sigAlg {
case MD5WithRSA, SHA1WithRSA:
k = testPrivateKey
case ECDSAWithSHA1:
k = ecdsaPriv
}
_, err := CreateCertificate(rand.Reader, template, template, k.Public(), &brokenSigner{k.Public()})
if err != nil {
t.Fatalf("CreateCertificate failed when SignatureAlgorithm = %v: %s", sigAlg, err)
}
}
}
@ -3131,7 +3169,6 @@ func TestParseCertificateRawEquals(t *testing.T) {
if !bytes.Equal(p.Bytes, cert.Raw) {
t.Fatalf("unexpected Certificate.Raw\ngot: %x\nwant: %x\n", cert.Raw, p.Bytes)
}
fmt.Printf("in: %x\nout: %x\n", p.Bytes, cert.Raw)
}
// mismatchingSigAlgIDPEM contains a certificate where the Certificate

View File

@ -2349,6 +2349,7 @@ const (
R_PPC64_GOT16_HI R_PPC64 = 16 // R_POWERPC_GOT16_HI
R_PPC64_GOT16_HA R_PPC64 = 17 // R_POWERPC_GOT16_HA
R_PPC64_JMP_SLOT R_PPC64 = 21 // R_POWERPC_JMP_SLOT
R_PPC64_RELATIVE R_PPC64 = 22 // R_POWERPC_RELATIVE
R_PPC64_REL32 R_PPC64 = 26 // R_POWERPC_REL32
R_PPC64_ADDR64 R_PPC64 = 38
R_PPC64_ADDR16_HIGHER R_PPC64 = 39
@ -2457,6 +2458,7 @@ var rppc64Strings = []intName{
{16, "R_PPC64_GOT16_HI"},
{17, "R_PPC64_GOT16_HA"},
{21, "R_PPC64_JMP_SLOT"},
{22, "R_PPC64_RELATIVE"},
{26, "R_PPC64_REL32"},
{38, "R_PPC64_ADDR64"},
{39, "R_PPC64_ADDR16_HIGHER"},

View File

@ -301,11 +301,15 @@ func newTable(symtab []byte, ptrsz int) ([]Sym, error) {
return syms, nil
}
// ErrNoSymbols is returned by File.Symbols if there is no such section
// in the File.
var ErrNoSymbols = errors.New("no symbol section")
// Symbols returns the symbol table for f.
func (f *File) Symbols() ([]Sym, error) {
symtabSection := f.Section("syms")
if symtabSection == nil {
return nil, errors.New("no symbol section")
return nil, ErrNoSymbols
}
symtab, err := symtabSection.Data()

View File

@ -265,3 +265,13 @@ func ExampleAs() {
// Output:
// Failed at path: non-existing
}
func ExampleUnwrap() {
err1 := errors.New("error1")
err2 := fmt.Errorf("error2: [%w]", err1)
fmt.Println(err2)
fmt.Println(errors.Unwrap(err2))
// Output
// error2: [error1]
// error1
}

View File

@ -593,7 +593,7 @@ func (fd *FD) ReadFrom(buf []byte) (int, syscall.Sockaddr, error) {
return n, sa, nil
}
// ReadFrom wraps the recvfrom network call for IPv4.
// ReadFromInet4 wraps the recvfrom network call for IPv4.
func (fd *FD) ReadFromInet4(buf []byte, sa4 *syscall.SockaddrInet4) (int, error) {
if len(buf) == 0 {
return 0, nil
@ -622,7 +622,7 @@ func (fd *FD) ReadFromInet4(buf []byte, sa4 *syscall.SockaddrInet4) (int, error)
return n, err
}
// ReadFrom wraps the recvfrom network call for IPv6.
// ReadFromInet6 wraps the recvfrom network call for IPv6.
func (fd *FD) ReadFromInet6(buf []byte, sa6 *syscall.SockaddrInet6) (int, error) {
if len(buf) == 0 {
return 0, nil

View File

@ -88,12 +88,7 @@ func SetPendingDialHooks(before, after func()) {
func SetTestHookServerServe(fn func(*Server, net.Listener)) { testHookServerServe = fn }
func NewTestTimeoutHandler(handler Handler, ch <-chan time.Time) Handler {
ctx, cancel := context.WithCancel(context.Background())
go func() {
<-ch
cancel()
}()
func NewTestTimeoutHandler(handler Handler, ctx context.Context) Handler {
return &timeoutHandler{
handler: handler,
testContext: ctx,

View File

@ -43,7 +43,7 @@ func interestingGoroutines() (gs []string) {
// These only show up with GOTRACEBACK=2; Issue 5005 (comment 28)
strings.Contains(stack, "runtime.goexit") ||
strings.Contains(stack, "created by runtime.gc") ||
strings.Contains(stack, "net/http_test.interestingGoroutines") ||
strings.Contains(stack, "interestingGoroutines") ||
strings.Contains(stack, "runtime.MHeap_Scavenger") {
continue
}

View File

@ -2274,6 +2274,18 @@ func TestRequestBodyTimeoutClosesConnection(t *testing.T) {
}
}
// cancelableTimeoutContext overwrites the error message to DeadlineExceeded
type cancelableTimeoutContext struct {
context.Context
}
func (c cancelableTimeoutContext) Err() error {
if c.Context.Err() != nil {
return context.DeadlineExceeded
}
return nil
}
func TestTimeoutHandler_h1(t *testing.T) { testTimeoutHandler(t, h1Mode) }
func TestTimeoutHandler_h2(t *testing.T) { testTimeoutHandler(t, h2Mode) }
func testTimeoutHandler(t *testing.T, h2 bool) {
@ -2286,8 +2298,9 @@ func testTimeoutHandler(t *testing.T, h2 bool) {
_, werr := w.Write([]byte("hi"))
writeErrors <- werr
})
timeout := make(chan time.Time, 1) // write to this to force timeouts
cst := newClientServerTest(t, h2, NewTestTimeoutHandler(sayHi, timeout))
ctx, cancel := context.WithCancel(context.Background())
h := NewTestTimeoutHandler(sayHi, cancelableTimeoutContext{ctx})
cst := newClientServerTest(t, h2, h)
defer cst.close()
// Succeed without timing out:
@ -2308,7 +2321,8 @@ func testTimeoutHandler(t *testing.T, h2 bool) {
}
// Times out:
timeout <- time.Time{}
cancel()
res, err = cst.c.Get(cst.ts.URL)
if err != nil {
t.Error(err)
@ -2429,8 +2443,9 @@ func TestTimeoutHandlerRaceHeaderTimeout(t *testing.T) {
_, werr := w.Write([]byte("hi"))
writeErrors <- werr
})
timeout := make(chan time.Time, 1) // write to this to force timeouts
cst := newClientServerTest(t, h1Mode, NewTestTimeoutHandler(sayHi, timeout))
ctx, cancel := context.WithCancel(context.Background())
h := NewTestTimeoutHandler(sayHi, cancelableTimeoutContext{ctx})
cst := newClientServerTest(t, h1Mode, h)
defer cst.close()
// Succeed without timing out:
@ -2451,7 +2466,8 @@ func TestTimeoutHandlerRaceHeaderTimeout(t *testing.T) {
}
// Times out:
timeout <- time.Time{}
cancel()
res, err = cst.c.Get(cst.ts.URL)
if err != nil {
t.Error(err)
@ -2501,6 +2517,41 @@ func TestTimeoutHandlerStartTimerWhenServing(t *testing.T) {
}
}
func TestTimeoutHandlerContextCanceled(t *testing.T) {
setParallel(t)
defer afterTest(t)
sendHi := make(chan bool, 1)
writeErrors := make(chan error, 1)
sayHi := HandlerFunc(func(w ResponseWriter, r *Request) {
w.Header().Set("Content-Type", "text/plain")
<-sendHi
_, werr := w.Write([]byte("hi"))
writeErrors <- werr
})
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Hour)
h := NewTestTimeoutHandler(sayHi, ctx)
cancel()
cst := newClientServerTest(t, h1Mode, h)
defer cst.close()
// Succeed without timing out:
sendHi <- true
res, err := cst.c.Get(cst.ts.URL)
if err != nil {
t.Error(err)
}
if g, e := res.StatusCode, StatusServiceUnavailable; g != e {
t.Errorf("got res.StatusCode %d; expected %d", g, e)
}
body, _ := io.ReadAll(res.Body)
if g, e := string(body), ""; g != e {
t.Errorf("got body %q; expected %q", g, e)
}
if g, e := <-writeErrors, context.Canceled; g != e {
t.Errorf("got unexpected Write error on first request: %v", g)
}
}
// https://golang.org/issue/15948
func TestTimeoutHandlerEmptyResponse(t *testing.T) {
setParallel(t)

View File

@ -3391,9 +3391,15 @@ func (h *timeoutHandler) ServeHTTP(w ResponseWriter, r *Request) {
case <-ctx.Done():
tw.mu.Lock()
defer tw.mu.Unlock()
w.WriteHeader(StatusServiceUnavailable)
io.WriteString(w, h.errorBody())
tw.timedOut = true
switch err := ctx.Err(); err {
case context.DeadlineExceeded:
w.WriteHeader(StatusServiceUnavailable)
io.WriteString(w, h.errorBody())
tw.err = ErrHandlerTimeout
default:
w.WriteHeader(StatusServiceUnavailable)
tw.err = err
}
}
}
@ -3404,7 +3410,7 @@ type timeoutWriter struct {
req *Request
mu sync.Mutex
timedOut bool
err error
wroteHeader bool
code int
}
@ -3424,8 +3430,8 @@ func (tw *timeoutWriter) Header() Header { return tw.h }
func (tw *timeoutWriter) Write(p []byte) (int, error) {
tw.mu.Lock()
defer tw.mu.Unlock()
if tw.timedOut {
return 0, ErrHandlerTimeout
if tw.err != nil {
return 0, tw.err
}
if !tw.wroteHeader {
tw.writeHeaderLocked(StatusOK)
@ -3437,7 +3443,7 @@ func (tw *timeoutWriter) writeHeaderLocked(code int) {
checkWriteHeaderCode(code)
switch {
case tw.timedOut:
case tw.err != nil:
return
case tw.wroteHeader:
if tw.req != nil {

View File

@ -2481,7 +2481,7 @@ type requestAndChan struct {
callerGone <-chan struct{} // closed when roundTrip caller has returned
}
// A writeRequest is sent by the readLoop's goroutine to the
// A writeRequest is sent by the caller's goroutine to the
// writeLoop's goroutine to write a request while the read loop
// concurrently waits on both the write response and the server's
// reply.

View File

@ -155,9 +155,14 @@ func (err parseAddrError) Error() string {
func parseIPv4(s string) (ip Addr, err error) {
var fields [4]uint8
var val, pos int
var digLen int // number of digits in current octet
for i := 0; i < len(s); i++ {
if s[i] >= '0' && s[i] <= '9' {
if digLen == 1 && val == 0 {
return Addr{}, parseAddrError{in: s, msg: "IPv4 field has octet with leading zero"}
}
val = val*10 + int(s[i]) - '0'
digLen++
if val > 255 {
return Addr{}, parseAddrError{in: s, msg: "IPv4 field has value >255"}
}
@ -175,6 +180,7 @@ func parseIPv4(s string) (ip Addr, err error) {
fields[pos] = uint8(val)
pos++
val = 0
digLen = 0
} else {
return Addr{}, parseAddrError{in: s, msg: "unexpected character", at: s[i:]}
}
@ -692,21 +698,19 @@ const (
// IPv6 addresses with zones are returned without their zone (use the
// Zone method to get it).
// The ip zero value returns all zeroes.
func (ip Addr) As16() [16]byte {
var ret [16]byte
bePutUint64(ret[:8], ip.addr.hi)
bePutUint64(ret[8:], ip.addr.lo)
return ret
func (ip Addr) As16() (a16 [16]byte) {
bePutUint64(a16[:8], ip.addr.hi)
bePutUint64(a16[8:], ip.addr.lo)
return a16
}
// As4 returns an IPv4 or IPv4-in-IPv6 address in its 4-byte representation.
// If ip is the zero Addr or an IPv6 address, As4 panics.
// Note that 0.0.0.0 is not the zero Addr.
func (ip Addr) As4() [4]byte {
func (ip Addr) As4() (a4 [4]byte) {
if ip.z == z4 || ip.Is4In6() {
var ret [4]byte
bePutUint32(ret[:], uint32(ip.addr.lo))
return ret
bePutUint32(a4[:], uint32(ip.addr.lo))
return a4
}
if ip.z == z0 {
panic("As4 called on IP zero value")

View File

@ -29,9 +29,10 @@ var (
func TestParseAddr(t *testing.T) {
var validIPs = []struct {
in string
ip Addr // output of ParseAddr()
str string // output of String(). If "", use in.
in string
ip Addr // output of ParseAddr()
str string // output of String(). If "", use in.
wantErr string
}{
// Basic zero IPv4 address.
{
@ -45,15 +46,18 @@ func TestParseAddr(t *testing.T) {
},
// IPv4 address in windows-style "print all the digits" form.
{
in: "010.000.015.001",
ip: MkAddr(Mk128(0, 0xffff0a000f01), Z4),
str: "10.0.15.1",
in: "010.000.015.001",
wantErr: `ParseAddr("010.000.015.001"): IPv4 field has octet with leading zero`,
},
// IPv4 address with a silly amount of leading zeros.
{
in: "000001.00000002.00000003.000000004",
ip: MkAddr(Mk128(0, 0xffff01020304), Z4),
str: "1.2.3.4",
in: "000001.00000002.00000003.000000004",
wantErr: `ParseAddr("000001.00000002.00000003.000000004"): IPv4 field has octet with leading zero`,
},
// 4-in-6 with octet with leading zero
{
in: "::ffff:1.2.03.4",
wantErr: `ParseAddr("::ffff:1.2.03.4"): ParseAddr("1.2.03.4"): IPv4 field has octet with leading zero (at "1.2.03.4")`,
},
// Basic zero IPv6 address.
{
@ -121,10 +125,16 @@ func TestParseAddr(t *testing.T) {
t.Run(test.in, func(t *testing.T) {
got, err := ParseAddr(test.in)
if err != nil {
if err.Error() == test.wantErr {
return
}
t.Fatal(err)
}
if test.wantErr != "" {
t.Fatalf("wanted error %q; got none", test.wantErr)
}
if got != test.ip {
t.Errorf("ParseAddr(%q) got %#v, want %#v", test.in, got, test.ip)
t.Errorf("got %#v, want %#v", got, test.ip)
}
// Check that ParseAddr is a pure function.
@ -963,7 +973,7 @@ func TestIs4In6(t *testing.T) {
{mustIP("::ffff:192.0.2.128"), true, mustIP("192.0.2.128")},
{mustIP("::ffff:192.0.2.128%eth0"), true, mustIP("192.0.2.128")},
{mustIP("::fffe:c000:0280"), false, mustIP("::fffe:c000:0280")},
{mustIP("::ffff:127.001.002.003"), true, mustIP("127.1.2.3")},
{mustIP("::ffff:127.1.2.3"), true, mustIP("127.1.2.3")},
{mustIP("::ffff:7f01:0203"), true, mustIP("127.1.2.3")},
{mustIP("0:0:0:0:0000:ffff:127.1.2.3"), true, mustIP("127.1.2.3")},
{mustIP("0:0:0:0:000000:ffff:127.1.2.3"), true, mustIP("127.1.2.3")},
@ -1796,3 +1806,12 @@ func TestInvalidAddrPortString(t *testing.T) {
}
}
}
var sink16 [16]byte
func BenchmarkAs16(b *testing.B) {
addr := MustParseAddr("1::10")
for i := 0; i < b.N; i++ {
sink16 = addr.As16()
}
}

View File

@ -475,6 +475,17 @@ func TestUDPReadTimeout(t *testing.T) {
}
func TestAllocs(t *testing.T) {
switch runtime.GOOS {
case "plan9":
// Plan9 wasn't optimized.
t.Skipf("skipping on %v", runtime.GOOS)
}
builder := os.Getenv("GO_BUILDER_NAME")
switch builder {
case "linux-amd64-noopt":
// Optimizations are required to remove the allocs.
t.Skipf("skipping on %v", builder)
}
conn, err := ListenUDP("udp4", &UDPAddr{IP: IPv4(127, 0, 0, 1)})
if err != nil {
t.Fatal(err)

View File

@ -624,6 +624,10 @@ func TestShrinkStackDuringBlockedSend(t *testing.T) {
}
func TestNoShrinkStackWhileParking(t *testing.T) {
if runtime.GOOS == "netbsd" && runtime.GOARCH == "arm64" {
testenv.SkipFlaky(t, 49382)
}
// The goal of this test is to trigger a "racy sudog adjustment"
// throw. Basically, there's a window between when a goroutine
// becomes available for preemption for stack scanning (and thus,

View File

@ -796,21 +796,17 @@ func (p *PageAlloc) Free(base, npages uintptr) {
// None of the tests need any higher-level locking, so we just
// take the lock internally.
lock(pp.mheapLock)
pp.free(base, npages)
pp.free(base, npages, true)
unlock(pp.mheapLock)
})
}
func (p *PageAlloc) Bounds() (ChunkIdx, ChunkIdx) {
return ChunkIdx((*pageAlloc)(p).start), ChunkIdx((*pageAlloc)(p).end)
}
func (p *PageAlloc) Scavenge(nbytes uintptr, mayUnlock bool) (r uintptr) {
func (p *PageAlloc) Scavenge(nbytes uintptr) (r uintptr) {
pp := (*pageAlloc)(p)
systemstack(func() {
// None of the tests need any higher-level locking, so we just
// take the lock internally.
lock(pp.mheapLock)
r = pp.scavenge(nbytes, mayUnlock)
unlock(pp.mheapLock)
r = pp.scavenge(nbytes)
})
return
}

View File

@ -78,6 +78,11 @@ It is a comma-separated list of name=val pairs setting these named variables:
If the line ends with "(forced)", this GC was forced by a
runtime.GC() call.
harddecommit: setting harddecommit=1 causes memory that is returned to the OS to
also have protections removed on it. This is the only mode of operation on Windows,
but is helpful in debugging scavenger-related issues on other platforms. Currently,
only supported on Linux.
inittrace: setting inittrace=1 causes the runtime to emit a single line to standard
error for each package with init work, summarizing the execution time and memory
allocation. No information is printed for inits executed as part of plugin loading

View File

@ -51,9 +51,9 @@ const (
lockRankItab
lockRankReflectOffs
lockRankHchan // Multiple hchans acquired in lock order in syncadjustsudogs()
lockRankTraceBuf
lockRankFin
lockRankNotifyList
lockRankTraceBuf
lockRankTraceStrings
lockRankMspanSpecial
lockRankProf
@ -80,6 +80,7 @@ const (
// Memory-related leaf locks
lockRankGlobalAlloc
lockRankPageAllocScav
// Other leaf locks
lockRankGFree
@ -131,9 +132,9 @@ var lockNames = []string{
lockRankReflectOffs: "reflectOffs",
lockRankHchan: "hchan",
lockRankTraceBuf: "traceBuf",
lockRankFin: "fin",
lockRankNotifyList: "notifyList",
lockRankTraceBuf: "traceBuf",
lockRankTraceStrings: "traceStrings",
lockRankMspanSpecial: "mspanSpecial",
lockRankProf: "prof",
@ -157,7 +158,8 @@ var lockNames = []string{
lockRankMheap: "mheap",
lockRankMheapSpecial: "mheapSpecial",
lockRankGlobalAlloc: "globalAlloc.mutex",
lockRankGlobalAlloc: "globalAlloc.mutex",
lockRankPageAllocScav: "pageAlloc.scav.lock",
lockRankGFree: "gFree",
lockRankHchanLeaf: "hchanLeaf",
@ -208,31 +210,32 @@ var lockPartialOrder [][]lockRank = [][]lockRank{
lockRankItab: {},
lockRankReflectOffs: {lockRankItab},
lockRankHchan: {lockRankScavenge, lockRankSweep, lockRankHchan},
lockRankFin: {lockRankSysmon, lockRankScavenge, lockRankSched, lockRankAllg, lockRankTimers, lockRankHchan},
lockRankNotifyList: {},
lockRankTraceBuf: {lockRankSysmon, lockRankScavenge},
lockRankFin: {lockRankSysmon, lockRankScavenge, lockRankSched, lockRankAllg, lockRankTimers, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf},
lockRankNotifyList: {},
lockRankTraceStrings: {lockRankTraceBuf},
lockRankMspanSpecial: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings},
lockRankProf: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings},
lockRankGcBitsArenas: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings},
lockRankMspanSpecial: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings},
lockRankProf: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings},
lockRankGcBitsArenas: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings},
lockRankRoot: {},
lockRankTrace: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankSweep, lockRankSched, lockRankHchan, lockRankTraceBuf, lockRankTraceStrings, lockRankRoot},
lockRankTraceStackTab: {lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankSched, lockRankAllg, lockRankTimers, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankRoot, lockRankTrace},
lockRankTraceStackTab: {lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankSched, lockRankAllg, lockRankTimers, lockRankHchan, lockRankTraceBuf, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankRoot, lockRankTrace},
lockRankNetpollInit: {lockRankTimers},
lockRankRwmutexW: {},
lockRankRwmutexR: {lockRankSysmon, lockRankRwmutexW},
lockRankSpanSetSpine: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings},
lockRankGscan: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankSpanSetSpine},
lockRankStackpool: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankRwmutexR, lockRankSpanSetSpine, lockRankGscan},
lockRankStackLarge: {lockRankSysmon, lockRankAssistQueue, lockRankSched, lockRankItab, lockRankHchan, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankSpanSetSpine, lockRankGscan},
lockRankDefer: {},
lockRankSudog: {lockRankHchan, lockRankNotifyList},
lockRankWbufSpans: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankMspanSpecial, lockRankProf, lockRankRoot, lockRankGscan, lockRankDefer, lockRankSudog},
lockRankMheap: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankMspanSpecial, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankSpanSetSpine, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankDefer, lockRankSudog, lockRankWbufSpans},
lockRankMheapSpecial: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings},
lockRankGlobalAlloc: {lockRankProf, lockRankSpanSetSpine, lockRankMheap, lockRankMheapSpecial},
lockRankSpanSetSpine: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings},
lockRankGscan: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankSpanSetSpine},
lockRankStackpool: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankRwmutexR, lockRankSpanSetSpine, lockRankGscan},
lockRankStackLarge: {lockRankSysmon, lockRankAssistQueue, lockRankSched, lockRankItab, lockRankHchan, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankSpanSetSpine, lockRankGscan},
lockRankDefer: {},
lockRankSudog: {lockRankHchan, lockRankNotifyList},
lockRankWbufSpans: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankMspanSpecial, lockRankProf, lockRankRoot, lockRankGscan, lockRankDefer, lockRankSudog},
lockRankMheap: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankMspanSpecial, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankSpanSetSpine, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankDefer, lockRankSudog, lockRankWbufSpans},
lockRankMheapSpecial: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings},
lockRankGlobalAlloc: {lockRankProf, lockRankSpanSetSpine, lockRankMheap, lockRankMheapSpecial},
lockRankPageAllocScav: {lockRankMheap},
lockRankGFree: {lockRankSched},
lockRankHchanLeaf: {lockRankGscan, lockRankHchanLeaf},

View File

@ -114,9 +114,29 @@ func sysUnused(v unsafe.Pointer, n uintptr) {
atomic.Store(&adviseUnused, _MADV_DONTNEED)
madvise(v, n, _MADV_DONTNEED)
}
if debug.harddecommit > 0 {
p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
if p != v || err != 0 {
throw("runtime: cannot disable permissions in address space")
}
}
}
func sysUsed(v unsafe.Pointer, n uintptr) {
if debug.harddecommit > 0 {
p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
if err == _ENOMEM {
throw("runtime: out of memory")
}
if p != v || err != 0 {
throw("runtime: cannot remap pages in address space")
}
return
// Don't do the sysHugePage optimization in hard decommit mode.
// We're breaking up pages everywhere, there's no point.
}
// Partially undo the NOHUGEPAGE marks from sysUnused
// for whole huge pages between v and v+n. This may
// leave huge pages off at the end points v and v+n

View File

@ -349,9 +349,6 @@ func (c *gcControllerState) init(gcPercent int32) {
kp: 0.9,
ti: 4.0,
// An update is done once per GC cycle.
period: 1,
// Set a high reset time in GC cycles.
// This is inversely proportional to the rate at which we
// accumulate error from clipping. By making this very high
@ -677,8 +674,9 @@ func (c *gcControllerState) endCycle(now int64, procs int, userForced bool) floa
(float64(scanWork) * (1 - utilization))
// Update cons/mark controller.
// Period for this is 1 GC cycle.
oldConsMark := c.consMark
c.consMark = c.consMarkController.next(c.consMark, currentConsMark)
c.consMark = c.consMarkController.next(c.consMark, currentConsMark, 1.0)
if debug.gcpacertrace > 0 {
printlock()
@ -1259,10 +1257,7 @@ func readGOGC() int32 {
type piController struct {
kp float64 // Proportional constant.
ti float64 // Integral time constant.
tt float64 // Reset time in GC cyles.
// Period in GC cycles between updates.
period float64
tt float64 // Reset time.
min, max float64 // Output boundaries.
@ -1271,7 +1266,7 @@ type piController struct {
errIntegral float64 // Integral of the error from t=0 to now.
}
func (c *piController) next(input, setpoint float64) float64 {
func (c *piController) next(input, setpoint, period float64) float64 {
// Compute the raw output value.
prop := c.kp * (setpoint - input)
rawOutput := prop + c.errIntegral
@ -1286,7 +1281,7 @@ func (c *piController) next(input, setpoint float64) float64 {
// Update the controller's state.
if c.ti != 0 && c.tt != 0 {
c.errIntegral += (c.kp*c.period/c.ti)*(setpoint-input) + (c.period/c.tt)*(output-rawOutput)
c.errIntegral += (c.kp*period/c.ti)*(setpoint-input) + (period/c.tt)*(output-rawOutput)
}
return output
}

View File

@ -270,49 +270,80 @@ func bgscavenge(c chan int) {
c <- 1
goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1)
// Exponentially-weighted moving average of the fraction of time this
// goroutine spends scavenging (that is, percent of a single CPU).
// It represents a measure of scheduling overheads which might extend
// the sleep or the critical time beyond what's expected. Assume no
// overhead to begin with.
//
// TODO(mknyszek): Consider making this based on total CPU time of the
// application (i.e. scavengePercent * GOMAXPROCS). This isn't really
// feasible now because the scavenger acquires the heap lock over the
// scavenging operation, which means scavenging effectively blocks
// allocators and isn't scalable. However, given a scalable allocator,
// it makes sense to also make the scavenger scale with it; if you're
// allocating more frequently, then presumably you're also generating
// more work for the scavenger.
const idealFraction = scavengePercent / 100.0
scavengeEWMA := float64(idealFraction)
// idealFraction is the ideal % of overall application CPU time that we
// spend scavenging.
idealFraction := float64(scavengePercent) / 100.0
// Input: fraction of CPU time used.
// Setpoint: idealFraction.
// Output: ratio of critical time to sleep time (determines sleep time).
//
// The output of this controller is somewhat indirect to what we actually
// want to achieve: how much time to sleep for. The reason for this definition
// is to ensure that the controller's outputs have a direct relationship with
// its inputs (as opposed to an inverse relationship), making it somewhat
// easier to reason about for tuning purposes.
critSleepController := piController{
// Tuned loosely via Ziegler-Nichols process.
kp: 0.3375,
ti: 3.2e6,
tt: 1e9, // 1 second reset time.
// These ranges seem wide, but we want to give the controller plenty of
// room to hunt for the optimal value.
min: 0.001, // 1:1000
max: 1000.0, // 1000:1
}
// It doesn't really matter what value we start at, but we can't be zero, because
// that'll cause divide-by-zero issues.
critSleepRatio := 0.001
for {
released := uintptr(0)
// Time in scavenging critical section.
crit := float64(0)
// Run on the system stack since we grab the heap lock,
// and a stack growth with the heap lock means a deadlock.
systemstack(func() {
lock(&mheap_.lock)
// Spend at least 1 ms scavenging, otherwise the corresponding
// sleep time to maintain our desired utilization is too low to
// be reliable.
const minCritTime = 1e6
for crit < minCritTime {
// If background scavenging is disabled or if there's no work to do just park.
retained, goal := heapRetained(), atomic.Load64(&mheap_.scavengeGoal)
if retained <= goal {
unlock(&mheap_.lock)
return
break
}
// Scavenge one page, and measure the amount of time spent scavenging.
start := nanotime()
released = mheap_.pages.scavenge(physPageSize, true)
mheap_.pages.scav.released += released
crit = float64(nanotime() - start)
// scavengeQuantum is the amount of memory we try to scavenge
// in one go. A smaller value means the scavenger is more responsive
// to the scheduler in case of e.g. preemption. A larger value means
// that the overheads of scavenging are better amortized, so better
// scavenging throughput.
//
// The current value is chosen assuming a cost of ~10µs/physical page
// (this is somewhat pessimistic), which implies a worst-case latency of
// about 160µs for 4 KiB physical pages. The current value is biased
// toward latency over throughput.
const scavengeQuantum = 64 << 10
unlock(&mheap_.lock)
})
// Accumulate the amount of time spent scavenging.
start := nanotime()
released = mheap_.pages.scavenge(scavengeQuantum)
atomic.Xadduintptr(&mheap_.pages.scav.released, released)
end := nanotime()
// On some platforms we may see end >= start if the time it takes to scavenge
// memory is less than the minimum granularity of its clock (e.g. Windows) or
// due to clock bugs.
//
// In this case, just assume scavenging takes 10 µs per regular physical page
// (determined empirically), and conservatively ignore the impact of huge pages
// on timing.
const approxCritNSPerPhysicalPage = 10e3
if end <= start {
crit += approxCritNSPerPhysicalPage * float64(released/physPageSize)
} else {
crit += float64(end - start)
}
}
if released == 0 {
lock(&scavenge.lock)
@ -329,18 +360,13 @@ func bgscavenge(c chan int) {
throw("released less than one physical page of memory")
}
// On some platforms we may see crit as zero if the time it takes to scavenge
// memory is less than the minimum granularity of its clock (e.g. Windows).
// In this case, just assume scavenging takes 10 µs per regular physical page
// (determined empirically), and conservatively ignore the impact of huge pages
// on timing.
//
// We shouldn't ever see a crit value less than zero unless there's a bug of
// some kind, either on our side or in the platform we're running on, but be
// defensive in that case as well.
const approxCritNSPerPhysicalPage = 10e3
if crit <= 0 {
crit = approxCritNSPerPhysicalPage * float64(released/physPageSize)
if crit < minCritTime {
// This means there wasn't enough work to actually fill up minCritTime.
// That's fine; we shouldn't try to do anything with this information
// because it's going result in a short enough sleep request that things
// will get messy. Just assume we did at least this much work.
// All this means is that we'll sleep longer than we otherwise would have.
crit = minCritTime
}
// Multiply the critical time by 1 + the ratio of the costs of using
@ -351,41 +377,19 @@ func bgscavenge(c chan int) {
// because of the additional overheads of using scavenged memory.
crit *= 1 + scavengeCostRatio
// If we spent more than 10 ms (for example, if the OS scheduled us away, or someone
// put their machine to sleep) in the critical section, bound the time we use to
// calculate at 10 ms to avoid letting the sleep time get arbitrarily high.
const maxCrit = 10e6
if crit > maxCrit {
crit = maxCrit
}
// Go to sleep for our current sleepNS.
slept := scavengeSleep(int64(crit / critSleepRatio))
// Compute the amount of time to sleep, assuming we want to use at most
// scavengePercent of CPU time. Take into account scheduling overheads
// that may extend the length of our sleep by multiplying by how far
// off we are from the ideal ratio. For example, if we're sleeping too
// much, then scavengeEMWA < idealFraction, so we'll adjust the sleep time
// down.
adjust := scavengeEWMA / idealFraction
sleepTime := int64(adjust * crit / (scavengePercent / 100.0))
// Calculate the CPU time spent.
//
// This may be slightly inaccurate with respect to GOMAXPROCS, but we're
// recomputing this often enough relative to GOMAXPROCS changes in general
// (it only changes when the world is stopped, and not during a GC) that
// that small inaccuracy is in the noise.
cpuFraction := float64(crit) / ((float64(slept) + crit) * float64(gomaxprocs))
// Go to sleep.
slept := scavengeSleep(sleepTime)
// Compute the new ratio.
fraction := crit / (crit + float64(slept))
// Set a lower bound on the fraction.
// Due to OS-related anomalies we may "sleep" for an inordinate amount
// of time. Let's avoid letting the ratio get out of hand by bounding
// the sleep time we use in our EWMA.
const minFraction = 1.0 / 1000.0
if fraction < minFraction {
fraction = minFraction
}
// Update scavengeEWMA by merging in the new crit/slept ratio.
const alpha = 0.5
scavengeEWMA = alpha*fraction + (1-alpha)*scavengeEWMA
// Update the critSleepRatio, adjusting until we reach our ideal fraction.
critSleepRatio = critSleepController.next(cpuFraction, idealFraction, float64(slept)+crit)
}
}
@ -395,16 +399,7 @@ func bgscavenge(c chan int) {
// back to the top of the heap.
//
// Returns the amount of memory scavenged in bytes.
//
// p.mheapLock must be held, but may be temporarily released if
// mayUnlock == true.
//
// Must run on the system stack because p.mheapLock must be held.
//
//go:systemstack
func (p *pageAlloc) scavenge(nbytes uintptr, mayUnlock bool) uintptr {
assertLockHeld(p.mheapLock)
func (p *pageAlloc) scavenge(nbytes uintptr) uintptr {
var (
addrs addrRange
gen uint32
@ -416,9 +411,11 @@ func (p *pageAlloc) scavenge(nbytes uintptr, mayUnlock bool) uintptr {
break
}
}
r, a := p.scavengeOne(addrs, nbytes-released, mayUnlock)
released += r
addrs = a
systemstack(func() {
r, a := p.scavengeOne(addrs, nbytes-released)
released += r
addrs = a
})
}
// Only unreserve the space which hasn't been scavenged or searched
// to ensure we always make progress.
@ -456,8 +453,9 @@ func printScavTrace(gen uint32, released uintptr, forced bool) {
func (p *pageAlloc) scavengeStartGen() {
assertLockHeld(p.mheapLock)
lock(&p.scav.lock)
if debug.scavtrace > 0 {
printScavTrace(p.scav.gen, p.scav.released, false)
printScavTrace(p.scav.gen, atomic.Loaduintptr(&p.scav.released), false)
}
p.inUse.cloneInto(&p.scav.inUse)
@ -487,9 +485,10 @@ func (p *pageAlloc) scavengeStartGen() {
// arena in size, so virtually every heap has the scavenger on.
p.scav.reservationBytes = alignUp(p.inUse.totalBytes, pallocChunkBytes) / scavengeReservationShards
p.scav.gen++
p.scav.released = 0
atomic.Storeuintptr(&p.scav.released, 0)
p.scav.freeHWM = minOffAddr
p.scav.scavLWM = maxOffAddr
unlock(&p.scav.lock)
}
// scavengeReserve reserves a contiguous range of the address space
@ -498,14 +497,9 @@ func (p *pageAlloc) scavengeStartGen() {
// first.
//
// Returns the reserved range and the scavenge generation number for it.
//
// p.mheapLock must be held.
//
// Must run on the system stack because p.mheapLock must be held.
//
//go:systemstack
func (p *pageAlloc) scavengeReserve() (addrRange, uint32) {
assertLockHeld(p.mheapLock)
lock(&p.scav.lock)
gen := p.scav.gen
// Start by reserving the minimum.
r := p.scav.inUse.removeLast(p.scav.reservationBytes)
@ -513,7 +507,8 @@ func (p *pageAlloc) scavengeReserve() (addrRange, uint32) {
// Return early if the size is zero; we don't want to use
// the bogus address below.
if r.size() == 0 {
return r, p.scav.gen
unlock(&p.scav.lock)
return r, gen
}
// The scavenger requires that base be aligned to a
@ -524,28 +519,26 @@ func (p *pageAlloc) scavengeReserve() (addrRange, uint32) {
// Remove from inUse however much extra we just pulled out.
p.scav.inUse.removeGreaterEqual(newBase)
unlock(&p.scav.lock)
r.base = offAddr{newBase}
return r, p.scav.gen
return r, gen
}
// scavengeUnreserve returns an unscavenged portion of a range that was
// previously reserved with scavengeReserve.
//
// p.mheapLock must be held.
//
// Must run on the system stack because p.mheapLock must be held.
//
//go:systemstack
func (p *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) {
assertLockHeld(p.mheapLock)
if r.size() == 0 || gen != p.scav.gen {
if r.size() == 0 {
return
}
if r.base.addr()%pallocChunkBytes != 0 {
throw("unreserving unaligned region")
}
p.scav.inUse.add(r)
lock(&p.scav.lock)
if gen == p.scav.gen {
p.scav.inUse.add(r)
}
unlock(&p.scav.lock)
}
// scavengeOne walks over address range work until it finds
@ -559,15 +552,10 @@ func (p *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) {
//
// work's base address must be aligned to pallocChunkBytes.
//
// p.mheapLock must be held, but may be temporarily released if
// mayUnlock == true.
//
// Must run on the system stack because p.mheapLock must be held.
// Must run on the systemstack because it acquires p.mheapLock.
//
//go:systemstack
func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (uintptr, addrRange) {
assertLockHeld(p.mheapLock)
func (p *pageAlloc) scavengeOne(work addrRange, max uintptr) (uintptr, addrRange) {
// Defensively check if we've received an empty address range.
// If so, just return.
if work.size() == 0 {
@ -599,40 +587,12 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
minPages = 1
}
// Helpers for locking and unlocking only if mayUnlock == true.
lockHeap := func() {
if mayUnlock {
lock(p.mheapLock)
}
// Fast path: check the chunk containing the top-most address in work.
if r, w := p.scavengeOneFast(work, minPages, maxPages); r != 0 {
return r, w
} else {
work = w
}
unlockHeap := func() {
if mayUnlock {
unlock(p.mheapLock)
}
}
// Fast path: check the chunk containing the top-most address in work,
// starting at that address's page index in the chunk.
//
// Note that work.end() is exclusive, so get the chunk we care about
// by subtracting 1.
maxAddr := work.limit.addr() - 1
maxChunk := chunkIndex(maxAddr)
if p.summary[len(p.summary)-1][maxChunk].max() >= uint(minPages) {
// We only bother looking for a candidate if there at least
// minPages free pages at all.
base, npages := p.chunkOf(maxChunk).findScavengeCandidate(chunkPageIndex(maxAddr), minPages, maxPages)
// If we found something, scavenge it and return!
if npages != 0 {
work.limit = offAddr{p.scavengeRangeLocked(maxChunk, base, npages)}
assertLockHeld(p.mheapLock) // Must be locked on return.
return uintptr(npages) * pageSize, work
}
}
// Update the limit to reflect the fact that we checked maxChunk already.
work.limit = offAddr{chunkBase(maxChunk)}
// findCandidate finds the next scavenge candidate in work optimistically.
//
@ -671,37 +631,61 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
// looking for any free and unscavenged page. If we think we see something,
// lock and verify it!
for work.size() != 0 {
unlockHeap()
// Search for the candidate.
candidateChunkIdx, ok := findCandidate(work)
// Lock the heap. We need to do this now if we found a candidate or not.
// If we did, we'll verify it. If not, we need to lock before returning
// anyway.
lockHeap()
if !ok {
// We didn't find a candidate, so we're done.
work.limit = work.base
break
}
// Lock, so we can verify what we found.
lock(p.mheapLock)
// Find, verify, and scavenge if we can.
chunk := p.chunkOf(candidateChunkIdx)
base, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, maxPages)
if npages > 0 {
work.limit = offAddr{p.scavengeRangeLocked(candidateChunkIdx, base, npages)}
assertLockHeld(p.mheapLock) // Must be locked on return.
unlock(p.mheapLock)
return uintptr(npages) * pageSize, work
}
unlock(p.mheapLock)
// We were fooled, so let's continue from where we left off.
work.limit = offAddr{chunkBase(candidateChunkIdx)}
}
return 0, work
}
assertLockHeld(p.mheapLock) // Must be locked on return.
// scavengeOneFast is the fast path for scavengeOne, which just checks the top
// chunk of work for some pages to scavenge.
//
// Must run on the system stack because it acquires the heap lock.
//
//go:systemstack
func (p *pageAlloc) scavengeOneFast(work addrRange, minPages, maxPages uintptr) (uintptr, addrRange) {
maxAddr := work.limit.addr() - 1
maxChunk := chunkIndex(maxAddr)
lock(p.mheapLock)
if p.summary[len(p.summary)-1][maxChunk].max() >= uint(minPages) {
// We only bother looking for a candidate if there at least
// minPages free pages at all.
base, npages := p.chunkOf(maxChunk).findScavengeCandidate(chunkPageIndex(maxAddr), minPages, maxPages)
// If we found something, scavenge it and return!
if npages != 0 {
work.limit = offAddr{p.scavengeRangeLocked(maxChunk, base, npages)}
unlock(p.mheapLock)
return uintptr(npages) * pageSize, work
}
}
unlock(p.mheapLock)
// Update the limit to reflect the fact that we checked maxChunk already.
work.limit = offAddr{chunkBase(maxChunk)}
return 0, work
}
@ -712,38 +696,57 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
//
// Returns the base address of the scavenged region.
//
// p.mheapLock must be held.
// p.mheapLock must be held. Unlocks p.mheapLock but reacquires
// it before returning. Must be run on the systemstack as a result.
//
//go:systemstack
func (p *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr {
assertLockHeld(p.mheapLock)
p.chunkOf(ci).scavenged.setRange(base, npages)
// Compute the full address for the start of the range.
addr := chunkBase(ci) + uintptr(base)*pageSize
// Mark the range we're about to scavenge as allocated, because
// we don't want any allocating goroutines to grab it while
// the scavenging is in progress.
if scav := p.allocRange(addr, uintptr(npages)); scav != 0 {
throw("double scavenge")
}
// With that done, it's safe to unlock.
unlock(p.mheapLock)
// Update the scavenge low watermark.
lock(&p.scav.lock)
if oAddr := (offAddr{addr}); oAddr.lessThan(p.scav.scavLWM) {
p.scav.scavLWM = oAddr
}
unlock(&p.scav.lock)
// Only perform the actual scavenging if we're not in a test.
// It's dangerous to do so otherwise.
if p.test {
return addr
if !p.test {
// Only perform the actual scavenging if we're not in a test.
// It's dangerous to do so otherwise.
sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize)
// Update global accounting only when not in test, otherwise
// the runtime's accounting will be wrong.
nbytes := int64(npages) * pageSize
atomic.Xadd64(&memstats.heap_released, nbytes)
// Update consistent accounting too.
stats := memstats.heapStats.acquire()
atomic.Xaddint64(&stats.committed, -nbytes)
atomic.Xaddint64(&stats.released, nbytes)
memstats.heapStats.release()
}
sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize)
// Update global accounting only when not in test, otherwise
// the runtime's accounting will be wrong.
nbytes := int64(npages) * pageSize
atomic.Xadd64(&memstats.heap_released, nbytes)
// Update consistent accounting too.
stats := memstats.heapStats.acquire()
atomic.Xaddint64(&stats.committed, -nbytes)
atomic.Xaddint64(&stats.released, nbytes)
memstats.heapStats.release()
// Relock the heap, because now we need to make these pages
// available allocation. Free them back to the page allocator.
lock(p.mheapLock)
p.free(addr, uintptr(npages), true)
// Mark the range as scavenged.
p.chunkOf(ci).scavenged.setRange(base, npages)
return addr
}

View File

@ -430,12 +430,12 @@ func TestPageAllocScavenge(t *testing.T) {
}
for name, v := range tests {
v := v
runTest := func(t *testing.T, mayUnlock bool) {
t.Run(name, func(t *testing.T) {
b := NewPageAlloc(v.beforeAlloc, v.beforeScav)
defer FreePageAlloc(b)
for iter, h := range v.expect {
if got := b.Scavenge(h.request, mayUnlock); got != h.expect {
if got := b.Scavenge(h.request); got != h.expect {
t.Fatalf("bad scavenge #%d: want %d, got %d", iter+1, h.expect, got)
}
}
@ -443,12 +443,6 @@ func TestPageAllocScavenge(t *testing.T) {
defer FreePageAlloc(want)
checkPageAlloc(t, want, b)
}
t.Run(name, func(t *testing.T) {
runTest(t, false)
})
t.Run(name+"MayUnlock", func(t *testing.T) {
runTest(t, true)
})
}
}

View File

@ -80,7 +80,7 @@ type mheap struct {
// access (since that may free the backing store).
allspans []*mspan // all spans out there
_ uint32 // align uint64 fields on 32-bit for atomics
// _ uint32 // align uint64 fields on 32-bit for atomics
// Proportional sweep
//
@ -1120,6 +1120,7 @@ func (h *mheap) allocSpan(npages uintptr, typ spanAllocType, spanclass spanClass
// Function-global state.
gp := getg()
base, scav := uintptr(0), uintptr(0)
growth := uintptr(0)
// On some platforms we need to provide physical page aligned stack
// allocations. Where the page size is less than the physical page
@ -1165,7 +1166,9 @@ func (h *mheap) allocSpan(npages uintptr, typ spanAllocType, spanclass spanClass
// Try to acquire a base address.
base, scav = h.pages.alloc(npages)
if base == 0 {
if !h.grow(npages) {
var ok bool
growth, ok = h.grow(npages)
if !ok {
unlock(&h.lock)
return nil
}
@ -1189,16 +1192,35 @@ func (h *mheap) allocSpan(npages uintptr, typ spanAllocType, spanclass spanClass
// Return memory around the aligned allocation.
spaceBefore := base - allocBase
if spaceBefore > 0 {
h.pages.free(allocBase, spaceBefore/pageSize)
h.pages.free(allocBase, spaceBefore/pageSize, false)
}
spaceAfter := (allocPages-npages)*pageSize - spaceBefore
if spaceAfter > 0 {
h.pages.free(base+npages*pageSize, spaceAfter/pageSize)
h.pages.free(base+npages*pageSize, spaceAfter/pageSize, false)
}
}
unlock(&h.lock)
if growth > 0 {
// We just caused a heap growth, so scavenge down what will soon be used.
// By scavenging inline we deal with the failure to allocate out of
// memory fragments by scavenging the memory fragments that are least
// likely to be re-used.
scavengeGoal := atomic.Load64(&h.scavengeGoal)
if retained := heapRetained(); retained+uint64(growth) > scavengeGoal {
// The scavenging algorithm requires the heap lock to be dropped so it
// can acquire it only sparingly. This is a potentially expensive operation
// so it frees up other goroutines to allocate in the meanwhile. In fact,
// they can make use of the growth we just created.
todo := growth
if overage := uintptr(retained + uint64(growth) - scavengeGoal); todo > overage {
todo = overage
}
h.pages.scavenge(todo)
}
}
HaveSpan:
// At this point, both s != nil and base != 0, and the heap
// lock is no longer held. Initialize the span.
@ -1311,10 +1333,10 @@ HaveSpan:
}
// Try to add at least npage pages of memory to the heap,
// returning whether it worked.
// returning how much the heap grew by and whether it worked.
//
// h.lock must be held.
func (h *mheap) grow(npage uintptr) bool {
func (h *mheap) grow(npage uintptr) (uintptr, bool) {
assertLockHeld(&h.lock)
// We must grow the heap in whole palloc chunks.
@ -1336,7 +1358,7 @@ func (h *mheap) grow(npage uintptr) bool {
av, asize := h.sysAlloc(ask)
if av == nil {
print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
return false
return 0, false
}
if uintptr(av) == h.curArena.end {
@ -1396,20 +1418,7 @@ func (h *mheap) grow(npage uintptr) bool {
// space ready for allocation.
h.pages.grow(v, nBase-v)
totalGrowth += nBase - v
// We just caused a heap growth, so scavenge down what will soon be used.
// By scavenging inline we deal with the failure to allocate out of
// memory fragments by scavenging the memory fragments that are least
// likely to be re-used.
scavengeGoal := atomic.Load64(&h.scavengeGoal)
if retained := heapRetained(); retained+uint64(totalGrowth) > scavengeGoal {
todo := totalGrowth
if overage := uintptr(retained + uint64(totalGrowth) - scavengeGoal); todo > overage {
todo = overage
}
h.pages.scavenge(todo, false)
}
return true
return totalGrowth, true
}
// Free the span back into the heap.
@ -1499,7 +1508,7 @@ func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) {
memstats.heapStats.release()
// Mark the space as free.
h.pages.free(s.base(), s.npages)
h.pages.free(s.base(), s.npages, false)
// Free the span structure. We no longer have a use for it.
s.state.set(mSpanDead)
@ -1515,13 +1524,19 @@ func (h *mheap) scavengeAll() {
// the mheap API.
gp := getg()
gp.m.mallocing++
lock(&h.lock)
// Start a new scavenge generation so we have a chance to walk
// over the whole heap.
h.pages.scavengeStartGen()
released := h.pages.scavenge(^uintptr(0), false)
gen := h.pages.scav.gen
unlock(&h.lock)
released := h.pages.scavenge(^uintptr(0))
lock(&h.pages.scav.lock)
gen := h.pages.scav.gen
unlock(&h.pages.scav.lock)
gp.m.mallocing--
if debug.scavtrace > 0 {

View File

@ -226,6 +226,8 @@ type pageAlloc struct {
// are currently available. Otherwise one might iterate over unused
// ranges.
//
// Protected by mheapLock.
//
// TODO(mknyszek): Consider changing the definition of the bitmap
// such that 1 means free and 0 means in-use so that summaries and
// the bitmaps align better on zero-values.
@ -261,29 +263,41 @@ type pageAlloc struct {
inUse addrRanges
// scav stores the scavenger state.
//
// All fields are protected by mheapLock.
scav struct {
lock mutex
// inUse is a slice of ranges of address space which have not
// yet been looked at by the scavenger.
//
// Protected by lock.
inUse addrRanges
// gen is the scavenge generation number.
//
// Protected by lock.
gen uint32
// reservationBytes is how large of a reservation should be made
// in bytes of address space for each scavenge iteration.
//
// Protected by lock.
reservationBytes uintptr
// released is the amount of memory released this generation.
//
// Updated atomically.
released uintptr
// scavLWM is the lowest (offset) address that the scavenger reached this
// scavenge generation.
//
// Protected by lock.
scavLWM offAddr
// freeHWM is the highest (offset) address of a page that was freed to
// the page allocator this scavenge generation.
//
// Protected by mheapLock.
freeHWM offAddr
}
@ -864,17 +878,19 @@ Found:
// Must run on the system stack because p.mheapLock must be held.
//
//go:systemstack
func (p *pageAlloc) free(base, npages uintptr) {
func (p *pageAlloc) free(base, npages uintptr, scavenged bool) {
assertLockHeld(p.mheapLock)
// If we're freeing pages below the p.searchAddr, update searchAddr.
if b := (offAddr{base}); b.lessThan(p.searchAddr) {
p.searchAddr = b
}
// Update the free high watermark for the scavenger.
limit := base + npages*pageSize - 1
if offLimit := (offAddr{limit}); p.scav.freeHWM.lessThan(offLimit) {
p.scav.freeHWM = offLimit
if !scavenged {
// Update the free high watermark for the scavenger.
if offLimit := (offAddr{limit}); p.scav.freeHWM.lessThan(offLimit) {
p.scav.freeHWM = offLimit
}
}
if npages == 1 {
// Fast path: we're clearing a single bit, and we know exactly

View File

@ -790,7 +790,15 @@ type consistentHeapStats struct {
//
// The caller's P must not change between acquire and
// release. This also means that the caller should not
// acquire a P or release its P in between.
// acquire a P or release its P in between. A P also must
// not acquire a given consistentHeapStats if it hasn't
// yet released it.
//
// nosplit because a stack growth in this function could
// lead to a stack allocation that could reenter the
// function.
//
//go:nosplit
func (m *consistentHeapStats) acquire() *heapStatsDelta {
if pp := getg().m.p.ptr(); pp != nil {
seq := atomic.Xadd(&pp.statsSeq, 1)
@ -814,6 +822,12 @@ func (m *consistentHeapStats) acquire() *heapStatsDelta {
// The caller's P must not change between acquire and
// release. This also means that the caller should not
// acquire a P or release its P in between.
//
// nosplit because a stack growth in this function could
// lead to a stack allocation that causes another acquire
// before this operation has completed.
//
//go:nosplit
func (m *consistentHeapStats) release() {
if pp := getg().m.p.ptr(); pp != nil {
seq := atomic.Xadd(&pp.statsSeq, 1)

View File

@ -315,6 +315,7 @@ var debug struct {
schedtrace int32
tracebackancestors int32
asyncpreemptoff int32
harddecommit int32
// debug.malloc is used as a combined debug check
// in the malloc function and should be set
@ -344,6 +345,7 @@ var dbgvars = []dbgVar{
{"tracebackancestors", &debug.tracebackancestors},
{"asyncpreemptoff", &debug.asyncpreemptoff},
{"inittrace", &debug.inittrace},
{"harddecommit", &debug.harddecommit},
}
func parsedebugvars() {

View File

@ -1002,7 +1002,7 @@ func newstack() {
// NOTE: stackguard0 may change underfoot, if another thread
// is about to try to preempt gp. Read it just once and use that same
// value now and below.
preempt := atomic.Loaduintptr(&gp.stackguard0) == stackPreempt
stackguard0 := atomic.Loaduintptr(&gp.stackguard0)
// Be conservative about where we preempt.
// We are interested in preempting user Go code, not runtime code.
@ -1016,6 +1016,7 @@ func newstack() {
// If the GC is in some way dependent on this goroutine (for example,
// it needs a lock held by the goroutine), that small preemption turns
// into a real deadlock.
preempt := stackguard0 == stackPreempt
if preempt {
if !canPreemptM(thisg.m) {
// Let the goroutine keep running for now.
@ -1083,7 +1084,7 @@ func newstack() {
}
}
if gp.stackguard0 == stackForceMove {
if stackguard0 == stackForceMove {
// Forced stack movement used for debugging.
// Don't double the stack (or we may quickly run out
// if this is done repeatedly).

View File

@ -353,6 +353,9 @@ func testTracebackArgs8d(a testArgsType8d) int {
return n
}
// nosplit to avoid preemption or morestack spilling registers.
//
//go:nosplit
//go:noinline
func testTracebackArgs9(a int64, b int32, c int16, d int8, x [2]int, y int) int {
if a < 0 {
@ -366,6 +369,9 @@ func testTracebackArgs9(a int64, b int32, c int16, d int8, x [2]int, y int) int
return n
}
// nosplit to avoid preemption or morestack spilling registers.
//
//go:nosplit
//go:noinline
func testTracebackArgs10(a, b, c, d, e int32) int {
// no use of any args
@ -373,8 +379,10 @@ func testTracebackArgs10(a, b, c, d, e int32) int {
}
// norace to avoid race instrumentation changing spill locations.
// nosplit to avoid preemption or morestack spilling registers.
//
//go:norace
//go:nosplit
//go:noinline
func testTracebackArgs11a(a, b, c int32) int {
if a < 0 {
@ -387,8 +395,10 @@ func testTracebackArgs11a(a, b, c int32) int {
}
// norace to avoid race instrumentation changing spill locations.
// nosplit to avoid preemption or morestack spilling registers.
//
//go:norace
//go:nosplit
//go:noinline
func testTracebackArgs11b(a, b, c, d int32) int {
var x int32

View File

@ -706,7 +706,8 @@ func isSeparator(r rune) bool {
// Title returns a copy of the string s with all Unicode letters that begin words
// mapped to their Unicode title case.
//
// BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
// Deprecated: The rule Title uses for word boundaries does not handle Unicode
// punctuation properly. Use golang.org/x/text/cases instead.
func Title(s string) string {
// Use a closure here to remember state.
// Hackish but effective. Depends on Map scanning in order and calling

View File

@ -283,7 +283,7 @@ netbsd_arm64)
mktypes="GOARCH=$GOARCH go tool cgo -godefs"
;;
openbsd_386)
GOOSARCH_in="syscall_openbsd1.go syscall_openbsd_$GOARCH.go"
GOOSARCH_in="syscall_openbsd_libc.go syscall_openbsd_$GOARCH.go"
mkerrors="$mkerrors -m32"
mksyscall="./mksyscall.pl -l32 -openbsd -libc"
mksysctl="./mksysctl_openbsd.pl"

View File

@ -344,6 +344,23 @@ func ExampleTime_Format_pad() {
}
func ExampleTime_GoString() {
t := time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC)
fmt.Println(t.GoString())
t = t.Add(1 * time.Minute)
fmt.Println(t.GoString())
t = t.AddDate(0, 1, 0)
fmt.Println(t.GoString())
t, _ = time.Parse("Jan 2, 2006 at 3:04pm (MST)", "Feb 3, 2013 at 7:54pm (UTC)")
fmt.Println(t.GoString())
// Output:
// time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC)
// time.Date(2009, time.November, 10, 23, 1, 0, 0, time.UTC)
// time.Date(2009, time.December, 10, 23, 1, 0, 0, time.UTC)
// time.Date(2013, time.February, 3, 19, 54, 0, 0, time.UTC)
}
func ExampleParse() {
// See the example for Time.Format for a thorough description of how
// to define the layout string to parse a time.Time value; Parse and
@ -401,6 +418,39 @@ func ExampleParseInLocation() {
// 2012-07-09 00:00:00 +0200 CEST
}
func ExampleUnix() {
unixTime := time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC)
fmt.Println(unixTime.Unix())
t := time.Unix(unixTime.Unix(), 0).UTC()
fmt.Println(t)
// Output:
// 1257894000
// 2009-11-10 23:00:00 +0000 UTC
}
func ExampleUnixMicro() {
umt := time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC)
fmt.Println(umt.UnixMicro())
t := time.UnixMicro(umt.UnixMicro()).UTC()
fmt.Println(t)
// Output:
// 1257894000000000
// 2009-11-10 23:00:00 +0000 UTC
}
func ExampleUnixMilli() {
umt := time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC)
fmt.Println(umt.UnixMilli())
t := time.UnixMilli(umt.UnixMilli()).UTC()
fmt.Println(t)
// Output:
// 1257894000000
// 2009-11-10 23:00:00 +0000 UTC
}
func ExampleTime_Unix() {
// 1 billion seconds of Unix, three ways.
fmt.Println(time.Unix(1e9, 0).UTC()) // 1e9 seconds

View File

@ -48,8 +48,12 @@ func (t *Ticker) Stop() {
}
// Reset stops a ticker and resets its period to the specified duration.
// The next tick will arrive after the new period elapses.
// The next tick will arrive after the new period elapses. The duration d
// must be greater than zero; if not, Reset will panic.
func (t *Ticker) Reset(d Duration) {
if d <= 0 {
panic("non-positive interval for Ticker.Reset")
}
if t.r.f == nil {
panic("time: Reset called on uninitialized Ticker")
}

View File

@ -134,6 +134,17 @@ func TestNewTickerLtZeroDuration(t *testing.T) {
NewTicker(-1)
}
// Test that Ticker.Reset panics when given a duration less than zero.
func TestTickerResetLtZeroDuration(t *testing.T) {
defer func() {
if err := recover(); err == nil {
t.Errorf("Ticker.Reset(0) should have panicked")
}
}()
tk := NewTicker(Second)
tk.Reset(0)
}
func BenchmarkTicker(b *testing.B) {
benchmark(b, func(n int) {
ticker := NewTicker(Nanosecond)

View File

@ -1433,17 +1433,17 @@ func Date(year int, month Month, day, hour, min, sec, nsec int, loc *Location) T
unix := int64(abs) + (absoluteToInternal + internalToUnix)
// Look for zone offset for t, so we can adjust to UTC.
// The lookup function expects UTC, so we pass t in the
// Look for zone offset for expected time, so we can adjust to UTC.
// The lookup function expects UTC, so first we pass unix in the
// hope that it will not be too close to a zone transition,
// and then adjust if it is.
_, offset, start, end, _ := loc.lookup(unix)
if offset != 0 {
switch utc := unix - int64(offset); {
case utc < start:
_, offset, _, _, _ = loc.lookup(start - 1)
case utc >= end:
_, offset, _, _, _ = loc.lookup(end)
utc := unix - int64(offset)
// If utc is valid for the time zone we found, then we have the right offset.
// If not, we get the correct offset by looking up utc in the location.
if utc < start || utc >= end {
_, offset, _, _, _ = loc.lookup(utc)
}
unix -= int64(offset)
}

View File

@ -1616,3 +1616,45 @@ func TestTimeAddSecOverflow(t *testing.T) {
}
}
}
// Issue 49284: time: ParseInLocation incorrectly because of Daylight Saving Time
func TestTimeWithZoneTransition(t *testing.T) {
ForceZipFileForTesting(true)
defer ForceZipFileForTesting(false)
loc, err := LoadLocation("Asia/Shanghai")
if err != nil {
t.Fatal(err)
}
tests := [...]struct {
give Time
want Time
}{
// 14 Apr 1991 - Daylight Saving Time Started
// When time of "Asia/Shanghai" was about to reach
// Sunday, 14 April 1991, 02:00:00 clocks were turned forward 1 hour to
// Sunday, 14 April 1991, 03:00:00 local daylight time instead.
// The UTC time was 13 April 1991, 18:00:00
0: {Date(1991, April, 13, 17, 50, 0, 0, loc), Date(1991, April, 13, 9, 50, 0, 0, UTC)},
1: {Date(1991, April, 13, 18, 0, 0, 0, loc), Date(1991, April, 13, 10, 0, 0, 0, UTC)},
2: {Date(1991, April, 14, 1, 50, 0, 0, loc), Date(1991, April, 13, 17, 50, 0, 0, UTC)},
3: {Date(1991, April, 14, 3, 0, 0, 0, loc), Date(1991, April, 13, 18, 0, 0, 0, UTC)},
// 15 Sep 1991 - Daylight Saving Time Ended
// When local daylight time of "Asia/Shanghai" was about to reach
// Sunday, 15 September 1991, 02:00:00 clocks were turned backward 1 hour to
// Sunday, 15 September 1991, 01:00:00 local standard time instead.
// The UTC time was 14 September 1991, 17:00:00
4: {Date(1991, September, 14, 16, 50, 0, 0, loc), Date(1991, September, 14, 7, 50, 0, 0, UTC)},
5: {Date(1991, September, 14, 17, 0, 0, 0, loc), Date(1991, September, 14, 8, 0, 0, 0, UTC)},
6: {Date(1991, September, 15, 0, 50, 0, 0, loc), Date(1991, September, 14, 15, 50, 0, 0, UTC)},
7: {Date(1991, September, 15, 2, 00, 0, 0, loc), Date(1991, September, 14, 18, 00, 0, 0, UTC)},
}
for i, tt := range tests {
if !tt.give.Equal(tt.want) {
t.Errorf("#%d:: %#v is not equal to %#v", i, tt.give.Format(RFC3339), tt.want.Format(RFC3339))
}
}
}

View File

@ -214,3 +214,13 @@ func ExampleValidString() {
// true
// false
}
func ExampleAppendRune() {
buf1 := utf8.AppendRune(nil, 0x10000)
buf2 := utf8.AppendRune([]byte("init"), 0x10000)
fmt.Println(string(buf1))
fmt.Println(string(buf2))
// Output:
// 𐀀
// init𐀀
}

View File

@ -133,7 +133,7 @@ func TestAppendRune(t *testing.T) {
t.Errorf("AppendRune(nil, %#04x) = %s, want %s", m.r, buf, m.str)
}
if buf := AppendRune([]byte("init"), m.r); string(buf) != "init"+m.str {
t.Errorf("AppendRune(nil, %#04x) = %s, want %s", m.r, buf, "init"+m.str)
t.Errorf("AppendRune(init, %#04x) = %s, want %s", m.r, buf, "init"+m.str)
}
}
}

View File

@ -0,0 +1,25 @@
// compile
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package p
func f(i int) {
var s1 struct {
s struct{ s struct{ i int } }
}
var s2, s3 struct {
a struct{ i int }
b int
}
func() {
i = 1 + 2*i + s3.a.i + func() int {
s2.a, s2.b = s3.a, s3.b
return 0
}() + func(*int) int {
return s1.s.s.i
}(new(int))
}()
}