Russ Cox 7510e597de cmd/go: make module index loading O(1)
For a large module, opening the index was populating tables with
entries for every package in the module. If we are only using a small
number of those packages, this is wasted work that can dwarf the
benefit from the index.

This CL changes the index reader to avoid loading all packages
at module index open time. It also refactors the code somewhat
for clarity.

It also removes some duplication by defining that a per-package
index is a per-module index containing a single package, rather
than having two different formats and two different decoders.

It also changes the string table to use uvarint-prefixed data
instead of having to scan for a NUL byte. This makes random access
to long strings more efficient - O(1) instead of O(n) - and can significantly
speed up the strings.Compare operation in the binary search looking
for a given package.

Also add a direct test of the indexing code.

For #53577.

Change-Id: I7428d28133e4e7fe2d2993fa014896cd15af48af
Reviewed-on: https://go-review.googlesource.com/c/go/+/416178
Reviewed-by: Bryan Mills <bcmills@google.com>
2022-07-11 19:09:00 +00:00

159 lines
3.6 KiB
Go

// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package modindex
import (
"cmd/go/internal/base"
"encoding/binary"
"go/token"
"sort"
)
const indexVersion = "go index v1" // 11 bytes (plus \n), to align uint32s in index
// encodeModuleBytes produces the encoded representation of the module index.
// encodeModuleBytes may modify the packages slice.
func encodeModuleBytes(packages []*rawPackage) []byte {
e := newEncoder()
e.Bytes([]byte(indexVersion + "\n"))
stringTableOffsetPos := e.Pos() // fill this at the end
e.Uint32(0) // string table offset
sort.Slice(packages, func(i, j int) bool {
return packages[i].dir < packages[j].dir
})
e.Int(len(packages))
packagesPos := e.Pos()
for _, p := range packages {
e.String(p.dir)
e.Int(0)
}
for i, p := range packages {
e.IntAt(e.Pos(), packagesPos+8*i+4)
encodePackage(e, p)
}
e.IntAt(e.Pos(), stringTableOffsetPos)
e.Bytes(e.stringTable)
e.Bytes([]byte{0xFF}) // end of string table marker
return e.b
}
func encodePackageBytes(p *rawPackage) []byte {
return encodeModuleBytes([]*rawPackage{p})
}
func encodePackage(e *encoder, p *rawPackage) {
e.String(p.error)
e.String(p.dir)
e.Int(len(p.sourceFiles)) // number of source files
sourceFileOffsetPos := e.Pos() // the pos of the start of the source file offsets
for range p.sourceFiles {
e.Int(0)
}
for i, f := range p.sourceFiles {
e.IntAt(e.Pos(), sourceFileOffsetPos+4*i)
encodeFile(e, f)
}
}
func encodeFile(e *encoder, f *rawFile) {
e.String(f.error)
e.String(f.parseError)
e.String(f.synopsis)
e.String(f.name)
e.String(f.pkgName)
e.Bool(f.ignoreFile)
e.Bool(f.binaryOnly)
e.String(f.cgoDirectives)
e.String(f.goBuildConstraint)
e.Int(len(f.plusBuildConstraints))
for _, s := range f.plusBuildConstraints {
e.String(s)
}
e.Int(len(f.imports))
for _, m := range f.imports {
e.String(m.path)
e.Position(m.position)
}
e.Int(len(f.embeds))
for _, embed := range f.embeds {
e.String(embed.pattern)
e.Position(embed.position)
}
}
func newEncoder() *encoder {
e := &encoder{strings: make(map[string]int)}
// place the empty string at position 0 in the string table
e.stringTable = append(e.stringTable, 0)
e.strings[""] = 0
return e
}
func (e *encoder) Position(position token.Position) {
e.String(position.Filename)
e.Int(position.Offset)
e.Int(position.Line)
e.Int(position.Column)
}
type encoder struct {
b []byte
stringTable []byte
strings map[string]int
}
func (e *encoder) Pos() int {
return len(e.b)
}
func (e *encoder) Bytes(b []byte) {
e.b = append(e.b, b...)
}
func (e *encoder) String(s string) {
if n, ok := e.strings[s]; ok {
e.Int(n)
return
}
pos := len(e.stringTable)
e.strings[s] = pos
e.Int(pos)
e.stringTable = binary.AppendUvarint(e.stringTable, uint64(len(s)))
e.stringTable = append(e.stringTable, []byte(s)...)
}
func (e *encoder) Bool(b bool) {
if b {
e.Uint32(1)
} else {
e.Uint32(0)
}
}
func (e *encoder) Uint32(n uint32) {
e.b = binary.LittleEndian.AppendUint32(e.b, n)
}
// Int encodes n. Note that all ints are written to the index as uint32s,
// and to avoid problems on 32-bit systems we require fitting into a 32-bit int.
func (e *encoder) Int(n int) {
if n < 0 || int(int32(n)) != n {
base.Fatalf("go: attempting to write an int to the index that overflows int32")
}
e.Uint32(uint32(n))
}
func (e *encoder) IntAt(n int, at int) {
if n < 0 || int(int32(n)) != n {
base.Fatalf("go: attempting to write an int to the index that overflows int32")
}
binary.LittleEndian.PutUint32(e.b[at:], uint32(n))
}