mirror of
https://github.com/golang/go.git
synced 2025-05-18 22:04:38 +00:00
For a large module, opening the index was populating tables with entries for every package in the module. If we are only using a small number of those packages, this is wasted work that can dwarf the benefit from the index. This CL changes the index reader to avoid loading all packages at module index open time. It also refactors the code somewhat for clarity. It also removes some duplication by defining that a per-package index is a per-module index containing a single package, rather than having two different formats and two different decoders. It also changes the string table to use uvarint-prefixed data instead of having to scan for a NUL byte. This makes random access to long strings more efficient - O(1) instead of O(n) - and can significantly speed up the strings.Compare operation in the binary search looking for a given package. Also add a direct test of the indexing code. For #53577. Change-Id: I7428d28133e4e7fe2d2993fa014896cd15af48af Reviewed-on: https://go-review.googlesource.com/c/go/+/416178 Reviewed-by: Bryan Mills <bcmills@google.com>
159 lines
3.6 KiB
Go
159 lines
3.6 KiB
Go
// Copyright 2022 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package modindex
|
|
|
|
import (
|
|
"cmd/go/internal/base"
|
|
"encoding/binary"
|
|
"go/token"
|
|
"sort"
|
|
)
|
|
|
|
const indexVersion = "go index v1" // 11 bytes (plus \n), to align uint32s in index
|
|
|
|
// encodeModuleBytes produces the encoded representation of the module index.
|
|
// encodeModuleBytes may modify the packages slice.
|
|
func encodeModuleBytes(packages []*rawPackage) []byte {
|
|
e := newEncoder()
|
|
e.Bytes([]byte(indexVersion + "\n"))
|
|
stringTableOffsetPos := e.Pos() // fill this at the end
|
|
e.Uint32(0) // string table offset
|
|
sort.Slice(packages, func(i, j int) bool {
|
|
return packages[i].dir < packages[j].dir
|
|
})
|
|
e.Int(len(packages))
|
|
packagesPos := e.Pos()
|
|
for _, p := range packages {
|
|
e.String(p.dir)
|
|
e.Int(0)
|
|
}
|
|
for i, p := range packages {
|
|
e.IntAt(e.Pos(), packagesPos+8*i+4)
|
|
encodePackage(e, p)
|
|
}
|
|
e.IntAt(e.Pos(), stringTableOffsetPos)
|
|
e.Bytes(e.stringTable)
|
|
e.Bytes([]byte{0xFF}) // end of string table marker
|
|
return e.b
|
|
}
|
|
|
|
func encodePackageBytes(p *rawPackage) []byte {
|
|
return encodeModuleBytes([]*rawPackage{p})
|
|
}
|
|
|
|
func encodePackage(e *encoder, p *rawPackage) {
|
|
e.String(p.error)
|
|
e.String(p.dir)
|
|
e.Int(len(p.sourceFiles)) // number of source files
|
|
sourceFileOffsetPos := e.Pos() // the pos of the start of the source file offsets
|
|
for range p.sourceFiles {
|
|
e.Int(0)
|
|
}
|
|
for i, f := range p.sourceFiles {
|
|
e.IntAt(e.Pos(), sourceFileOffsetPos+4*i)
|
|
encodeFile(e, f)
|
|
}
|
|
}
|
|
|
|
func encodeFile(e *encoder, f *rawFile) {
|
|
e.String(f.error)
|
|
e.String(f.parseError)
|
|
e.String(f.synopsis)
|
|
e.String(f.name)
|
|
e.String(f.pkgName)
|
|
e.Bool(f.ignoreFile)
|
|
e.Bool(f.binaryOnly)
|
|
e.String(f.cgoDirectives)
|
|
e.String(f.goBuildConstraint)
|
|
|
|
e.Int(len(f.plusBuildConstraints))
|
|
for _, s := range f.plusBuildConstraints {
|
|
e.String(s)
|
|
}
|
|
|
|
e.Int(len(f.imports))
|
|
for _, m := range f.imports {
|
|
e.String(m.path)
|
|
e.Position(m.position)
|
|
}
|
|
|
|
e.Int(len(f.embeds))
|
|
for _, embed := range f.embeds {
|
|
e.String(embed.pattern)
|
|
e.Position(embed.position)
|
|
}
|
|
}
|
|
|
|
func newEncoder() *encoder {
|
|
e := &encoder{strings: make(map[string]int)}
|
|
|
|
// place the empty string at position 0 in the string table
|
|
e.stringTable = append(e.stringTable, 0)
|
|
e.strings[""] = 0
|
|
|
|
return e
|
|
}
|
|
|
|
func (e *encoder) Position(position token.Position) {
|
|
e.String(position.Filename)
|
|
e.Int(position.Offset)
|
|
e.Int(position.Line)
|
|
e.Int(position.Column)
|
|
}
|
|
|
|
type encoder struct {
|
|
b []byte
|
|
stringTable []byte
|
|
strings map[string]int
|
|
}
|
|
|
|
func (e *encoder) Pos() int {
|
|
return len(e.b)
|
|
}
|
|
|
|
func (e *encoder) Bytes(b []byte) {
|
|
e.b = append(e.b, b...)
|
|
}
|
|
|
|
func (e *encoder) String(s string) {
|
|
if n, ok := e.strings[s]; ok {
|
|
e.Int(n)
|
|
return
|
|
}
|
|
pos := len(e.stringTable)
|
|
e.strings[s] = pos
|
|
e.Int(pos)
|
|
e.stringTable = binary.AppendUvarint(e.stringTable, uint64(len(s)))
|
|
e.stringTable = append(e.stringTable, []byte(s)...)
|
|
}
|
|
|
|
func (e *encoder) Bool(b bool) {
|
|
if b {
|
|
e.Uint32(1)
|
|
} else {
|
|
e.Uint32(0)
|
|
}
|
|
}
|
|
|
|
func (e *encoder) Uint32(n uint32) {
|
|
e.b = binary.LittleEndian.AppendUint32(e.b, n)
|
|
}
|
|
|
|
// Int encodes n. Note that all ints are written to the index as uint32s,
|
|
// and to avoid problems on 32-bit systems we require fitting into a 32-bit int.
|
|
func (e *encoder) Int(n int) {
|
|
if n < 0 || int(int32(n)) != n {
|
|
base.Fatalf("go: attempting to write an int to the index that overflows int32")
|
|
}
|
|
e.Uint32(uint32(n))
|
|
}
|
|
|
|
func (e *encoder) IntAt(n int, at int) {
|
|
if n < 0 || int(int32(n)) != n {
|
|
base.Fatalf("go: attempting to write an int to the index that overflows int32")
|
|
}
|
|
binary.LittleEndian.PutUint32(e.b[at:], uint32(n))
|
|
}
|