unicode/utf8: optimize Valid to parity with ValidString

The benchmarks added in this change revealed that ValidString
runs ~17% faster than Valid([]byte) on the ASCII prefix
of the input. Inspection of the assembly revealed that the
code generated for p[8:] required recomputing the slice capacity
to handle the cap=0 special case, which added an ADD -8 instruction.
By making len=cap, the capacity becomes a common subexpression
with the length, saving the ADD instruction.
(Thanks to khr for the tip.)

Incidentally, I tried a number of other optimizations but was
unable to make consistent gains across all benchmarks. The most
promising was to retain the bitmask of non-ASCII bytes from the
fast loop; the slow loop would shift it, and when it becomes zero,
return to the fast loop. This made the MostlyASCII benchmark 4x
faster, but made the other cases slower by up to 10%.

cpu: Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
benchmark                                   old ns/op     new ns/op     delta
BenchmarkValidTenASCIIChars-16              4.09          4.06          -0.85%
BenchmarkValid100KASCIIChars-16             9325          7747          -16.92%
BenchmarkValidTenJapaneseChars-16           27.0          27.2          +0.85%
BenchmarkValidLongMostlyASCII-16            57277         58361         +1.89%
BenchmarkValidLongJapanese-16               94002         93131         -0.93%
BenchmarkValidStringTenASCIIChars-16        4.15          4.07          -1.74%
BenchmarkValidString100KASCIIChars-16       7980          8019          +0.49%
BenchmarkValidStringTenJapaneseChars-16     26.0          25.9          -0.38%
BenchmarkValidStringLongMostlyASCII-16      58550         58006         -0.93%
BenchmarkValidStringLongJapanese-16         97964         100038        +2.12%

Change-Id: Ic9d585dedd9af83c27dd791ecd805150ac949f15
Reviewed-on: https://go-review.googlesource.com/c/go/+/375594
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Trust: Alex Rakoczy <alex@golang.org>
This commit is contained in:
Alan Donovan 2022-01-05 09:20:15 -05:00 committed by Daniel Martí
parent bebe9aa423
commit fd2e1e743a
2 changed files with 63 additions and 0 deletions

View File

@ -475,6 +475,11 @@ func RuneStart(b byte) bool { return b&0xC0 != 0x80 }
// Valid reports whether p consists entirely of valid UTF-8-encoded runes. // Valid reports whether p consists entirely of valid UTF-8-encoded runes.
func Valid(p []byte) bool { func Valid(p []byte) bool {
// This optimization avoids the need to recompute the capacity
// when generating code for p[8:], bringing it to parity with
// ValidString, which was 20% faster on long ASCII strings.
p = p[:len(p):len(p)]
// Fast path. Check for and skip 8 bytes of ASCII characters per iteration. // Fast path. Check for and skip 8 bytes of ASCII characters per iteration.
for len(p) >= 8 { for len(p) >= 8 {
// Combining two 32 bit loads allows the same code to be used // Combining two 32 bit loads allows the same code to be used

View File

@ -6,6 +6,7 @@ package utf8_test
import ( import (
"bytes" "bytes"
"strings"
"testing" "testing"
"unicode" "unicode"
. "unicode/utf8" . "unicode/utf8"
@ -554,6 +555,8 @@ func BenchmarkRuneCountInStringTenJapaneseChars(b *testing.B) {
} }
} }
var ascii100000 = strings.Repeat("0123456789", 10000)
func BenchmarkValidTenASCIIChars(b *testing.B) { func BenchmarkValidTenASCIIChars(b *testing.B) {
s := []byte("0123456789") s := []byte("0123456789")
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
@ -561,12 +564,32 @@ func BenchmarkValidTenASCIIChars(b *testing.B) {
} }
} }
func BenchmarkValid100KASCIIChars(b *testing.B) {
s := []byte(ascii100000)
for i := 0; i < b.N; i++ {
Valid(s)
}
}
func BenchmarkValidTenJapaneseChars(b *testing.B) { func BenchmarkValidTenJapaneseChars(b *testing.B) {
s := []byte("日本語日本語日本語日") s := []byte("日本語日本語日本語日")
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
Valid(s) Valid(s)
} }
} }
func BenchmarkValidLongMostlyASCII(b *testing.B) {
longMostlyASCII := []byte(longStringMostlyASCII)
for i := 0; i < b.N; i++ {
Valid(longMostlyASCII)
}
}
func BenchmarkValidLongJapanese(b *testing.B) {
longJapanese := []byte(longStringJapanese)
for i := 0; i < b.N; i++ {
Valid(longJapanese)
}
}
func BenchmarkValidStringTenASCIIChars(b *testing.B) { func BenchmarkValidStringTenASCIIChars(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
@ -574,12 +597,47 @@ func BenchmarkValidStringTenASCIIChars(b *testing.B) {
} }
} }
func BenchmarkValidString100KASCIIChars(b *testing.B) {
for i := 0; i < b.N; i++ {
ValidString(ascii100000)
}
}
func BenchmarkValidStringTenJapaneseChars(b *testing.B) { func BenchmarkValidStringTenJapaneseChars(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
ValidString("日本語日本語日本語日") ValidString("日本語日本語日本語日")
} }
} }
func BenchmarkValidStringLongMostlyASCII(b *testing.B) {
for i := 0; i < b.N; i++ {
ValidString(longStringMostlyASCII)
}
}
func BenchmarkValidStringLongJapanese(b *testing.B) {
for i := 0; i < b.N; i++ {
ValidString(longStringJapanese)
}
}
var longStringMostlyASCII string // ~100KB, ~97% ASCII
var longStringJapanese string // ~100KB, non-ASCII
func init() {
const japanese = "日本語日本語日本語日"
var b bytes.Buffer
for i := 0; b.Len() < 100_000; i++ {
if i%100 == 0 {
b.WriteString(japanese)
} else {
b.WriteString("0123456789")
}
}
longStringMostlyASCII = b.String()
longStringJapanese = strings.Repeat(japanese, 100_000/len(japanese))
}
func BenchmarkEncodeASCIIRune(b *testing.B) { func BenchmarkEncodeASCIIRune(b *testing.B) {
buf := make([]byte, UTFMax) buf := make([]byte, UTFMax)
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {