From fd2e1e743a86a53a30427cf3606543ecc9bd60bd Mon Sep 17 00:00:00 2001 From: Alan Donovan Date: Wed, 5 Jan 2022 09:20:15 -0500 Subject: [PATCH] unicode/utf8: optimize Valid to parity with ValidString The benchmarks added in this change revealed that ValidString runs ~17% faster than Valid([]byte) on the ASCII prefix of the input. Inspection of the assembly revealed that the code generated for p[8:] required recomputing the slice capacity to handle the cap=0 special case, which added an ADD -8 instruction. By making len=cap, the capacity becomes a common subexpression with the length, saving the ADD instruction. (Thanks to khr for the tip.) Incidentally, I tried a number of other optimizations but was unable to make consistent gains across all benchmarks. The most promising was to retain the bitmask of non-ASCII bytes from the fast loop; the slow loop would shift it, and when it becomes zero, return to the fast loop. This made the MostlyASCII benchmark 4x faster, but made the other cases slower by up to 10%. cpu: Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz benchmark old ns/op new ns/op delta BenchmarkValidTenASCIIChars-16 4.09 4.06 -0.85% BenchmarkValid100KASCIIChars-16 9325 7747 -16.92% BenchmarkValidTenJapaneseChars-16 27.0 27.2 +0.85% BenchmarkValidLongMostlyASCII-16 57277 58361 +1.89% BenchmarkValidLongJapanese-16 94002 93131 -0.93% BenchmarkValidStringTenASCIIChars-16 4.15 4.07 -1.74% BenchmarkValidString100KASCIIChars-16 7980 8019 +0.49% BenchmarkValidStringTenJapaneseChars-16 26.0 25.9 -0.38% BenchmarkValidStringLongMostlyASCII-16 58550 58006 -0.93% BenchmarkValidStringLongJapanese-16 97964 100038 +2.12% Change-Id: Ic9d585dedd9af83c27dd791ecd805150ac949f15 Reviewed-on: https://go-review.googlesource.com/c/go/+/375594 Reviewed-by: Keith Randall Run-TryBot: Keith Randall TryBot-Result: Gopher Robot Trust: Alex Rakoczy --- src/unicode/utf8/utf8.go | 5 +++ src/unicode/utf8/utf8_test.go | 58 +++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/src/unicode/utf8/utf8.go b/src/unicode/utf8/utf8.go index 6938c7e6a7..1e9f666e23 100644 --- a/src/unicode/utf8/utf8.go +++ b/src/unicode/utf8/utf8.go @@ -475,6 +475,11 @@ func RuneStart(b byte) bool { return b&0xC0 != 0x80 } // Valid reports whether p consists entirely of valid UTF-8-encoded runes. func Valid(p []byte) bool { + // This optimization avoids the need to recompute the capacity + // when generating code for p[8:], bringing it to parity with + // ValidString, which was 20% faster on long ASCII strings. + p = p[:len(p):len(p)] + // Fast path. Check for and skip 8 bytes of ASCII characters per iteration. for len(p) >= 8 { // Combining two 32 bit loads allows the same code to be used diff --git a/src/unicode/utf8/utf8_test.go b/src/unicode/utf8/utf8_test.go index e9be4d2d63..e7c31222cc 100644 --- a/src/unicode/utf8/utf8_test.go +++ b/src/unicode/utf8/utf8_test.go @@ -6,6 +6,7 @@ package utf8_test import ( "bytes" + "strings" "testing" "unicode" . "unicode/utf8" @@ -554,6 +555,8 @@ func BenchmarkRuneCountInStringTenJapaneseChars(b *testing.B) { } } +var ascii100000 = strings.Repeat("0123456789", 10000) + func BenchmarkValidTenASCIIChars(b *testing.B) { s := []byte("0123456789") for i := 0; i < b.N; i++ { @@ -561,12 +564,32 @@ func BenchmarkValidTenASCIIChars(b *testing.B) { } } +func BenchmarkValid100KASCIIChars(b *testing.B) { + s := []byte(ascii100000) + for i := 0; i < b.N; i++ { + Valid(s) + } +} + func BenchmarkValidTenJapaneseChars(b *testing.B) { s := []byte("日本語日本語日本語日") for i := 0; i < b.N; i++ { Valid(s) } } +func BenchmarkValidLongMostlyASCII(b *testing.B) { + longMostlyASCII := []byte(longStringMostlyASCII) + for i := 0; i < b.N; i++ { + Valid(longMostlyASCII) + } +} + +func BenchmarkValidLongJapanese(b *testing.B) { + longJapanese := []byte(longStringJapanese) + for i := 0; i < b.N; i++ { + Valid(longJapanese) + } +} func BenchmarkValidStringTenASCIIChars(b *testing.B) { for i := 0; i < b.N; i++ { @@ -574,12 +597,47 @@ func BenchmarkValidStringTenASCIIChars(b *testing.B) { } } +func BenchmarkValidString100KASCIIChars(b *testing.B) { + for i := 0; i < b.N; i++ { + ValidString(ascii100000) + } +} + func BenchmarkValidStringTenJapaneseChars(b *testing.B) { for i := 0; i < b.N; i++ { ValidString("日本語日本語日本語日") } } +func BenchmarkValidStringLongMostlyASCII(b *testing.B) { + for i := 0; i < b.N; i++ { + ValidString(longStringMostlyASCII) + } +} + +func BenchmarkValidStringLongJapanese(b *testing.B) { + for i := 0; i < b.N; i++ { + ValidString(longStringJapanese) + } +} + +var longStringMostlyASCII string // ~100KB, ~97% ASCII +var longStringJapanese string // ~100KB, non-ASCII + +func init() { + const japanese = "日本語日本語日本語日" + var b bytes.Buffer + for i := 0; b.Len() < 100_000; i++ { + if i%100 == 0 { + b.WriteString(japanese) + } else { + b.WriteString("0123456789") + } + } + longStringMostlyASCII = b.String() + longStringJapanese = strings.Repeat(japanese, 100_000/len(japanese)) +} + func BenchmarkEncodeASCIIRune(b *testing.B) { buf := make([]byte, UTFMax) for i := 0; i < b.N; i++ {