mirror of
https://github.com/golang/go.git
synced 2025-05-17 13:24:38 +00:00
bytes, strings: moves indexRabinKarp function to internal/bytealg
In order to facilitate optimization of IndexAny and LastIndexAny, this patch moves three Rabin-Karp related functions indexRabinKarp, hashStr and hashStrRev in strings package to initernal/bytealg. There are also three functions in the bytes package with the same names and functions but different parameter types. To highlight this, this patch also moves them to internal/bytealg and gives them slightly different names. Related benchmark changes on amd64 and arm64: name old time/op new time/op delta pkg:strings goos:linux goarch:amd64 Index-16 14.0ns ± 1% 14.1ns ± 2% ~ (p=0.738 n=5+5) LastIndex-16 15.5ns ± 1% 15.7ns ± 4% ~ (p=0.897 n=5+5) pkg:bytes goos:linux goarch:amd64 Index/10-16 26.5ns ± 1% 26.5ns ± 0% ~ (p=0.873 n=5+5) Index/32-16 26.2ns ± 0% 25.7ns ± 0% -1.68% (p=0.008 n=5+5) Index/4K-16 5.12µs ± 4% 5.14µs ± 2% ~ (p=0.841 n=5+5) Index/4M-16 5.44ms ± 3% 5.34ms ± 2% ~ (p=0.056 n=5+5) Index/64M-16 85.8ms ± 3% 84.6ms ± 0% -1.37% (p=0.016 n=5+5) name old speed new speed delta pkg:bytes goos:linux goarch:amd64 Index/10-16 377MB/s ± 1% 377MB/s ± 0% ~ (p=1.000 n=5+5) Index/32-16 1.22GB/s ± 1% 1.24GB/s ± 0% +1.66% (p=0.008 n=5+5) Index/4K-16 800MB/s ± 4% 797MB/s ± 2% ~ (p=0.841 n=5+5) Index/4M-16 771MB/s ± 3% 786MB/s ± 2% ~ (p=0.056 n=5+5) Index/64M-16 783MB/s ± 3% 793MB/s ± 0% +1.36% (p=0.016 n=5+5) name old time/op new time/op delta pkg:strings goos:linux goarch:arm64 Index-8 22.6ns ± 0% 22.5ns ± 0% ~ (p=0.167 n=5+5) LastIndex-8 17.5ns ± 0% 17.5ns ± 0% ~ (all equal) pkg:bytes goos:linux goarch:arm64 Index/10-8 25.0ns ± 0% 25.0ns ± 0% ~ (all equal) Index/32-8 160ns ± 0% 160ns ± 0% ~ (all equal) Index/4K-8 6.26µs ± 0% 6.26µs ± 0% ~ (p=0.167 n=5+5) Index/4M-8 6.30ms ± 0% 6.31ms ± 0% ~ (p=1.000 n=5+5) Index/64M-8 101ms ± 0% 101ms ± 0% ~ (p=0.690 n=5+5) name old speed new speed delta pkg:bytes goos:linux goarch:arm64 Index/10-8 399MB/s ± 0% 400MB/s ± 0% +0.08% (p=0.008 n=5+5) Index/32-8 200MB/s ± 0% 200MB/s ± 0% ~ (p=0.127 n=4+5) Index/4K-8 654MB/s ± 0% 654MB/s ± 0% +0.01% (p=0.016 n=5+5) Index/4M-8 665MB/s ± 0% 665MB/s ± 0% ~ (p=0.833 n=5+5) Index/64M-8 665MB/s ± 0% 665MB/s ± 0% ~ (p=0.913 n=5+5) Change-Id: Icce3bc162bb8613ac36dc963a46c51f8e82ab842 Reviewed-on: https://go-review.googlesource.com/c/go/+/208638 Run-TryBot: eric fang <eric.fang@arm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
This commit is contained in:
parent
cec08794ef
commit
18a6fd44bb
@ -117,17 +117,17 @@ func LastIndex(s, sep []byte) int {
|
|||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
// Rabin-Karp search from the end of the string
|
// Rabin-Karp search from the end of the string
|
||||||
hashss, pow := hashStrRev(sep)
|
hashss, pow := bytealg.HashStrRevBytes(sep)
|
||||||
last := len(s) - n
|
last := len(s) - n
|
||||||
var h uint32
|
var h uint32
|
||||||
for i := len(s) - 1; i >= last; i-- {
|
for i := len(s) - 1; i >= last; i-- {
|
||||||
h = h*primeRK + uint32(s[i])
|
h = h*bytealg.PrimeRK + uint32(s[i])
|
||||||
}
|
}
|
||||||
if h == hashss && Equal(s[last:], sep) {
|
if h == hashss && Equal(s[last:], sep) {
|
||||||
return last
|
return last
|
||||||
}
|
}
|
||||||
for i := last - 1; i >= 0; i-- {
|
for i := last - 1; i >= 0; i-- {
|
||||||
h *= primeRK
|
h *= bytealg.PrimeRK
|
||||||
h += uint32(s[i])
|
h += uint32(s[i])
|
||||||
h -= pow * uint32(s[i+n])
|
h -= pow * uint32(s[i+n])
|
||||||
if h == hashss && Equal(s[i:i+n], sep) {
|
if h == hashss && Equal(s[i:i+n], sep) {
|
||||||
@ -1068,7 +1068,7 @@ func Index(s, sep []byte) int {
|
|||||||
// we should cutover at even larger average skips,
|
// we should cutover at even larger average skips,
|
||||||
// because Equal becomes that much more expensive.
|
// because Equal becomes that much more expensive.
|
||||||
// This code does not take that effect into account.
|
// This code does not take that effect into account.
|
||||||
j := indexRabinKarp(s[i:], sep)
|
j := bytealg.IndexRabinKarpBytes(s[i:], sep)
|
||||||
if j < 0 {
|
if j < 0 {
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
@ -1077,63 +1077,3 @@ func Index(s, sep []byte) int {
|
|||||||
}
|
}
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
|
|
||||||
func indexRabinKarp(s, sep []byte) int {
|
|
||||||
// Rabin-Karp search
|
|
||||||
hashsep, pow := hashStr(sep)
|
|
||||||
n := len(sep)
|
|
||||||
var h uint32
|
|
||||||
for i := 0; i < n; i++ {
|
|
||||||
h = h*primeRK + uint32(s[i])
|
|
||||||
}
|
|
||||||
if h == hashsep && Equal(s[:n], sep) {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
for i := n; i < len(s); {
|
|
||||||
h *= primeRK
|
|
||||||
h += uint32(s[i])
|
|
||||||
h -= pow * uint32(s[i-n])
|
|
||||||
i++
|
|
||||||
if h == hashsep && Equal(s[i-n:i], sep) {
|
|
||||||
return i - n
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
|
|
||||||
// primeRK is the prime base used in Rabin-Karp algorithm.
|
|
||||||
const primeRK = 16777619
|
|
||||||
|
|
||||||
// hashStr returns the hash and the appropriate multiplicative
|
|
||||||
// factor for use in Rabin-Karp algorithm.
|
|
||||||
func hashStr(sep []byte) (uint32, uint32) {
|
|
||||||
hash := uint32(0)
|
|
||||||
for i := 0; i < len(sep); i++ {
|
|
||||||
hash = hash*primeRK + uint32(sep[i])
|
|
||||||
}
|
|
||||||
var pow, sq uint32 = 1, primeRK
|
|
||||||
for i := len(sep); i > 0; i >>= 1 {
|
|
||||||
if i&1 != 0 {
|
|
||||||
pow *= sq
|
|
||||||
}
|
|
||||||
sq *= sq
|
|
||||||
}
|
|
||||||
return hash, pow
|
|
||||||
}
|
|
||||||
|
|
||||||
// hashStrRev returns the hash of the reverse of sep and the
|
|
||||||
// appropriate multiplicative factor for use in Rabin-Karp algorithm.
|
|
||||||
func hashStrRev(sep []byte) (uint32, uint32) {
|
|
||||||
hash := uint32(0)
|
|
||||||
for i := len(sep) - 1; i >= 0; i-- {
|
|
||||||
hash = hash*primeRK + uint32(sep[i])
|
|
||||||
}
|
|
||||||
var pow, sq uint32 = 1, primeRK
|
|
||||||
for i := len(sep); i > 0; i >>= 1 {
|
|
||||||
if i&1 != 0 {
|
|
||||||
pow *= sq
|
|
||||||
}
|
|
||||||
sq *= sq
|
|
||||||
}
|
|
||||||
return hash, pow
|
|
||||||
}
|
|
||||||
|
@ -141,9 +141,10 @@ var indexTests = []BinOpTest{
|
|||||||
{"barfoobarfooyyyzzzyyyzzzyyyzzzyyyxxxzzzyyy", "x", 33},
|
{"barfoobarfooyyyzzzyyyzzzyyyzzzyyyxxxzzzyyy", "x", 33},
|
||||||
{"foofyfoobarfoobar", "y", 4},
|
{"foofyfoobarfoobar", "y", 4},
|
||||||
{"oooooooooooooooooooooo", "r", -1},
|
{"oooooooooooooooooooooo", "r", -1},
|
||||||
// test fallback to Rabin-Karp.
|
|
||||||
{"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22},
|
{"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22},
|
||||||
{"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1},
|
{"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1},
|
||||||
|
// test fallback to Rabin-Karp.
|
||||||
|
{"000000000000000000000000000000000000000000000000000000000000000000000001", "0000000000000000000000000000000000000000000000000000000000000000001", 5},
|
||||||
}
|
}
|
||||||
|
|
||||||
var lastIndexTests = []BinOpTest{
|
var lastIndexTests = []BinOpTest{
|
||||||
@ -209,6 +210,27 @@ func runIndexTests(t *testing.T, f func(s, sep []byte) int, funcName string, tes
|
|||||||
t.Errorf("%s(%q,%q) = %v; want %v", funcName, a, b, actual, test.i)
|
t.Errorf("%s(%q,%q) = %v; want %v", funcName, a, b, actual, test.i)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
var allocTests = []struct {
|
||||||
|
a []byte
|
||||||
|
b []byte
|
||||||
|
i int
|
||||||
|
}{
|
||||||
|
// case for function Index.
|
||||||
|
{[]byte("000000000000000000000000000000000000000000000000000000000000000000000001"), []byte("0000000000000000000000000000000000000000000000000000000000000000001"), 5},
|
||||||
|
// case for function LastIndex.
|
||||||
|
{[]byte("000000000000000000000000000000000000000000000000000000000000000010000"), []byte("00000000000000000000000000000000000000000000000000000000000001"), 3},
|
||||||
|
}
|
||||||
|
allocs := testing.AllocsPerRun(100, func() {
|
||||||
|
if i := Index(allocTests[1].a, allocTests[1].b); i != allocTests[1].i {
|
||||||
|
t.Errorf("Index([]byte(%q), []byte(%q)) = %v; want %v", allocTests[1].a, allocTests[1].b, i, allocTests[1].i)
|
||||||
|
}
|
||||||
|
if i := LastIndex(allocTests[0].a, allocTests[0].b); i != allocTests[0].i {
|
||||||
|
t.Errorf("LastIndex([]byte(%q), []byte(%q)) = %v; want %v", allocTests[0].a, allocTests[0].b, i, allocTests[0].i)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
if allocs != 0 {
|
||||||
|
t.Errorf("expected no allocations, got %f", allocs)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func runIndexAnyTests(t *testing.T, f func(s []byte, chars string) int, funcName string, testCases []BinOpTest) {
|
func runIndexAnyTests(t *testing.T, f func(s []byte, chars string) int, funcName string, testCases []BinOpTest) {
|
||||||
|
@ -21,3 +21,128 @@ const (
|
|||||||
|
|
||||||
// MaxLen is the maximum length of the string to be searched for (argument b) in Index.
|
// MaxLen is the maximum length of the string to be searched for (argument b) in Index.
|
||||||
var MaxLen int
|
var MaxLen int
|
||||||
|
|
||||||
|
// FIXME: the logic of HashStrBytes, HashStrRevBytes, IndexRabinKarpBytes and HashStr, HashStrRev,
|
||||||
|
// IndexRabinKarp are exactly the same, except that the types are different. Can we eliminate
|
||||||
|
// three of them without causing allocation?
|
||||||
|
|
||||||
|
// PrimeRK is the prime base used in Rabin-Karp algorithm.
|
||||||
|
const PrimeRK = 16777619
|
||||||
|
|
||||||
|
// HashStrBytes returns the hash and the appropriate multiplicative
|
||||||
|
// factor for use in Rabin-Karp algorithm.
|
||||||
|
func HashStrBytes(sep []byte) (uint32, uint32) {
|
||||||
|
hash := uint32(0)
|
||||||
|
for i := 0; i < len(sep); i++ {
|
||||||
|
hash = hash*PrimeRK + uint32(sep[i])
|
||||||
|
}
|
||||||
|
var pow, sq uint32 = 1, PrimeRK
|
||||||
|
for i := len(sep); i > 0; i >>= 1 {
|
||||||
|
if i&1 != 0 {
|
||||||
|
pow *= sq
|
||||||
|
}
|
||||||
|
sq *= sq
|
||||||
|
}
|
||||||
|
return hash, pow
|
||||||
|
}
|
||||||
|
|
||||||
|
// HashStr returns the hash and the appropriate multiplicative
|
||||||
|
// factor for use in Rabin-Karp algorithm.
|
||||||
|
func HashStr(sep string) (uint32, uint32) {
|
||||||
|
hash := uint32(0)
|
||||||
|
for i := 0; i < len(sep); i++ {
|
||||||
|
hash = hash*PrimeRK + uint32(sep[i])
|
||||||
|
}
|
||||||
|
var pow, sq uint32 = 1, PrimeRK
|
||||||
|
for i := len(sep); i > 0; i >>= 1 {
|
||||||
|
if i&1 != 0 {
|
||||||
|
pow *= sq
|
||||||
|
}
|
||||||
|
sq *= sq
|
||||||
|
}
|
||||||
|
return hash, pow
|
||||||
|
}
|
||||||
|
|
||||||
|
// HashStrRevBytes returns the hash of the reverse of sep and the
|
||||||
|
// appropriate multiplicative factor for use in Rabin-Karp algorithm.
|
||||||
|
func HashStrRevBytes(sep []byte) (uint32, uint32) {
|
||||||
|
hash := uint32(0)
|
||||||
|
for i := len(sep) - 1; i >= 0; i-- {
|
||||||
|
hash = hash*PrimeRK + uint32(sep[i])
|
||||||
|
}
|
||||||
|
var pow, sq uint32 = 1, PrimeRK
|
||||||
|
for i := len(sep); i > 0; i >>= 1 {
|
||||||
|
if i&1 != 0 {
|
||||||
|
pow *= sq
|
||||||
|
}
|
||||||
|
sq *= sq
|
||||||
|
}
|
||||||
|
return hash, pow
|
||||||
|
}
|
||||||
|
|
||||||
|
// HashStrRev returns the hash of the reverse of sep and the
|
||||||
|
// appropriate multiplicative factor for use in Rabin-Karp algorithm.
|
||||||
|
func HashStrRev(sep string) (uint32, uint32) {
|
||||||
|
hash := uint32(0)
|
||||||
|
for i := len(sep) - 1; i >= 0; i-- {
|
||||||
|
hash = hash*PrimeRK + uint32(sep[i])
|
||||||
|
}
|
||||||
|
var pow, sq uint32 = 1, PrimeRK
|
||||||
|
for i := len(sep); i > 0; i >>= 1 {
|
||||||
|
if i&1 != 0 {
|
||||||
|
pow *= sq
|
||||||
|
}
|
||||||
|
sq *= sq
|
||||||
|
}
|
||||||
|
return hash, pow
|
||||||
|
}
|
||||||
|
|
||||||
|
// IndexRabinKarpBytes uses the Rabin-Karp search algorithm to return the index of the
|
||||||
|
// first occurence of substr in s, or -1 if not present.
|
||||||
|
func IndexRabinKarpBytes(s, sep []byte) int {
|
||||||
|
// Rabin-Karp search
|
||||||
|
hashsep, pow := HashStrBytes(sep)
|
||||||
|
n := len(sep)
|
||||||
|
var h uint32
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
h = h*PrimeRK + uint32(s[i])
|
||||||
|
}
|
||||||
|
if h == hashsep && Equal(s[:n], sep) {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
for i := n; i < len(s); {
|
||||||
|
h *= PrimeRK
|
||||||
|
h += uint32(s[i])
|
||||||
|
h -= pow * uint32(s[i-n])
|
||||||
|
i++
|
||||||
|
if h == hashsep && Equal(s[i-n:i], sep) {
|
||||||
|
return i - n
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
|
// IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the
|
||||||
|
// first occurence of substr in s, or -1 if not present.
|
||||||
|
func IndexRabinKarp(s, substr string) int {
|
||||||
|
// Rabin-Karp search
|
||||||
|
hashss, pow := HashStr(substr)
|
||||||
|
n := len(substr)
|
||||||
|
var h uint32
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
h = h*PrimeRK + uint32(s[i])
|
||||||
|
}
|
||||||
|
if h == hashss && s[:n] == substr {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
for i := n; i < len(s); {
|
||||||
|
h *= PrimeRK
|
||||||
|
h += uint32(s[i])
|
||||||
|
h -= pow * uint32(s[i-n])
|
||||||
|
i++
|
||||||
|
if h == hashss && s[i-n:i] == substr {
|
||||||
|
return i - n
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
@ -36,43 +36,6 @@ func explode(s string, n int) []string {
|
|||||||
return a
|
return a
|
||||||
}
|
}
|
||||||
|
|
||||||
// primeRK is the prime base used in Rabin-Karp algorithm.
|
|
||||||
const primeRK = 16777619
|
|
||||||
|
|
||||||
// hashStr returns the hash and the appropriate multiplicative
|
|
||||||
// factor for use in Rabin-Karp algorithm.
|
|
||||||
func hashStr(sep string) (uint32, uint32) {
|
|
||||||
hash := uint32(0)
|
|
||||||
for i := 0; i < len(sep); i++ {
|
|
||||||
hash = hash*primeRK + uint32(sep[i])
|
|
||||||
}
|
|
||||||
var pow, sq uint32 = 1, primeRK
|
|
||||||
for i := len(sep); i > 0; i >>= 1 {
|
|
||||||
if i&1 != 0 {
|
|
||||||
pow *= sq
|
|
||||||
}
|
|
||||||
sq *= sq
|
|
||||||
}
|
|
||||||
return hash, pow
|
|
||||||
}
|
|
||||||
|
|
||||||
// hashStrRev returns the hash of the reverse of sep and the
|
|
||||||
// appropriate multiplicative factor for use in Rabin-Karp algorithm.
|
|
||||||
func hashStrRev(sep string) (uint32, uint32) {
|
|
||||||
hash := uint32(0)
|
|
||||||
for i := len(sep) - 1; i >= 0; i-- {
|
|
||||||
hash = hash*primeRK + uint32(sep[i])
|
|
||||||
}
|
|
||||||
var pow, sq uint32 = 1, primeRK
|
|
||||||
for i := len(sep); i > 0; i >>= 1 {
|
|
||||||
if i&1 != 0 {
|
|
||||||
pow *= sq
|
|
||||||
}
|
|
||||||
sq *= sq
|
|
||||||
}
|
|
||||||
return hash, pow
|
|
||||||
}
|
|
||||||
|
|
||||||
// Count counts the number of non-overlapping instances of substr in s.
|
// Count counts the number of non-overlapping instances of substr in s.
|
||||||
// If substr is an empty string, Count returns 1 + the number of Unicode code points in s.
|
// If substr is an empty string, Count returns 1 + the number of Unicode code points in s.
|
||||||
func Count(s, substr string) int {
|
func Count(s, substr string) int {
|
||||||
@ -126,17 +89,17 @@ func LastIndex(s, substr string) int {
|
|||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
// Rabin-Karp search from the end of the string
|
// Rabin-Karp search from the end of the string
|
||||||
hashss, pow := hashStrRev(substr)
|
hashss, pow := bytealg.HashStrRev(substr)
|
||||||
last := len(s) - n
|
last := len(s) - n
|
||||||
var h uint32
|
var h uint32
|
||||||
for i := len(s) - 1; i >= last; i-- {
|
for i := len(s) - 1; i >= last; i-- {
|
||||||
h = h*primeRK + uint32(s[i])
|
h = h*bytealg.PrimeRK + uint32(s[i])
|
||||||
}
|
}
|
||||||
if h == hashss && s[last:] == substr {
|
if h == hashss && s[last:] == substr {
|
||||||
return last
|
return last
|
||||||
}
|
}
|
||||||
for i := last - 1; i >= 0; i-- {
|
for i := last - 1; i >= 0; i-- {
|
||||||
h *= primeRK
|
h *= bytealg.PrimeRK
|
||||||
h += uint32(s[i])
|
h += uint32(s[i])
|
||||||
h -= pow * uint32(s[i+n])
|
h -= pow * uint32(s[i+n])
|
||||||
if h == hashss && s[i:i+n] == substr {
|
if h == hashss && s[i:i+n] == substr {
|
||||||
@ -1095,7 +1058,7 @@ func Index(s, substr string) int {
|
|||||||
fails++
|
fails++
|
||||||
if fails >= 4+i>>4 && i < t {
|
if fails >= 4+i>>4 && i < t {
|
||||||
// See comment in ../bytes/bytes.go.
|
// See comment in ../bytes/bytes.go.
|
||||||
j := indexRabinKarp(s[i:], substr)
|
j := bytealg.IndexRabinKarp(s[i:], substr)
|
||||||
if j < 0 {
|
if j < 0 {
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
@ -1104,26 +1067,3 @@ func Index(s, substr string) int {
|
|||||||
}
|
}
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
|
|
||||||
func indexRabinKarp(s, substr string) int {
|
|
||||||
// Rabin-Karp search
|
|
||||||
hashss, pow := hashStr(substr)
|
|
||||||
n := len(substr)
|
|
||||||
var h uint32
|
|
||||||
for i := 0; i < n; i++ {
|
|
||||||
h = h*primeRK + uint32(s[i])
|
|
||||||
}
|
|
||||||
if h == hashss && s[:n] == substr {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
for i := n; i < len(s); {
|
|
||||||
h *= primeRK
|
|
||||||
h += uint32(s[i])
|
|
||||||
h -= pow * uint32(s[i-n])
|
|
||||||
i++
|
|
||||||
if h == hashss && s[i-n:i] == substr {
|
|
||||||
return i - n
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user