mime: speed up ParseMediaType

Add benchmarks for ParseMediaType.

Eschew UTF-8 decoding and strings.IndexFunc where possible, and rely
on 128-bit bitmaps instead. Eliminate some bounds checks.

Some benchmark results (no changes to allocations):

goos: darwin
goarch: amd64
pkg: mime
cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz
                      │     old     │                 new                 │
                      │   sec/op    │   sec/op     vs base                │
ParseMediaType-8        71.75µ ± 0%   55.53µ ± 0%  -22.60% (p=0.000 n=20)
ParseMediaTypeBogus-8   5.330µ ± 0%   3.603µ ± 0%  -32.41% (p=0.000 n=20)
geomean                 19.56µ        14.14µ       -27.67%

Change-Id: I324c9990fe43581484916ecff61ca6c708467a89
GitHub-Last-Rev: e2293d64b3852722bef920169eaa44e7ded3111c
GitHub-Pull-Request: golang/go#73436
Reviewed-on: https://go-review.googlesource.com/c/go/+/666655
Reviewed-by: Jorropo <jorropo.pgm@gmail.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Reviewed-by: Sean Liao <sean@liao.dev>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Sean Liao <sean@liao.dev>
This commit is contained in:
Julien Cretel 2025-04-22 16:49:42 +00:00 committed by Gopher Robot
parent 7a2689b152
commit f9ce1dddc2
3 changed files with 93 additions and 27 deletions

View File

@ -4,22 +4,68 @@
package mime package mime
import ( // isTSpecial reports whether c is in 'tspecials' as defined by RFC
"strings"
)
// isTSpecial reports whether rune is in 'tspecials' as defined by RFC
// 1521 and RFC 2045. // 1521 and RFC 2045.
func isTSpecial(r rune) bool { func isTSpecial(c byte) bool {
return strings.ContainsRune(`()<>@,;:\"/[]?=`, r) // tspecials := "(" / ")" / "<" / ">" / "@" /
// "," / ";" / ":" / "\" / <">
// "/" / "[" / "]" / "?" / "="
//
// mask is a 128-bit bitmap with 1s for allowed bytes,
// so that the byte c can be tested with a shift and an and.
// If c >= 128, then 1<<c and 1<<(c-64) will both be zero,
// and this function will return false.
const mask = 0 |
1<<'(' |
1<<')' |
1<<'<' |
1<<'>' |
1<<'@' |
1<<',' |
1<<';' |
1<<':' |
1<<'\\' |
1<<'"' |
1<<'/' |
1<<'[' |
1<<']' |
1<<'?' |
1<<'='
return ((uint64(1)<<c)&(mask&(1<<64-1)) |
(uint64(1)<<(c-64))&(mask>>64)) != 0
} }
// isTokenChar reports whether rune is in 'token' as defined by RFC // isTokenChar reports whether c is in 'token' as defined by RFC
// 1521 and RFC 2045. // 1521 and RFC 2045.
func isTokenChar(r rune) bool { func isTokenChar(c byte) bool {
// token := 1*<any (US-ASCII) CHAR except SPACE, CTLs, // token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
// or tspecials> // or tspecials>
return r > 0x20 && r < 0x7f && !isTSpecial(r) //
// mask is a 128-bit bitmap with 1s for allowed bytes,
// so that the byte c can be tested with a shift and an and.
// If c >= 128, then 1<<c and 1<<(c-64) will both be zero,
// and this function will return false.
const mask = 0 |
(1<<(10)-1)<<'0' |
(1<<(26)-1)<<'a' |
(1<<(26)-1)<<'A' |
1<<'!' |
1<<'#' |
1<<'$' |
1<<'%' |
1<<'&' |
1<<'\'' |
1<<'*' |
1<<'+' |
1<<'-' |
1<<'.' |
1<<'^' |
1<<'_' |
1<<'`' |
1<<'|' |
1<<'~'
return ((uint64(1)<<c)&(mask&(1<<64-1)) |
(uint64(1)<<(c-64))&(mask>>64)) != 0
} }
// isToken reports whether s is a 'token' as defined by RFC 1521 // isToken reports whether s is a 'token' as defined by RFC 1521
@ -28,5 +74,10 @@ func isToken(s string) bool {
if s == "" { if s == "" {
return false return false
} }
return strings.IndexFunc(s, isNotTokenChar) < 0 for _, c := range []byte(s) {
if !isTokenChar(c) {
return false
}
}
return true
} }

View File

@ -60,7 +60,7 @@ func FormatMediaType(t string, param map[string]string) string {
// attribute-char := <any (US-ASCII) CHAR except SPACE, CTLs, "*", "'", "%", or tspecials> // attribute-char := <any (US-ASCII) CHAR except SPACE, CTLs, "*", "'", "%", or tspecials>
if ch <= ' ' || ch >= 0x7F || if ch <= ' ' || ch >= 0x7F ||
ch == '*' || ch == '\'' || ch == '%' || ch == '*' || ch == '\'' || ch == '%' ||
isTSpecial(rune(ch)) { isTSpecial(ch) {
b.WriteString(value[offset:index]) b.WriteString(value[offset:index])
offset = index + 1 offset = index + 1
@ -250,23 +250,17 @@ func decode2231Enc(v string) (string, bool) {
return encv, true return encv, true
} }
func isNotTokenChar(r rune) bool {
return !isTokenChar(r)
}
// consumeToken consumes a token from the beginning of provided // consumeToken consumes a token from the beginning of provided
// string, per RFC 2045 section 5.1 (referenced from 2183), and return // string, per RFC 2045 section 5.1 (referenced from 2183), and return
// the token consumed and the rest of the string. Returns ("", v) on // the token consumed and the rest of the string. Returns ("", v) on
// failure to consume at least one character. // failure to consume at least one character.
func consumeToken(v string) (token, rest string) { func consumeToken(v string) (token, rest string) {
notPos := strings.IndexFunc(v, isNotTokenChar) for i := range len(v) {
if notPos == -1 { if !isTokenChar(v[i]) {
return v, "" return v[:i], v[i:]
}
} }
if notPos == 0 { return v, ""
return "", v
}
return v[0:notPos], v[notPos:]
} }
// consumeValue consumes a "value" per RFC 2045, where a value is // consumeValue consumes a "value" per RFC 2045, where a value is
@ -299,7 +293,7 @@ func consumeValue(v string) (value, rest string) {
// and intended as a literal backslash. This makes Go servers deal better // and intended as a literal backslash. This makes Go servers deal better
// with MSIE without affecting the way they handle conforming MIME // with MSIE without affecting the way they handle conforming MIME
// generators. // generators.
if r == '\\' && i+1 < len(v) && isTSpecial(rune(v[i+1])) { if r == '\\' && i+1 < len(v) && isTSpecial(v[i+1]) {
buffer.WriteByte(v[i+1]) buffer.WriteByte(v[i+1])
i++ i++
continue continue

View File

@ -96,7 +96,9 @@ type mediaTypeTest struct {
p map[string]string p map[string]string
} }
func TestParseMediaType(t *testing.T) { var parseMediaTypeTests []mediaTypeTest
func init() {
// Convenience map initializer // Convenience map initializer
m := func(s ...string) map[string]string { m := func(s ...string) map[string]string {
sm := make(map[string]string) sm := make(map[string]string)
@ -107,7 +109,7 @@ func TestParseMediaType(t *testing.T) {
} }
nameFoo := map[string]string{"name": "foo"} nameFoo := map[string]string{"name": "foo"}
tests := []mediaTypeTest{ parseMediaTypeTests = []mediaTypeTest{
{`form-data; name="foo"`, "form-data", nameFoo}, {`form-data; name="foo"`, "form-data", nameFoo},
{` form-data ; name=foo`, "form-data", nameFoo}, {` form-data ; name=foo`, "form-data", nameFoo},
{`FORM-DATA;name="foo"`, "form-data", nameFoo}, {`FORM-DATA;name="foo"`, "form-data", nameFoo},
@ -412,7 +414,10 @@ func TestParseMediaType(t *testing.T) {
{`text; charset=utf-8; charset=utf-8; format=fixed`, "text", m("charset", "utf-8", "format", "fixed")}, {`text; charset=utf-8; charset=utf-8; format=fixed`, "text", m("charset", "utf-8", "format", "fixed")},
{`text; charset=utf-8; format=flowed; charset=utf-8`, "text", m("charset", "utf-8", "format", "flowed")}, {`text; charset=utf-8; format=flowed; charset=utf-8`, "text", m("charset", "utf-8", "format", "flowed")},
} }
for _, test := range tests { }
func TestParseMediaType(t *testing.T) {
for _, test := range parseMediaTypeTests {
mt, params, err := ParseMediaType(test.in) mt, params, err := ParseMediaType(test.in)
if err != nil { if err != nil {
if test.t != "" { if test.t != "" {
@ -438,6 +443,14 @@ func TestParseMediaType(t *testing.T) {
} }
} }
func BenchmarkParseMediaType(b *testing.B) {
for range b.N {
for _, test := range parseMediaTypeTests {
ParseMediaType(test.in)
}
}
}
type badMediaTypeTest struct { type badMediaTypeTest struct {
in string in string
mt string mt string
@ -486,6 +499,14 @@ func TestParseMediaTypeBogus(t *testing.T) {
} }
} }
func BenchmarkParseMediaTypeBogus(b *testing.B) {
for range b.N {
for _, test := range badMediaTypeTests {
ParseMediaType(test.in)
}
}
}
type formatTest struct { type formatTest struct {
typ string typ string
params map[string]string params map[string]string