go/src/mime/grammar.go
Julien Cretel f9ce1dddc2 mime: speed up ParseMediaType
Add benchmarks for ParseMediaType.

Eschew UTF-8 decoding and strings.IndexFunc where possible, and rely
on 128-bit bitmaps instead. Eliminate some bounds checks.

Some benchmark results (no changes to allocations):

goos: darwin
goarch: amd64
pkg: mime
cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz
                      │     old     │                 new                 │
                      │   sec/op    │   sec/op     vs base                │
ParseMediaType-8        71.75µ ± 0%   55.53µ ± 0%  -22.60% (p=0.000 n=20)
ParseMediaTypeBogus-8   5.330µ ± 0%   3.603µ ± 0%  -32.41% (p=0.000 n=20)
geomean                 19.56µ        14.14µ       -27.67%

Change-Id: I324c9990fe43581484916ecff61ca6c708467a89
GitHub-Last-Rev: e2293d64b3852722bef920169eaa44e7ded3111c
GitHub-Pull-Request: golang/go#73436
Reviewed-on: https://go-review.googlesource.com/c/go/+/666655
Reviewed-by: Jorropo <jorropo.pgm@gmail.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Reviewed-by: Sean Liao <sean@liao.dev>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Sean Liao <sean@liao.dev>
2025-04-26 08:01:54 -07:00

84 lines
1.9 KiB
Go

// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package mime
// isTSpecial reports whether c is in 'tspecials' as defined by RFC
// 1521 and RFC 2045.
func isTSpecial(c byte) bool {
// tspecials := "(" / ")" / "<" / ">" / "@" /
// "," / ";" / ":" / "\" / <">
// "/" / "[" / "]" / "?" / "="
//
// mask is a 128-bit bitmap with 1s for allowed bytes,
// so that the byte c can be tested with a shift and an and.
// If c >= 128, then 1<<c and 1<<(c-64) will both be zero,
// and this function will return false.
const mask = 0 |
1<<'(' |
1<<')' |
1<<'<' |
1<<'>' |
1<<'@' |
1<<',' |
1<<';' |
1<<':' |
1<<'\\' |
1<<'"' |
1<<'/' |
1<<'[' |
1<<']' |
1<<'?' |
1<<'='
return ((uint64(1)<<c)&(mask&(1<<64-1)) |
(uint64(1)<<(c-64))&(mask>>64)) != 0
}
// isTokenChar reports whether c is in 'token' as defined by RFC
// 1521 and RFC 2045.
func isTokenChar(c byte) bool {
// token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
// or tspecials>
//
// mask is a 128-bit bitmap with 1s for allowed bytes,
// so that the byte c can be tested with a shift and an and.
// If c >= 128, then 1<<c and 1<<(c-64) will both be zero,
// and this function will return false.
const mask = 0 |
(1<<(10)-1)<<'0' |
(1<<(26)-1)<<'a' |
(1<<(26)-1)<<'A' |
1<<'!' |
1<<'#' |
1<<'$' |
1<<'%' |
1<<'&' |
1<<'\'' |
1<<'*' |
1<<'+' |
1<<'-' |
1<<'.' |
1<<'^' |
1<<'_' |
1<<'`' |
1<<'|' |
1<<'~'
return ((uint64(1)<<c)&(mask&(1<<64-1)) |
(uint64(1)<<(c-64))&(mask>>64)) != 0
}
// isToken reports whether s is a 'token' as defined by RFC 1521
// and RFC 2045.
func isToken(s string) bool {
if s == "" {
return false
}
for _, c := range []byte(s) {
if !isTokenChar(c) {
return false
}
}
return true
}