diff --git a/src/mime/grammar.go b/src/mime/grammar.go index 6a6f71dbd4..cc578fbcfd 100644 --- a/src/mime/grammar.go +++ b/src/mime/grammar.go @@ -4,22 +4,68 @@ package mime -import ( - "strings" -) - -// isTSpecial reports whether rune is in 'tspecials' as defined by RFC +// isTSpecial reports whether c is in 'tspecials' as defined by RFC // 1521 and RFC 2045. -func isTSpecial(r rune) bool { - return strings.ContainsRune(`()<>@,;:\"/[]?=`, r) +func isTSpecial(c byte) bool { + // tspecials := "(" / ")" / "<" / ">" / "@" / + // "," / ";" / ":" / "\" / <"> + // "/" / "[" / "]" / "?" / "=" + // + // mask is a 128-bit bitmap with 1s for allowed bytes, + // so that the byte c can be tested with a shift and an and. + // If c >= 128, then 1<' | + 1<<'@' | + 1<<',' | + 1<<';' | + 1<<':' | + 1<<'\\' | + 1<<'"' | + 1<<'/' | + 1<<'[' | + 1<<']' | + 1<<'?' | + 1<<'=' + return ((uint64(1)<>64)) != 0 } -// isTokenChar reports whether rune is in 'token' as defined by RFC +// isTokenChar reports whether c is in 'token' as defined by RFC // 1521 and RFC 2045. -func isTokenChar(r rune) bool { +func isTokenChar(c byte) bool { // token := 1* - return r > 0x20 && r < 0x7f && !isTSpecial(r) + // + // mask is a 128-bit bitmap with 1s for allowed bytes, + // so that the byte c can be tested with a shift and an and. + // If c >= 128, then 1<>64)) != 0 } // isToken reports whether s is a 'token' as defined by RFC 1521 @@ -28,5 +74,10 @@ func isToken(s string) bool { if s == "" { return false } - return strings.IndexFunc(s, isNotTokenChar) < 0 + for _, c := range []byte(s) { + if !isTokenChar(c) { + return false + } + } + return true } diff --git a/src/mime/mediatype.go b/src/mime/mediatype.go index f0a0be2155..66684a68b2 100644 --- a/src/mime/mediatype.go +++ b/src/mime/mediatype.go @@ -60,7 +60,7 @@ func FormatMediaType(t string, param map[string]string) string { // attribute-char := if ch <= ' ' || ch >= 0x7F || ch == '*' || ch == '\'' || ch == '%' || - isTSpecial(rune(ch)) { + isTSpecial(ch) { b.WriteString(value[offset:index]) offset = index + 1 @@ -250,23 +250,17 @@ func decode2231Enc(v string) (string, bool) { return encv, true } -func isNotTokenChar(r rune) bool { - return !isTokenChar(r) -} - // consumeToken consumes a token from the beginning of provided // string, per RFC 2045 section 5.1 (referenced from 2183), and return // the token consumed and the rest of the string. Returns ("", v) on // failure to consume at least one character. func consumeToken(v string) (token, rest string) { - notPos := strings.IndexFunc(v, isNotTokenChar) - if notPos == -1 { - return v, "" + for i := range len(v) { + if !isTokenChar(v[i]) { + return v[:i], v[i:] + } } - if notPos == 0 { - return "", v - } - return v[0:notPos], v[notPos:] + return v, "" } // consumeValue consumes a "value" per RFC 2045, where a value is @@ -299,7 +293,7 @@ func consumeValue(v string) (value, rest string) { // and intended as a literal backslash. This makes Go servers deal better // with MSIE without affecting the way they handle conforming MIME // generators. - if r == '\\' && i+1 < len(v) && isTSpecial(rune(v[i+1])) { + if r == '\\' && i+1 < len(v) && isTSpecial(v[i+1]) { buffer.WriteByte(v[i+1]) i++ continue diff --git a/src/mime/mediatype_test.go b/src/mime/mediatype_test.go index 1731f7361e..251df8d669 100644 --- a/src/mime/mediatype_test.go +++ b/src/mime/mediatype_test.go @@ -96,7 +96,9 @@ type mediaTypeTest struct { p map[string]string } -func TestParseMediaType(t *testing.T) { +var parseMediaTypeTests []mediaTypeTest + +func init() { // Convenience map initializer m := func(s ...string) map[string]string { sm := make(map[string]string) @@ -107,7 +109,7 @@ func TestParseMediaType(t *testing.T) { } nameFoo := map[string]string{"name": "foo"} - tests := []mediaTypeTest{ + parseMediaTypeTests = []mediaTypeTest{ {`form-data; name="foo"`, "form-data", nameFoo}, {` form-data ; name=foo`, "form-data", nameFoo}, {`FORM-DATA;name="foo"`, "form-data", nameFoo}, @@ -412,7 +414,10 @@ func TestParseMediaType(t *testing.T) { {`text; charset=utf-8; charset=utf-8; format=fixed`, "text", m("charset", "utf-8", "format", "fixed")}, {`text; charset=utf-8; format=flowed; charset=utf-8`, "text", m("charset", "utf-8", "format", "flowed")}, } - for _, test := range tests { +} + +func TestParseMediaType(t *testing.T) { + for _, test := range parseMediaTypeTests { mt, params, err := ParseMediaType(test.in) if err != nil { if test.t != "" { @@ -438,6 +443,14 @@ func TestParseMediaType(t *testing.T) { } } +func BenchmarkParseMediaType(b *testing.B) { + for range b.N { + for _, test := range parseMediaTypeTests { + ParseMediaType(test.in) + } + } +} + type badMediaTypeTest struct { in string mt string @@ -486,6 +499,14 @@ func TestParseMediaTypeBogus(t *testing.T) { } } +func BenchmarkParseMediaTypeBogus(b *testing.B) { + for range b.N { + for _, test := range badMediaTypeTests { + ParseMediaType(test.in) + } + } +} + type formatTest struct { typ string params map[string]string