mirror of
https://github.com/golang/go.git
synced 2025-05-31 23:25:39 +00:00
html: simplify and optimize escape/unescape
The html package uses some specific code to escape special characters. Actually, the strings.Replacer can be used instead, and is much more efficient. The converse operation is more complex but can still be slightly optimized. Credits to Ken Bloom (kabloom@google.com), who first submitted a similar patch at https://codereview.appspot.com/141930043 Added benchmarks and slightly optimized UnescapeString. benchmark old ns/op new ns/op delta BenchmarkEscape-4 118713 19825 -83.30% BenchmarkEscapeNone-4 87653 3784 -95.68% BenchmarkUnescape-4 24888 23417 -5.91% BenchmarkUnescapeNone-4 14423 157 -98.91% benchmark old allocs new allocs delta BenchmarkEscape-4 9 2 -77.78% BenchmarkEscapeNone-4 0 0 +0.00% BenchmarkUnescape-4 2 2 +0.00% BenchmarkUnescapeNone-4 0 0 +0.00% benchmark old bytes new bytes delta BenchmarkEscape-4 24800 12288 -50.45% BenchmarkEscapeNone-4 0 0 +0.00% BenchmarkUnescape-4 10240 10240 +0.00% BenchmarkUnescapeNone-4 0 0 +0.00% Fixes #8697 Change-Id: I208261ed7cbe9b3dee6317851f8c0cf15528bce4 Reviewed-on: https://go-review.googlesource.com/9808 Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
parent
2b833666f1
commit
2d9a50b97f
@ -6,7 +6,6 @@
|
|||||||
package html
|
package html
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"strings"
|
"strings"
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
)
|
)
|
||||||
@ -187,52 +186,20 @@ func unescape(b []byte) []byte {
|
|||||||
return b
|
return b
|
||||||
}
|
}
|
||||||
|
|
||||||
const escapedChars = `&'<>"`
|
var htmlEscaper = strings.NewReplacer(
|
||||||
|
`&`, "&",
|
||||||
func escape(w writer, s string) error {
|
`'`, "'", // "'" is shorter than "'" and apos was not in HTML until HTML5.
|
||||||
i := strings.IndexAny(s, escapedChars)
|
`<`, "<",
|
||||||
for i != -1 {
|
`>`, ">",
|
||||||
if _, err := w.WriteString(s[:i]); err != nil {
|
`"`, """, // """ is shorter than """.
|
||||||
return err
|
)
|
||||||
}
|
|
||||||
var esc string
|
|
||||||
switch s[i] {
|
|
||||||
case '&':
|
|
||||||
esc = "&"
|
|
||||||
case '\'':
|
|
||||||
// "'" is shorter than "'" and apos was not in HTML until HTML5.
|
|
||||||
esc = "'"
|
|
||||||
case '<':
|
|
||||||
esc = "<"
|
|
||||||
case '>':
|
|
||||||
esc = ">"
|
|
||||||
case '"':
|
|
||||||
// """ is shorter than """.
|
|
||||||
esc = """
|
|
||||||
default:
|
|
||||||
panic("unrecognized escape character")
|
|
||||||
}
|
|
||||||
s = s[i+1:]
|
|
||||||
if _, err := w.WriteString(esc); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
i = strings.IndexAny(s, escapedChars)
|
|
||||||
}
|
|
||||||
_, err := w.WriteString(s)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// EscapeString escapes special characters like "<" to become "<". It
|
// EscapeString escapes special characters like "<" to become "<". It
|
||||||
// escapes only five such characters: <, >, &, ' and ".
|
// escapes only five such characters: <, >, &, ' and ".
|
||||||
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
|
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
|
||||||
// always true.
|
// always true.
|
||||||
func EscapeString(s string) string {
|
func EscapeString(s string) string {
|
||||||
if strings.IndexAny(s, escapedChars) == -1 {
|
return htmlEscaper.Replace(s)
|
||||||
return s
|
|
||||||
}
|
|
||||||
var buf bytes.Buffer
|
|
||||||
escape(&buf, s)
|
|
||||||
return buf.String()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// UnescapeString unescapes entities like "<" to become "<". It unescapes a
|
// UnescapeString unescapes entities like "<" to become "<". It unescapes a
|
||||||
@ -241,10 +208,8 @@ func EscapeString(s string) string {
|
|||||||
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
|
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
|
||||||
// always true.
|
// always true.
|
||||||
func UnescapeString(s string) string {
|
func UnescapeString(s string) string {
|
||||||
for _, c := range s {
|
if !strings.Contains(s, "&") {
|
||||||
if c == '&' {
|
return s
|
||||||
return string(unescape([]byte(s)))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return s
|
return string(unescape([]byte(s)))
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,10 @@
|
|||||||
|
|
||||||
package html
|
package html
|
||||||
|
|
||||||
import "testing"
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
type unescapeTest struct {
|
type unescapeTest struct {
|
||||||
// A short description of the test case.
|
// A short description of the test case.
|
||||||
@ -113,3 +116,38 @@ func TestUnescapeEscape(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
benchEscapeData = strings.Repeat("AAAAA < BBBBB > CCCCC & DDDDD ' EEEEE \" ", 100)
|
||||||
|
benchEscapeNone = strings.Repeat("AAAAA x BBBBB x CCCCC x DDDDD x EEEEE x ", 100)
|
||||||
|
)
|
||||||
|
|
||||||
|
func BenchmarkEscape(b *testing.B) {
|
||||||
|
n := 0
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
n += len(EscapeString(benchEscapeData))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkEscapeNone(b *testing.B) {
|
||||||
|
n := 0
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
n += len(EscapeString(benchEscapeNone))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkUnescape(b *testing.B) {
|
||||||
|
s := EscapeString(benchEscapeData)
|
||||||
|
n := 0
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
n += len(UnescapeString(s))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkUnescapeNone(b *testing.B) {
|
||||||
|
s := EscapeString(benchEscapeNone)
|
||||||
|
n := 0
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
n += len(UnescapeString(s))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user