encoding/json: add json/v2 with GOEXPERIMENT=jsonv2 guard

This imports the proposed new v2 JSON API implemented in
github.com/go-json-experiment/json as of commit
d3c622f1b874954c355e60c8e6b6baa5f60d2fed.

When GOEXPERIMENT=jsonv2 is set, the encoding/json/v2 and
encoding/jsontext packages are visible, the encoding/json
package is implemented in terms of encoding/json/v2, and
the encoding/json package include various additional APIs.
(See #71497 for details.)

When GOEXPERIMENT=jsonv2 is not set, the new API is not
present and the encoding/json package is unchanged.

The experimental API is not bound by the Go compatibility
promise and is expected to evolve as updates are made to
the json/v2 proposal.

The contents of encoding/json/internal/jsontest/testdata
are compressed with zstd v1.5.7 with the -19 option.

Fixes #71845
For #71497

Change-Id: Ib8c94e5f0586b6aaa22833190b41cf6ef59f4f01
Reviewed-on: https://go-review.googlesource.com/c/go/+/665796
Auto-Submit: Damien Neil <dneil@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: Joseph Tsai <joetsai@digital-static.net>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
This commit is contained in:
Damien Neil 2025-04-11 14:19:51 -07:00 committed by Gopher Robot
parent c889004615
commit 0e17905793
107 changed files with 39814 additions and 3 deletions

View File

@ -8,6 +8,8 @@
// We benchmark converting between the JSON form
// and in-memory data structures.
//go:build !goexperiment.jsonv2
package json
import (

View File

@ -5,6 +5,8 @@
// Represents JSON data structure using native Go types: booleans, floats,
// strings, arrays, and maps.
//go:build !goexperiment.jsonv2
package json
import (

View File

@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json
import (

View File

@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
// Package json implements encoding and decoding of JSON as defined in
// RFC 7159. The mapping between JSON and Go values is described
// in the documentation for the Marshal and Unmarshal functions.

View File

@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json
import (

View File

@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json_test
import (

View File

@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json_test
import (

View File

@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json_test
import (

View File

@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json
import (

View File

@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json
import (

View File

@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json
import (

View File

@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json
import "bytes"

View File

@ -0,0 +1,41 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package internal
import "errors"
// NotForPublicUse is a marker type that an API is for internal use only.
// It does not perfectly prevent usage of that API, but helps to restrict usage.
// Anything with this marker is not covered by the Go compatibility agreement.
type NotForPublicUse struct{}
// AllowInternalUse is passed from "json" to "jsontext" to authenticate
// that the caller can have access to internal functionality.
var AllowInternalUse NotForPublicUse
// Sentinel error values internally shared between jsonv1 and jsonv2.
var (
ErrCycle = errors.New("encountered a cycle")
ErrNonNilReference = errors.New("value must be passed as a non-nil pointer reference")
)
var (
// TransformMarshalError converts a v2 error into a v1 error.
// It is called only at the top-level of a Marshal function.
TransformMarshalError func(any, error) error
// NewMarshalerError constructs a jsonv1.MarshalerError.
// It is called after a user-defined Marshal method/function fails.
NewMarshalerError func(any, error, string) error
// TransformUnmarshalError converts a v2 error into a v1 error.
// It is called only at the top-level of a Unmarshal function.
TransformUnmarshalError func(any, error) error
// NewRawNumber returns new(jsonv1.Number).
NewRawNumber func() any
// RawNumberOf returns jsonv1.Number(b).
RawNumberOf func(b []byte) any
)

View File

@ -0,0 +1,205 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
// jsonflags implements all the optional boolean flags.
// These flags are shared across both "json", "jsontext", and "jsonopts".
package jsonflags
import "encoding/json/internal"
// Bools represents zero or more boolean flags, all set to true or false.
// The least-significant bit is the boolean value of all flags in the set.
// The remaining bits identify which particular flags.
//
// In common usage, this is OR'd with 0 or 1. For example:
// - (AllowInvalidUTF8 | 0) means "AllowInvalidUTF8 is false"
// - (Multiline | Indent | 1) means "Multiline and Indent are true"
type Bools uint64
func (Bools) JSONOptions(internal.NotForPublicUse) {}
const (
// AllFlags is the set of all flags.
AllFlags = AllCoderFlags | AllArshalV2Flags | AllArshalV1Flags
// AllCoderFlags is the set of all encoder/decoder flags.
AllCoderFlags = (maxCoderFlag - 1) - initFlag
// AllArshalV2Flags is the set of all v2 marshal/unmarshal flags.
AllArshalV2Flags = (maxArshalV2Flag - 1) - (maxCoderFlag - 1)
// AllArshalV1Flags is the set of all v1 marshal/unmarshal flags.
AllArshalV1Flags = (maxArshalV1Flag - 1) - (maxArshalV2Flag - 1)
// NonBooleanFlags is the set of non-boolean flags,
// where the value is some other concrete Go type.
// The value of the flag is stored within jsonopts.Struct.
NonBooleanFlags = 0 |
Indent |
IndentPrefix |
ByteLimit |
DepthLimit |
Marshalers |
Unmarshalers
// DefaultV1Flags is the set of booleans flags that default to true under
// v1 semantics. None of the non-boolean flags differ between v1 and v2.
DefaultV1Flags = 0 |
AllowDuplicateNames |
AllowInvalidUTF8 |
EscapeForHTML |
EscapeForJS |
EscapeInvalidUTF8 |
PreserveRawStrings |
Deterministic |
FormatNilMapAsNull |
FormatNilSliceAsNull |
MatchCaseInsensitiveNames |
CallMethodsWithLegacySemantics |
FormatBytesWithLegacySemantics |
FormatTimeWithLegacySemantics |
MatchCaseSensitiveDelimiter |
MergeWithLegacySemantics |
OmitEmptyWithLegacyDefinition |
ReportErrorsWithLegacySemantics |
StringifyWithLegacySemantics |
UnmarshalArrayFromAnyLength
// AnyWhitespace reports whether the encoded output might have any whitespace.
AnyWhitespace = Multiline | SpaceAfterColon | SpaceAfterComma
// WhitespaceFlags is the set of flags related to whitespace formatting.
// In contrast to AnyWhitespace, this includes Indent and IndentPrefix
// as those settings take no effect if Multiline is false.
WhitespaceFlags = AnyWhitespace | Indent | IndentPrefix
// AnyEscape is the set of flags related to escaping in a JSON string.
AnyEscape = EscapeForHTML | EscapeForJS | EscapeInvalidUTF8
// CanonicalizeNumbers is the set of flags related to raw number canonicalization.
CanonicalizeNumbers = CanonicalizeRawInts | CanonicalizeRawFloats
)
// Encoder and decoder flags.
const (
initFlag Bools = 1 << iota // reserved for the boolean value itself
AllowDuplicateNames // encode or decode
AllowInvalidUTF8 // encode or decode
WithinArshalCall // encode or decode; for internal use by json.Marshal and json.Unmarshal
OmitTopLevelNewline // encode only; for internal use by json.Marshal and json.MarshalWrite
PreserveRawStrings // encode only
CanonicalizeRawInts // encode only
CanonicalizeRawFloats // encode only
ReorderRawObjects // encode only
EscapeForHTML // encode only
EscapeForJS // encode only
EscapeInvalidUTF8 // encode only; only exposed in v1
Multiline // encode only
SpaceAfterColon // encode only
SpaceAfterComma // encode only
Indent // encode only; non-boolean flag
IndentPrefix // encode only; non-boolean flag
ByteLimit // encode or decode; non-boolean flag
DepthLimit // encode or decode; non-boolean flag
maxCoderFlag
)
// Marshal and Unmarshal flags (for v2).
const (
_ Bools = (maxCoderFlag >> 1) << iota
StringifyNumbers // marshal or unmarshal
Deterministic // marshal only
FormatNilMapAsNull // marshal only
FormatNilSliceAsNull // marshal only
OmitZeroStructFields // marshal only
MatchCaseInsensitiveNames // marshal or unmarshal
DiscardUnknownMembers // marshal only
RejectUnknownMembers // unmarshal only
Marshalers // marshal only; non-boolean flag
Unmarshalers // unmarshal only; non-boolean flag
maxArshalV2Flag
)
// Marshal and Unmarshal flags (for v1).
const (
_ Bools = (maxArshalV2Flag >> 1) << iota
CallMethodsWithLegacySemantics // marshal or unmarshal
FormatBytesWithLegacySemantics // marshal or unmarshal
FormatTimeWithLegacySemantics // marshal or unmarshal
MatchCaseSensitiveDelimiter // marshal or unmarshal
MergeWithLegacySemantics // unmarshal
OmitEmptyWithLegacyDefinition // marshal
ReportErrorsWithLegacySemantics // marshal or unmarshal
StringifyWithLegacySemantics // marshal or unmarshal
StringifyBoolsAndStrings // marshal or unmarshal; for internal use by jsonv2.makeStructArshaler
UnmarshalAnyWithRawNumber // unmarshal; for internal use by jsonv1.Decoder.UseNumber
UnmarshalArrayFromAnyLength // unmarshal
maxArshalV1Flag
)
// Flags is a set of boolean flags.
// If the presence bit is zero, then the value bit must also be zero.
// The least-significant bit of both fields is always zero.
//
// Unlike Bools, which can represent a set of bools that are all true or false,
// Flags represents a set of bools, each individually may be true or false.
type Flags struct{ Presence, Values uint64 }
// Join joins two sets of flags such that the latter takes precedence.
func (dst *Flags) Join(src Flags) {
// Copy over all source presence bits over to the destination (using OR),
// then invert the source presence bits to clear out source value (using AND-NOT),
// then copy over source value bits over to the destination (using OR).
// e.g., dst := Flags{Presence: 0b_1100_0011, Value: 0b_1000_0011}
// e.g., src := Flags{Presence: 0b_0101_1010, Value: 0b_1001_0010}
dst.Presence |= src.Presence // e.g., 0b_1100_0011 | 0b_0101_1010 -> 0b_110_11011
dst.Values &= ^src.Presence // e.g., 0b_1000_0011 & 0b_1010_0101 -> 0b_100_00001
dst.Values |= src.Values // e.g., 0b_1000_0001 | 0b_1001_0010 -> 0b_100_10011
}
// Set sets both the presence and value for the provided bool (or set of bools).
func (fs *Flags) Set(f Bools) {
// Select out the bits for the flag identifiers (everything except LSB),
// then set the presence for all the identifier bits (using OR),
// then invert the identifier bits to clear out the values (using AND-NOT),
// then copy over all the identifier bits to the value if LSB is 1.
// e.g., fs := Flags{Presence: 0b_0101_0010, Value: 0b_0001_0010}
// e.g., f := 0b_1001_0001
id := uint64(f) &^ uint64(1) // e.g., 0b_1001_0001 & 0b_1111_1110 -> 0b_1001_0000
fs.Presence |= id // e.g., 0b_0101_0010 | 0b_1001_0000 -> 0b_1101_0011
fs.Values &= ^id // e.g., 0b_0001_0010 & 0b_0110_1111 -> 0b_0000_0010
fs.Values |= uint64(f&1) * id // e.g., 0b_0000_0010 | 0b_1001_0000 -> 0b_1001_0010
}
// Get reports whether the bool (or any of the bools) is true.
// This is generally only used with a singular bool.
// The value bit of f (i.e., the LSB) is ignored.
func (fs Flags) Get(f Bools) bool {
return fs.Values&uint64(f) > 0
}
// Has reports whether the bool (or any of the bools) is set.
// The value bit of f (i.e., the LSB) is ignored.
func (fs Flags) Has(f Bools) bool {
return fs.Presence&uint64(f) > 0
}
// Clear clears both the presence and value for the provided bool or bools.
// The value bit of f (i.e., the LSB) is ignored.
func (fs *Flags) Clear(f Bools) {
// Invert f to produce a mask to clear all bits in f (using AND).
// e.g., fs := Flags{Presence: 0b_0101_0010, Value: 0b_0001_0010}
// e.g., f := 0b_0001_1000
mask := uint64(^f) // e.g., 0b_0001_1000 -> 0b_1110_0111
fs.Presence &= mask // e.g., 0b_0101_0010 & 0b_1110_0111 -> 0b_0100_0010
fs.Values &= mask // e.g., 0b_0001_0010 & 0b_1110_0111 -> 0b_0000_0010
}

View File

@ -0,0 +1,75 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsonflags
import "testing"
func TestFlags(t *testing.T) {
type Check struct{ want Flags }
type Join struct{ in Flags }
type Set struct{ in Bools }
type Clear struct{ in Bools }
type Get struct {
in Bools
want bool
wantOk bool
}
calls := []any{
Get{in: AllowDuplicateNames, want: false, wantOk: false},
Set{in: AllowDuplicateNames | 0},
Get{in: AllowDuplicateNames, want: false, wantOk: true},
Set{in: AllowDuplicateNames | 1},
Get{in: AllowDuplicateNames, want: true, wantOk: true},
Check{want: Flags{Presence: uint64(AllowDuplicateNames), Values: uint64(AllowDuplicateNames)}},
Get{in: AllowInvalidUTF8, want: false, wantOk: false},
Set{in: AllowInvalidUTF8 | 1},
Get{in: AllowInvalidUTF8, want: true, wantOk: true},
Set{in: AllowInvalidUTF8 | 0},
Get{in: AllowInvalidUTF8, want: false, wantOk: true},
Get{in: AllowDuplicateNames, want: true, wantOk: true},
Check{want: Flags{Presence: uint64(AllowDuplicateNames | AllowInvalidUTF8), Values: uint64(AllowDuplicateNames)}},
Set{in: AllowDuplicateNames | AllowInvalidUTF8 | 0},
Check{want: Flags{Presence: uint64(AllowDuplicateNames | AllowInvalidUTF8), Values: uint64(0)}},
Set{in: AllowDuplicateNames | AllowInvalidUTF8 | 0},
Check{want: Flags{Presence: uint64(AllowDuplicateNames | AllowInvalidUTF8), Values: uint64(0)}},
Set{in: AllowDuplicateNames | AllowInvalidUTF8 | 1},
Check{want: Flags{Presence: uint64(AllowDuplicateNames | AllowInvalidUTF8), Values: uint64(AllowDuplicateNames | AllowInvalidUTF8)}},
Join{in: Flags{Presence: 0, Values: 0}},
Check{want: Flags{Presence: uint64(AllowDuplicateNames | AllowInvalidUTF8), Values: uint64(AllowDuplicateNames | AllowInvalidUTF8)}},
Join{in: Flags{Presence: uint64(Multiline | AllowInvalidUTF8), Values: uint64(AllowDuplicateNames)}},
Check{want: Flags{Presence: uint64(Multiline | AllowDuplicateNames | AllowInvalidUTF8), Values: uint64(AllowDuplicateNames)}},
Clear{in: AllowDuplicateNames | AllowInvalidUTF8},
Check{want: Flags{Presence: uint64(Multiline), Values: uint64(0)}},
Set{in: AllowInvalidUTF8 | Deterministic | ReportErrorsWithLegacySemantics | 1},
Set{in: Multiline | StringifyNumbers | 0},
Check{want: Flags{Presence: uint64(AllowInvalidUTF8 | Deterministic | ReportErrorsWithLegacySemantics | Multiline | StringifyNumbers), Values: uint64(AllowInvalidUTF8 | Deterministic | ReportErrorsWithLegacySemantics)}},
Clear{in: ^AllCoderFlags},
Check{want: Flags{Presence: uint64(AllowInvalidUTF8 | Multiline), Values: uint64(AllowInvalidUTF8)}},
}
var fs Flags
for i, call := range calls {
switch call := call.(type) {
case Join:
fs.Join(call.in)
case Set:
fs.Set(call.in)
case Clear:
fs.Clear(call.in)
case Get:
got := fs.Get(call.in)
gotOk := fs.Has(call.in)
if got != call.want || gotOk != call.wantOk {
t.Fatalf("%d: GetOk = (%v, %v), want (%v, %v)", i, got, gotOk, call.want, call.wantOk)
}
case Check:
if fs != call.want {
t.Fatalf("%d: got %x, want %x", i, fs, call.want)
}
}
}
}

View File

@ -0,0 +1,202 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsonopts
import (
"encoding/json/internal"
"encoding/json/internal/jsonflags"
)
// Options is the common options type shared across json packages.
type Options interface {
// JSONOptions is exported so related json packages can implement Options.
JSONOptions(internal.NotForPublicUse)
}
// Struct is the combination of all options in struct form.
// This is efficient to pass down the call stack and to query.
type Struct struct {
Flags jsonflags.Flags
CoderValues
ArshalValues
}
type CoderValues struct {
Indent string // jsonflags.Indent
IndentPrefix string // jsonflags.IndentPrefix
ByteLimit int64 // jsonflags.ByteLimit
DepthLimit int // jsonflags.DepthLimit
}
type ArshalValues struct {
// The Marshalers and Unmarshalers fields use the any type to avoid a
// concrete dependency on *json.Marshalers and *json.Unmarshalers,
// which would in turn create a dependency on the "reflect" package.
Marshalers any // jsonflags.Marshalers
Unmarshalers any // jsonflags.Unmarshalers
Format string
FormatDepth int
}
// DefaultOptionsV2 is the set of all options that define default v2 behavior.
var DefaultOptionsV2 = Struct{
Flags: jsonflags.Flags{
Presence: uint64(jsonflags.AllFlags & ^jsonflags.WhitespaceFlags),
Values: uint64(0),
},
}
// DefaultOptionsV1 is the set of all options that define default v1 behavior.
var DefaultOptionsV1 = Struct{
Flags: jsonflags.Flags{
Presence: uint64(jsonflags.AllFlags & ^jsonflags.WhitespaceFlags),
Values: uint64(jsonflags.DefaultV1Flags),
},
}
func (*Struct) JSONOptions(internal.NotForPublicUse) {}
// GetUnknownOption is injected by the "json" package to handle Options
// declared in that package so that "jsonopts" can handle them.
var GetUnknownOption = func(*Struct, Options) (any, bool) { panic("unknown option") }
func GetOption[T any](opts Options, setter func(T) Options) (T, bool) {
// Collapse the options to *Struct to simplify lookup.
structOpts, ok := opts.(*Struct)
if !ok {
var structOpts2 Struct
structOpts2.Join(opts)
structOpts = &structOpts2
}
// Lookup the option based on the return value of the setter.
var zero T
switch opt := setter(zero).(type) {
case jsonflags.Bools:
v := structOpts.Flags.Get(opt)
ok := structOpts.Flags.Has(opt)
return any(v).(T), ok
case Indent:
if !structOpts.Flags.Has(jsonflags.Indent) {
return zero, false
}
return any(structOpts.Indent).(T), true
case IndentPrefix:
if !structOpts.Flags.Has(jsonflags.IndentPrefix) {
return zero, false
}
return any(structOpts.IndentPrefix).(T), true
case ByteLimit:
if !structOpts.Flags.Has(jsonflags.ByteLimit) {
return zero, false
}
return any(structOpts.ByteLimit).(T), true
case DepthLimit:
if !structOpts.Flags.Has(jsonflags.DepthLimit) {
return zero, false
}
return any(structOpts.DepthLimit).(T), true
default:
v, ok := GetUnknownOption(structOpts, opt)
return v.(T), ok
}
}
// JoinUnknownOption is injected by the "json" package to handle Options
// declared in that package so that "jsonopts" can handle them.
var JoinUnknownOption = func(*Struct, Options) { panic("unknown option") }
func (dst *Struct) Join(srcs ...Options) {
dst.join(false, srcs...)
}
func (dst *Struct) JoinWithoutCoderOptions(srcs ...Options) {
dst.join(true, srcs...)
}
func (dst *Struct) join(excludeCoderOptions bool, srcs ...Options) {
for _, src := range srcs {
switch src := src.(type) {
case nil:
continue
case jsonflags.Bools:
if excludeCoderOptions {
src &= ^jsonflags.AllCoderFlags
}
dst.Flags.Set(src)
case Indent:
if excludeCoderOptions {
continue
}
dst.Flags.Set(jsonflags.Multiline | jsonflags.Indent | 1)
dst.Indent = string(src)
case IndentPrefix:
if excludeCoderOptions {
continue
}
dst.Flags.Set(jsonflags.Multiline | jsonflags.IndentPrefix | 1)
dst.IndentPrefix = string(src)
case ByteLimit:
if excludeCoderOptions {
continue
}
dst.Flags.Set(jsonflags.ByteLimit | 1)
dst.ByteLimit = int64(src)
case DepthLimit:
if excludeCoderOptions {
continue
}
dst.Flags.Set(jsonflags.DepthLimit | 1)
dst.DepthLimit = int(src)
case *Struct:
srcFlags := src.Flags // shallow copy the flags
if excludeCoderOptions {
srcFlags.Clear(jsonflags.AllCoderFlags)
}
dst.Flags.Join(srcFlags)
if srcFlags.Has(jsonflags.NonBooleanFlags) {
if srcFlags.Has(jsonflags.Indent) {
dst.Indent = src.Indent
}
if srcFlags.Has(jsonflags.IndentPrefix) {
dst.IndentPrefix = src.IndentPrefix
}
if srcFlags.Has(jsonflags.ByteLimit) {
dst.ByteLimit = src.ByteLimit
}
if srcFlags.Has(jsonflags.DepthLimit) {
dst.DepthLimit = src.DepthLimit
}
if srcFlags.Has(jsonflags.Marshalers) {
dst.Marshalers = src.Marshalers
}
if srcFlags.Has(jsonflags.Unmarshalers) {
dst.Unmarshalers = src.Unmarshalers
}
}
default:
JoinUnknownOption(dst, src)
}
}
}
type (
Indent string // jsontext.WithIndent
IndentPrefix string // jsontext.WithIndentPrefix
ByteLimit int64 // jsontext.WithByteLimit
DepthLimit int // jsontext.WithDepthLimit
// type for jsonflags.Marshalers declared in "json" package
// type for jsonflags.Unmarshalers declared in "json" package
)
func (Indent) JSONOptions(internal.NotForPublicUse) {}
func (IndentPrefix) JSONOptions(internal.NotForPublicUse) {}
func (ByteLimit) JSONOptions(internal.NotForPublicUse) {}
func (DepthLimit) JSONOptions(internal.NotForPublicUse) {}

View File

@ -0,0 +1,233 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsonopts_test
import (
"reflect"
"testing"
"encoding/json/internal/jsonflags"
. "encoding/json/internal/jsonopts"
"encoding/json/jsontext"
"encoding/json/v2"
)
func makeFlags(f ...jsonflags.Bools) (fs jsonflags.Flags) {
for _, f := range f {
fs.Set(f)
}
return fs
}
func TestJoin(t *testing.T) {
tests := []struct {
in Options
excludeCoders bool
want *Struct
}{{
in: jsonflags.AllowInvalidUTF8 | 1,
want: &Struct{Flags: makeFlags(jsonflags.AllowInvalidUTF8 | 1)},
}, {
in: jsonflags.Multiline | 0,
want: &Struct{
Flags: makeFlags(jsonflags.AllowInvalidUTF8|1, jsonflags.Multiline|0)},
}, {
in: Indent("\t"), // implicitly sets Multiline=true
want: &Struct{
Flags: makeFlags(jsonflags.AllowInvalidUTF8 | jsonflags.Multiline | jsonflags.Indent | 1),
CoderValues: CoderValues{Indent: "\t"},
},
}, {
in: &Struct{
Flags: makeFlags(jsonflags.Multiline|jsonflags.EscapeForJS|0, jsonflags.AllowInvalidUTF8|1),
},
want: &Struct{
Flags: makeFlags(jsonflags.AllowInvalidUTF8|jsonflags.Indent|1, jsonflags.Multiline|jsonflags.EscapeForJS|0),
CoderValues: CoderValues{Indent: "\t"},
},
}, {
in: &DefaultOptionsV1,
want: func() *Struct {
v1 := DefaultOptionsV1
v1.Flags.Set(jsonflags.Indent | 1)
v1.Flags.Set(jsonflags.Multiline | 0)
v1.Indent = "\t"
return &v1
}(), // v1 fully replaces before (except for whitespace related flags)
}, {
in: &DefaultOptionsV2,
want: func() *Struct {
v2 := DefaultOptionsV2
v2.Flags.Set(jsonflags.Indent | 1)
v2.Flags.Set(jsonflags.Multiline | 0)
v2.Indent = "\t"
return &v2
}(), // v2 fully replaces before (except for whitespace related flags)
}, {
in: jsonflags.Deterministic | jsonflags.AllowInvalidUTF8 | 1, excludeCoders: true,
want: func() *Struct {
v2 := DefaultOptionsV2
v2.Flags.Set(jsonflags.Deterministic | 1)
v2.Flags.Set(jsonflags.Indent | 1)
v2.Flags.Set(jsonflags.Multiline | 0)
v2.Indent = "\t"
return &v2
}(),
}, {
in: jsontext.WithIndentPrefix(" "), excludeCoders: true,
want: func() *Struct {
v2 := DefaultOptionsV2
v2.Flags.Set(jsonflags.Deterministic | 1)
v2.Flags.Set(jsonflags.Indent | 1)
v2.Flags.Set(jsonflags.Multiline | 0)
v2.Indent = "\t"
return &v2
}(),
}, {
in: jsontext.WithIndentPrefix(" "), excludeCoders: false,
want: func() *Struct {
v2 := DefaultOptionsV2
v2.Flags.Set(jsonflags.Deterministic | 1)
v2.Flags.Set(jsonflags.Indent | 1)
v2.Flags.Set(jsonflags.IndentPrefix | 1)
v2.Flags.Set(jsonflags.Multiline | 1)
v2.Indent = "\t"
v2.IndentPrefix = " "
return &v2
}(),
}, {
in: &Struct{
Flags: jsonflags.Flags{
Presence: uint64(jsonflags.Deterministic | jsonflags.Indent | jsonflags.IndentPrefix),
Values: uint64(jsonflags.Indent | jsonflags.IndentPrefix),
},
CoderValues: CoderValues{Indent: " ", IndentPrefix: " "},
},
excludeCoders: true,
want: func() *Struct {
v2 := DefaultOptionsV2
v2.Flags.Set(jsonflags.Indent | 1)
v2.Flags.Set(jsonflags.IndentPrefix | 1)
v2.Flags.Set(jsonflags.Multiline | 1)
v2.Indent = "\t"
v2.IndentPrefix = " "
return &v2
}(),
}, {
in: &Struct{
Flags: jsonflags.Flags{
Presence: uint64(jsonflags.Deterministic | jsonflags.Indent | jsonflags.IndentPrefix),
Values: uint64(jsonflags.Indent | jsonflags.IndentPrefix),
},
CoderValues: CoderValues{Indent: " ", IndentPrefix: " "},
},
excludeCoders: false,
want: func() *Struct {
v2 := DefaultOptionsV2
v2.Flags.Set(jsonflags.Indent | 1)
v2.Flags.Set(jsonflags.IndentPrefix | 1)
v2.Flags.Set(jsonflags.Multiline | 1)
v2.Indent = " "
v2.IndentPrefix = " "
return &v2
}(),
}}
got := new(Struct)
for i, tt := range tests {
if tt.excludeCoders {
got.JoinWithoutCoderOptions(tt.in)
} else {
got.Join(tt.in)
}
if !reflect.DeepEqual(got, tt.want) {
t.Fatalf("%d: Join:\n\tgot: %+v\n\twant: %+v", i, got, tt.want)
}
}
}
func TestGet(t *testing.T) {
opts := &Struct{
Flags: makeFlags(jsonflags.Indent|jsonflags.Deterministic|jsonflags.Marshalers|1, jsonflags.Multiline|0),
CoderValues: CoderValues{Indent: "\t"},
ArshalValues: ArshalValues{Marshalers: new(json.Marshalers)},
}
if v, ok := json.GetOption(nil, jsontext.AllowDuplicateNames); v || ok {
t.Errorf("GetOption(..., AllowDuplicateNames) = (%v, %v), want (false, false)", v, ok)
}
if v, ok := json.GetOption(jsonflags.AllowInvalidUTF8|0, jsontext.AllowDuplicateNames); v || ok {
t.Errorf("GetOption(..., AllowDuplicateNames) = (%v, %v), want (false, false)", v, ok)
}
if v, ok := json.GetOption(jsonflags.AllowDuplicateNames|0, jsontext.AllowDuplicateNames); v || !ok {
t.Errorf("GetOption(..., AllowDuplicateNames) = (%v, %v), want (false, true)", v, ok)
}
if v, ok := json.GetOption(jsonflags.AllowDuplicateNames|1, jsontext.AllowDuplicateNames); !v || !ok {
t.Errorf("GetOption(..., AllowDuplicateNames) = (%v, %v), want (true, true)", v, ok)
}
if v, ok := json.GetOption(Indent(""), jsontext.AllowDuplicateNames); v || ok {
t.Errorf("GetOption(..., AllowDuplicateNames) = (%v, %v), want (false, false)", v, ok)
}
if v, ok := json.GetOption(Indent(" "), jsontext.WithIndent); v != " " || !ok {
t.Errorf(`GetOption(..., WithIndent) = (%q, %v), want (" ", true)`, v, ok)
}
if v, ok := json.GetOption(jsonflags.AllowDuplicateNames|1, jsontext.WithIndent); v != "" || ok {
t.Errorf(`GetOption(..., WithIndent) = (%q, %v), want ("", false)`, v, ok)
}
if v, ok := json.GetOption(opts, jsontext.AllowDuplicateNames); v || ok {
t.Errorf("GetOption(..., AllowDuplicateNames) = (%v, %v), want (false, false)", v, ok)
}
if v, ok := json.GetOption(opts, json.Deterministic); !v || !ok {
t.Errorf("GetOption(..., Deterministic) = (%v, %v), want (true, true)", v, ok)
}
if v, ok := json.GetOption(opts, jsontext.Multiline); v || !ok {
t.Errorf("GetOption(..., Multiline) = (%v, %v), want (false, true)", v, ok)
}
if v, ok := json.GetOption(opts, jsontext.AllowInvalidUTF8); v || ok {
t.Errorf("GetOption(..., AllowInvalidUTF8) = (%v, %v), want (false, false)", v, ok)
}
if v, ok := json.GetOption(opts, jsontext.WithIndent); v != "\t" || !ok {
t.Errorf(`GetOption(..., WithIndent) = (%q, %v), want ("\t", true)`, v, ok)
}
if v, ok := json.GetOption(opts, jsontext.WithIndentPrefix); v != "" || ok {
t.Errorf(`GetOption(..., WithIndentPrefix) = (%q, %v), want ("", false)`, v, ok)
}
if v, ok := json.GetOption(opts, json.WithMarshalers); v == nil || !ok {
t.Errorf(`GetOption(..., WithMarshalers) = (%v, %v), want (non-nil, true)`, v, ok)
}
if v, ok := json.GetOption(opts, json.WithUnmarshalers); v != nil || ok {
t.Errorf(`GetOption(..., WithUnmarshalers) = (%v, %v), want (nil, false)`, v, ok)
}
}
var sink struct {
Bool bool
String string
Marshalers *json.Marshalers
}
func BenchmarkGetBool(b *testing.B) {
b.ReportAllocs()
opts := json.DefaultOptionsV2()
for range b.N {
sink.Bool, sink.Bool = json.GetOption(opts, jsontext.AllowDuplicateNames)
}
}
func BenchmarkGetIndent(b *testing.B) {
b.ReportAllocs()
opts := json.DefaultOptionsV2()
for range b.N {
sink.String, sink.Bool = json.GetOption(opts, jsontext.WithIndent)
}
}
func BenchmarkGetMarshalers(b *testing.B) {
b.ReportAllocs()
opts := json.JoinOptions(json.DefaultOptionsV2(), json.WithMarshalers(nil))
for range b.N {
sink.Marshalers, sink.Bool = json.GetOption(opts, json.WithMarshalers)
}
}

View File

@ -0,0 +1,37 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontest
import (
"fmt"
"path"
"runtime"
)
// TODO(https://go.dev/issue/52751): Replace with native testing support.
// CaseName is a case name annotated with a file and line.
type CaseName struct {
Name string
Where CasePos
}
// Name annotates a case name with the file and line of the caller.
func Name(s string) (c CaseName) {
c.Name = s
runtime.Callers(2, c.Where.pc[:])
return c
}
// CasePos represents a file and line number.
type CasePos struct{ pc [1]uintptr }
func (pos CasePos) String() string {
frames := runtime.CallersFrames(pos.pc[:])
frame, _ := frames.Next()
return fmt.Sprintf("%s:%d", path.Base(frame.File), frame.Line)
}

View File

@ -0,0 +1,607 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
// Package jsontest contains functionality to assist in testing JSON.
package jsontest
import (
"bytes"
"embed"
"errors"
"internal/zstd"
"io"
"io/fs"
"path"
"slices"
"strings"
"sync"
"time"
)
// Embed the testdata directory as a fs.FS because this package is imported
// by other packages such that the location of testdata may change relative
// to the working directory of the test itself.
//
//go:embed testdata/*.json.zst
var testdataFS embed.FS
type Entry struct {
Name string
Data func() []byte
New func() any // nil if there is no concrete type for this
}
func mustGet[T any](v T, err error) T {
if err != nil {
panic(err)
}
return v
}
// Data is a list of JSON testdata.
var Data = func() (entries []Entry) {
fis := mustGet(fs.ReadDir(testdataFS, "testdata"))
slices.SortFunc(fis, func(x, y fs.DirEntry) int { return strings.Compare(x.Name(), y.Name()) })
for _, fi := range fis {
var entry Entry
// Convert snake_case file name to CamelCase.
words := strings.Split(strings.TrimSuffix(fi.Name(), ".json.zst"), "_")
for i := range words {
words[i] = strings.Title(words[i])
}
entry.Name = strings.Join(words, "")
// Lazily read and decompress the test data.
entry.Data = sync.OnceValue(func() []byte {
filePath := path.Join("testdata", fi.Name())
b := mustGet(fs.ReadFile(testdataFS, filePath))
zr := zstd.NewReader(bytes.NewReader(b))
return mustGet(io.ReadAll(zr))
})
// Check whether there is a concrete type for this data.
switch entry.Name {
case "CanadaGeometry":
entry.New = func() any { return new(canadaRoot) }
case "CitmCatalog":
entry.New = func() any { return new(citmRoot) }
case "GolangSource":
entry.New = func() any { return new(golangRoot) }
case "StringEscaped":
entry.New = func() any { return new(stringRoot) }
case "StringUnicode":
entry.New = func() any { return new(stringRoot) }
case "SyntheaFhir":
entry.New = func() any { return new(syntheaRoot) }
case "TwitterStatus":
entry.New = func() any { return new(twitterRoot) }
}
entries = append(entries, entry)
}
return entries
}()
type (
canadaRoot struct {
Type string `json:"type"`
Features []struct {
Type string `json:"type"`
Properties struct {
Name string `json:"name"`
} `json:"properties"`
Geometry struct {
Type string `json:"type"`
Coordinates [][][2]float64 `json:"coordinates"`
} `json:"geometry"`
} `json:"features"`
}
)
type (
citmRoot struct {
AreaNames map[int64]string `json:"areaNames"`
AudienceSubCategoryNames map[int64]string `json:"audienceSubCategoryNames"`
BlockNames map[int64]string `json:"blockNames"`
Events map[int64]struct {
Description string `json:"description"`
ID int `json:"id"`
Logo string `json:"logo"`
Name string `json:"name"`
SubTopicIds []int `json:"subTopicIds"`
SubjectCode any `json:"subjectCode"`
Subtitle any `json:"subtitle"`
TopicIds []int `json:"topicIds"`
} `json:"events"`
Performances []struct {
EventID int `json:"eventId"`
ID int `json:"id"`
Logo any `json:"logo"`
Name any `json:"name"`
Prices []struct {
Amount int `json:"amount"`
AudienceSubCategoryID int64 `json:"audienceSubCategoryId"`
SeatCategoryID int64 `json:"seatCategoryId"`
} `json:"prices"`
SeatCategories []struct {
Areas []struct {
AreaID int `json:"areaId"`
BlockIds []any `json:"blockIds"`
} `json:"areas"`
SeatCategoryID int `json:"seatCategoryId"`
} `json:"seatCategories"`
SeatMapImage any `json:"seatMapImage"`
Start int64 `json:"start"`
VenueCode string `json:"venueCode"`
} `json:"performances"`
SeatCategoryNames map[uint64]string `json:"seatCategoryNames"`
SubTopicNames map[uint64]string `json:"subTopicNames"`
SubjectNames map[uint64]string `json:"subjectNames"`
TopicNames map[uint64]string `json:"topicNames"`
TopicSubTopics map[uint64][]uint64 `json:"topicSubTopics"`
VenueNames map[string]string `json:"venueNames"`
}
)
type (
golangRoot struct {
Tree *golangNode `json:"tree"`
Username string `json:"username"`
}
golangNode struct {
Name string `json:"name"`
Kids []golangNode `json:"kids"`
CLWeight float64 `json:"cl_weight"`
Touches int `json:"touches"`
MinT uint64 `json:"min_t"`
MaxT uint64 `json:"max_t"`
MeanT uint64 `json:"mean_t"`
}
)
type (
stringRoot struct {
Arabic string `json:"Arabic"`
ArabicPresentationFormsA string `json:"Arabic Presentation Forms-A"`
ArabicPresentationFormsB string `json:"Arabic Presentation Forms-B"`
Armenian string `json:"Armenian"`
Arrows string `json:"Arrows"`
Bengali string `json:"Bengali"`
Bopomofo string `json:"Bopomofo"`
BoxDrawing string `json:"Box Drawing"`
CJKCompatibility string `json:"CJK Compatibility"`
CJKCompatibilityForms string `json:"CJK Compatibility Forms"`
CJKCompatibilityIdeographs string `json:"CJK Compatibility Ideographs"`
CJKSymbolsAndPunctuation string `json:"CJK Symbols and Punctuation"`
CJKUnifiedIdeographs string `json:"CJK Unified Ideographs"`
CJKUnifiedIdeographsExtensionA string `json:"CJK Unified Ideographs Extension A"`
CJKUnifiedIdeographsExtensionB string `json:"CJK Unified Ideographs Extension B"`
Cherokee string `json:"Cherokee"`
CurrencySymbols string `json:"Currency Symbols"`
Cyrillic string `json:"Cyrillic"`
CyrillicSupplementary string `json:"Cyrillic Supplementary"`
Devanagari string `json:"Devanagari"`
EnclosedAlphanumerics string `json:"Enclosed Alphanumerics"`
EnclosedCJKLettersAndMonths string `json:"Enclosed CJK Letters and Months"`
Ethiopic string `json:"Ethiopic"`
GeometricShapes string `json:"Geometric Shapes"`
Georgian string `json:"Georgian"`
GreekAndCoptic string `json:"Greek and Coptic"`
Gujarati string `json:"Gujarati"`
Gurmukhi string `json:"Gurmukhi"`
HangulCompatibilityJamo string `json:"Hangul Compatibility Jamo"`
HangulJamo string `json:"Hangul Jamo"`
HangulSyllables string `json:"Hangul Syllables"`
Hebrew string `json:"Hebrew"`
Hiragana string `json:"Hiragana"`
IPAExtentions string `json:"IPA Extentions"`
KangxiRadicals string `json:"Kangxi Radicals"`
Katakana string `json:"Katakana"`
Khmer string `json:"Khmer"`
KhmerSymbols string `json:"Khmer Symbols"`
Latin string `json:"Latin"`
LatinExtendedAdditional string `json:"Latin Extended Additional"`
Latin1Supplement string `json:"Latin-1 Supplement"`
LatinExtendedA string `json:"Latin-Extended A"`
LatinExtendedB string `json:"Latin-Extended B"`
LetterlikeSymbols string `json:"Letterlike Symbols"`
Malayalam string `json:"Malayalam"`
MathematicalAlphanumericSymbols string `json:"Mathematical Alphanumeric Symbols"`
MathematicalOperators string `json:"Mathematical Operators"`
MiscellaneousSymbols string `json:"Miscellaneous Symbols"`
Mongolian string `json:"Mongolian"`
NumberForms string `json:"Number Forms"`
Oriya string `json:"Oriya"`
PhoneticExtensions string `json:"Phonetic Extensions"`
SupplementalArrowsB string `json:"Supplemental Arrows-B"`
Syriac string `json:"Syriac"`
Tamil string `json:"Tamil"`
Thaana string `json:"Thaana"`
Thai string `json:"Thai"`
UnifiedCanadianAboriginalSyllabics string `json:"Unified Canadian Aboriginal Syllabics"`
YiRadicals string `json:"Yi Radicals"`
YiSyllables string `json:"Yi Syllables"`
}
)
type (
syntheaRoot struct {
Entry []struct {
FullURL string `json:"fullUrl"`
Request *struct {
Method string `json:"method"`
URL string `json:"url"`
} `json:"request"`
Resource *struct {
AbatementDateTime time.Time `json:"abatementDateTime"`
AchievementStatus syntheaCode `json:"achievementStatus"`
Active bool `json:"active"`
Activity []struct {
Detail *struct {
Code syntheaCode `json:"code"`
Location syntheaReference `json:"location"`
Status string `json:"status"`
} `json:"detail"`
} `json:"activity"`
Address []syntheaAddress `json:"address"`
Addresses []syntheaReference `json:"addresses"`
AuthoredOn time.Time `json:"authoredOn"`
BillablePeriod syntheaRange `json:"billablePeriod"`
BirthDate string `json:"birthDate"`
CareTeam []struct {
Provider syntheaReference `json:"provider"`
Reference string `json:"reference"`
Role syntheaCode `json:"role"`
Sequence int64 `json:"sequence"`
} `json:"careTeam"`
Category []syntheaCode `json:"category"`
Claim syntheaReference `json:"claim"`
Class syntheaCoding `json:"class"`
ClinicalStatus syntheaCode `json:"clinicalStatus"`
Code syntheaCode `json:"code"`
Communication []struct {
Language syntheaCode `json:"language"`
} `json:"communication"`
Component []struct {
Code syntheaCode `json:"code"`
ValueQuantity syntheaCoding `json:"valueQuantity"`
} `json:"component"`
Contained []struct {
Beneficiary syntheaReference `json:"beneficiary"`
ID string `json:"id"`
Intent string `json:"intent"`
Payor []syntheaReference `json:"payor"`
Performer []syntheaReference `json:"performer"`
Requester syntheaReference `json:"requester"`
ResourceType string `json:"resourceType"`
Status string `json:"status"`
Subject syntheaReference `json:"subject"`
Type syntheaCode `json:"type"`
} `json:"contained"`
Created time.Time `json:"created"`
DeceasedDateTime time.Time `json:"deceasedDateTime"`
Description syntheaCode `json:"description"`
Diagnosis []struct {
DiagnosisReference syntheaReference `json:"diagnosisReference"`
Sequence int64 `json:"sequence"`
Type []syntheaCode `json:"type"`
} `json:"diagnosis"`
DosageInstruction []struct {
AsNeededBoolean bool `json:"asNeededBoolean"`
DoseAndRate []struct {
DoseQuantity *struct {
Value float64 `json:"value"`
} `json:"doseQuantity"`
Type syntheaCode `json:"type"`
} `json:"doseAndRate"`
Sequence int64 `json:"sequence"`
Timing *struct {
Repeat *struct {
Frequency int64 `json:"frequency"`
Period float64 `json:"period"`
PeriodUnit string `json:"periodUnit"`
} `json:"repeat"`
} `json:"timing"`
} `json:"dosageInstruction"`
EffectiveDateTime time.Time `json:"effectiveDateTime"`
Encounter syntheaReference `json:"encounter"`
Extension []syntheaExtension `json:"extension"`
Gender string `json:"gender"`
Goal []syntheaReference `json:"goal"`
ID string `json:"id"`
Identifier []struct {
System string `json:"system"`
Type syntheaCode `json:"type"`
Use string `json:"use"`
Value string `json:"value"`
} `json:"identifier"`
Insurance []struct {
Coverage syntheaReference `json:"coverage"`
Focal bool `json:"focal"`
Sequence int64 `json:"sequence"`
} `json:"insurance"`
Insurer syntheaReference `json:"insurer"`
Intent string `json:"intent"`
Issued time.Time `json:"issued"`
Item []struct {
Adjudication []struct {
Amount syntheaCurrency `json:"amount"`
Category syntheaCode `json:"category"`
} `json:"adjudication"`
Category syntheaCode `json:"category"`
DiagnosisSequence []int64 `json:"diagnosisSequence"`
Encounter []syntheaReference `json:"encounter"`
InformationSequence []int64 `json:"informationSequence"`
LocationCodeableConcept syntheaCode `json:"locationCodeableConcept"`
Net syntheaCurrency `json:"net"`
ProcedureSequence []int64 `json:"procedureSequence"`
ProductOrService syntheaCode `json:"productOrService"`
Sequence int64 `json:"sequence"`
ServicedPeriod syntheaRange `json:"servicedPeriod"`
} `json:"item"`
LifecycleStatus string `json:"lifecycleStatus"`
ManagingOrganization []syntheaReference `json:"managingOrganization"`
MaritalStatus syntheaCode `json:"maritalStatus"`
MedicationCodeableConcept syntheaCode `json:"medicationCodeableConcept"`
MultipleBirthBoolean bool `json:"multipleBirthBoolean"`
Name rawValue `json:"name"`
NumberOfInstances int64 `json:"numberOfInstances"`
NumberOfSeries int64 `json:"numberOfSeries"`
OccurrenceDateTime time.Time `json:"occurrenceDateTime"`
OnsetDateTime time.Time `json:"onsetDateTime"`
Outcome string `json:"outcome"`
Participant []struct {
Individual syntheaReference `json:"individual"`
Member syntheaReference `json:"member"`
Role []syntheaCode `json:"role"`
} `json:"participant"`
Patient syntheaReference `json:"patient"`
Payment *struct {
Amount syntheaCurrency `json:"amount"`
} `json:"payment"`
PerformedPeriod syntheaRange `json:"performedPeriod"`
Period syntheaRange `json:"period"`
Prescription syntheaReference `json:"prescription"`
PrimarySource bool `json:"primarySource"`
Priority syntheaCode `json:"priority"`
Procedure []struct {
ProcedureReference syntheaReference `json:"procedureReference"`
Sequence int64 `json:"sequence"`
} `json:"procedure"`
Provider syntheaReference `json:"provider"`
ReasonCode []syntheaCode `json:"reasonCode"`
ReasonReference []syntheaReference `json:"reasonReference"`
RecordedDate time.Time `json:"recordedDate"`
Referral syntheaReference `json:"referral"`
Requester syntheaReference `json:"requester"`
ResourceType string `json:"resourceType"`
Result []syntheaReference `json:"result"`
Series []struct {
BodySite syntheaCoding `json:"bodySite"`
Instance []struct {
Number int64 `json:"number"`
SopClass syntheaCoding `json:"sopClass"`
Title string `json:"title"`
UID string `json:"uid"`
} `json:"instance"`
Modality syntheaCoding `json:"modality"`
Number int64 `json:"number"`
NumberOfInstances int64 `json:"numberOfInstances"`
Started string `json:"started"`
UID string `json:"uid"`
} `json:"series"`
ServiceProvider syntheaReference `json:"serviceProvider"`
Started time.Time `json:"started"`
Status string `json:"status"`
Subject syntheaReference `json:"subject"`
SupportingInfo []struct {
Category syntheaCode `json:"category"`
Sequence int64 `json:"sequence"`
ValueReference syntheaReference `json:"valueReference"`
} `json:"supportingInfo"`
Telecom []map[string]string `json:"telecom"`
Text map[string]string `json:"text"`
Total rawValue `json:"total"`
Type rawValue `json:"type"`
Use string `json:"use"`
VaccineCode syntheaCode `json:"vaccineCode"`
ValueCodeableConcept syntheaCode `json:"valueCodeableConcept"`
ValueQuantity syntheaCoding `json:"valueQuantity"`
VerificationStatus syntheaCode `json:"verificationStatus"`
} `json:"resource"`
} `json:"entry"`
ResourceType string `json:"resourceType"`
Type string `json:"type"`
}
syntheaCode struct {
Coding []syntheaCoding `json:"coding"`
Text string `json:"text"`
}
syntheaCoding struct {
Code string `json:"code"`
Display string `json:"display"`
System string `json:"system"`
Unit string `json:"unit"`
Value float64 `json:"value"`
}
syntheaReference struct {
Display string `json:"display"`
Reference string `json:"reference"`
}
syntheaAddress struct {
City string `json:"city"`
Country string `json:"country"`
Extension []syntheaExtension `json:"extension"`
Line []string `json:"line"`
PostalCode string `json:"postalCode"`
State string `json:"state"`
}
syntheaExtension struct {
URL string `json:"url"`
ValueAddress syntheaAddress `json:"valueAddress"`
ValueCode string `json:"valueCode"`
ValueDecimal float64 `json:"valueDecimal"`
ValueString string `json:"valueString"`
Extension []syntheaExtension `json:"extension"`
}
syntheaRange struct {
End time.Time `json:"end"`
Start time.Time `json:"start"`
}
syntheaCurrency struct {
Currency string `json:"currency"`
Value float64 `json:"value"`
}
)
type (
twitterRoot struct {
Statuses []twitterStatus `json:"statuses"`
SearchMetadata struct {
CompletedIn float64 `json:"completed_in"`
MaxID int64 `json:"max_id"`
MaxIDStr int64 `json:"max_id_str,string"`
NextResults string `json:"next_results"`
Query string `json:"query"`
RefreshURL string `json:"refresh_url"`
Count int `json:"count"`
SinceID int `json:"since_id"`
SinceIDStr int `json:"since_id_str,string"`
} `json:"search_metadata"`
}
twitterStatus struct {
Metadata struct {
ResultType string `json:"result_type"`
IsoLanguageCode string `json:"iso_language_code"`
} `json:"metadata"`
CreatedAt string `json:"created_at"`
ID int64 `json:"id"`
IDStr int64 `json:"id_str,string"`
Text string `json:"text"`
Source string `json:"source"`
Truncated bool `json:"truncated"`
InReplyToStatusID int64 `json:"in_reply_to_status_id"`
InReplyToStatusIDStr int64 `json:"in_reply_to_status_id_str,string"`
InReplyToUserID int64 `json:"in_reply_to_user_id"`
InReplyToUserIDStr int64 `json:"in_reply_to_user_id_str,string"`
InReplyToScreenName string `json:"in_reply_to_screen_name"`
User twitterUser `json:"user,omitempty"`
Geo any `json:"geo"`
Coordinates any `json:"coordinates"`
Place any `json:"place"`
Contributors any `json:"contributors"`
RetweeetedStatus *twitterStatus `json:"retweeted_status"`
RetweetCount int `json:"retweet_count"`
FavoriteCount int `json:"favorite_count"`
Entities twitterEntities `json:"entities,omitempty"`
Favorited bool `json:"favorited"`
Retweeted bool `json:"retweeted"`
PossiblySensitive bool `json:"possibly_sensitive"`
Lang string `json:"lang"`
}
twitterUser struct {
ID int64 `json:"id"`
IDStr string `json:"id_str"`
Name string `json:"name"`
ScreenName string `json:"screen_name"`
Location string `json:"location"`
Description string `json:"description"`
URL any `json:"url"`
Entities twitterEntities `json:"entities"`
Protected bool `json:"protected"`
FollowersCount int `json:"followers_count"`
FriendsCount int `json:"friends_count"`
ListedCount int `json:"listed_count"`
CreatedAt string `json:"created_at"`
FavouritesCount int `json:"favourites_count"`
UtcOffset int `json:"utc_offset"`
TimeZone string `json:"time_zone"`
GeoEnabled bool `json:"geo_enabled"`
Verified bool `json:"verified"`
StatusesCount int `json:"statuses_count"`
Lang string `json:"lang"`
ContributorsEnabled bool `json:"contributors_enabled"`
IsTranslator bool `json:"is_translator"`
IsTranslationEnabled bool `json:"is_translation_enabled"`
ProfileBackgroundColor string `json:"profile_background_color"`
ProfileBackgroundImageURL string `json:"profile_background_image_url"`
ProfileBackgroundImageURLHTTPS string `json:"profile_background_image_url_https"`
ProfileBackgroundTile bool `json:"profile_background_tile"`
ProfileImageURL string `json:"profile_image_url"`
ProfileImageURLHTTPS string `json:"profile_image_url_https"`
ProfileBannerURL string `json:"profile_banner_url"`
ProfileLinkColor string `json:"profile_link_color"`
ProfileSidebarBorderColor string `json:"profile_sidebar_border_color"`
ProfileSidebarFillColor string `json:"profile_sidebar_fill_color"`
ProfileTextColor string `json:"profile_text_color"`
ProfileUseBackgroundImage bool `json:"profile_use_background_image"`
DefaultProfile bool `json:"default_profile"`
DefaultProfileImage bool `json:"default_profile_image"`
Following bool `json:"following"`
FollowRequestSent bool `json:"follow_request_sent"`
Notifications bool `json:"notifications"`
}
twitterEntities struct {
Hashtags []any `json:"hashtags"`
Symbols []any `json:"symbols"`
URL *twitterURL `json:"url"`
URLs []twitterURL `json:"urls"`
UserMentions []struct {
ScreenName string `json:"screen_name"`
Name string `json:"name"`
ID int64 `json:"id"`
IDStr int64 `json:"id_str,string"`
Indices []int `json:"indices"`
} `json:"user_mentions"`
Description struct {
URLs []twitterURL `json:"urls"`
} `json:"description"`
Media []struct {
ID int64 `json:"id"`
IDStr string `json:"id_str"`
Indices []int `json:"indices"`
MediaURL string `json:"media_url"`
MediaURLHTTPS string `json:"media_url_https"`
URL string `json:"url"`
DisplayURL string `json:"display_url"`
ExpandedURL string `json:"expanded_url"`
Type string `json:"type"`
Sizes map[string]struct {
W int `json:"w"`
H int `json:"h"`
Resize string `json:"resize"`
} `json:"sizes"`
SourceStatusID int64 `json:"source_status_id"`
SourceStatusIDStr int64 `json:"source_status_id_str,string"`
} `json:"media"`
}
twitterURL struct {
URL string `json:"url"`
URLs []twitterURL `json:"urls"`
ExpandedURL string `json:"expanded_url"`
DisplayURL string `json:"display_url"`
Indices []int `json:"indices"`
}
)
// rawValue is the raw encoded JSON value.
type rawValue []byte
func (v rawValue) MarshalJSON() ([]byte, error) {
if v == nil {
return []byte("null"), nil
}
return v, nil
}
func (v *rawValue) UnmarshalJSON(b []byte) error {
if v == nil {
return errors.New("jsontest.rawValue: UnmarshalJSON on nil pointer")
}
*v = append((*v)[:0], b...)
return nil
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,629 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsonwire
import (
"io"
"math"
"slices"
"strconv"
"unicode/utf16"
"unicode/utf8"
)
type ValueFlags uint
const (
_ ValueFlags = (1 << iota) / 2 // powers of two starting with zero
stringNonVerbatim // string cannot be naively treated as valid UTF-8
stringNonCanonical // string not formatted according to RFC 8785, section 3.2.2.2.
// TODO: Track whether a number is a non-integer?
)
func (f *ValueFlags) Join(f2 ValueFlags) { *f |= f2 }
func (f ValueFlags) IsVerbatim() bool { return f&stringNonVerbatim == 0 }
func (f ValueFlags) IsCanonical() bool { return f&stringNonCanonical == 0 }
// ConsumeWhitespace consumes leading JSON whitespace per RFC 7159, section 2.
func ConsumeWhitespace(b []byte) (n int) {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
for len(b) > n && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') {
n++
}
return n
}
// ConsumeNull consumes the next JSON null literal per RFC 7159, section 3.
// It returns 0 if it is invalid, in which case consumeLiteral should be used.
func ConsumeNull(b []byte) int {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
const literal = "null"
if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
return len(literal)
}
return 0
}
// ConsumeFalse consumes the next JSON false literal per RFC 7159, section 3.
// It returns 0 if it is invalid, in which case consumeLiteral should be used.
func ConsumeFalse(b []byte) int {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
const literal = "false"
if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
return len(literal)
}
return 0
}
// ConsumeTrue consumes the next JSON true literal per RFC 7159, section 3.
// It returns 0 if it is invalid, in which case consumeLiteral should be used.
func ConsumeTrue(b []byte) int {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
const literal = "true"
if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
return len(literal)
}
return 0
}
// ConsumeLiteral consumes the next JSON literal per RFC 7159, section 3.
// If the input appears truncated, it returns io.ErrUnexpectedEOF.
func ConsumeLiteral(b []byte, lit string) (n int, err error) {
for i := 0; i < len(b) && i < len(lit); i++ {
if b[i] != lit[i] {
return i, NewInvalidCharacterError(b[i:], "in literal "+lit+" (expecting "+strconv.QuoteRune(rune(lit[i]))+")")
}
}
if len(b) < len(lit) {
return len(b), io.ErrUnexpectedEOF
}
return len(lit), nil
}
// ConsumeSimpleString consumes the next JSON string per RFC 7159, section 7
// but is limited to the grammar for an ASCII string without escape sequences.
// It returns 0 if it is invalid or more complicated than a simple string,
// in which case consumeString should be called.
//
// It rejects '<', '>', and '&' for compatibility reasons since these were
// always escaped in the v1 implementation. Thus, if this function reports
// non-zero then we know that the string would be encoded the same way
// under both v1 or v2 escape semantics.
func ConsumeSimpleString(b []byte) (n int) {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
if len(b) > 0 && b[0] == '"' {
n++
for len(b) > n && b[n] < utf8.RuneSelf && escapeASCII[b[n]] == 0 {
n++
}
if uint(len(b)) > uint(n) && b[n] == '"' {
n++
return n
}
}
return 0
}
// ConsumeString consumes the next JSON string per RFC 7159, section 7.
// If validateUTF8 is false, then this allows the presence of invalid UTF-8
// characters within the string itself.
// It reports the number of bytes consumed and whether an error was encountered.
// If the input appears truncated, it returns io.ErrUnexpectedEOF.
func ConsumeString(flags *ValueFlags, b []byte, validateUTF8 bool) (n int, err error) {
return ConsumeStringResumable(flags, b, 0, validateUTF8)
}
// ConsumeStringResumable is identical to consumeString but supports resuming
// from a previous call that returned io.ErrUnexpectedEOF.
func ConsumeStringResumable(flags *ValueFlags, b []byte, resumeOffset int, validateUTF8 bool) (n int, err error) {
// Consume the leading double quote.
switch {
case resumeOffset > 0:
n = resumeOffset // already handled the leading quote
case uint(len(b)) == 0:
return n, io.ErrUnexpectedEOF
case b[0] == '"':
n++
default:
return n, NewInvalidCharacterError(b[n:], `at start of string (expecting '"')`)
}
// Consume every character in the string.
for uint(len(b)) > uint(n) {
// Optimize for long sequences of unescaped characters.
noEscape := func(c byte) bool {
return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"'
}
for uint(len(b)) > uint(n) && noEscape(b[n]) {
n++
}
if uint(len(b)) <= uint(n) {
return n, io.ErrUnexpectedEOF
}
// Check for terminating double quote.
if b[n] == '"' {
n++
return n, nil
}
switch r, rn := utf8.DecodeRune(b[n:]); {
// Handle UTF-8 encoded byte sequence.
// Due to specialized handling of ASCII above, we know that
// all normal sequences at this point must be 2 bytes or larger.
case rn > 1:
n += rn
// Handle escape sequence.
case r == '\\':
flags.Join(stringNonVerbatim)
resumeOffset = n
if uint(len(b)) < uint(n+2) {
return resumeOffset, io.ErrUnexpectedEOF
}
switch r := b[n+1]; r {
case '/':
// Forward slash is the only character with 3 representations.
// Per RFC 8785, section 3.2.2.2., this must not be escaped.
flags.Join(stringNonCanonical)
n += 2
case '"', '\\', 'b', 'f', 'n', 'r', 't':
n += 2
case 'u':
if uint(len(b)) < uint(n+6) {
if hasEscapedUTF16Prefix(b[n:], false) {
return resumeOffset, io.ErrUnexpectedEOF
}
flags.Join(stringNonCanonical)
return n, NewInvalidEscapeSequenceError(b[n:])
}
v1, ok := parseHexUint16(b[n+2 : n+6])
if !ok {
flags.Join(stringNonCanonical)
return n, NewInvalidEscapeSequenceError(b[n : n+6])
}
// Only certain control characters can use the \uFFFF notation
// for canonical formatting (per RFC 8785, section 3.2.2.2.).
switch v1 {
// \uFFFF notation not permitted for these characters.
case '\b', '\f', '\n', '\r', '\t':
flags.Join(stringNonCanonical)
default:
// \uFFFF notation only permitted for control characters.
if v1 >= ' ' {
flags.Join(stringNonCanonical)
} else {
// \uFFFF notation must be lower case.
for _, c := range b[n+2 : n+6] {
if 'A' <= c && c <= 'F' {
flags.Join(stringNonCanonical)
}
}
}
}
n += 6
r := rune(v1)
if validateUTF8 && utf16.IsSurrogate(r) {
if uint(len(b)) < uint(n+6) {
if hasEscapedUTF16Prefix(b[n:], true) {
return resumeOffset, io.ErrUnexpectedEOF
}
flags.Join(stringNonCanonical)
return n - 6, NewInvalidEscapeSequenceError(b[n-6:])
} else if v2, ok := parseHexUint16(b[n+2 : n+6]); b[n] != '\\' || b[n+1] != 'u' || !ok {
flags.Join(stringNonCanonical)
return n - 6, NewInvalidEscapeSequenceError(b[n-6 : n+6])
} else if r = utf16.DecodeRune(rune(v1), rune(v2)); r == utf8.RuneError {
flags.Join(stringNonCanonical)
return n - 6, NewInvalidEscapeSequenceError(b[n-6 : n+6])
} else {
n += 6
}
}
default:
flags.Join(stringNonCanonical)
return n, NewInvalidEscapeSequenceError(b[n : n+2])
}
// Handle invalid UTF-8.
case r == utf8.RuneError:
if !utf8.FullRune(b[n:]) {
return n, io.ErrUnexpectedEOF
}
flags.Join(stringNonVerbatim | stringNonCanonical)
if validateUTF8 {
return n, ErrInvalidUTF8
}
n++
// Handle invalid control characters.
case r < ' ':
flags.Join(stringNonVerbatim | stringNonCanonical)
return n, NewInvalidCharacterError(b[n:], "in string (expecting non-control character)")
default:
panic("BUG: unhandled character " + QuoteRune(b[n:]))
}
}
return n, io.ErrUnexpectedEOF
}
// AppendUnquote appends the unescaped form of a JSON string in src to dst.
// Any invalid UTF-8 within the string will be replaced with utf8.RuneError,
// but the error will be specified as having encountered such an error.
// The input must be an entire JSON string with no surrounding whitespace.
func AppendUnquote[Bytes ~[]byte | ~string](dst []byte, src Bytes) (v []byte, err error) {
dst = slices.Grow(dst, len(src))
// Consume the leading double quote.
var i, n int
switch {
case uint(len(src)) == 0:
return dst, io.ErrUnexpectedEOF
case src[0] == '"':
i, n = 1, 1
default:
return dst, NewInvalidCharacterError(src, `at start of string (expecting '"')`)
}
// Consume every character in the string.
for uint(len(src)) > uint(n) {
// Optimize for long sequences of unescaped characters.
noEscape := func(c byte) bool {
return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"'
}
for uint(len(src)) > uint(n) && noEscape(src[n]) {
n++
}
if uint(len(src)) <= uint(n) {
dst = append(dst, src[i:n]...)
return dst, io.ErrUnexpectedEOF
}
// Check for terminating double quote.
if src[n] == '"' {
dst = append(dst, src[i:n]...)
n++
if n < len(src) {
err = NewInvalidCharacterError(src[n:], "after string value")
}
return dst, err
}
switch r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:]))); {
// Handle UTF-8 encoded byte sequence.
// Due to specialized handling of ASCII above, we know that
// all normal sequences at this point must be 2 bytes or larger.
case rn > 1:
n += rn
// Handle escape sequence.
case r == '\\':
dst = append(dst, src[i:n]...)
// Handle escape sequence.
if uint(len(src)) < uint(n+2) {
return dst, io.ErrUnexpectedEOF
}
switch r := src[n+1]; r {
case '"', '\\', '/':
dst = append(dst, r)
n += 2
case 'b':
dst = append(dst, '\b')
n += 2
case 'f':
dst = append(dst, '\f')
n += 2
case 'n':
dst = append(dst, '\n')
n += 2
case 'r':
dst = append(dst, '\r')
n += 2
case 't':
dst = append(dst, '\t')
n += 2
case 'u':
if uint(len(src)) < uint(n+6) {
if hasEscapedUTF16Prefix(src[n:], false) {
return dst, io.ErrUnexpectedEOF
}
return dst, NewInvalidEscapeSequenceError(src[n:])
}
v1, ok := parseHexUint16(src[n+2 : n+6])
if !ok {
return dst, NewInvalidEscapeSequenceError(src[n : n+6])
}
n += 6
// Check whether this is a surrogate half.
r := rune(v1)
if utf16.IsSurrogate(r) {
r = utf8.RuneError // assume failure unless the following succeeds
if uint(len(src)) < uint(n+6) {
if hasEscapedUTF16Prefix(src[n:], true) {
return utf8.AppendRune(dst, r), io.ErrUnexpectedEOF
}
err = NewInvalidEscapeSequenceError(src[n-6:])
} else if v2, ok := parseHexUint16(src[n+2 : n+6]); src[n] != '\\' || src[n+1] != 'u' || !ok {
err = NewInvalidEscapeSequenceError(src[n-6 : n+6])
} else if r = utf16.DecodeRune(rune(v1), rune(v2)); r == utf8.RuneError {
err = NewInvalidEscapeSequenceError(src[n-6 : n+6])
} else {
n += 6
}
}
dst = utf8.AppendRune(dst, r)
default:
return dst, NewInvalidEscapeSequenceError(src[n : n+2])
}
i = n
// Handle invalid UTF-8.
case r == utf8.RuneError:
dst = append(dst, src[i:n]...)
if !utf8.FullRuneInString(string(truncateMaxUTF8(src[n:]))) {
return dst, io.ErrUnexpectedEOF
}
// NOTE: An unescaped string may be longer than the escaped string
// because invalid UTF-8 bytes are being replaced.
dst = append(dst, "\uFFFD"...)
n += rn
i = n
err = ErrInvalidUTF8
// Handle invalid control characters.
case r < ' ':
dst = append(dst, src[i:n]...)
return dst, NewInvalidCharacterError(src[n:], "in string (expecting non-control character)")
default:
panic("BUG: unhandled character " + QuoteRune(src[n:]))
}
}
dst = append(dst, src[i:n]...)
return dst, io.ErrUnexpectedEOF
}
// hasEscapedUTF16Prefix reports whether b is possibly
// the truncated prefix of a \uFFFF escape sequence.
func hasEscapedUTF16Prefix[Bytes ~[]byte | ~string](b Bytes, lowerSurrogateHalf bool) bool {
for i := range len(b) {
switch c := b[i]; {
case i == 0 && c != '\\':
return false
case i == 1 && c != 'u':
return false
case i == 2 && lowerSurrogateHalf && c != 'd' && c != 'D':
return false // not within ['\uDC00':'\uDFFF']
case i == 3 && lowerSurrogateHalf && !('c' <= c && c <= 'f') && !('C' <= c && c <= 'F'):
return false // not within ['\uDC00':'\uDFFF']
case i >= 2 && i < 6 && !('0' <= c && c <= '9') && !('a' <= c && c <= 'f') && !('A' <= c && c <= 'F'):
return false
}
}
return true
}
// UnquoteMayCopy returns the unescaped form of b.
// If there are no escaped characters, the output is simply a subslice of
// the input with the surrounding quotes removed.
// Otherwise, a new buffer is allocated for the output.
// It assumes the input is valid.
func UnquoteMayCopy(b []byte, isVerbatim bool) []byte {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
if isVerbatim {
return b[len(`"`) : len(b)-len(`"`)]
}
b, _ = AppendUnquote(nil, b)
return b
}
// ConsumeSimpleNumber consumes the next JSON number per RFC 7159, section 6
// but is limited to the grammar for a positive integer.
// It returns 0 if it is invalid or more complicated than a simple integer,
// in which case consumeNumber should be called.
func ConsumeSimpleNumber(b []byte) (n int) {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
if len(b) > 0 {
if b[0] == '0' {
n++
} else if '1' <= b[0] && b[0] <= '9' {
n++
for len(b) > n && ('0' <= b[n] && b[n] <= '9') {
n++
}
} else {
return 0
}
if uint(len(b)) <= uint(n) || (b[n] != '.' && b[n] != 'e' && b[n] != 'E') {
return n
}
}
return 0
}
type ConsumeNumberState uint
const (
consumeNumberInit ConsumeNumberState = iota
beforeIntegerDigits
withinIntegerDigits
beforeFractionalDigits
withinFractionalDigits
beforeExponentDigits
withinExponentDigits
)
// ConsumeNumber consumes the next JSON number per RFC 7159, section 6.
// It reports the number of bytes consumed and whether an error was encountered.
// If the input appears truncated, it returns io.ErrUnexpectedEOF.
//
// Note that JSON numbers are not self-terminating.
// If the entire input is consumed, then the caller needs to consider whether
// there may be subsequent unread data that may still be part of this number.
func ConsumeNumber(b []byte) (n int, err error) {
n, _, err = ConsumeNumberResumable(b, 0, consumeNumberInit)
return n, err
}
// ConsumeNumberResumable is identical to consumeNumber but supports resuming
// from a previous call that returned io.ErrUnexpectedEOF.
func ConsumeNumberResumable(b []byte, resumeOffset int, state ConsumeNumberState) (n int, _ ConsumeNumberState, err error) {
// Jump to the right state when resuming from a partial consumption.
n = resumeOffset
if state > consumeNumberInit {
switch state {
case withinIntegerDigits, withinFractionalDigits, withinExponentDigits:
// Consume leading digits.
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
n++
}
if uint(len(b)) <= uint(n) {
return n, state, nil // still within the same state
}
state++ // switches "withinX" to "beforeY" where Y is the state after X
}
switch state {
case beforeIntegerDigits:
goto beforeInteger
case beforeFractionalDigits:
goto beforeFractional
case beforeExponentDigits:
goto beforeExponent
default:
return n, state, nil
}
}
// Consume required integer component (with optional minus sign).
beforeInteger:
resumeOffset = n
if uint(len(b)) > 0 && b[0] == '-' {
n++
}
switch {
case uint(len(b)) <= uint(n):
return resumeOffset, beforeIntegerDigits, io.ErrUnexpectedEOF
case b[n] == '0':
n++
state = beforeFractionalDigits
case '1' <= b[n] && b[n] <= '9':
n++
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
n++
}
state = withinIntegerDigits
default:
return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
}
// Consume optional fractional component.
beforeFractional:
if uint(len(b)) > uint(n) && b[n] == '.' {
resumeOffset = n
n++
switch {
case uint(len(b)) <= uint(n):
return resumeOffset, beforeFractionalDigits, io.ErrUnexpectedEOF
case '0' <= b[n] && b[n] <= '9':
n++
default:
return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
}
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
n++
}
state = withinFractionalDigits
}
// Consume optional exponent component.
beforeExponent:
if uint(len(b)) > uint(n) && (b[n] == 'e' || b[n] == 'E') {
resumeOffset = n
n++
if uint(len(b)) > uint(n) && (b[n] == '-' || b[n] == '+') {
n++
}
switch {
case uint(len(b)) <= uint(n):
return resumeOffset, beforeExponentDigits, io.ErrUnexpectedEOF
case '0' <= b[n] && b[n] <= '9':
n++
default:
return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
}
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
n++
}
state = withinExponentDigits
}
return n, state, nil
}
// parseHexUint16 is similar to strconv.ParseUint,
// but operates directly on []byte and is optimized for base-16.
// See https://go.dev/issue/42429.
func parseHexUint16[Bytes ~[]byte | ~string](b Bytes) (v uint16, ok bool) {
if len(b) != 4 {
return 0, false
}
for i := range 4 {
c := b[i]
switch {
case '0' <= c && c <= '9':
c = c - '0'
case 'a' <= c && c <= 'f':
c = 10 + c - 'a'
case 'A' <= c && c <= 'F':
c = 10 + c - 'A'
default:
return 0, false
}
v = v*16 + uint16(c)
}
return v, true
}
// ParseUint parses b as a decimal unsigned integer according to
// a strict subset of the JSON number grammar, returning the value if valid.
// It returns (0, false) if there is a syntax error and
// returns (math.MaxUint64, false) if there is an overflow.
func ParseUint(b []byte) (v uint64, ok bool) {
const unsafeWidth = 20 // len(fmt.Sprint(uint64(math.MaxUint64)))
var n int
for ; len(b) > n && ('0' <= b[n] && b[n] <= '9'); n++ {
v = 10*v + uint64(b[n]-'0')
}
switch {
case n == 0 || len(b) != n || (b[0] == '0' && string(b) != "0"):
return 0, false
case n >= unsafeWidth && (b[0] != '1' || v < 1e19 || n > unsafeWidth):
return math.MaxUint64, false
}
return v, true
}
// ParseFloat parses a floating point number according to the Go float grammar.
// Note that the JSON number grammar is a strict subset.
//
// If the number overflows the finite representation of a float,
// then we return MaxFloat since any finite value will always be infinitely
// more accurate at representing another finite value than an infinite value.
func ParseFloat(b []byte, bits int) (v float64, ok bool) {
fv, err := strconv.ParseFloat(string(b), bits)
if math.IsInf(fv, 0) {
switch {
case bits == 32 && math.IsInf(fv, +1):
fv = +math.MaxFloat32
case bits == 64 && math.IsInf(fv, +1):
fv = +math.MaxFloat64
case bits == 32 && math.IsInf(fv, -1):
fv = -math.MaxFloat32
case bits == 64 && math.IsInf(fv, -1):
fv = -math.MaxFloat64
}
}
return fv, err == nil
}

View File

@ -0,0 +1,443 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsonwire
import (
"errors"
"io"
"math"
"reflect"
"strings"
"testing"
)
func TestConsumeWhitespace(t *testing.T) {
tests := []struct {
in string
want int
}{
{"", 0},
{"a", 0},
{" a", 1},
{" a ", 1},
{" \n\r\ta", 4},
{" \n\r\t \n\r\t \n\r\t \n\r\t", 16},
{"\u00a0", 0}, // non-breaking space is not JSON whitespace
}
for _, tt := range tests {
t.Run("", func(t *testing.T) {
if got := ConsumeWhitespace([]byte(tt.in)); got != tt.want {
t.Errorf("ConsumeWhitespace(%q) = %v, want %v", tt.in, got, tt.want)
}
})
}
}
func TestConsumeLiteral(t *testing.T) {
tests := []struct {
literal string
in string
want int
wantErr error
}{
{"null", "", 0, io.ErrUnexpectedEOF},
{"null", "n", 1, io.ErrUnexpectedEOF},
{"null", "nu", 2, io.ErrUnexpectedEOF},
{"null", "nul", 3, io.ErrUnexpectedEOF},
{"null", "null", 4, nil},
{"null", "nullx", 4, nil},
{"null", "x", 0, NewInvalidCharacterError("x", "in literal null (expecting 'n')")},
{"null", "nuxx", 2, NewInvalidCharacterError("x", "in literal null (expecting 'l')")},
{"false", "", 0, io.ErrUnexpectedEOF},
{"false", "f", 1, io.ErrUnexpectedEOF},
{"false", "fa", 2, io.ErrUnexpectedEOF},
{"false", "fal", 3, io.ErrUnexpectedEOF},
{"false", "fals", 4, io.ErrUnexpectedEOF},
{"false", "false", 5, nil},
{"false", "falsex", 5, nil},
{"false", "x", 0, NewInvalidCharacterError("x", "in literal false (expecting 'f')")},
{"false", "falsx", 4, NewInvalidCharacterError("x", "in literal false (expecting 'e')")},
{"true", "", 0, io.ErrUnexpectedEOF},
{"true", "t", 1, io.ErrUnexpectedEOF},
{"true", "tr", 2, io.ErrUnexpectedEOF},
{"true", "tru", 3, io.ErrUnexpectedEOF},
{"true", "true", 4, nil},
{"true", "truex", 4, nil},
{"true", "x", 0, NewInvalidCharacterError("x", "in literal true (expecting 't')")},
{"true", "trux", 3, NewInvalidCharacterError("x", "in literal true (expecting 'e')")},
}
for _, tt := range tests {
t.Run("", func(t *testing.T) {
var got int
switch tt.literal {
case "null":
got = ConsumeNull([]byte(tt.in))
case "false":
got = ConsumeFalse([]byte(tt.in))
case "true":
got = ConsumeTrue([]byte(tt.in))
default:
t.Errorf("invalid literal: %v", tt.literal)
}
switch {
case tt.wantErr == nil && got != tt.want:
t.Errorf("Consume%v(%q) = %v, want %v", strings.Title(tt.literal), tt.in, got, tt.want)
case tt.wantErr != nil && got != 0:
t.Errorf("Consume%v(%q) = %v, want %v", strings.Title(tt.literal), tt.in, got, 0)
}
got, gotErr := ConsumeLiteral([]byte(tt.in), tt.literal)
if got != tt.want || !reflect.DeepEqual(gotErr, tt.wantErr) {
t.Errorf("ConsumeLiteral(%q, %q) = (%v, %v), want (%v, %v)", tt.in, tt.literal, got, gotErr, tt.want, tt.wantErr)
}
})
}
}
func TestConsumeString(t *testing.T) {
var errPrev = errors.New("same as previous error")
tests := []struct {
in string
simple bool
want int
wantUTF8 int // consumed bytes if validateUTF8 is specified
wantFlags ValueFlags
wantUnquote string
wantErr error
wantErrUTF8 error // error if validateUTF8 is specified
wantErrUnquote error
}{
{``, false, 0, 0, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev},
{`"`, false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev},
{`""`, true, 2, 2, 0, "", nil, nil, nil},
{`""x`, true, 2, 2, 0, "", nil, nil, NewInvalidCharacterError("x", "after string value")},
{` ""x`, false, 0, 0, 0, "", NewInvalidCharacterError(" ", "at start of string (expecting '\"')"), errPrev, errPrev},
{`"hello`, false, 6, 6, 0, "hello", io.ErrUnexpectedEOF, errPrev, errPrev},
{`"hello"`, true, 7, 7, 0, "hello", nil, nil, nil},
{"\"\x00\"", false, 1, 1, stringNonVerbatim | stringNonCanonical, "", NewInvalidCharacterError("\x00", "in string (expecting non-control character)"), errPrev, errPrev},
{`"\u0000"`, false, 8, 8, stringNonVerbatim, "\x00", nil, nil, nil},
{"\"\x1f\"", false, 1, 1, stringNonVerbatim | stringNonCanonical, "", NewInvalidCharacterError("\x1f", "in string (expecting non-control character)"), errPrev, errPrev},
{`"\u001f"`, false, 8, 8, stringNonVerbatim, "\x1f", nil, nil, nil},
{`"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"`, true, 54, 54, 0, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", nil, nil, nil},
{"\" !#$%'()*+,-./0123456789:;=?@[]^_`{|}~\x7f\"", true, 41, 41, 0, " !#$%'()*+,-./0123456789:;=?@[]^_`{|}~\x7f", nil, nil, nil},
{`"&"`, false, 3, 3, 0, "&", nil, nil, nil},
{`"<"`, false, 3, 3, 0, "<", nil, nil, nil},
{`">"`, false, 3, 3, 0, ">", nil, nil, nil},
{"\"x\x80\"", false, 4, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd", nil, ErrInvalidUTF8, errPrev},
{"\"x\xff\"", false, 4, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd", nil, ErrInvalidUTF8, errPrev},
{"\"x\xc0", false, 3, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd", io.ErrUnexpectedEOF, ErrInvalidUTF8, io.ErrUnexpectedEOF},
{"\"x\xc0\x80\"", false, 5, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd", nil, ErrInvalidUTF8, errPrev},
{"\"x\xe0", false, 2, 2, 0, "x", io.ErrUnexpectedEOF, errPrev, errPrev},
{"\"x\xe0\x80", false, 4, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd", io.ErrUnexpectedEOF, ErrInvalidUTF8, io.ErrUnexpectedEOF},
{"\"x\xe0\x80\x80\"", false, 6, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd\ufffd", nil, ErrInvalidUTF8, errPrev},
{"\"x\xf0", false, 2, 2, 0, "x", io.ErrUnexpectedEOF, errPrev, errPrev},
{"\"x\xf0\x80", false, 4, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd", io.ErrUnexpectedEOF, ErrInvalidUTF8, io.ErrUnexpectedEOF},
{"\"x\xf0\x80\x80", false, 5, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd\ufffd", io.ErrUnexpectedEOF, ErrInvalidUTF8, io.ErrUnexpectedEOF},
{"\"x\xf0\x80\x80\x80\"", false, 7, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd\ufffd\ufffd", nil, ErrInvalidUTF8, errPrev},
{"\"x\xed\xba\xad\"", false, 6, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd\ufffd", nil, ErrInvalidUTF8, errPrev},
{"\"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602\"", false, 25, 25, 0, "\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602", nil, nil, nil},
{`"¢"`[:2], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev},
{`"¢"`[:3], false, 3, 3, 0, "¢", io.ErrUnexpectedEOF, errPrev, errPrev}, // missing terminating quote
{`"¢"`[:4], false, 4, 4, 0, "¢", nil, nil, nil},
{`"€"`[:2], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev},
{`"€"`[:3], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev},
{`"€"`[:4], false, 4, 4, 0, "€", io.ErrUnexpectedEOF, errPrev, errPrev}, // missing terminating quote
{`"€"`[:5], false, 5, 5, 0, "€", nil, nil, nil},
{`"𐍈"`[:2], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev},
{`"𐍈"`[:3], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev},
{`"𐍈"`[:4], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev},
{`"𐍈"`[:5], false, 5, 5, 0, "𐍈", io.ErrUnexpectedEOF, errPrev, errPrev}, // missing terminating quote
{`"𐍈"`[:6], false, 6, 6, 0, "𐍈", nil, nil, nil},
{`"x\`, false, 2, 2, stringNonVerbatim, "x", io.ErrUnexpectedEOF, errPrev, errPrev},
{`"x\"`, false, 4, 4, stringNonVerbatim, "x\"", io.ErrUnexpectedEOF, errPrev, errPrev},
{`"x\x"`, false, 2, 2, stringNonVerbatim | stringNonCanonical, "x", NewInvalidEscapeSequenceError(`\x`), errPrev, errPrev},
{`"\"\\\b\f\n\r\t"`, false, 16, 16, stringNonVerbatim, "\"\\\b\f\n\r\t", nil, nil, nil},
{`"/"`, true, 3, 3, 0, "/", nil, nil, nil},
{`"\/"`, false, 4, 4, stringNonVerbatim | stringNonCanonical, "/", nil, nil, nil},
{`"\u002f"`, false, 8, 8, stringNonVerbatim | stringNonCanonical, "/", nil, nil, nil},
{`"\u`, false, 1, 1, stringNonVerbatim, "", io.ErrUnexpectedEOF, errPrev, errPrev},
{`"\uf`, false, 1, 1, stringNonVerbatim, "", io.ErrUnexpectedEOF, errPrev, errPrev},
{`"\uff`, false, 1, 1, stringNonVerbatim, "", io.ErrUnexpectedEOF, errPrev, errPrev},
{`"\ufff`, false, 1, 1, stringNonVerbatim, "", io.ErrUnexpectedEOF, errPrev, errPrev},
{`"\ufffd`, false, 7, 7, stringNonVerbatim | stringNonCanonical, "\ufffd", io.ErrUnexpectedEOF, errPrev, errPrev},
{`"\ufffd"`, false, 8, 8, stringNonVerbatim | stringNonCanonical, "\ufffd", nil, nil, nil},
{`"\uABCD"`, false, 8, 8, stringNonVerbatim | stringNonCanonical, "\uabcd", nil, nil, nil},
{`"\uefX0"`, false, 1, 1, stringNonVerbatim | stringNonCanonical, "", NewInvalidEscapeSequenceError(`\uefX0`), errPrev, errPrev},
{`"\uDEAD`, false, 7, 1, stringNonVerbatim | stringNonCanonical, "\ufffd", io.ErrUnexpectedEOF, errPrev, errPrev},
{`"\uDEAD"`, false, 8, 1, stringNonVerbatim | stringNonCanonical, "\ufffd", nil, NewInvalidEscapeSequenceError(`\uDEAD"`), errPrev},
{`"\uDEAD______"`, false, 14, 1, stringNonVerbatim | stringNonCanonical, "\ufffd______", nil, NewInvalidEscapeSequenceError(`\uDEAD______`), errPrev},
{`"\uDEAD\uXXXX"`, false, 7, 1, stringNonVerbatim | stringNonCanonical, "\ufffd", NewInvalidEscapeSequenceError(`\uXXXX`), NewInvalidEscapeSequenceError(`\uDEAD\uXXXX`), NewInvalidEscapeSequenceError(`\uXXXX`)},
{`"\uDEAD\uBEEF"`, false, 14, 1, stringNonVerbatim | stringNonCanonical, "\ufffd\ubeef", nil, NewInvalidEscapeSequenceError(`\uDEAD\uBEEF`), errPrev},
{`"\uD800\udea`, false, 7, 1, stringNonVerbatim | stringNonCanonical, "\ufffd", io.ErrUnexpectedEOF, errPrev, errPrev},
{`"\uD800\udb`, false, 7, 1, stringNonVerbatim | stringNonCanonical, "\ufffd", io.ErrUnexpectedEOF, NewInvalidEscapeSequenceError(`\uD800\udb`), io.ErrUnexpectedEOF},
{`"\uD800\udead"`, false, 14, 14, stringNonVerbatim | stringNonCanonical, "\U000102ad", nil, nil, nil},
{`"\u0022\u005c\u002f\u0008\u000c\u000a\u000d\u0009"`, false, 50, 50, stringNonVerbatim | stringNonCanonical, "\"\\/\b\f\n\r\t", nil, nil, nil},
{`"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\ud83d\ude02"`, false, 56, 56, stringNonVerbatim | stringNonCanonical, "\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602", nil, nil, nil},
}
for _, tt := range tests {
t.Run("", func(t *testing.T) {
if tt.wantErrUTF8 == errPrev {
tt.wantErrUTF8 = tt.wantErr
}
if tt.wantErrUnquote == errPrev {
tt.wantErrUnquote = tt.wantErrUTF8
}
switch got := ConsumeSimpleString([]byte(tt.in)); {
case tt.simple && got != tt.want:
t.Errorf("consumeSimpleString(%q) = %v, want %v", tt.in, got, tt.want)
case !tt.simple && got != 0:
t.Errorf("consumeSimpleString(%q) = %v, want %v", tt.in, got, 0)
}
var gotFlags ValueFlags
got, gotErr := ConsumeString(&gotFlags, []byte(tt.in), false)
if gotFlags != tt.wantFlags {
t.Errorf("consumeString(%q, false) flags = %v, want %v", tt.in, gotFlags, tt.wantFlags)
}
if got != tt.want || !reflect.DeepEqual(gotErr, tt.wantErr) {
t.Errorf("consumeString(%q, false) = (%v, %v), want (%v, %v)", tt.in, got, gotErr, tt.want, tt.wantErr)
}
got, gotErr = ConsumeString(&gotFlags, []byte(tt.in), true)
if got != tt.wantUTF8 || !reflect.DeepEqual(gotErr, tt.wantErrUTF8) {
t.Errorf("consumeString(%q, false) = (%v, %v), want (%v, %v)", tt.in, got, gotErr, tt.wantUTF8, tt.wantErrUTF8)
}
gotUnquote, gotErr := AppendUnquote(nil, tt.in)
if string(gotUnquote) != tt.wantUnquote || !reflect.DeepEqual(gotErr, tt.wantErrUnquote) {
t.Errorf("AppendUnquote(nil, %q) = (%q, %v), want (%q, %v)", tt.in[:got], gotUnquote, gotErr, tt.wantUnquote, tt.wantErrUnquote)
}
})
}
}
func TestConsumeNumber(t *testing.T) {
tests := []struct {
in string
simple bool
want int
wantErr error
}{
{"", false, 0, io.ErrUnexpectedEOF},
{`"NaN"`, false, 0, NewInvalidCharacterError("\"", "in number (expecting digit)")},
{`"Infinity"`, false, 0, NewInvalidCharacterError("\"", "in number (expecting digit)")},
{`"-Infinity"`, false, 0, NewInvalidCharacterError("\"", "in number (expecting digit)")},
{".0", false, 0, NewInvalidCharacterError(".", "in number (expecting digit)")},
{"0", true, 1, nil},
{"-0", false, 2, nil},
{"+0", false, 0, NewInvalidCharacterError("+", "in number (expecting digit)")},
{"1", true, 1, nil},
{"-1", false, 2, nil},
{"00", true, 1, nil},
{"-00", false, 2, nil},
{"01", true, 1, nil},
{"-01", false, 2, nil},
{"0i", true, 1, nil},
{"-0i", false, 2, nil},
{"0f", true, 1, nil},
{"-0f", false, 2, nil},
{"9876543210", true, 10, nil},
{"-9876543210", false, 11, nil},
{"9876543210x", true, 10, nil},
{"-9876543210x", false, 11, nil},
{" 9876543210", true, 0, NewInvalidCharacterError(" ", "in number (expecting digit)")},
{"- 9876543210", false, 1, NewInvalidCharacterError(" ", "in number (expecting digit)")},
{strings.Repeat("9876543210", 1000), true, 10000, nil},
{"-" + strings.Repeat("9876543210", 1000), false, 1 + 10000, nil},
{"0.", false, 1, io.ErrUnexpectedEOF},
{"-0.", false, 2, io.ErrUnexpectedEOF},
{"0e", false, 1, io.ErrUnexpectedEOF},
{"-0e", false, 2, io.ErrUnexpectedEOF},
{"0E", false, 1, io.ErrUnexpectedEOF},
{"-0E", false, 2, io.ErrUnexpectedEOF},
{"0.0", false, 3, nil},
{"-0.0", false, 4, nil},
{"0e0", false, 3, nil},
{"-0e0", false, 4, nil},
{"0E0", false, 3, nil},
{"-0E0", false, 4, nil},
{"0.0123456789", false, 12, nil},
{"-0.0123456789", false, 13, nil},
{"1.f", false, 2, NewInvalidCharacterError("f", "in number (expecting digit)")},
{"-1.f", false, 3, NewInvalidCharacterError("f", "in number (expecting digit)")},
{"1.e", false, 2, NewInvalidCharacterError("e", "in number (expecting digit)")},
{"-1.e", false, 3, NewInvalidCharacterError("e", "in number (expecting digit)")},
{"1e0", false, 3, nil},
{"-1e0", false, 4, nil},
{"1E0", false, 3, nil},
{"-1E0", false, 4, nil},
{"1Ex", false, 2, NewInvalidCharacterError("x", "in number (expecting digit)")},
{"-1Ex", false, 3, NewInvalidCharacterError("x", "in number (expecting digit)")},
{"1e-0", false, 4, nil},
{"-1e-0", false, 5, nil},
{"1e+0", false, 4, nil},
{"-1e+0", false, 5, nil},
{"1E-0", false, 4, nil},
{"-1E-0", false, 5, nil},
{"1E+0", false, 4, nil},
{"-1E+0", false, 5, nil},
{"1E+00500", false, 8, nil},
{"-1E+00500", false, 9, nil},
{"1E+00500x", false, 8, nil},
{"-1E+00500x", false, 9, nil},
{"9876543210.0123456789e+01234589x", false, 31, nil},
{"-9876543210.0123456789e+01234589x", false, 32, nil},
{"1_000_000", true, 1, nil},
{"0x12ef", true, 1, nil},
{"0x1p-2", true, 1, nil},
}
for _, tt := range tests {
t.Run("", func(t *testing.T) {
switch got := ConsumeSimpleNumber([]byte(tt.in)); {
case tt.simple && got != tt.want:
t.Errorf("ConsumeSimpleNumber(%q) = %v, want %v", tt.in, got, tt.want)
case !tt.simple && got != 0:
t.Errorf("ConsumeSimpleNumber(%q) = %v, want %v", tt.in, got, 0)
}
got, gotErr := ConsumeNumber([]byte(tt.in))
if got != tt.want || !reflect.DeepEqual(gotErr, tt.wantErr) {
t.Errorf("ConsumeNumber(%q) = (%v, %v), want (%v, %v)", tt.in, got, gotErr, tt.want, tt.wantErr)
}
})
}
}
func TestParseHexUint16(t *testing.T) {
tests := []struct {
in string
want uint16
wantOk bool
}{
{"", 0, false},
{"a", 0, false},
{"ab", 0, false},
{"abc", 0, false},
{"abcd", 0xabcd, true},
{"abcde", 0, false},
{"9eA1", 0x9ea1, true},
{"gggg", 0, false},
{"0000", 0x0000, true},
{"1234", 0x1234, true},
}
for _, tt := range tests {
t.Run("", func(t *testing.T) {
got, gotOk := parseHexUint16([]byte(tt.in))
if got != tt.want || gotOk != tt.wantOk {
t.Errorf("parseHexUint16(%q) = (0x%04x, %v), want (0x%04x, %v)", tt.in, got, gotOk, tt.want, tt.wantOk)
}
})
}
}
func TestParseUint(t *testing.T) {
tests := []struct {
in string
want uint64
wantOk bool
}{
{"", 0, false},
{"0", 0, true},
{"1", 1, true},
{"-1", 0, false},
{"1f", 0, false},
{"00", 0, false},
{"01", 0, false},
{"10", 10, true},
{"10.9", 0, false},
{" 10", 0, false},
{"10 ", 0, false},
{"123456789", 123456789, true},
{"123456789d", 0, false},
{"18446744073709551614", math.MaxUint64 - 1, true},
{"18446744073709551615", math.MaxUint64, true},
{"18446744073709551616", math.MaxUint64, false},
{"18446744073709551620", math.MaxUint64, false},
{"18446744073709551700", math.MaxUint64, false},
{"18446744073709552000", math.MaxUint64, false},
{"18446744073709560000", math.MaxUint64, false},
{"18446744073709600000", math.MaxUint64, false},
{"18446744073710000000", math.MaxUint64, false},
{"18446744073800000000", math.MaxUint64, false},
{"18446744074000000000", math.MaxUint64, false},
{"18446744080000000000", math.MaxUint64, false},
{"18446744100000000000", math.MaxUint64, false},
{"18446745000000000000", math.MaxUint64, false},
{"18446750000000000000", math.MaxUint64, false},
{"18446800000000000000", math.MaxUint64, false},
{"18447000000000000000", math.MaxUint64, false},
{"18450000000000000000", math.MaxUint64, false},
{"18500000000000000000", math.MaxUint64, false},
{"19000000000000000000", math.MaxUint64, false},
{"19999999999999999999", math.MaxUint64, false},
{"20000000000000000000", math.MaxUint64, false},
{"100000000000000000000", math.MaxUint64, false},
{"99999999999999999999999999999999", math.MaxUint64, false},
{"99999999999999999999999999999999f", 0, false},
}
for _, tt := range tests {
t.Run("", func(t *testing.T) {
got, gotOk := ParseUint([]byte(tt.in))
if got != tt.want || gotOk != tt.wantOk {
t.Errorf("ParseUint(%q) = (%v, %v), want (%v, %v)", tt.in, got, gotOk, tt.want, tt.wantOk)
}
})
}
}
func TestParseFloat(t *testing.T) {
tests := []struct {
in string
want32 float64
want64 float64
wantOk bool
}{
{"0", 0, 0, true},
{"-1", -1, -1, true},
{"1", 1, 1, true},
{"-16777215", -16777215, -16777215, true}, // -(1<<24 - 1)
{"16777215", 16777215, 16777215, true}, // +(1<<24 - 1)
{"-16777216", -16777216, -16777216, true}, // -(1<<24)
{"16777216", 16777216, 16777216, true}, // +(1<<24)
{"-16777217", -16777216, -16777217, true}, // -(1<<24 + 1)
{"16777217", 16777216, 16777217, true}, // +(1<<24 + 1)
{"-9007199254740991", -9007199254740992, -9007199254740991, true}, // -(1<<53 - 1)
{"9007199254740991", 9007199254740992, 9007199254740991, true}, // +(1<<53 - 1)
{"-9007199254740992", -9007199254740992, -9007199254740992, true}, // -(1<<53)
{"9007199254740992", 9007199254740992, 9007199254740992, true}, // +(1<<53)
{"-9007199254740993", -9007199254740992, -9007199254740992, true}, // -(1<<53 + 1)
{"9007199254740993", 9007199254740992, 9007199254740992, true}, // +(1<<53 + 1)
{"-1e1000", -math.MaxFloat32, -math.MaxFloat64, false},
{"1e1000", +math.MaxFloat32, +math.MaxFloat64, false},
}
for _, tt := range tests {
t.Run("", func(t *testing.T) {
got32, gotOk32 := ParseFloat([]byte(tt.in), 32)
if got32 != tt.want32 || gotOk32 != tt.wantOk {
t.Errorf("ParseFloat(%q, 32) = (%v, %v), want (%v, %v)", tt.in, got32, gotOk32, tt.want32, tt.wantOk)
}
got64, gotOk64 := ParseFloat([]byte(tt.in), 64)
if got64 != tt.want64 || gotOk64 != tt.wantOk {
t.Errorf("ParseFloat(%q, 64) = (%v, %v), want (%v, %v)", tt.in, got64, gotOk64, tt.want64, tt.wantOk)
}
})
}
}

View File

@ -0,0 +1,294 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsonwire
import (
"math"
"slices"
"strconv"
"unicode/utf16"
"unicode/utf8"
"encoding/json/internal/jsonflags"
)
// escapeASCII reports whether the ASCII character needs to be escaped.
// It conservatively assumes EscapeForHTML.
var escapeASCII = [...]uint8{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // escape control characters
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // escape control characters
0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // escape '"' and '&'
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, // escape '<' and '>'
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // escape '\\'
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
}
// NeedEscape reports whether src needs escaping of any characters.
// It conservatively assumes EscapeForHTML and EscapeForJS.
// It reports true for inputs with invalid UTF-8.
func NeedEscape[Bytes ~[]byte | ~string](src Bytes) bool {
var i int
for uint(len(src)) > uint(i) {
if c := src[i]; c < utf8.RuneSelf {
if escapeASCII[c] > 0 {
return true
}
i++
} else {
r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[i:])))
if r == utf8.RuneError || r == '\u2028' || r == '\u2029' {
return true
}
i += rn
}
}
return false
}
// AppendQuote appends src to dst as a JSON string per RFC 7159, section 7.
//
// It takes in flags and respects the following:
// - EscapeForHTML escapes '<', '>', and '&'.
// - EscapeForJS escapes '\u2028' and '\u2029'.
// - AllowInvalidUTF8 avoids reporting an error for invalid UTF-8.
//
// Regardless of whether AllowInvalidUTF8 is specified,
// invalid bytes are replaced with the Unicode replacement character ('\ufffd').
// If no escape flags are set, then the shortest representable form is used,
// which is also the canonical form for strings (RFC 8785, section 3.2.2.2).
func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes, flags *jsonflags.Flags) ([]byte, error) {
var i, n int
var hasInvalidUTF8 bool
dst = slices.Grow(dst, len(`"`)+len(src)+len(`"`))
dst = append(dst, '"')
for uint(len(src)) > uint(n) {
if c := src[n]; c < utf8.RuneSelf {
// Handle single-byte ASCII.
n++
if escapeASCII[c] == 0 {
continue // no escaping possibly needed
}
// Handle escaping of single-byte ASCII.
if !(c == '<' || c == '>' || c == '&') || flags.Get(jsonflags.EscapeForHTML) {
dst = append(dst, src[i:n-1]...)
dst = appendEscapedASCII(dst, c)
i = n
}
} else {
// Handle multi-byte Unicode.
r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:])))
n += rn
if r != utf8.RuneError && r != '\u2028' && r != '\u2029' {
continue // no escaping possibly needed
}
// Handle escaping of multi-byte Unicode.
switch {
case isInvalidUTF8(r, rn):
hasInvalidUTF8 = true
dst = append(dst, src[i:n-rn]...)
if flags.Get(jsonflags.EscapeInvalidUTF8) {
dst = append(dst, `\ufffd`...)
} else {
dst = append(dst, "\ufffd"...)
}
i = n
case (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS):
dst = append(dst, src[i:n-rn]...)
dst = appendEscapedUnicode(dst, r)
i = n
}
}
}
dst = append(dst, src[i:n]...)
dst = append(dst, '"')
if hasInvalidUTF8 && !flags.Get(jsonflags.AllowInvalidUTF8) {
return dst, ErrInvalidUTF8
}
return dst, nil
}
func appendEscapedASCII(dst []byte, c byte) []byte {
switch c {
case '"', '\\':
dst = append(dst, '\\', c)
case '\b':
dst = append(dst, "\\b"...)
case '\f':
dst = append(dst, "\\f"...)
case '\n':
dst = append(dst, "\\n"...)
case '\r':
dst = append(dst, "\\r"...)
case '\t':
dst = append(dst, "\\t"...)
default:
dst = appendEscapedUTF16(dst, uint16(c))
}
return dst
}
func appendEscapedUnicode(dst []byte, r rune) []byte {
if r1, r2 := utf16.EncodeRune(r); r1 != '\ufffd' && r2 != '\ufffd' {
dst = appendEscapedUTF16(dst, uint16(r1))
dst = appendEscapedUTF16(dst, uint16(r2))
} else {
dst = appendEscapedUTF16(dst, uint16(r))
}
return dst
}
func appendEscapedUTF16(dst []byte, x uint16) []byte {
const hex = "0123456789abcdef"
return append(dst, '\\', 'u', hex[(x>>12)&0xf], hex[(x>>8)&0xf], hex[(x>>4)&0xf], hex[(x>>0)&0xf])
}
// ReformatString consumes a JSON string from src and appends it to dst,
// reformatting it if necessary according to the specified flags.
// It returns the appended output and the number of consumed input bytes.
func ReformatString(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error) {
// TODO: Should this update ValueFlags as input?
var valFlags ValueFlags
n, err := ConsumeString(&valFlags, src, !flags.Get(jsonflags.AllowInvalidUTF8))
if err != nil {
return dst, n, err
}
// If the output requires no special escapes, and the input
// is already in canonical form or should be preserved verbatim,
// then directly copy the input to the output.
if !flags.Get(jsonflags.AnyEscape) &&
(valFlags.IsCanonical() || flags.Get(jsonflags.PreserveRawStrings)) {
dst = append(dst, src[:n]...) // copy the string verbatim
return dst, n, nil
}
// Under [jsonflags.PreserveRawStrings], any pre-escaped sequences
// remain escaped, however we still need to respect the
// [jsonflags.EscapeForHTML] and [jsonflags.EscapeForJS] options.
if flags.Get(jsonflags.PreserveRawStrings) {
var i, lastAppendIndex int
for i < n {
if c := src[i]; c < utf8.RuneSelf {
if (c == '<' || c == '>' || c == '&') && flags.Get(jsonflags.EscapeForHTML) {
dst = append(dst, src[lastAppendIndex:i]...)
dst = appendEscapedASCII(dst, c)
lastAppendIndex = i + 1
}
i++
} else {
r, rn := utf8.DecodeRune(truncateMaxUTF8(src[i:]))
if (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS) {
dst = append(dst, src[lastAppendIndex:i]...)
dst = appendEscapedUnicode(dst, r)
lastAppendIndex = i + rn
}
i += rn
}
}
return append(dst, src[lastAppendIndex:n]...), n, nil
}
// The input contains characters that might need escaping,
// unnecessary escape sequences, or invalid UTF-8.
// Perform a round-trip unquote and quote to properly reformat
// these sequences according the current flags.
b, _ := AppendUnquote(nil, src[:n])
dst, _ = AppendQuote(dst, b, flags)
return dst, n, nil
}
// AppendFloat appends src to dst as a JSON number per RFC 7159, section 6.
// It formats numbers similar to the ES6 number-to-string conversion.
// See https://go.dev/issue/14135.
//
// The output is identical to ECMA-262, 6th edition, section 7.1.12.1 and with
// RFC 8785, section 3.2.2.3 for 64-bit floating-point numbers except for -0,
// which is formatted as -0 instead of just 0.
//
// For 32-bit floating-point numbers,
// the output is a 32-bit equivalent of the algorithm.
// Note that ECMA-262 specifies no algorithm for 32-bit numbers.
func AppendFloat(dst []byte, src float64, bits int) []byte {
if bits == 32 {
src = float64(float32(src))
}
abs := math.Abs(src)
fmt := byte('f')
if abs != 0 {
if bits == 64 && (float64(abs) < 1e-6 || float64(abs) >= 1e21) ||
bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) {
fmt = 'e'
}
}
dst = strconv.AppendFloat(dst, src, fmt, -1, bits)
if fmt == 'e' {
// Clean up e-09 to e-9.
n := len(dst)
if n >= 4 && dst[n-4] == 'e' && dst[n-3] == '-' && dst[n-2] == '0' {
dst[n-2] = dst[n-1]
dst = dst[:n-1]
}
}
return dst
}
// ReformatNumber consumes a JSON string from src and appends it to dst,
// canonicalizing it if specified.
// It returns the appended output and the number of consumed input bytes.
func ReformatNumber(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error) {
n, err := ConsumeNumber(src)
if err != nil {
return dst, n, err
}
if !flags.Get(jsonflags.CanonicalizeNumbers) {
dst = append(dst, src[:n]...) // copy the number verbatim
return dst, n, nil
}
// Identify the kind of number.
var isFloat bool
for _, c := range src[:n] {
if c == '.' || c == 'e' || c == 'E' {
isFloat = true // has fraction or exponent
break
}
}
// Check if need to canonicalize this kind of number.
switch {
case string(src[:n]) == "-0":
break // canonicalize -0 as 0 regardless of kind
case isFloat:
if !flags.Get(jsonflags.CanonicalizeRawFloats) {
dst = append(dst, src[:n]...) // copy the number verbatim
return dst, n, nil
}
default:
// As an optimization, we can copy integer numbers below 2⁵³ verbatim
// since the canonical form is always identical.
const maxExactIntegerDigits = 16 // len(strconv.AppendUint(nil, 1<<53, 10))
if !flags.Get(jsonflags.CanonicalizeRawInts) || n < maxExactIntegerDigits {
dst = append(dst, src[:n]...) // copy the number verbatim
return dst, n, nil
}
}
// Parse and reformat the number (which uses a canonical format).
fv, _ := strconv.ParseFloat(string(src[:n]), 64)
switch {
case fv == 0:
fv = 0 // normalize negative zero as just zero
case math.IsInf(fv, +1):
fv = +math.MaxFloat64
case math.IsInf(fv, -1):
fv = -math.MaxFloat64
}
return AppendFloat(dst, fv, 64), n, nil
}

View File

@ -0,0 +1,332 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsonwire
import (
"bufio"
"bytes"
"compress/gzip"
"crypto/sha256"
"encoding/binary"
"encoding/hex"
"flag"
"math"
"net/http"
"reflect"
"strconv"
"strings"
"testing"
"time"
"encoding/json/internal/jsonflags"
)
func TestAppendQuote(t *testing.T) {
tests := []struct {
in string
flags jsonflags.Bools
want string
wantErr error
wantErrUTF8 error
}{
{"", 0, `""`, nil, nil},
{"hello", 0, `"hello"`, nil, nil},
{"\x00", 0, `"\u0000"`, nil, nil},
{"\x1f", 0, `"\u001f"`, nil, nil},
{"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", 0, `"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"`, nil, nil},
{" !#$%&'()*+,-./0123456789:;<=>?@[]^_`{|}~\x7f", 0, "\" !#$%&'()*+,-./0123456789:;<=>?@[]^_`{|}~\x7f\"", nil, nil},
{" !#$%&'()*+,-./0123456789:;<=>?@[]^_`{|}~\x7f", jsonflags.EscapeForHTML, "\" !#$%\\u0026'()*+,-./0123456789:;\\u003c=\\u003e?@[]^_`{|}~\x7f\"", nil, nil},
{" !#$%&'()*+,-./0123456789:;<=>?@[]^_`{|}~\x7f", jsonflags.EscapeForJS, "\" !#$%&'()*+,-./0123456789:;<=>?@[]^_`{|}~\x7f\"", nil, nil},
{"\u2027\u2028\u2029\u2030", 0, "\"\u2027\u2028\u2029\u2030\"", nil, nil},
{"\u2027\u2028\u2029\u2030", jsonflags.EscapeForHTML, "\"\u2027\u2028\u2029\u2030\"", nil, nil},
{"\u2027\u2028\u2029\u2030", jsonflags.EscapeForJS, "\"\u2027\\u2028\\u2029\u2030\"", nil, nil},
{"x\x80\ufffd", 0, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8},
{"x\xff\ufffd", 0, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8},
{"x\xc0", 0, "\"x\ufffd\"", nil, ErrInvalidUTF8},
{"x\xc0\x80", 0, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8},
{"x\xe0", 0, "\"x\ufffd\"", nil, ErrInvalidUTF8},
{"x\xe0\x80", 0, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8},
{"x\xe0\x80\x80", 0, "\"x\ufffd\ufffd\ufffd\"", nil, ErrInvalidUTF8},
{"x\xf0", 0, "\"x\ufffd\"", nil, ErrInvalidUTF8},
{"x\xf0\x80", 0, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8},
{"x\xf0\x80\x80", 0, "\"x\ufffd\ufffd\ufffd\"", nil, ErrInvalidUTF8},
{"x\xf0\x80\x80\x80", 0, "\"x\ufffd\ufffd\ufffd\ufffd\"", nil, ErrInvalidUTF8},
{"x\xed\xba\xad", 0, "\"x\ufffd\ufffd\ufffd\"", nil, ErrInvalidUTF8},
{"\"\\/\b\f\n\r\t", 0, `"\"\\/\b\f\n\r\t"`, nil, nil},
{"٩(-̮̮̃-̃)۶ ٩(●̮̮̃•̃)۶ ٩(͡๏̯͡๏)۶ ٩(-̮̮̃•̃).", 0, `"٩(-̮̮̃-̃)۶ ٩(●̮̮̃•̃)۶ ٩(͡๏̯͡๏)۶ ٩(-̮̮̃•̃)."`, nil, nil},
{"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602", 0, "\"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602\"", nil, nil},
{"\u0000\u001f\u0020\u0022\u0026\u003c\u003e\u005c\u007f\u0080\u2028\u2029\ufffd\U0001f602", 0, "\"\\u0000\\u001f\u0020\\\"\u0026\u003c\u003e\\\\\u007f\u0080\u2028\u2029\ufffd\U0001f602\"", nil, nil},
}
for _, tt := range tests {
t.Run("", func(t *testing.T) {
var flags jsonflags.Flags
flags.Set(tt.flags | 1)
flags.Set(jsonflags.AllowInvalidUTF8 | 1)
got, gotErr := AppendQuote(nil, tt.in, &flags)
if string(got) != tt.want || !reflect.DeepEqual(gotErr, tt.wantErr) {
t.Errorf("AppendQuote(nil, %q, ...) = (%s, %v), want (%s, %v)", tt.in, got, gotErr, tt.want, tt.wantErr)
}
flags.Set(jsonflags.AllowInvalidUTF8 | 0)
switch got, gotErr := AppendQuote(nil, tt.in, &flags); {
case tt.wantErrUTF8 == nil && (string(got) != tt.want || !reflect.DeepEqual(gotErr, tt.wantErr)):
t.Errorf("AppendQuote(nil, %q, ...) = (%s, %v), want (%s, %v)", tt.in, got, gotErr, tt.want, tt.wantErr)
case tt.wantErrUTF8 != nil && (!strings.HasPrefix(tt.want, string(got)) || !reflect.DeepEqual(gotErr, tt.wantErrUTF8)):
t.Errorf("AppendQuote(nil, %q, ...) = (%s, %v), want (%s, %v)", tt.in, got, gotErr, tt.want, tt.wantErrUTF8)
}
})
}
}
func TestAppendNumber(t *testing.T) {
tests := []struct {
in float64
want32 string
want64 string
}{
{math.E, "2.7182817", "2.718281828459045"},
{math.Pi, "3.1415927", "3.141592653589793"},
{math.SmallestNonzeroFloat32, "1e-45", "1.401298464324817e-45"},
{math.SmallestNonzeroFloat64, "0", "5e-324"},
{math.MaxFloat32, "3.4028235e+38", "3.4028234663852886e+38"},
{math.MaxFloat64, "", "1.7976931348623157e+308"},
{0.1111111111111111, "0.11111111", "0.1111111111111111"},
{0.2222222222222222, "0.22222222", "0.2222222222222222"},
{0.3333333333333333, "0.33333334", "0.3333333333333333"},
{0.4444444444444444, "0.44444445", "0.4444444444444444"},
{0.5555555555555555, "0.5555556", "0.5555555555555555"},
{0.6666666666666666, "0.6666667", "0.6666666666666666"},
{0.7777777777777777, "0.7777778", "0.7777777777777777"},
{0.8888888888888888, "0.8888889", "0.8888888888888888"},
{0.9999999999999999, "1", "0.9999999999999999"},
// The following entries are from RFC 8785, appendix B
// which are designed to ensure repeatable formatting of 64-bit floats.
{math.Float64frombits(0x0000000000000000), "0", "0"},
{math.Float64frombits(0x8000000000000000), "-0", "-0"}, // differs from RFC 8785
{math.Float64frombits(0x0000000000000001), "0", "5e-324"},
{math.Float64frombits(0x8000000000000001), "-0", "-5e-324"},
{math.Float64frombits(0x7fefffffffffffff), "", "1.7976931348623157e+308"},
{math.Float64frombits(0xffefffffffffffff), "", "-1.7976931348623157e+308"},
{math.Float64frombits(0x4340000000000000), "9007199000000000", "9007199254740992"},
{math.Float64frombits(0xc340000000000000), "-9007199000000000", "-9007199254740992"},
{math.Float64frombits(0x4430000000000000), "295147900000000000000", "295147905179352830000"},
{math.Float64frombits(0x44b52d02c7e14af5), "1e+23", "9.999999999999997e+22"},
{math.Float64frombits(0x44b52d02c7e14af6), "1e+23", "1e+23"},
{math.Float64frombits(0x44b52d02c7e14af7), "1e+23", "1.0000000000000001e+23"},
{math.Float64frombits(0x444b1ae4d6e2ef4e), "1e+21", "999999999999999700000"},
{math.Float64frombits(0x444b1ae4d6e2ef4f), "1e+21", "999999999999999900000"},
{math.Float64frombits(0x444b1ae4d6e2ef50), "1e+21", "1e+21"},
{math.Float64frombits(0x3eb0c6f7a0b5ed8c), "0.000001", "9.999999999999997e-7"},
{math.Float64frombits(0x3eb0c6f7a0b5ed8d), "0.000001", "0.000001"},
{math.Float64frombits(0x41b3de4355555553), "333333340", "333333333.3333332"},
{math.Float64frombits(0x41b3de4355555554), "333333340", "333333333.33333325"},
{math.Float64frombits(0x41b3de4355555555), "333333340", "333333333.3333333"},
{math.Float64frombits(0x41b3de4355555556), "333333340", "333333333.3333334"},
{math.Float64frombits(0x41b3de4355555557), "333333340", "333333333.33333343"},
{math.Float64frombits(0xbecbf647612f3696), "-0.0000033333333", "-0.0000033333333333333333"},
{math.Float64frombits(0x43143ff3c1cb0959), "1424953900000000", "1424953923781206.2"},
// The following are select entries from RFC 8785, appendix B,
// but modified for equivalent 32-bit behavior.
{float64(math.Float32frombits(0x65a96815)), "9.999999e+22", "9.999998877476383e+22"},
{float64(math.Float32frombits(0x65a96816)), "1e+23", "9.999999778196308e+22"},
{float64(math.Float32frombits(0x65a96817)), "1.0000001e+23", "1.0000000678916234e+23"},
{float64(math.Float32frombits(0x6258d725)), "999999900000000000000", "999999879303389000000"},
{float64(math.Float32frombits(0x6258d726)), "999999950000000000000", "999999949672133200000"},
{float64(math.Float32frombits(0x6258d727)), "1e+21", "1.0000000200408773e+21"},
{float64(math.Float32frombits(0x6258d728)), "1.0000001e+21", "1.0000000904096215e+21"},
{float64(math.Float32frombits(0x358637bc)), "9.999999e-7", "9.99999883788405e-7"},
{float64(math.Float32frombits(0x358637bd)), "0.000001", "9.999999974752427e-7"},
{float64(math.Float32frombits(0x358637be)), "0.0000010000001", "0.0000010000001111620804"},
}
for _, tt := range tests {
t.Run("", func(t *testing.T) {
if got32 := string(AppendFloat(nil, tt.in, 32)); got32 != tt.want32 && tt.want32 != "" {
t.Errorf("AppendFloat(nil, %v, 32) = %v, want %v", tt.in, got32, tt.want32)
}
if got64 := string(AppendFloat(nil, tt.in, 64)); got64 != tt.want64 && tt.want64 != "" {
t.Errorf("AppendFloat(nil, %v, 64) = %v, want %v", tt.in, got64, tt.want64)
}
})
}
}
// The default of 1e4 lines was chosen since it is sufficiently large to include
// test numbers from all three categories (i.e., static, series, and random).
// Yet, it is sufficiently low to execute quickly relative to other tests.
//
// Processing 1e8 lines takes a minute and processes about 4GiB worth of text.
var testCanonicalNumberLines = flag.Float64("canonical-number-lines", 1e4, "specify the number of lines to check from the canonical numbers testdata")
// TestCanonicalNumber verifies that appendNumber complies with RFC 8785
// according to the testdata provided by the reference implementation.
// See https://github.com/cyberphone/json-canonicalization/tree/master/testdata#es6-numbers.
func TestCanonicalNumber(t *testing.T) {
const testfileURL = "https://github.com/cyberphone/json-canonicalization/releases/download/es6testfile/es6testfile100m.txt.gz"
hashes := map[float64]string{
1e3: "be18b62b6f69cdab33a7e0dae0d9cfa869fda80ddc712221570f9f40a5878687",
1e4: "b9f7a8e75ef22a835685a52ccba7f7d6bdc99e34b010992cbc5864cd12be6892",
1e5: "22776e6d4b49fa294a0d0f349268e5c28808fe7e0cb2bcbe28f63894e494d4c7",
1e6: "49415fee2c56c77864931bd3624faad425c3c577d6d74e89a83bc725506dad16",
1e7: "b9f8a44a91d46813b21b9602e72f112613c91408db0b8341fb94603d9db135e0",
1e8: "0f7dda6b0837dde083c5d6b896f7d62340c8a2415b0c7121d83145e08a755272",
}
wantHash := hashes[*testCanonicalNumberLines]
if wantHash == "" {
t.Fatalf("canonical-number-lines must be one of the following values: 1e3, 1e4, 1e5, 1e6, 1e7, 1e8")
}
numLines := int(*testCanonicalNumberLines)
// generator returns a function that generates the next float64 to format.
// This implements the algorithm specified in the reference implementation.
generator := func() func() float64 {
static := [...]uint64{
0x0000000000000000, 0x8000000000000000, 0x0000000000000001, 0x8000000000000001,
0xc46696695dbd1cc3, 0xc43211ede4974a35, 0xc3fce97ca0f21056, 0xc3c7213080c1a6ac,
0xc39280f39a348556, 0xc35d9b1f5d20d557, 0xc327af4c4a80aaac, 0xc2f2f2a36ecd5556,
0xc2be51057e155558, 0xc28840d131aaaaac, 0xc253670dc1555557, 0xc21f0b4935555557,
0xc1e8d5d42aaaaaac, 0xc1b3de4355555556, 0xc17fca0555555556, 0xc1496e6aaaaaaaab,
0xc114585555555555, 0xc0e046aaaaaaaaab, 0xc0aa0aaaaaaaaaaa, 0xc074d55555555555,
0xc040aaaaaaaaaaab, 0xc00aaaaaaaaaaaab, 0xbfd5555555555555, 0xbfa1111111111111,
0xbf6b4e81b4e81b4f, 0xbf35d867c3ece2a5, 0xbf0179ec9cbd821e, 0xbecbf647612f3696,
0xbe965e9f80f29212, 0xbe61e54c672874db, 0xbe2ca213d840baf8, 0xbdf6e80fe033c8c6,
0xbdc2533fe68fd3d2, 0xbd8d51ffd74c861c, 0xbd5774ccac3d3817, 0xbd22c3d6f030f9ac,
0xbcee0624b3818f79, 0xbcb804ea293472c7, 0xbc833721ba905bd3, 0xbc4ebe9c5db3c61e,
0xbc18987d17c304e5, 0xbbe3ad30dfcf371d, 0xbbaf7b816618582f, 0xbb792f9ab81379bf,
0xbb442615600f9499, 0xbb101e77800c76e1, 0xbad9ca58cce0be35, 0xbaa4a1e0a3e6fe90,
0xba708180831f320d, 0xba3a68cd9e985016, 0x446696695dbd1cc3, 0x443211ede4974a35,
0x43fce97ca0f21056, 0x43c7213080c1a6ac, 0x439280f39a348556, 0x435d9b1f5d20d557,
0x4327af4c4a80aaac, 0x42f2f2a36ecd5556, 0x42be51057e155558, 0x428840d131aaaaac,
0x4253670dc1555557, 0x421f0b4935555557, 0x41e8d5d42aaaaaac, 0x41b3de4355555556,
0x417fca0555555556, 0x41496e6aaaaaaaab, 0x4114585555555555, 0x40e046aaaaaaaaab,
0x40aa0aaaaaaaaaaa, 0x4074d55555555555, 0x4040aaaaaaaaaaab, 0x400aaaaaaaaaaaab,
0x3fd5555555555555, 0x3fa1111111111111, 0x3f6b4e81b4e81b4f, 0x3f35d867c3ece2a5,
0x3f0179ec9cbd821e, 0x3ecbf647612f3696, 0x3e965e9f80f29212, 0x3e61e54c672874db,
0x3e2ca213d840baf8, 0x3df6e80fe033c8c6, 0x3dc2533fe68fd3d2, 0x3d8d51ffd74c861c,
0x3d5774ccac3d3817, 0x3d22c3d6f030f9ac, 0x3cee0624b3818f79, 0x3cb804ea293472c7,
0x3c833721ba905bd3, 0x3c4ebe9c5db3c61e, 0x3c18987d17c304e5, 0x3be3ad30dfcf371d,
0x3baf7b816618582f, 0x3b792f9ab81379bf, 0x3b442615600f9499, 0x3b101e77800c76e1,
0x3ad9ca58cce0be35, 0x3aa4a1e0a3e6fe90, 0x3a708180831f320d, 0x3a3a68cd9e985016,
0x4024000000000000, 0x4014000000000000, 0x3fe0000000000000, 0x3fa999999999999a,
0x3f747ae147ae147b, 0x3f40624dd2f1a9fc, 0x3f0a36e2eb1c432d, 0x3ed4f8b588e368f1,
0x3ea0c6f7a0b5ed8d, 0x3e6ad7f29abcaf48, 0x3e35798ee2308c3a, 0x3ed539223589fa95,
0x3ed4ff26cd5a7781, 0x3ed4f95a762283ff, 0x3ed4f8c60703520c, 0x3ed4f8b72f19cd0d,
0x3ed4f8b5b31c0c8d, 0x3ed4f8b58d1c461a, 0x3ed4f8b5894f7f0e, 0x3ed4f8b588ee37f3,
0x3ed4f8b588e47da4, 0x3ed4f8b588e3849c, 0x3ed4f8b588e36bb5, 0x3ed4f8b588e36937,
0x3ed4f8b588e368f8, 0x3ed4f8b588e368f1, 0x3ff0000000000000, 0xbff0000000000000,
0xbfeffffffffffffa, 0xbfeffffffffffffb, 0x3feffffffffffffa, 0x3feffffffffffffb,
0x3feffffffffffffc, 0x3feffffffffffffe, 0xbfefffffffffffff, 0xbfefffffffffffff,
0x3fefffffffffffff, 0x3fefffffffffffff, 0x3fd3333333333332, 0x3fd3333333333333,
0x3fd3333333333334, 0x0010000000000000, 0x000ffffffffffffd, 0x000fffffffffffff,
0x7fefffffffffffff, 0xffefffffffffffff, 0x4340000000000000, 0xc340000000000000,
0x4430000000000000, 0x44b52d02c7e14af5, 0x44b52d02c7e14af6, 0x44b52d02c7e14af7,
0x444b1ae4d6e2ef4e, 0x444b1ae4d6e2ef4f, 0x444b1ae4d6e2ef50, 0x3eb0c6f7a0b5ed8c,
0x3eb0c6f7a0b5ed8d, 0x41b3de4355555553, 0x41b3de4355555554, 0x41b3de4355555555,
0x41b3de4355555556, 0x41b3de4355555557, 0xbecbf647612f3696, 0x43143ff3c1cb0959,
}
var state struct {
idx int
data []byte
block [sha256.Size]byte
}
return func() float64 {
const numSerial = 2000
var f float64
switch {
case state.idx < len(static):
f = math.Float64frombits(static[state.idx])
case state.idx < len(static)+numSerial:
f = math.Float64frombits(0x0010000000000000 + uint64(state.idx-len(static)))
default:
for f == 0 || math.IsNaN(f) || math.IsInf(f, 0) {
if len(state.data) == 0 {
state.block = sha256.Sum256(state.block[:])
state.data = state.block[:]
}
f = math.Float64frombits(binary.LittleEndian.Uint64(state.data))
state.data = state.data[8:]
}
}
state.idx++
return f
}
}
// Pass through the test twice. In the first pass we only hash the output,
// while in the second pass we check every line against the golden testdata.
// If the hashes match in the first pass, then we skip the second pass.
for _, checkGolden := range []bool{false, true} {
var br *bufio.Reader // for line-by-line reading of es6testfile100m.txt
if checkGolden {
resp, err := http.Get(testfileURL)
if err != nil {
t.Fatalf("http.Get error: %v", err)
}
defer resp.Body.Close()
zr, err := gzip.NewReader(resp.Body)
if err != nil {
t.Fatalf("gzip.NewReader error: %v", err)
}
br = bufio.NewReader(zr)
}
// appendNumberJCS differs from appendNumber only for -0.
appendNumberJCS := func(b []byte, f float64) []byte {
if math.Signbit(f) && f == 0 {
return append(b, '0')
}
return AppendFloat(b, f, 64)
}
var gotLine []byte
next := generator()
hash := sha256.New()
start := time.Now()
lastPrint := start
for n := 1; n <= numLines; n++ {
// Generate the formatted line for this number.
f := next()
gotLine = gotLine[:0] // reset from previous usage
gotLine = strconv.AppendUint(gotLine, math.Float64bits(f), 16)
gotLine = append(gotLine, ',')
gotLine = appendNumberJCS(gotLine, f)
gotLine = append(gotLine, '\n')
hash.Write(gotLine)
// Check that the formatted line matches.
if checkGolden {
wantLine, err := br.ReadBytes('\n')
if err != nil {
t.Fatalf("bufio.Reader.ReadBytes error: %v", err)
}
if !bytes.Equal(gotLine, wantLine) {
t.Errorf("mismatch on line %d:\n\tgot %v\n\twant %v",
n, strings.TrimSpace(string(gotLine)), strings.TrimSpace(string(wantLine)))
}
}
// Print progress.
if now := time.Now(); now.Sub(lastPrint) > time.Second || n == numLines {
remaining := float64(now.Sub(start)) * float64(numLines-n) / float64(n)
t.Logf("%0.3f%% (%v remaining)",
100.0*float64(n)/float64(numLines),
time.Duration(remaining).Round(time.Second))
lastPrint = now
}
}
gotHash := hex.EncodeToString(hash.Sum(nil))
if gotHash == wantHash {
return // hashes match, no need to check golden testdata
}
}
}

View File

@ -0,0 +1,217 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
// Package jsonwire implements stateless functionality for handling JSON text.
package jsonwire
import (
"cmp"
"errors"
"strconv"
"strings"
"unicode"
"unicode/utf16"
"unicode/utf8"
)
// TrimSuffixWhitespace trims JSON from the end of b.
func TrimSuffixWhitespace(b []byte) []byte {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
n := len(b) - 1
for n >= 0 && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') {
n--
}
return b[:n+1]
}
// TrimSuffixString trims a valid JSON string at the end of b.
// The behavior is undefined if there is not a valid JSON string present.
func TrimSuffixString(b []byte) []byte {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
if len(b) > 0 && b[len(b)-1] == '"' {
b = b[:len(b)-1]
}
for len(b) >= 2 && !(b[len(b)-1] == '"' && b[len(b)-2] != '\\') {
b = b[:len(b)-1] // trim all characters except an unescaped quote
}
if len(b) > 0 && b[len(b)-1] == '"' {
b = b[:len(b)-1]
}
return b
}
// HasSuffixByte reports whether b ends with c.
func HasSuffixByte(b []byte, c byte) bool {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
return len(b) > 0 && b[len(b)-1] == c
}
// TrimSuffixByte removes c from the end of b if it is present.
func TrimSuffixByte(b []byte, c byte) []byte {
// NOTE: The arguments and logic are kept simple to keep this inlinable.
if len(b) > 0 && b[len(b)-1] == c {
return b[:len(b)-1]
}
return b
}
// QuoteRune quotes the first rune in the input.
func QuoteRune[Bytes ~[]byte | ~string](b Bytes) string {
r, n := utf8.DecodeRuneInString(string(truncateMaxUTF8(b)))
if r == utf8.RuneError && n == 1 {
return `'\x` + strconv.FormatUint(uint64(b[0]), 16) + `'`
}
return strconv.QuoteRune(r)
}
// CompareUTF16 lexicographically compares x to y according
// to the UTF-16 codepoints of the UTF-8 encoded input strings.
// This implements the ordering specified in RFC 8785, section 3.2.3.
func CompareUTF16[Bytes ~[]byte | ~string](x, y Bytes) int {
// NOTE: This is an optimized, mostly allocation-free implementation
// of CompareUTF16Simple in wire_test.go. FuzzCompareUTF16 verifies that the
// two implementations agree on the result of comparing any two strings.
isUTF16Self := func(r rune) bool {
return ('\u0000' <= r && r <= '\uD7FF') || ('\uE000' <= r && r <= '\uFFFF')
}
for {
if len(x) == 0 || len(y) == 0 {
return cmp.Compare(len(x), len(y))
}
// ASCII fast-path.
if x[0] < utf8.RuneSelf || y[0] < utf8.RuneSelf {
if x[0] != y[0] {
return cmp.Compare(x[0], y[0])
}
x, y = x[1:], y[1:]
continue
}
// Decode next pair of runes as UTF-8.
rx, nx := utf8.DecodeRuneInString(string(truncateMaxUTF8(x)))
ry, ny := utf8.DecodeRuneInString(string(truncateMaxUTF8(y)))
selfx := isUTF16Self(rx)
selfy := isUTF16Self(ry)
switch {
// The x rune is a single UTF-16 codepoint, while
// the y rune is a surrogate pair of UTF-16 codepoints.
case selfx && !selfy:
ry, _ = utf16.EncodeRune(ry)
// The y rune is a single UTF-16 codepoint, while
// the x rune is a surrogate pair of UTF-16 codepoints.
case selfy && !selfx:
rx, _ = utf16.EncodeRune(rx)
}
if rx != ry {
return cmp.Compare(rx, ry)
}
// Check for invalid UTF-8, in which case,
// we just perform a byte-for-byte comparison.
if isInvalidUTF8(rx, nx) || isInvalidUTF8(ry, ny) {
if x[0] != y[0] {
return cmp.Compare(x[0], y[0])
}
}
x, y = x[nx:], y[ny:]
}
}
// truncateMaxUTF8 truncates b such it contains at least one rune.
//
// The utf8 package currently lacks generic variants, which complicates
// generic functions that operates on either []byte or string.
// As a hack, we always call the utf8 function operating on strings,
// but always truncate the input such that the result is identical.
//
// Example usage:
//
// utf8.DecodeRuneInString(string(truncateMaxUTF8(b)))
//
// Converting a []byte to a string is stack allocated since
// truncateMaxUTF8 guarantees that the []byte is short.
func truncateMaxUTF8[Bytes ~[]byte | ~string](b Bytes) Bytes {
// TODO(https://go.dev/issue/56948): Remove this function and
// instead directly call generic utf8 functions wherever used.
if len(b) > utf8.UTFMax {
return b[:utf8.UTFMax]
}
return b
}
// TODO(https://go.dev/issue/70547): Use utf8.ErrInvalid instead.
var ErrInvalidUTF8 = errors.New("invalid UTF-8")
func NewInvalidCharacterError[Bytes ~[]byte | ~string](prefix Bytes, where string) error {
what := QuoteRune(prefix)
return errors.New("invalid character " + what + " " + where)
}
func NewInvalidEscapeSequenceError[Bytes ~[]byte | ~string](what Bytes) error {
label := "escape sequence"
if len(what) > 6 {
label = "surrogate pair"
}
needEscape := strings.IndexFunc(string(what), func(r rune) bool {
return r == '`' || r == utf8.RuneError || unicode.IsSpace(r) || !unicode.IsPrint(r)
}) >= 0
if needEscape {
return errors.New("invalid " + label + " " + strconv.Quote(string(what)) + " in string")
} else {
return errors.New("invalid " + label + " `" + string(what) + "` in string")
}
}
// TruncatePointer optionally truncates the JSON pointer,
// enforcing that the length roughly does not exceed n.
func TruncatePointer(s string, n int) string {
if len(s) <= n {
return s
}
i := n / 2
j := len(s) - n/2
// Avoid truncating a name if there are multiple names present.
if k := strings.LastIndexByte(s[:i], '/'); k > 0 {
i = k
}
if k := strings.IndexByte(s[j:], '/'); k >= 0 {
j += k + len("/")
}
// Avoid truncation in the middle of a UTF-8 rune.
for i > 0 && isInvalidUTF8(utf8.DecodeLastRuneInString(s[:i])) {
i--
}
for j < len(s) && isInvalidUTF8(utf8.DecodeRuneInString(s[j:])) {
j++
}
// Determine the right middle fragment to use.
var middle string
switch strings.Count(s[i:j], "/") {
case 0:
middle = "…"
case 1:
middle = "…/…"
default:
middle = "…/…/…"
}
if strings.HasPrefix(s[i:j], "/") && middle != "…" {
middle = strings.TrimPrefix(middle, "…")
}
if strings.HasSuffix(s[i:j], "/") && middle != "…" {
middle = strings.TrimSuffix(middle, "…")
}
return s[:i] + middle + s[j:]
}
func isInvalidUTF8(r rune, rn int) bool {
return r == utf8.RuneError && rn == 1
}

View File

@ -0,0 +1,98 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsonwire
import (
"cmp"
"slices"
"testing"
"unicode/utf16"
"unicode/utf8"
)
func TestQuoteRune(t *testing.T) {
tests := []struct{ in, want string }{
{"x", `'x'`},
{"\n", `'\n'`},
{"'", `'\''`},
{"\xff", `'\xff'`},
{"💩", `'💩'`},
{"💩"[:1], `'\xf0'`},
{"\uffff", `'\uffff'`},
{"\U00101234", `'\U00101234'`},
}
for _, tt := range tests {
got := QuoteRune([]byte(tt.in))
if got != tt.want {
t.Errorf("quoteRune(%q) = %s, want %s", tt.in, got, tt.want)
}
}
}
var compareUTF16Testdata = []string{"", "\r", "1", "f\xfe", "f\xfe\xff", "f\xff", "\u0080", "\u00f6", "\u20ac", "\U0001f600", "\ufb33"}
func TestCompareUTF16(t *testing.T) {
for i, si := range compareUTF16Testdata {
for j, sj := range compareUTF16Testdata {
got := CompareUTF16([]byte(si), []byte(sj))
want := cmp.Compare(i, j)
if got != want {
t.Errorf("CompareUTF16(%q, %q) = %v, want %v", si, sj, got, want)
}
}
}
}
func FuzzCompareUTF16(f *testing.F) {
for _, td1 := range compareUTF16Testdata {
for _, td2 := range compareUTF16Testdata {
f.Add([]byte(td1), []byte(td2))
}
}
// CompareUTF16Simple is identical to CompareUTF16,
// but relies on naively converting a string to a []uint16 codepoints.
// It is easy to verify as correct, but is slow.
CompareUTF16Simple := func(x, y []byte) int {
ux := utf16.Encode([]rune(string(x)))
uy := utf16.Encode([]rune(string(y)))
return slices.Compare(ux, uy)
}
f.Fuzz(func(t *testing.T, s1, s2 []byte) {
// Compare the optimized and simplified implementations.
got := CompareUTF16(s1, s2)
want := CompareUTF16Simple(s1, s2)
if got != want && utf8.Valid(s1) && utf8.Valid(s2) {
t.Errorf("CompareUTF16(%q, %q) = %v, want %v", s1, s2, got, want)
}
})
}
func TestTruncatePointer(t *testing.T) {
tests := []struct{ in, want string }{
{"hello", "hello"},
{"/a/b/c", "/a/b/c"},
{"/a/b/c/d/e/f/g", "/a/b/…/f/g"},
{"supercalifragilisticexpialidocious", "super…cious"},
{"/supercalifragilisticexpialidocious/supercalifragilisticexpialidocious", "/supe…/…cious"},
{"/supercalifragilisticexpialidocious/supercalifragilisticexpialidocious/supercalifragilisticexpialidocious", "/supe…/…/…cious"},
{"/a/supercalifragilisticexpialidocious/supercalifragilisticexpialidocious", "/a/…/…cious"},
{"/supercalifragilisticexpialidocious/supercalifragilisticexpialidocious/b", "/supe…/…/b"},
{"/fizz/buzz/bazz", "/fizz/…/bazz"},
{"/fizz/buzz/bazz/razz", "/fizz/…/razz"},
{"/////////////////////////////", "/////…/////"},
{"/🎄❤️✨/🎁✅😊/🎅🔥⭐", "/🎄…/…/…⭐"},
}
for _, tt := range tests {
got := TruncatePointer(tt.in, 10)
if got != tt.want {
t.Errorf("TruncatePointer(%q) = %q, want %q", tt.in, got, tt.want)
}
}
}

View File

@ -0,0 +1,856 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"bytes"
"errors"
"io"
"math"
"math/rand"
"path"
"reflect"
"strings"
"testing"
"encoding/json/internal/jsontest"
"encoding/json/internal/jsonwire"
)
func E(err error) *SyntacticError {
return &SyntacticError{Err: err}
}
func newInvalidCharacterError(prefix, where string) *SyntacticError {
return E(jsonwire.NewInvalidCharacterError(prefix, where))
}
func newInvalidEscapeSequenceError(what string) *SyntacticError {
return E(jsonwire.NewInvalidEscapeSequenceError(what))
}
func (e *SyntacticError) withPos(prefix string, pointer Pointer) *SyntacticError {
e.ByteOffset = int64(len(prefix))
e.JSONPointer = pointer
return e
}
func equalError(x, y error) bool {
return reflect.DeepEqual(x, y)
}
var (
zeroToken Token
zeroValue Value
)
// tokOrVal is either a Token or a Value.
type tokOrVal interface{ Kind() Kind }
type coderTestdataEntry struct {
name jsontest.CaseName
in string
outCompacted string
outEscaped string // outCompacted if empty; escapes all runes in a string
outIndented string // outCompacted if empty; uses " " for indent prefix and "\t" for indent
outCanonicalized string // outCompacted if empty
tokens []Token
pointers []Pointer
}
var coderTestdata = []coderTestdataEntry{{
name: jsontest.Name("Null"),
in: ` null `,
outCompacted: `null`,
tokens: []Token{Null},
pointers: []Pointer{""},
}, {
name: jsontest.Name("False"),
in: ` false `,
outCompacted: `false`,
tokens: []Token{False},
}, {
name: jsontest.Name("True"),
in: ` true `,
outCompacted: `true`,
tokens: []Token{True},
}, {
name: jsontest.Name("EmptyString"),
in: ` "" `,
outCompacted: `""`,
tokens: []Token{String("")},
}, {
name: jsontest.Name("SimpleString"),
in: ` "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" `,
outCompacted: `"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"`,
outEscaped: `"\u0061\u0062\u0063\u0064\u0065\u0066\u0067\u0068\u0069\u006a\u006b\u006c\u006d\u006e\u006f\u0070\u0071\u0072\u0073\u0074\u0075\u0076\u0077\u0078\u0079\u007a\u0041\u0042\u0043\u0044\u0045\u0046\u0047\u0048\u0049\u004a\u004b\u004c\u004d\u004e\u004f\u0050\u0051\u0052\u0053\u0054\u0055\u0056\u0057\u0058\u0059\u005a"`,
tokens: []Token{String("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")},
}, {
name: jsontest.Name("ComplicatedString"),
in: " \"Hello, 世界 🌟★☆✩🌠 " + "\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602" + ` \ud800\udead \"\\\/\b\f\n\r\t \u0022\u005c\u002f\u0008\u000c\u000a\u000d\u0009" `,
outCompacted: "\"Hello, 世界 🌟★☆✩🌠 " + "\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602" + " 𐊭 \\\"\\\\/\\b\\f\\n\\r\\t \\\"\\\\/\\b\\f\\n\\r\\t\"",
outEscaped: `"\u0048\u0065\u006c\u006c\u006f\u002c\u0020\u4e16\u754c\u0020\ud83c\udf1f\u2605\u2606\u2729\ud83c\udf20\u0020\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\ud83d\ude02\u0020\ud800\udead\u0020\u0022\u005c\u002f\u0008\u000c\u000a\u000d\u0009\u0020\u0022\u005c\u002f\u0008\u000c\u000a\u000d\u0009"`,
outCanonicalized: `"Hello, 世界 🌟★☆✩🌠 €ö€힙דּ<EE8080>😂 𐊭 \"\\/\b\f\n\r\t \"\\/\b\f\n\r\t"`,
tokens: []Token{rawToken("\"Hello, 世界 🌟★☆✩🌠 " + "\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602" + " 𐊭 \\\"\\\\/\\b\\f\\n\\r\\t \\\"\\\\/\\b\\f\\n\\r\\t\"")},
}, {
name: jsontest.Name("ZeroNumber"),
in: ` 0 `,
outCompacted: `0`,
tokens: []Token{Uint(0)},
}, {
name: jsontest.Name("SimpleNumber"),
in: ` 123456789 `,
outCompacted: `123456789`,
tokens: []Token{Uint(123456789)},
}, {
name: jsontest.Name("NegativeNumber"),
in: ` -123456789 `,
outCompacted: `-123456789`,
tokens: []Token{Int(-123456789)},
}, {
name: jsontest.Name("FractionalNumber"),
in: " 0.123456789 ",
outCompacted: `0.123456789`,
tokens: []Token{Float(0.123456789)},
}, {
name: jsontest.Name("ExponentNumber"),
in: " 0e12456789 ",
outCompacted: `0e12456789`,
outCanonicalized: `0`,
tokens: []Token{rawToken(`0e12456789`)},
}, {
name: jsontest.Name("ExponentNumberP"),
in: " 0e+12456789 ",
outCompacted: `0e+12456789`,
outCanonicalized: `0`,
tokens: []Token{rawToken(`0e+12456789`)},
}, {
name: jsontest.Name("ExponentNumberN"),
in: " 0e-12456789 ",
outCompacted: `0e-12456789`,
outCanonicalized: `0`,
tokens: []Token{rawToken(`0e-12456789`)},
}, {
name: jsontest.Name("ComplicatedNumber"),
in: ` -123456789.987654321E+0123456789 `,
outCompacted: `-123456789.987654321E+0123456789`,
outCanonicalized: `-1.7976931348623157e+308`,
tokens: []Token{rawToken(`-123456789.987654321E+0123456789`)},
}, {
name: jsontest.Name("Numbers"),
in: ` [
0, -0, 0.0, -0.0, 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001, 1e1000,
-5e-324, 1e+100, 1.7976931348623157e+308,
9007199254740990, 9007199254740991, 9007199254740992, 9007199254740993, 9007199254740994,
-9223372036854775808, 9223372036854775807, 0, 18446744073709551615
] `,
outCompacted: "[0,-0,0.0,-0.0,1.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001,1e1000,-5e-324,1e+100,1.7976931348623157e+308,9007199254740990,9007199254740991,9007199254740992,9007199254740993,9007199254740994,-9223372036854775808,9223372036854775807,0,18446744073709551615]",
outIndented: `[
0,
-0,
0.0,
-0.0,
1.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001,
1e1000,
-5e-324,
1e+100,
1.7976931348623157e+308,
9007199254740990,
9007199254740991,
9007199254740992,
9007199254740993,
9007199254740994,
-9223372036854775808,
9223372036854775807,
0,
18446744073709551615
]`,
outCanonicalized: `[0,0,0,0,1,1.7976931348623157e+308,-5e-324,1e+100,1.7976931348623157e+308,9007199254740990,9007199254740991,9007199254740992,9007199254740992,9007199254740994,-9223372036854776000,9223372036854776000,0,18446744073709552000]`,
tokens: []Token{
BeginArray,
Float(0), Float(math.Copysign(0, -1)), rawToken(`0.0`), rawToken(`-0.0`), rawToken(`1.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001`), rawToken(`1e1000`),
Float(-5e-324), Float(1e100), Float(1.7976931348623157e+308),
Float(9007199254740990), Float(9007199254740991), Float(9007199254740992), rawToken(`9007199254740993`), rawToken(`9007199254740994`),
Int(minInt64), Int(maxInt64), Uint(minUint64), Uint(maxUint64),
EndArray,
},
pointers: []Pointer{
"", "/0", "/1", "/2", "/3", "/4", "/5", "/6", "/7", "/8", "/9", "/10", "/11", "/12", "/13", "/14", "/15", "/16", "/17", "",
},
}, {
name: jsontest.Name("ObjectN0"),
in: ` { } `,
outCompacted: `{}`,
tokens: []Token{BeginObject, EndObject},
pointers: []Pointer{"", ""},
}, {
name: jsontest.Name("ObjectN1"),
in: ` { "0" : 0 } `,
outCompacted: `{"0":0}`,
outEscaped: `{"\u0030":0}`,
outIndented: `{
"0": 0
}`,
tokens: []Token{BeginObject, String("0"), Uint(0), EndObject},
pointers: []Pointer{"", "/0", "/0", ""},
}, {
name: jsontest.Name("ObjectN2"),
in: ` { "0" : 0 , "1" : 1 } `,
outCompacted: `{"0":0,"1":1}`,
outEscaped: `{"\u0030":0,"\u0031":1}`,
outIndented: `{
"0": 0,
"1": 1
}`,
tokens: []Token{BeginObject, String("0"), Uint(0), String("1"), Uint(1), EndObject},
pointers: []Pointer{"", "/0", "/0", "/1", "/1", ""},
}, {
name: jsontest.Name("ObjectNested"),
in: ` { "0" : { "1" : { "2" : { "3" : { "4" : { } } } } } } `,
outCompacted: `{"0":{"1":{"2":{"3":{"4":{}}}}}}`,
outEscaped: `{"\u0030":{"\u0031":{"\u0032":{"\u0033":{"\u0034":{}}}}}}`,
outIndented: `{
"0": {
"1": {
"2": {
"3": {
"4": {}
}
}
}
}
}`,
tokens: []Token{BeginObject, String("0"), BeginObject, String("1"), BeginObject, String("2"), BeginObject, String("3"), BeginObject, String("4"), BeginObject, EndObject, EndObject, EndObject, EndObject, EndObject, EndObject},
pointers: []Pointer{
"",
"/0", "/0",
"/0/1", "/0/1",
"/0/1/2", "/0/1/2",
"/0/1/2/3", "/0/1/2/3",
"/0/1/2/3/4", "/0/1/2/3/4",
"/0/1/2/3/4",
"/0/1/2/3",
"/0/1/2",
"/0/1",
"/0",
"",
},
}, {
name: jsontest.Name("ObjectSuperNested"),
in: `{"": {
"44444": {
"6666666": "ccccccc",
"77777777": "bb",
"555555": "aaaa"
},
"0": {
"3333": "bbb",
"11": "",
"222": "aaaaa"
}
}}`,
outCompacted: `{"":{"44444":{"6666666":"ccccccc","77777777":"bb","555555":"aaaa"},"0":{"3333":"bbb","11":"","222":"aaaaa"}}}`,
outEscaped: `{"":{"\u0034\u0034\u0034\u0034\u0034":{"\u0036\u0036\u0036\u0036\u0036\u0036\u0036":"\u0063\u0063\u0063\u0063\u0063\u0063\u0063","\u0037\u0037\u0037\u0037\u0037\u0037\u0037\u0037":"\u0062\u0062","\u0035\u0035\u0035\u0035\u0035\u0035":"\u0061\u0061\u0061\u0061"},"\u0030":{"\u0033\u0033\u0033\u0033":"\u0062\u0062\u0062","\u0031\u0031":"","\u0032\u0032\u0032":"\u0061\u0061\u0061\u0061\u0061"}}}`,
outIndented: `{
"": {
"44444": {
"6666666": "ccccccc",
"77777777": "bb",
"555555": "aaaa"
},
"0": {
"3333": "bbb",
"11": "",
"222": "aaaaa"
}
}
}`,
outCanonicalized: `{"":{"0":{"11":"","222":"aaaaa","3333":"bbb"},"44444":{"555555":"aaaa","6666666":"ccccccc","77777777":"bb"}}}`,
tokens: []Token{
BeginObject,
String(""),
BeginObject,
String("44444"),
BeginObject,
String("6666666"), String("ccccccc"),
String("77777777"), String("bb"),
String("555555"), String("aaaa"),
EndObject,
String("0"),
BeginObject,
String("3333"), String("bbb"),
String("11"), String(""),
String("222"), String("aaaaa"),
EndObject,
EndObject,
EndObject,
},
pointers: []Pointer{
"",
"/", "/",
"//44444", "//44444",
"//44444/6666666", "//44444/6666666",
"//44444/77777777", "//44444/77777777",
"//44444/555555", "//44444/555555",
"//44444",
"//0", "//0",
"//0/3333", "//0/3333",
"//0/11", "//0/11",
"//0/222", "//0/222",
"//0",
"/",
"",
},
}, {
name: jsontest.Name("ArrayN0"),
in: ` [ ] `,
outCompacted: `[]`,
tokens: []Token{BeginArray, EndArray},
pointers: []Pointer{"", ""},
}, {
name: jsontest.Name("ArrayN1"),
in: ` [ 0 ] `,
outCompacted: `[0]`,
outIndented: `[
0
]`,
tokens: []Token{BeginArray, Uint(0), EndArray},
pointers: []Pointer{"", "/0", ""},
}, {
name: jsontest.Name("ArrayN2"),
in: ` [ 0 , 1 ] `,
outCompacted: `[0,1]`,
outIndented: `[
0,
1
]`,
tokens: []Token{BeginArray, Uint(0), Uint(1), EndArray},
}, {
name: jsontest.Name("ArrayNested"),
in: ` [ [ [ [ [ ] ] ] ] ] `,
outCompacted: `[[[[[]]]]]`,
outIndented: `[
[
[
[
[]
]
]
]
]`,
tokens: []Token{BeginArray, BeginArray, BeginArray, BeginArray, BeginArray, EndArray, EndArray, EndArray, EndArray, EndArray},
pointers: []Pointer{
"",
"/0",
"/0/0",
"/0/0/0",
"/0/0/0/0",
"/0/0/0/0",
"/0/0/0",
"/0/0",
"/0",
"",
},
}, {
name: jsontest.Name("Everything"),
in: ` {
"literals" : [ null , false , true ],
"string" : "Hello, 世界" ,
"number" : 3.14159 ,
"arrayN0" : [ ] ,
"arrayN1" : [ 0 ] ,
"arrayN2" : [ 0 , 1 ] ,
"objectN0" : { } ,
"objectN1" : { "0" : 0 } ,
"objectN2" : { "0" : 0 , "1" : 1 }
} `,
outCompacted: `{"literals":[null,false,true],"string":"Hello, 世界","number":3.14159,"arrayN0":[],"arrayN1":[0],"arrayN2":[0,1],"objectN0":{},"objectN1":{"0":0},"objectN2":{"0":0,"1":1}}`,
outEscaped: `{"\u006c\u0069\u0074\u0065\u0072\u0061\u006c\u0073":[null,false,true],"\u0073\u0074\u0072\u0069\u006e\u0067":"\u0048\u0065\u006c\u006c\u006f\u002c\u0020\u4e16\u754c","\u006e\u0075\u006d\u0062\u0065\u0072":3.14159,"\u0061\u0072\u0072\u0061\u0079\u004e\u0030":[],"\u0061\u0072\u0072\u0061\u0079\u004e\u0031":[0],"\u0061\u0072\u0072\u0061\u0079\u004e\u0032":[0,1],"\u006f\u0062\u006a\u0065\u0063\u0074\u004e\u0030":{},"\u006f\u0062\u006a\u0065\u0063\u0074\u004e\u0031":{"\u0030":0},"\u006f\u0062\u006a\u0065\u0063\u0074\u004e\u0032":{"\u0030":0,"\u0031":1}}`,
outIndented: `{
"literals": [
null,
false,
true
],
"string": "Hello, 世界",
"number": 3.14159,
"arrayN0": [],
"arrayN1": [
0
],
"arrayN2": [
0,
1
],
"objectN0": {},
"objectN1": {
"0": 0
},
"objectN2": {
"0": 0,
"1": 1
}
}`,
outCanonicalized: `{"arrayN0":[],"arrayN1":[0],"arrayN2":[0,1],"literals":[null,false,true],"number":3.14159,"objectN0":{},"objectN1":{"0":0},"objectN2":{"0":0,"1":1},"string":"Hello, 世界"}`,
tokens: []Token{
BeginObject,
String("literals"), BeginArray, Null, False, True, EndArray,
String("string"), String("Hello, 世界"),
String("number"), Float(3.14159),
String("arrayN0"), BeginArray, EndArray,
String("arrayN1"), BeginArray, Uint(0), EndArray,
String("arrayN2"), BeginArray, Uint(0), Uint(1), EndArray,
String("objectN0"), BeginObject, EndObject,
String("objectN1"), BeginObject, String("0"), Uint(0), EndObject,
String("objectN2"), BeginObject, String("0"), Uint(0), String("1"), Uint(1), EndObject,
EndObject,
},
pointers: []Pointer{
"",
"/literals", "/literals",
"/literals/0",
"/literals/1",
"/literals/2",
"/literals",
"/string", "/string",
"/number", "/number",
"/arrayN0", "/arrayN0", "/arrayN0",
"/arrayN1", "/arrayN1",
"/arrayN1/0",
"/arrayN1",
"/arrayN2", "/arrayN2",
"/arrayN2/0",
"/arrayN2/1",
"/arrayN2",
"/objectN0", "/objectN0", "/objectN0",
"/objectN1", "/objectN1",
"/objectN1/0", "/objectN1/0",
"/objectN1",
"/objectN2", "/objectN2",
"/objectN2/0", "/objectN2/0",
"/objectN2/1", "/objectN2/1",
"/objectN2",
"",
},
}}
// TestCoderInterleaved tests that we can interleave calls that operate on
// tokens and raw values. The only error condition is trying to operate on a
// raw value when the next token is an end of object or array.
func TestCoderInterleaved(t *testing.T) {
for _, td := range coderTestdata {
// In TokenFirst and ValueFirst, alternate between tokens and values.
// In TokenDelims, only use tokens for object and array delimiters.
for _, modeName := range []string{"TokenFirst", "ValueFirst", "TokenDelims"} {
t.Run(path.Join(td.name.Name, modeName), func(t *testing.T) {
testCoderInterleaved(t, td.name.Where, modeName, td)
})
}
}
}
func testCoderInterleaved(t *testing.T, where jsontest.CasePos, modeName string, td coderTestdataEntry) {
src := strings.NewReader(td.in)
dst := new(bytes.Buffer)
dec := NewDecoder(src)
enc := NewEncoder(dst)
tickTock := modeName == "TokenFirst"
for {
if modeName == "TokenDelims" {
switch dec.PeekKind() {
case '{', '}', '[', ']':
tickTock = true // as token
default:
tickTock = false // as value
}
}
if tickTock {
tok, err := dec.ReadToken()
if err != nil {
if err == io.EOF {
break
}
t.Fatalf("%s: Decoder.ReadToken error: %v", where, err)
}
if err := enc.WriteToken(tok); err != nil {
t.Fatalf("%s: Encoder.WriteToken error: %v", where, err)
}
} else {
val, err := dec.ReadValue()
if err != nil {
// It is a syntactic error to call ReadValue
// at the end of an object or array.
// Retry as a ReadToken call.
expectError := dec.PeekKind() == '}' || dec.PeekKind() == ']'
if expectError {
if !errors.As(err, new(*SyntacticError)) {
t.Fatalf("%s: Decoder.ReadToken error is %T, want %T", where, err, new(SyntacticError))
}
tickTock = !tickTock
continue
}
if err == io.EOF {
break
}
t.Fatalf("%s: Decoder.ReadValue error: %v", where, err)
}
if err := enc.WriteValue(val); err != nil {
t.Fatalf("%s: Encoder.WriteValue error: %v", where, err)
}
}
tickTock = !tickTock
}
got := dst.String()
want := td.outCompacted + "\n"
if got != want {
t.Fatalf("%s: output mismatch:\ngot %q\nwant %q", where, got, want)
}
}
func TestCoderStackPointer(t *testing.T) {
tests := []struct {
token Token
want Pointer
}{
{Null, ""},
{BeginArray, ""},
{EndArray, ""},
{BeginArray, ""},
{Bool(true), "/0"},
{EndArray, ""},
{BeginArray, ""},
{String("hello"), "/0"},
{String("goodbye"), "/1"},
{EndArray, ""},
{BeginObject, ""},
{EndObject, ""},
{BeginObject, ""},
{String("hello"), "/hello"},
{String("goodbye"), "/hello"},
{EndObject, ""},
{BeginObject, ""},
{String(""), "/"},
{Null, "/"},
{String("0"), "/0"},
{Null, "/0"},
{String("~"), "/~0"},
{Null, "/~0"},
{String("/"), "/~1"},
{Null, "/~1"},
{String("a//b~/c/~d~~e"), "/a~1~1b~0~1c~1~0d~0~0e"},
{Null, "/a~1~1b~0~1c~1~0d~0~0e"},
{String(" \r\n\t"), "/ \r\n\t"},
{Null, "/ \r\n\t"},
{EndObject, ""},
{BeginArray, ""},
{BeginObject, "/0"},
{String(""), "/0/"},
{BeginArray, "/0/"},
{BeginObject, "/0//0"},
{String("#"), "/0//0/#"},
{Null, "/0//0/#"},
{EndObject, "/0//0"},
{EndArray, "/0/"},
{EndObject, "/0"},
{EndArray, ""},
}
for _, allowDupes := range []bool{false, true} {
var name string
switch allowDupes {
case false:
name = "RejectDuplicateNames"
case true:
name = "AllowDuplicateNames"
}
t.Run(name, func(t *testing.T) {
bb := new(bytes.Buffer)
enc := NewEncoder(bb, AllowDuplicateNames(allowDupes))
for i, tt := range tests {
if err := enc.WriteToken(tt.token); err != nil {
t.Fatalf("%d: Encoder.WriteToken error: %v", i, err)
}
if got := enc.StackPointer(); got != tests[i].want {
t.Fatalf("%d: Encoder.StackPointer = %v, want %v", i, got, tests[i].want)
}
}
dec := NewDecoder(bb, AllowDuplicateNames(allowDupes))
for i := range tests {
if _, err := dec.ReadToken(); err != nil {
t.Fatalf("%d: Decoder.ReadToken error: %v", i, err)
}
if got := dec.StackPointer(); got != tests[i].want {
t.Fatalf("%d: Decoder.StackPointer = %v, want %v", i, got, tests[i].want)
}
}
})
}
}
func TestCoderMaxDepth(t *testing.T) {
trimArray := func(b []byte) []byte { return b[len(`[`) : len(b)-len(`]`)] }
maxArrays := []byte(strings.Repeat(`[`, maxNestingDepth+1) + strings.Repeat(`]`, maxNestingDepth+1))
trimObject := func(b []byte) []byte { return b[len(`{"":`) : len(b)-len(`}`)] }
maxObjects := []byte(strings.Repeat(`{"":`, maxNestingDepth+1) + `""` + strings.Repeat(`}`, maxNestingDepth+1))
t.Run("Decoder", func(t *testing.T) {
var dec Decoder
checkReadToken := func(t *testing.T, wantKind Kind, wantErr error) {
t.Helper()
if tok, err := dec.ReadToken(); tok.Kind() != wantKind || !equalError(err, wantErr) {
t.Fatalf("Decoder.ReadToken = (%q, %v), want (%q, %v)", byte(tok.Kind()), err, byte(wantKind), wantErr)
}
}
checkReadValue := func(t *testing.T, wantLen int, wantErr error) {
t.Helper()
if val, err := dec.ReadValue(); len(val) != wantLen || !equalError(err, wantErr) {
t.Fatalf("Decoder.ReadValue = (%d, %v), want (%d, %v)", len(val), err, wantLen, wantErr)
}
}
t.Run("ArraysValid/SingleValue", func(t *testing.T) {
dec.s.reset(trimArray(maxArrays), nil)
checkReadValue(t, maxNestingDepth*len(`[]`), nil)
})
t.Run("ArraysValid/TokenThenValue", func(t *testing.T) {
dec.s.reset(trimArray(maxArrays), nil)
checkReadToken(t, '[', nil)
checkReadValue(t, (maxNestingDepth-1)*len(`[]`), nil)
checkReadToken(t, ']', nil)
})
t.Run("ArraysValid/AllTokens", func(t *testing.T) {
dec.s.reset(trimArray(maxArrays), nil)
for range maxNestingDepth {
checkReadToken(t, '[', nil)
}
for range maxNestingDepth {
checkReadToken(t, ']', nil)
}
})
wantErr := &SyntacticError{
ByteOffset: maxNestingDepth,
JSONPointer: Pointer(strings.Repeat("/0", maxNestingDepth)),
Err: errMaxDepth,
}
t.Run("ArraysInvalid/SingleValue", func(t *testing.T) {
dec.s.reset(maxArrays, nil)
checkReadValue(t, 0, wantErr)
})
t.Run("ArraysInvalid/TokenThenValue", func(t *testing.T) {
dec.s.reset(maxArrays, nil)
checkReadToken(t, '[', nil)
checkReadValue(t, 0, wantErr)
})
t.Run("ArraysInvalid/AllTokens", func(t *testing.T) {
dec.s.reset(maxArrays, nil)
for range maxNestingDepth {
checkReadToken(t, '[', nil)
}
checkReadValue(t, 0, wantErr)
})
t.Run("ObjectsValid/SingleValue", func(t *testing.T) {
dec.s.reset(trimObject(maxObjects), nil)
checkReadValue(t, maxNestingDepth*len(`{"":}`)+len(`""`), nil)
})
t.Run("ObjectsValid/TokenThenValue", func(t *testing.T) {
dec.s.reset(trimObject(maxObjects), nil)
checkReadToken(t, '{', nil)
checkReadToken(t, '"', nil)
checkReadValue(t, (maxNestingDepth-1)*len(`{"":}`)+len(`""`), nil)
checkReadToken(t, '}', nil)
})
t.Run("ObjectsValid/AllTokens", func(t *testing.T) {
dec.s.reset(trimObject(maxObjects), nil)
for range maxNestingDepth {
checkReadToken(t, '{', nil)
checkReadToken(t, '"', nil)
}
checkReadToken(t, '"', nil)
for range maxNestingDepth {
checkReadToken(t, '}', nil)
}
})
wantErr = &SyntacticError{
ByteOffset: maxNestingDepth * int64(len(`{"":`)),
JSONPointer: Pointer(strings.Repeat("/", maxNestingDepth)),
Err: errMaxDepth,
}
t.Run("ObjectsInvalid/SingleValue", func(t *testing.T) {
dec.s.reset(maxObjects, nil)
checkReadValue(t, 0, wantErr)
})
t.Run("ObjectsInvalid/TokenThenValue", func(t *testing.T) {
dec.s.reset(maxObjects, nil)
checkReadToken(t, '{', nil)
checkReadToken(t, '"', nil)
checkReadValue(t, 0, wantErr)
})
t.Run("ObjectsInvalid/AllTokens", func(t *testing.T) {
dec.s.reset(maxObjects, nil)
for range maxNestingDepth {
checkReadToken(t, '{', nil)
checkReadToken(t, '"', nil)
}
checkReadToken(t, 0, wantErr)
})
})
t.Run("Encoder", func(t *testing.T) {
var enc Encoder
checkWriteToken := func(t *testing.T, tok Token, wantErr error) {
t.Helper()
if err := enc.WriteToken(tok); !equalError(err, wantErr) {
t.Fatalf("Encoder.WriteToken = %v, want %v", err, wantErr)
}
}
checkWriteValue := func(t *testing.T, val Value, wantErr error) {
t.Helper()
if err := enc.WriteValue(val); !equalError(err, wantErr) {
t.Fatalf("Encoder.WriteValue = %v, want %v", err, wantErr)
}
}
wantErr := &SyntacticError{
ByteOffset: maxNestingDepth,
JSONPointer: Pointer(strings.Repeat("/0", maxNestingDepth)),
Err: errMaxDepth,
}
t.Run("Arrays/SingleValue", func(t *testing.T) {
enc.s.reset(enc.s.Buf[:0], nil)
checkWriteValue(t, maxArrays, wantErr)
checkWriteValue(t, trimArray(maxArrays), nil)
})
t.Run("Arrays/TokenThenValue", func(t *testing.T) {
enc.s.reset(enc.s.Buf[:0], nil)
checkWriteToken(t, BeginArray, nil)
checkWriteValue(t, trimArray(maxArrays), wantErr)
checkWriteValue(t, trimArray(trimArray(maxArrays)), nil)
checkWriteToken(t, EndArray, nil)
})
t.Run("Arrays/AllTokens", func(t *testing.T) {
enc.s.reset(enc.s.Buf[:0], nil)
for range maxNestingDepth {
checkWriteToken(t, BeginArray, nil)
}
checkWriteToken(t, BeginArray, wantErr)
for range maxNestingDepth {
checkWriteToken(t, EndArray, nil)
}
})
wantErr = &SyntacticError{
ByteOffset: maxNestingDepth * int64(len(`{"":`)),
JSONPointer: Pointer(strings.Repeat("/", maxNestingDepth)),
Err: errMaxDepth,
}
t.Run("Objects/SingleValue", func(t *testing.T) {
enc.s.reset(enc.s.Buf[:0], nil)
checkWriteValue(t, maxObjects, wantErr)
checkWriteValue(t, trimObject(maxObjects), nil)
})
t.Run("Objects/TokenThenValue", func(t *testing.T) {
enc.s.reset(enc.s.Buf[:0], nil)
checkWriteToken(t, BeginObject, nil)
checkWriteToken(t, String(""), nil)
checkWriteValue(t, trimObject(maxObjects), wantErr)
checkWriteValue(t, trimObject(trimObject(maxObjects)), nil)
checkWriteToken(t, EndObject, nil)
})
t.Run("Objects/AllTokens", func(t *testing.T) {
enc.s.reset(enc.s.Buf[:0], nil)
for range maxNestingDepth - 1 {
checkWriteToken(t, BeginObject, nil)
checkWriteToken(t, String(""), nil)
}
checkWriteToken(t, BeginObject, nil)
checkWriteToken(t, String(""), nil)
checkWriteToken(t, BeginObject, wantErr)
checkWriteToken(t, String(""), nil)
for range maxNestingDepth {
checkWriteToken(t, EndObject, nil)
}
})
})
}
// FaultyBuffer implements io.Reader and io.Writer.
// It may process fewer bytes than the provided buffer
// and may randomly return an error.
type FaultyBuffer struct {
B []byte
// MaxBytes is the maximum number of bytes read/written.
// A random number of bytes within [0, MaxBytes] are processed.
// A non-positive value is treated as infinity.
MaxBytes int
// MayError specifies whether to randomly provide this error.
// Even if an error is returned, no bytes are dropped.
MayError error
// Rand to use for pseudo-random behavior.
// If nil, it will be initialized with rand.NewSource(0).
Rand rand.Source
}
func (p *FaultyBuffer) Read(b []byte) (int, error) {
b = b[:copy(b[:p.mayTruncate(len(b))], p.B)]
p.B = p.B[len(b):]
if len(p.B) == 0 && (len(b) == 0 || p.randN(2) == 0) {
return len(b), io.EOF
}
return len(b), p.mayError()
}
func (p *FaultyBuffer) Write(b []byte) (int, error) {
b2 := b[:p.mayTruncate(len(b))]
p.B = append(p.B, b2...)
if len(b2) < len(b) {
return len(b2), io.ErrShortWrite
}
return len(b2), p.mayError()
}
// mayTruncate may return a value between [0, n].
func (p *FaultyBuffer) mayTruncate(n int) int {
if p.MaxBytes > 0 {
if n > p.MaxBytes {
n = p.MaxBytes
}
return p.randN(n + 1)
}
return n
}
// mayError may return a non-nil error.
func (p *FaultyBuffer) mayError() error {
if p.MayError != nil && p.randN(2) == 0 {
return p.MayError
}
return nil
}
func (p *FaultyBuffer) randN(n int) int {
if p.Rand == nil {
p.Rand = rand.NewSource(0)
}
return int(p.Rand.Int63() % int64(n))
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,107 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
// Package jsontext implements syntactic processing of JSON
// as specified in RFC 4627, RFC 7159, RFC 7493, RFC 8259, and RFC 8785.
// JSON is a simple data interchange format that can represent
// primitive data types such as booleans, strings, and numbers,
// in addition to structured data types such as objects and arrays.
//
// The [Encoder] and [Decoder] types are used to encode or decode
// a stream of JSON tokens or values.
//
// # Tokens and Values
//
// A JSON token refers to the basic structural elements of JSON:
//
// - a JSON literal (i.e., null, true, or false)
// - a JSON string (e.g., "hello, world!")
// - a JSON number (e.g., 123.456)
// - a start or end delimiter for a JSON object (i.e., '{' or '}')
// - a start or end delimiter for a JSON array (i.e., '[' or ']')
//
// A JSON token is represented by the [Token] type in Go. Technically,
// there are two additional structural characters (i.e., ':' and ','),
// but there is no [Token] representation for them since their presence
// can be inferred by the structure of the JSON grammar itself.
// For example, there must always be an implicit colon between
// the name and value of a JSON object member.
//
// A JSON value refers to a complete unit of JSON data:
//
// - a JSON literal, string, or number
// - a JSON object (e.g., `{"name":"value"}`)
// - a JSON array (e.g., `[1,2,3,]`)
//
// A JSON value is represented by the [Value] type in Go and is a []byte
// containing the raw textual representation of the value. There is some overlap
// between tokens and values as both contain literals, strings, and numbers.
// However, only a value can represent the entirety of a JSON object or array.
//
// The [Encoder] and [Decoder] types contain methods to read or write the next
// [Token] or [Value] in a sequence. They maintain a state machine to validate
// whether the sequence of JSON tokens and/or values produces a valid JSON.
// [Options] may be passed to the [NewEncoder] or [NewDecoder] constructors
// to configure the syntactic behavior of encoding and decoding.
//
// # Terminology
//
// The terms "encode" and "decode" are used for syntactic functionality
// that is concerned with processing JSON based on its grammar, and
// the terms "marshal" and "unmarshal" are used for semantic functionality
// that determines the meaning of JSON values as Go values and vice-versa.
// This package (i.e., [jsontext]) deals with JSON at a syntactic layer,
// while [encoding/json/v2] deals with JSON at a semantic layer.
// The goal is to provide a clear distinction between functionality that
// is purely concerned with encoding versus that of marshaling.
// For example, one can directly encode a stream of JSON tokens without
// needing to marshal a concrete Go value representing them.
// Similarly, one can decode a stream of JSON tokens without
// needing to unmarshal them into a concrete Go value.
//
// This package uses JSON terminology when discussing JSON, which may differ
// from related concepts in Go or elsewhere in computing literature.
//
// - a JSON "object" refers to an unordered collection of name/value members.
// - a JSON "array" refers to an ordered sequence of elements.
// - a JSON "value" refers to either a literal (i.e., null, false, or true),
// string, number, object, or array.
//
// See RFC 8259 for more information.
//
// # Specifications
//
// Relevant specifications include RFC 4627, RFC 7159, RFC 7493, RFC 8259,
// and RFC 8785. Each RFC is generally a stricter subset of another RFC.
// In increasing order of strictness:
//
// - RFC 4627 and RFC 7159 do not require (but recommend) the use of UTF-8
// and also do not require (but recommend) that object names be unique.
// - RFC 8259 requires the use of UTF-8,
// but does not require (but recommends) that object names be unique.
// - RFC 7493 requires the use of UTF-8
// and also requires that object names be unique.
// - RFC 8785 defines a canonical representation. It requires the use of UTF-8
// and also requires that object names be unique and in a specific ordering.
// It specifies exactly how strings and numbers must be formatted.
//
// The primary difference between RFC 4627 and RFC 7159 is that the former
// restricted top-level values to only JSON objects and arrays, while
// RFC 7159 and subsequent RFCs permit top-level values to additionally be
// JSON nulls, booleans, strings, or numbers.
//
// By default, this package operates on RFC 7493, but can be configured
// to operate according to the other RFC specifications.
// RFC 7493 is a stricter subset of RFC 8259 and fully compliant with it.
// In particular, it makes specific choices about behavior that RFC 8259
// leaves as undefined in order to ensure greater interoperability.
package jsontext
// requireKeyedLiterals can be embedded in a struct to require keyed literals.
type requireKeyedLiterals struct{}
// nonComparable can be embedded in a struct to prevent comparability.
type nonComparable [0]func()

View File

@ -0,0 +1,972 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"bytes"
"io"
"math/bits"
"encoding/json/internal/jsonflags"
"encoding/json/internal/jsonopts"
"encoding/json/internal/jsonwire"
)
// Encoder is a streaming encoder from raw JSON tokens and values.
// It is used to write a stream of top-level JSON values,
// each terminated with a newline character.
//
// [Encoder.WriteToken] and [Encoder.WriteValue] calls may be interleaved.
// For example, the following JSON value:
//
// {"name":"value","array":[null,false,true,3.14159],"object":{"k":"v"}}
//
// can be composed with the following calls (ignoring errors for brevity):
//
// e.WriteToken(BeginObject) // {
// e.WriteToken(String("name")) // "name"
// e.WriteToken(String("value")) // "value"
// e.WriteValue(Value(`"array"`)) // "array"
// e.WriteToken(BeginArray) // [
// e.WriteToken(Null) // null
// e.WriteToken(False) // false
// e.WriteValue(Value("true")) // true
// e.WriteToken(Float(3.14159)) // 3.14159
// e.WriteToken(EndArray) // ]
// e.WriteValue(Value(`"object"`)) // "object"
// e.WriteValue(Value(`{"k":"v"}`)) // {"k":"v"}
// e.WriteToken(EndObject) // }
//
// The above is one of many possible sequence of calls and
// may not represent the most sensible method to call for any given token/value.
// For example, it is probably more common to call [Encoder.WriteToken] with a string
// for object names.
type Encoder struct {
s encoderState
}
// encoderState is the low-level state of Encoder.
// It has exported fields and method for use by the "json" package.
type encoderState struct {
state
encodeBuffer
jsonopts.Struct
SeenPointers map[any]struct{} // only used when marshaling; identical to json.seenPointers
}
// encodeBuffer is a buffer split into 2 segments:
//
// - buf[0:len(buf)] // written (but unflushed) portion of the buffer
// - buf[len(buf):cap(buf)] // unused portion of the buffer
type encodeBuffer struct {
Buf []byte // may alias wr if it is a bytes.Buffer
// baseOffset is added to len(buf) to obtain the absolute offset
// relative to the start of io.Writer stream.
baseOffset int64
wr io.Writer
// maxValue is the approximate maximum Value size passed to WriteValue.
maxValue int
// unusedCache is the buffer returned by the UnusedBuffer method.
unusedCache []byte
// bufStats is statistics about buffer utilization.
// It is only used with pooled encoders in pools.go.
bufStats bufferStatistics
}
// NewEncoder constructs a new streaming encoder writing to w
// configured with the provided options.
// It flushes the internal buffer when the buffer is sufficiently full or
// when a top-level value has been written.
//
// If w is a [bytes.Buffer], then the encoder appends directly into the buffer
// without copying the contents from an intermediate buffer.
func NewEncoder(w io.Writer, opts ...Options) *Encoder {
e := new(Encoder)
e.Reset(w, opts...)
return e
}
// Reset resets an encoder such that it is writing afresh to w and
// configured with the provided options. Reset must not be called on
// a Encoder passed to the [encoding/json/v2.MarshalerTo.MarshalJSONTo] method
// or the [encoding/json/v2.MarshalToFunc] function.
func (e *Encoder) Reset(w io.Writer, opts ...Options) {
switch {
case e == nil:
panic("jsontext: invalid nil Encoder")
case w == nil:
panic("jsontext: invalid nil io.Writer")
case e.s.Flags.Get(jsonflags.WithinArshalCall):
panic("jsontext: cannot reset Encoder passed to json.MarshalerTo")
}
e.s.reset(nil, w, opts...)
}
func (e *encoderState) reset(b []byte, w io.Writer, opts ...Options) {
e.state.reset()
e.encodeBuffer = encodeBuffer{Buf: b, wr: w, bufStats: e.bufStats}
if bb, ok := w.(*bytes.Buffer); ok && bb != nil {
e.Buf = bb.Bytes()[bb.Len():] // alias the unused buffer of bb
}
opts2 := jsonopts.Struct{} // avoid mutating e.Struct in case it is part of opts
opts2.Join(opts...)
e.Struct = opts2
if e.Flags.Get(jsonflags.Multiline) {
if !e.Flags.Has(jsonflags.SpaceAfterColon) {
e.Flags.Set(jsonflags.SpaceAfterColon | 1)
}
if !e.Flags.Has(jsonflags.SpaceAfterComma) {
e.Flags.Set(jsonflags.SpaceAfterComma | 0)
}
if !e.Flags.Has(jsonflags.Indent) {
e.Flags.Set(jsonflags.Indent | 1)
e.Indent = "\t"
}
}
}
// Options returns the options used to construct the decoder and
// may additionally contain semantic options passed to a
// [encoding/json/v2.MarshalEncode] call.
//
// If operating within
// a [encoding/json/v2.MarshalerTo.MarshalJSONTo] method call or
// a [encoding/json/v2.MarshalToFunc] function call,
// then the returned options are only valid within the call.
func (e *Encoder) Options() Options {
return &e.s.Struct
}
// NeedFlush determines whether to flush at this point.
func (e *encoderState) NeedFlush() bool {
// NOTE: This function is carefully written to be inlinable.
// Avoid flushing if e.wr is nil since there is no underlying writer.
// Flush if less than 25% of the capacity remains.
// Flushing at some constant fraction ensures that the buffer stops growing
// so long as the largest Token or Value fits within that unused capacity.
return e.wr != nil && (e.Tokens.Depth() == 1 || len(e.Buf) > 3*cap(e.Buf)/4)
}
// Flush flushes the buffer to the underlying io.Writer.
// It may append a trailing newline after the top-level value.
func (e *encoderState) Flush() error {
if e.wr == nil || e.avoidFlush() {
return nil
}
// In streaming mode, always emit a newline after the top-level value.
if e.Tokens.Depth() == 1 && !e.Flags.Get(jsonflags.OmitTopLevelNewline) {
e.Buf = append(e.Buf, '\n')
}
// Inform objectNameStack that we are about to flush the buffer content.
e.Names.copyQuotedBuffer(e.Buf)
// Specialize bytes.Buffer for better performance.
if bb, ok := e.wr.(*bytes.Buffer); ok {
// If e.buf already aliases the internal buffer of bb,
// then the Write call simply increments the internal offset,
// otherwise Write operates as expected.
// See https://go.dev/issue/42986.
n, _ := bb.Write(e.Buf) // never fails unless bb is nil
e.baseOffset += int64(n)
// If the internal buffer of bytes.Buffer is too small,
// append operations elsewhere in the Encoder may grow the buffer.
// This would be semantically correct, but hurts performance.
// As such, ensure 25% of the current length is always available
// to reduce the probability that other appends must allocate.
if avail := bb.Available(); avail < bb.Len()/4 {
bb.Grow(avail + 1)
}
e.Buf = bb.AvailableBuffer()
return nil
}
// Flush the internal buffer to the underlying io.Writer.
n, err := e.wr.Write(e.Buf)
e.baseOffset += int64(n)
if err != nil {
// In the event of an error, preserve the unflushed portion.
// Thus, write errors aren't fatal so long as the io.Writer
// maintains consistent state after errors.
if n > 0 {
e.Buf = e.Buf[:copy(e.Buf, e.Buf[n:])]
}
return &ioError{action: "write", err: err}
}
e.Buf = e.Buf[:0]
// Check whether to grow the buffer.
// Note that cap(e.buf) may already exceed maxBufferSize since
// an append elsewhere already grew it to store a large token.
const maxBufferSize = 4 << 10
const growthSizeFactor = 2 // higher value is faster
const growthRateFactor = 2 // higher value is slower
// By default, grow if below the maximum buffer size.
grow := cap(e.Buf) <= maxBufferSize/growthSizeFactor
// Growing can be expensive, so only grow
// if a sufficient number of bytes have been processed.
grow = grow && int64(cap(e.Buf)) < e.previousOffsetEnd()/growthRateFactor
if grow {
e.Buf = make([]byte, 0, cap(e.Buf)*growthSizeFactor)
}
return nil
}
func (d *encodeBuffer) offsetAt(pos int) int64 { return d.baseOffset + int64(pos) }
func (e *encodeBuffer) previousOffsetEnd() int64 { return e.baseOffset + int64(len(e.Buf)) }
func (e *encodeBuffer) unflushedBuffer() []byte { return e.Buf }
// avoidFlush indicates whether to avoid flushing to ensure there is always
// enough in the buffer to unwrite the last object member if it were empty.
func (e *encoderState) avoidFlush() bool {
switch {
case e.Tokens.Last.Length() == 0:
// Never flush after BeginObject or BeginArray since we don't know yet
// if the object or array will end up being empty.
return true
case e.Tokens.Last.needObjectValue():
// Never flush before the object value since we don't know yet
// if the object value will end up being empty.
return true
case e.Tokens.Last.NeedObjectName() && len(e.Buf) >= 2:
// Never flush after the object value if it does turn out to be empty.
switch string(e.Buf[len(e.Buf)-2:]) {
case `ll`, `""`, `{}`, `[]`: // last two bytes of every empty value
return true
}
}
return false
}
// UnwriteEmptyObjectMember unwrites the last object member if it is empty
// and reports whether it performed an unwrite operation.
func (e *encoderState) UnwriteEmptyObjectMember(prevName *string) bool {
if last := e.Tokens.Last; !last.isObject() || !last.NeedObjectName() || last.Length() == 0 {
panic("BUG: must be called on an object after writing a value")
}
// The flushing logic is modified to never flush a trailing empty value.
// The encoder never writes trailing whitespace eagerly.
b := e.unflushedBuffer()
// Detect whether the last value was empty.
var n int
if len(b) >= 3 {
switch string(b[len(b)-2:]) {
case "ll": // last two bytes of `null`
n = len(`null`)
case `""`:
// It is possible for a non-empty string to have `""` as a suffix
// if the second to the last quote was escaped.
if b[len(b)-3] == '\\' {
return false // e.g., `"\""` is not empty
}
n = len(`""`)
case `{}`:
n = len(`{}`)
case `[]`:
n = len(`[]`)
}
}
if n == 0 {
return false
}
// Unwrite the value, whitespace, colon, name, whitespace, and comma.
b = b[:len(b)-n]
b = jsonwire.TrimSuffixWhitespace(b)
b = jsonwire.TrimSuffixByte(b, ':')
b = jsonwire.TrimSuffixString(b)
b = jsonwire.TrimSuffixWhitespace(b)
b = jsonwire.TrimSuffixByte(b, ',')
e.Buf = b // store back truncated unflushed buffer
// Undo state changes.
e.Tokens.Last.decrement() // for object member value
e.Tokens.Last.decrement() // for object member name
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
if e.Tokens.Last.isActiveNamespace() {
e.Namespaces.Last().removeLast()
}
}
e.Names.clearLast()
if prevName != nil {
e.Names.copyQuotedBuffer(e.Buf) // required by objectNameStack.replaceLastUnquotedName
e.Names.replaceLastUnquotedName(*prevName)
}
return true
}
// UnwriteOnlyObjectMemberName unwrites the only object member name
// and returns the unquoted name.
func (e *encoderState) UnwriteOnlyObjectMemberName() string {
if last := e.Tokens.Last; !last.isObject() || last.Length() != 1 {
panic("BUG: must be called on an object after writing first name")
}
// Unwrite the name and whitespace.
b := jsonwire.TrimSuffixString(e.Buf)
isVerbatim := bytes.IndexByte(e.Buf[len(b):], '\\') < 0
name := string(jsonwire.UnquoteMayCopy(e.Buf[len(b):], isVerbatim))
e.Buf = jsonwire.TrimSuffixWhitespace(b)
// Undo state changes.
e.Tokens.Last.decrement()
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
if e.Tokens.Last.isActiveNamespace() {
e.Namespaces.Last().removeLast()
}
}
e.Names.clearLast()
return name
}
// WriteToken writes the next token and advances the internal write offset.
//
// The provided token kind must be consistent with the JSON grammar.
// For example, it is an error to provide a number when the encoder
// is expecting an object name (which is always a string), or
// to provide an end object delimiter when the encoder is finishing an array.
// If the provided token is invalid, then it reports a [SyntacticError] and
// the internal state remains unchanged. The offset reported
// in [SyntacticError] will be relative to the [Encoder.OutputOffset].
func (e *Encoder) WriteToken(t Token) error {
return e.s.WriteToken(t)
}
func (e *encoderState) WriteToken(t Token) error {
k := t.Kind()
b := e.Buf // use local variable to avoid mutating e in case of error
// Append any delimiters or optional whitespace.
b = e.Tokens.MayAppendDelim(b, k)
if e.Flags.Get(jsonflags.AnyWhitespace) {
b = e.appendWhitespace(b, k)
}
pos := len(b) // offset before the token
// Append the token to the output and to the state machine.
var err error
switch k {
case 'n':
b = append(b, "null"...)
err = e.Tokens.appendLiteral()
case 'f':
b = append(b, "false"...)
err = e.Tokens.appendLiteral()
case 't':
b = append(b, "true"...)
err = e.Tokens.appendLiteral()
case '"':
if b, err = t.appendString(b, &e.Flags); err != nil {
break
}
if e.Tokens.Last.NeedObjectName() {
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
if !e.Tokens.Last.isValidNamespace() {
err = errInvalidNamespace
break
}
if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], false) {
err = wrapWithObjectName(ErrDuplicateName, b[pos:])
break
}
}
e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds
}
err = e.Tokens.appendString()
case '0':
if b, err = t.appendNumber(b, &e.Flags); err != nil {
break
}
err = e.Tokens.appendNumber()
case '{':
b = append(b, '{')
if err = e.Tokens.pushObject(); err != nil {
break
}
e.Names.push()
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
e.Namespaces.push()
}
case '}':
b = append(b, '}')
if err = e.Tokens.popObject(); err != nil {
break
}
e.Names.pop()
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
e.Namespaces.pop()
}
case '[':
b = append(b, '[')
err = e.Tokens.pushArray()
case ']':
b = append(b, ']')
err = e.Tokens.popArray()
default:
err = errInvalidToken
}
if err != nil {
return wrapSyntacticError(e, err, pos, +1)
}
// Finish off the buffer and store it back into e.
e.Buf = b
if e.NeedFlush() {
return e.Flush()
}
return nil
}
// AppendRaw appends either a raw string (without double quotes) or number.
// Specify safeASCII if the string output is guaranteed to be ASCII
// without any characters (including '<', '>', and '&') that need escaping,
// otherwise this will validate whether the string needs escaping.
// The appended bytes for a JSON number must be valid.
//
// This is a specialized implementation of Encoder.WriteValue
// that allows appending directly into the buffer.
// It is only called from marshal logic in the "json" package.
func (e *encoderState) AppendRaw(k Kind, safeASCII bool, appendFn func([]byte) ([]byte, error)) error {
b := e.Buf // use local variable to avoid mutating e in case of error
// Append any delimiters or optional whitespace.
b = e.Tokens.MayAppendDelim(b, k)
if e.Flags.Get(jsonflags.AnyWhitespace) {
b = e.appendWhitespace(b, k)
}
pos := len(b) // offset before the token
var err error
switch k {
case '"':
// Append directly into the encoder buffer by assuming that
// most of the time none of the characters need escaping.
b = append(b, '"')
if b, err = appendFn(b); err != nil {
return err
}
b = append(b, '"')
// Check whether we need to escape the string and if necessary
// copy it to a scratch buffer and then escape it back.
isVerbatim := safeASCII || !jsonwire.NeedEscape(b[pos+len(`"`):len(b)-len(`"`)])
if !isVerbatim {
var err error
b2 := append(e.unusedCache, b[pos+len(`"`):len(b)-len(`"`)]...)
b, err = jsonwire.AppendQuote(b[:pos], string(b2), &e.Flags)
e.unusedCache = b2[:0]
if err != nil {
return wrapSyntacticError(e, err, pos, +1)
}
}
// Update the state machine.
if e.Tokens.Last.NeedObjectName() {
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
if !e.Tokens.Last.isValidNamespace() {
return wrapSyntacticError(e, err, pos, +1)
}
if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], isVerbatim) {
err = wrapWithObjectName(ErrDuplicateName, b[pos:])
return wrapSyntacticError(e, err, pos, +1)
}
}
e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds
}
if err := e.Tokens.appendString(); err != nil {
return wrapSyntacticError(e, err, pos, +1)
}
case '0':
if b, err = appendFn(b); err != nil {
return err
}
if err := e.Tokens.appendNumber(); err != nil {
return wrapSyntacticError(e, err, pos, +1)
}
default:
panic("BUG: invalid kind")
}
// Finish off the buffer and store it back into e.
e.Buf = b
if e.NeedFlush() {
return e.Flush()
}
return nil
}
// WriteValue writes the next raw value and advances the internal write offset.
// The Encoder does not simply copy the provided value verbatim, but
// parses it to ensure that it is syntactically valid and reformats it
// according to how the Encoder is configured to format whitespace and strings.
// If [AllowInvalidUTF8] is specified, then any invalid UTF-8 is mangled
// as the Unicode replacement character, U+FFFD.
//
// The provided value kind must be consistent with the JSON grammar
// (see examples on [Encoder.WriteToken]). If the provided value is invalid,
// then it reports a [SyntacticError] and the internal state remains unchanged.
// The offset reported in [SyntacticError] will be relative to the
// [Encoder.OutputOffset] plus the offset into v of any encountered syntax error.
func (e *Encoder) WriteValue(v Value) error {
return e.s.WriteValue(v)
}
func (e *encoderState) WriteValue(v Value) error {
e.maxValue |= len(v) // bitwise OR is a fast approximation of max
k := v.Kind()
b := e.Buf // use local variable to avoid mutating e in case of error
// Append any delimiters or optional whitespace.
b = e.Tokens.MayAppendDelim(b, k)
if e.Flags.Get(jsonflags.AnyWhitespace) {
b = e.appendWhitespace(b, k)
}
pos := len(b) // offset before the value
// Append the value the output.
var n int
n += jsonwire.ConsumeWhitespace(v[n:])
b, m, err := e.reformatValue(b, v[n:], e.Tokens.Depth())
if err != nil {
return wrapSyntacticError(e, err, pos+n+m, +1)
}
n += m
n += jsonwire.ConsumeWhitespace(v[n:])
if len(v) > n {
err = jsonwire.NewInvalidCharacterError(v[n:], "after top-level value")
return wrapSyntacticError(e, err, pos+n, 0)
}
// Append the kind to the state machine.
switch k {
case 'n', 'f', 't':
err = e.Tokens.appendLiteral()
case '"':
if e.Tokens.Last.NeedObjectName() {
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
if !e.Tokens.Last.isValidNamespace() {
err = errInvalidNamespace
break
}
if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], false) {
err = wrapWithObjectName(ErrDuplicateName, b[pos:])
break
}
}
e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds
}
err = e.Tokens.appendString()
case '0':
err = e.Tokens.appendNumber()
case '{':
if err = e.Tokens.pushObject(); err != nil {
break
}
if err = e.Tokens.popObject(); err != nil {
panic("BUG: popObject should never fail immediately after pushObject: " + err.Error())
}
if e.Flags.Get(jsonflags.ReorderRawObjects) {
mustReorderObjects(b[pos:])
}
case '[':
if err = e.Tokens.pushArray(); err != nil {
break
}
if err = e.Tokens.popArray(); err != nil {
panic("BUG: popArray should never fail immediately after pushArray: " + err.Error())
}
if e.Flags.Get(jsonflags.ReorderRawObjects) {
mustReorderObjects(b[pos:])
}
}
if err != nil {
return wrapSyntacticError(e, err, pos, +1)
}
// Finish off the buffer and store it back into e.
e.Buf = b
if e.NeedFlush() {
return e.Flush()
}
return nil
}
// CountNextDelimWhitespace counts the number of bytes of delimiter and
// whitespace bytes assuming the upcoming token is a JSON value.
// This method is used for error reporting at the semantic layer.
func (e *encoderState) CountNextDelimWhitespace() (n int) {
const next = Kind('"') // arbitrary kind as next JSON value
delim := e.Tokens.needDelim(next)
if delim > 0 {
n += len(",") | len(":")
}
if delim == ':' {
if e.Flags.Get(jsonflags.SpaceAfterColon) {
n += len(" ")
}
} else {
if delim == ',' && e.Flags.Get(jsonflags.SpaceAfterComma) {
n += len(" ")
}
if e.Flags.Get(jsonflags.Multiline) {
if m := e.Tokens.NeedIndent(next); m > 0 {
n += len("\n") + len(e.IndentPrefix) + (m-1)*len(e.Indent)
}
}
}
return n
}
// appendWhitespace appends whitespace that immediately precedes the next token.
func (e *encoderState) appendWhitespace(b []byte, next Kind) []byte {
if delim := e.Tokens.needDelim(next); delim == ':' {
if e.Flags.Get(jsonflags.SpaceAfterColon) {
b = append(b, ' ')
}
} else {
if delim == ',' && e.Flags.Get(jsonflags.SpaceAfterComma) {
b = append(b, ' ')
}
if e.Flags.Get(jsonflags.Multiline) {
b = e.AppendIndent(b, e.Tokens.NeedIndent(next))
}
}
return b
}
// AppendIndent appends the appropriate number of indentation characters
// for the current nested level, n.
func (e *encoderState) AppendIndent(b []byte, n int) []byte {
if n == 0 {
return b
}
b = append(b, '\n')
b = append(b, e.IndentPrefix...)
for ; n > 1; n-- {
b = append(b, e.Indent...)
}
return b
}
// reformatValue parses a JSON value from the start of src and
// appends it to the end of dst, reformatting whitespace and strings as needed.
// It returns the extended dst buffer and the number of consumed input bytes.
func (e *encoderState) reformatValue(dst []byte, src Value, depth int) ([]byte, int, error) {
// TODO: Should this update ValueFlags as input?
if len(src) == 0 {
return dst, 0, io.ErrUnexpectedEOF
}
switch k := Kind(src[0]).normalize(); k {
case 'n':
if jsonwire.ConsumeNull(src) == 0 {
n, err := jsonwire.ConsumeLiteral(src, "null")
return dst, n, err
}
return append(dst, "null"...), len("null"), nil
case 'f':
if jsonwire.ConsumeFalse(src) == 0 {
n, err := jsonwire.ConsumeLiteral(src, "false")
return dst, n, err
}
return append(dst, "false"...), len("false"), nil
case 't':
if jsonwire.ConsumeTrue(src) == 0 {
n, err := jsonwire.ConsumeLiteral(src, "true")
return dst, n, err
}
return append(dst, "true"...), len("true"), nil
case '"':
if n := jsonwire.ConsumeSimpleString(src); n > 0 {
dst = append(dst, src[:n]...) // copy simple strings verbatim
return dst, n, nil
}
return jsonwire.ReformatString(dst, src, &e.Flags)
case '0':
if n := jsonwire.ConsumeSimpleNumber(src); n > 0 && !e.Flags.Get(jsonflags.CanonicalizeNumbers) {
dst = append(dst, src[:n]...) // copy simple numbers verbatim
return dst, n, nil
}
return jsonwire.ReformatNumber(dst, src, &e.Flags)
case '{':
return e.reformatObject(dst, src, depth)
case '[':
return e.reformatArray(dst, src, depth)
default:
return dst, 0, jsonwire.NewInvalidCharacterError(src, "at start of value")
}
}
// reformatObject parses a JSON object from the start of src and
// appends it to the end of src, reformatting whitespace and strings as needed.
// It returns the extended dst buffer and the number of consumed input bytes.
func (e *encoderState) reformatObject(dst []byte, src Value, depth int) ([]byte, int, error) {
// Append object start.
if len(src) == 0 || src[0] != '{' {
panic("BUG: reformatObject must be called with a buffer that starts with '{'")
} else if depth == maxNestingDepth+1 {
return dst, 0, errMaxDepth
}
dst = append(dst, '{')
n := len("{")
// Append (possible) object end.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
if src[n] == '}' {
dst = append(dst, '}')
n += len("}")
return dst, n, nil
}
var err error
var names *objectNamespace
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
e.Namespaces.push()
defer e.Namespaces.pop()
names = e.Namespaces.Last()
}
depth++
for {
// Append optional newline and indentation.
if e.Flags.Get(jsonflags.Multiline) {
dst = e.AppendIndent(dst, depth)
}
// Append object name.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
m := jsonwire.ConsumeSimpleString(src[n:])
isVerbatim := m > 0
if isVerbatim {
dst = append(dst, src[n:n+m]...)
} else {
dst, m, err = jsonwire.ReformatString(dst, src[n:], &e.Flags)
if err != nil {
return dst, n + m, err
}
}
quotedName := src[n : n+m]
if !e.Flags.Get(jsonflags.AllowDuplicateNames) && !names.insertQuoted(quotedName, isVerbatim) {
return dst, n, wrapWithObjectName(ErrDuplicateName, quotedName)
}
n += m
// Append colon.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, wrapWithObjectName(io.ErrUnexpectedEOF, quotedName)
}
if src[n] != ':' {
err = jsonwire.NewInvalidCharacterError(src[n:], "after object name (expecting ':')")
return dst, n, wrapWithObjectName(err, quotedName)
}
dst = append(dst, ':')
n += len(":")
if e.Flags.Get(jsonflags.SpaceAfterColon) {
dst = append(dst, ' ')
}
// Append object value.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, wrapWithObjectName(io.ErrUnexpectedEOF, quotedName)
}
dst, m, err = e.reformatValue(dst, src[n:], depth)
if err != nil {
return dst, n + m, wrapWithObjectName(err, quotedName)
}
n += m
// Append comma or object end.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
switch src[n] {
case ',':
dst = append(dst, ',')
if e.Flags.Get(jsonflags.SpaceAfterComma) {
dst = append(dst, ' ')
}
n += len(",")
continue
case '}':
if e.Flags.Get(jsonflags.Multiline) {
dst = e.AppendIndent(dst, depth-1)
}
dst = append(dst, '}')
n += len("}")
return dst, n, nil
default:
return dst, n, jsonwire.NewInvalidCharacterError(src[n:], "after object value (expecting ',' or '}')")
}
}
}
// reformatArray parses a JSON array from the start of src and
// appends it to the end of dst, reformatting whitespace and strings as needed.
// It returns the extended dst buffer and the number of consumed input bytes.
func (e *encoderState) reformatArray(dst []byte, src Value, depth int) ([]byte, int, error) {
// Append array start.
if len(src) == 0 || src[0] != '[' {
panic("BUG: reformatArray must be called with a buffer that starts with '['")
} else if depth == maxNestingDepth+1 {
return dst, 0, errMaxDepth
}
dst = append(dst, '[')
n := len("[")
// Append (possible) array end.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
if src[n] == ']' {
dst = append(dst, ']')
n += len("]")
return dst, n, nil
}
var idx int64
var err error
depth++
for {
// Append optional newline and indentation.
if e.Flags.Get(jsonflags.Multiline) {
dst = e.AppendIndent(dst, depth)
}
// Append array value.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
var m int
dst, m, err = e.reformatValue(dst, src[n:], depth)
if err != nil {
return dst, n + m, wrapWithArrayIndex(err, idx)
}
n += m
// Append comma or array end.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
switch src[n] {
case ',':
dst = append(dst, ',')
if e.Flags.Get(jsonflags.SpaceAfterComma) {
dst = append(dst, ' ')
}
n += len(",")
idx++
continue
case ']':
if e.Flags.Get(jsonflags.Multiline) {
dst = e.AppendIndent(dst, depth-1)
}
dst = append(dst, ']')
n += len("]")
return dst, n, nil
default:
return dst, n, jsonwire.NewInvalidCharacterError(src[n:], "after array value (expecting ',' or ']')")
}
}
}
// OutputOffset returns the current output byte offset. It gives the location
// of the next byte immediately after the most recently written token or value.
// The number of bytes actually written to the underlying [io.Writer] may be less
// than this offset due to internal buffering effects.
func (e *Encoder) OutputOffset() int64 {
return e.s.previousOffsetEnd()
}
// UnusedBuffer returns a zero-length buffer with a possible non-zero capacity.
// This buffer is intended to be used to populate a [Value]
// being passed to an immediately succeeding [Encoder.WriteValue] call.
//
// Example usage:
//
// b := d.UnusedBuffer()
// b = append(b, '"')
// b = appendString(b, v) // append the string formatting of v
// b = append(b, '"')
// ... := d.WriteValue(b)
//
// It is the user's responsibility to ensure that the value is valid JSON.
func (e *Encoder) UnusedBuffer() []byte {
// NOTE: We don't return e.buf[len(e.buf):cap(e.buf)] since WriteValue would
// need to take special care to avoid mangling the data while reformatting.
// WriteValue can't easily identify whether the input Value aliases e.buf
// without using unsafe.Pointer. Thus, we just return a different buffer.
// Should this ever alias e.buf, we need to consider how it operates with
// the specialized performance optimization for bytes.Buffer.
n := 1 << bits.Len(uint(e.s.maxValue|63)) // fast approximation for max length
if cap(e.s.unusedCache) < n {
e.s.unusedCache = make([]byte, 0, n)
}
return e.s.unusedCache
}
// StackDepth returns the depth of the state machine for written JSON data.
// Each level on the stack represents a nested JSON object or array.
// It is incremented whenever an [BeginObject] or [BeginArray] token is encountered
// and decremented whenever an [EndObject] or [EndArray] token is encountered.
// The depth is zero-indexed, where zero represents the top-level JSON value.
func (e *Encoder) StackDepth() int {
// NOTE: Keep in sync with Decoder.StackDepth.
return e.s.Tokens.Depth() - 1
}
// StackIndex returns information about the specified stack level.
// It must be a number between 0 and [Encoder.StackDepth], inclusive.
// For each level, it reports the kind:
//
// - 0 for a level of zero,
// - '{' for a level representing a JSON object, and
// - '[' for a level representing a JSON array.
//
// It also reports the length of that JSON object or array.
// Each name and value in a JSON object is counted separately,
// so the effective number of members would be half the length.
// A complete JSON object must have an even length.
func (e *Encoder) StackIndex(i int) (Kind, int64) {
// NOTE: Keep in sync with Decoder.StackIndex.
switch s := e.s.Tokens.index(i); {
case i > 0 && s.isObject():
return '{', s.Length()
case i > 0 && s.isArray():
return '[', s.Length()
default:
return 0, s.Length()
}
}
// StackPointer returns a JSON Pointer (RFC 6901) to the most recently written value.
func (e *Encoder) StackPointer() Pointer {
return Pointer(e.s.AppendStackPointer(nil, -1))
}
func (e *encoderState) AppendStackPointer(b []byte, where int) []byte {
e.Names.copyQuotedBuffer(e.Buf)
return e.state.appendStackPointer(b, where)
}

View File

@ -0,0 +1,737 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"bytes"
"errors"
"io"
"path"
"slices"
"testing"
"encoding/json/internal/jsonflags"
"encoding/json/internal/jsontest"
"encoding/json/internal/jsonwire"
)
// TestEncoder tests whether we can produce JSON with either tokens or raw values.
func TestEncoder(t *testing.T) {
for _, td := range coderTestdata {
for _, formatName := range []string{"Compact", "Indented"} {
for _, typeName := range []string{"Token", "Value", "TokenDelims"} {
t.Run(path.Join(td.name.Name, typeName, formatName), func(t *testing.T) {
testEncoder(t, td.name.Where, formatName, typeName, td)
})
}
}
}
}
func testEncoder(t *testing.T, where jsontest.CasePos, formatName, typeName string, td coderTestdataEntry) {
var want string
var opts []Options
dst := new(bytes.Buffer)
opts = append(opts, jsonflags.OmitTopLevelNewline|1)
want = td.outCompacted
switch formatName {
case "Indented":
opts = append(opts, Multiline(true))
opts = append(opts, WithIndentPrefix("\t"))
opts = append(opts, WithIndent(" "))
if td.outIndented != "" {
want = td.outIndented
}
}
enc := NewEncoder(dst, opts...)
switch typeName {
case "Token":
var pointers []Pointer
for _, tok := range td.tokens {
if err := enc.WriteToken(tok); err != nil {
t.Fatalf("%s: Encoder.WriteToken error: %v", where, err)
}
if td.pointers != nil {
pointers = append(pointers, enc.StackPointer())
}
}
if !slices.Equal(pointers, td.pointers) {
t.Fatalf("%s: pointers mismatch:\ngot %q\nwant %q", where, pointers, td.pointers)
}
case "Value":
if err := enc.WriteValue(Value(td.in)); err != nil {
t.Fatalf("%s: Encoder.WriteValue error: %v", where, err)
}
case "TokenDelims":
// Use WriteToken for object/array delimiters, WriteValue otherwise.
for _, tok := range td.tokens {
switch tok.Kind() {
case '{', '}', '[', ']':
if err := enc.WriteToken(tok); err != nil {
t.Fatalf("%s: Encoder.WriteToken error: %v", where, err)
}
default:
val := Value(tok.String())
if tok.Kind() == '"' {
val, _ = jsonwire.AppendQuote(nil, tok.String(), &jsonflags.Flags{})
}
if err := enc.WriteValue(val); err != nil {
t.Fatalf("%s: Encoder.WriteValue error: %v", where, err)
}
}
}
}
got := dst.String()
if got != want {
t.Errorf("%s: output mismatch:\ngot %q\nwant %q", where, got, want)
}
}
// TestFaultyEncoder tests that temporary I/O errors are not fatal.
func TestFaultyEncoder(t *testing.T) {
for _, td := range coderTestdata {
for _, typeName := range []string{"Token", "Value"} {
t.Run(path.Join(td.name.Name, typeName), func(t *testing.T) {
testFaultyEncoder(t, td.name.Where, typeName, td)
})
}
}
}
func testFaultyEncoder(t *testing.T, where jsontest.CasePos, typeName string, td coderTestdataEntry) {
b := &FaultyBuffer{
MaxBytes: 1,
MayError: io.ErrShortWrite,
}
// Write all the tokens.
// Even if the underlying io.Writer may be faulty,
// writing a valid token or value is guaranteed to at least
// be appended to the internal buffer.
// In other words, syntactic errors occur before I/O errors.
enc := NewEncoder(b)
switch typeName {
case "Token":
for i, tok := range td.tokens {
err := enc.WriteToken(tok)
if err != nil && !errors.Is(err, io.ErrShortWrite) {
t.Fatalf("%s: %d: Encoder.WriteToken error: %v", where, i, err)
}
}
case "Value":
err := enc.WriteValue(Value(td.in))
if err != nil && !errors.Is(err, io.ErrShortWrite) {
t.Fatalf("%s: Encoder.WriteValue error: %v", where, err)
}
}
gotOutput := string(append(b.B, enc.s.unflushedBuffer()...))
wantOutput := td.outCompacted + "\n"
if gotOutput != wantOutput {
t.Fatalf("%s: output mismatch:\ngot %s\nwant %s", where, gotOutput, wantOutput)
}
}
type encoderMethodCall struct {
in tokOrVal
wantErr error
wantPointer Pointer
}
var encoderErrorTestdata = []struct {
name jsontest.CaseName
opts []Options
calls []encoderMethodCall
wantOut string
}{{
name: jsontest.Name("InvalidToken"),
calls: []encoderMethodCall{
{zeroToken, E(errInvalidToken), ""},
},
}, {
name: jsontest.Name("InvalidValue"),
calls: []encoderMethodCall{
{Value(`#`), newInvalidCharacterError("#", "at start of value"), ""},
},
}, {
name: jsontest.Name("InvalidValue/DoubleZero"),
calls: []encoderMethodCall{
{Value(`00`), newInvalidCharacterError("0", "after top-level value").withPos(`0`, ""), ""},
},
}, {
name: jsontest.Name("TruncatedValue"),
calls: []encoderMethodCall{
{zeroValue, E(io.ErrUnexpectedEOF).withPos("", ""), ""},
},
}, {
name: jsontest.Name("TruncatedNull"),
calls: []encoderMethodCall{
{Value(`nul`), E(io.ErrUnexpectedEOF).withPos("nul", ""), ""},
},
}, {
name: jsontest.Name("InvalidNull"),
calls: []encoderMethodCall{
{Value(`nulL`), newInvalidCharacterError("L", "in literal null (expecting 'l')").withPos(`nul`, ""), ""},
},
}, {
name: jsontest.Name("TruncatedFalse"),
calls: []encoderMethodCall{
{Value(`fals`), E(io.ErrUnexpectedEOF).withPos("fals", ""), ""},
},
}, {
name: jsontest.Name("InvalidFalse"),
calls: []encoderMethodCall{
{Value(`falsE`), newInvalidCharacterError("E", "in literal false (expecting 'e')").withPos(`fals`, ""), ""},
},
}, {
name: jsontest.Name("TruncatedTrue"),
calls: []encoderMethodCall{
{Value(`tru`), E(io.ErrUnexpectedEOF).withPos(`tru`, ""), ""},
},
}, {
name: jsontest.Name("InvalidTrue"),
calls: []encoderMethodCall{
{Value(`truE`), newInvalidCharacterError("E", "in literal true (expecting 'e')").withPos(`tru`, ""), ""},
},
}, {
name: jsontest.Name("TruncatedString"),
calls: []encoderMethodCall{
{Value(`"star`), E(io.ErrUnexpectedEOF).withPos(`"star`, ""), ""},
},
}, {
name: jsontest.Name("InvalidString"),
calls: []encoderMethodCall{
{Value(`"ok` + "\x00"), newInvalidCharacterError("\x00", `in string (expecting non-control character)`).withPos(`"ok`, ""), ""},
},
}, {
name: jsontest.Name("ValidString/AllowInvalidUTF8/Token"),
opts: []Options{AllowInvalidUTF8(true)},
calls: []encoderMethodCall{
{String("living\xde\xad\xbe\xef"), nil, ""},
},
wantOut: "\"living\xde\xad\ufffd\ufffd\"\n",
}, {
name: jsontest.Name("ValidString/AllowInvalidUTF8/Value"),
opts: []Options{AllowInvalidUTF8(true)},
calls: []encoderMethodCall{
{Value("\"living\xde\xad\xbe\xef\""), nil, ""},
},
wantOut: "\"living\xde\xad\ufffd\ufffd\"\n",
}, {
name: jsontest.Name("InvalidString/RejectInvalidUTF8"),
opts: []Options{AllowInvalidUTF8(false)},
calls: []encoderMethodCall{
{String("living\xde\xad\xbe\xef"), E(jsonwire.ErrInvalidUTF8), ""},
{Value("\"living\xde\xad\xbe\xef\""), E(jsonwire.ErrInvalidUTF8).withPos("\"living\xde\xad", ""), ""},
{BeginObject, nil, ""},
{String("name"), nil, ""},
{BeginArray, nil, ""},
{String("living\xde\xad\xbe\xef"), E(jsonwire.ErrInvalidUTF8).withPos(`{"name":[`, "/name/0"), ""},
{Value("\"living\xde\xad\xbe\xef\""), E(jsonwire.ErrInvalidUTF8).withPos("{\"name\":[\"living\xde\xad", "/name/0"), ""},
},
wantOut: `{"name":[`,
}, {
name: jsontest.Name("TruncatedNumber"),
calls: []encoderMethodCall{
{Value(`0.`), E(io.ErrUnexpectedEOF).withPos("0", ""), ""},
},
}, {
name: jsontest.Name("InvalidNumber"),
calls: []encoderMethodCall{
{Value(`0.e`), newInvalidCharacterError("e", "in number (expecting digit)").withPos(`0.`, ""), ""},
},
}, {
name: jsontest.Name("TruncatedObject/AfterStart"),
calls: []encoderMethodCall{
{Value(`{`), E(io.ErrUnexpectedEOF).withPos("{", ""), ""},
},
}, {
name: jsontest.Name("TruncatedObject/AfterName"),
calls: []encoderMethodCall{
{Value(`{"X"`), E(io.ErrUnexpectedEOF).withPos(`{"X"`, "/X"), ""},
},
}, {
name: jsontest.Name("TruncatedObject/AfterColon"),
calls: []encoderMethodCall{
{Value(`{"X":`), E(io.ErrUnexpectedEOF).withPos(`{"X":`, "/X"), ""},
},
}, {
name: jsontest.Name("TruncatedObject/AfterValue"),
calls: []encoderMethodCall{
{Value(`{"0":0`), E(io.ErrUnexpectedEOF).withPos(`{"0":0`, ""), ""},
},
}, {
name: jsontest.Name("TruncatedObject/AfterComma"),
calls: []encoderMethodCall{
{Value(`{"0":0,`), E(io.ErrUnexpectedEOF).withPos(`{"0":0,`, ""), ""},
},
}, {
name: jsontest.Name("InvalidObject/MissingColon"),
calls: []encoderMethodCall{
{Value(` { "fizz" "buzz" } `), newInvalidCharacterError("\"", "after object name (expecting ':')").withPos(` { "fizz" `, "/fizz"), ""},
{Value(` { "fizz" , "buzz" } `), newInvalidCharacterError(",", "after object name (expecting ':')").withPos(` { "fizz" `, "/fizz"), ""},
},
}, {
name: jsontest.Name("InvalidObject/MissingComma"),
calls: []encoderMethodCall{
{Value(` { "fizz" : "buzz" "gazz" } `), newInvalidCharacterError("\"", "after object value (expecting ',' or '}')").withPos(` { "fizz" : "buzz" `, ""), ""},
{Value(` { "fizz" : "buzz" : "gazz" } `), newInvalidCharacterError(":", "after object value (expecting ',' or '}')").withPos(` { "fizz" : "buzz" `, ""), ""},
},
}, {
name: jsontest.Name("InvalidObject/ExtraComma"),
calls: []encoderMethodCall{
{Value(` { , } `), newInvalidCharacterError(",", `at start of string (expecting '"')`).withPos(` { `, ""), ""},
{Value(` { "fizz" : "buzz" , } `), newInvalidCharacterError("}", `at start of string (expecting '"')`).withPos(` { "fizz" : "buzz" , `, ""), ""},
},
}, {
name: jsontest.Name("InvalidObject/InvalidName"),
calls: []encoderMethodCall{
{Value(`{ null }`), newInvalidCharacterError("n", `at start of string (expecting '"')`).withPos(`{ `, ""), ""},
{Value(`{ false }`), newInvalidCharacterError("f", `at start of string (expecting '"')`).withPos(`{ `, ""), ""},
{Value(`{ true }`), newInvalidCharacterError("t", `at start of string (expecting '"')`).withPos(`{ `, ""), ""},
{Value(`{ 0 }`), newInvalidCharacterError("0", `at start of string (expecting '"')`).withPos(`{ `, ""), ""},
{Value(`{ {} }`), newInvalidCharacterError("{", `at start of string (expecting '"')`).withPos(`{ `, ""), ""},
{Value(`{ [] }`), newInvalidCharacterError("[", `at start of string (expecting '"')`).withPos(`{ `, ""), ""},
{BeginObject, nil, ""},
{Null, E(ErrNonStringName).withPos(`{`, ""), ""},
{Value(`null`), E(ErrNonStringName).withPos(`{`, ""), ""},
{False, E(ErrNonStringName).withPos(`{`, ""), ""},
{Value(`false`), E(ErrNonStringName).withPos(`{`, ""), ""},
{True, E(ErrNonStringName).withPos(`{`, ""), ""},
{Value(`true`), E(ErrNonStringName).withPos(`{`, ""), ""},
{Uint(0), E(ErrNonStringName).withPos(`{`, ""), ""},
{Value(`0`), E(ErrNonStringName).withPos(`{`, ""), ""},
{BeginObject, E(ErrNonStringName).withPos(`{`, ""), ""},
{Value(`{}`), E(ErrNonStringName).withPos(`{`, ""), ""},
{BeginArray, E(ErrNonStringName).withPos(`{`, ""), ""},
{Value(`[]`), E(ErrNonStringName).withPos(`{`, ""), ""},
{EndObject, nil, ""},
},
wantOut: "{}\n",
}, {
name: jsontest.Name("InvalidObject/InvalidValue"),
calls: []encoderMethodCall{
{Value(`{ "0": x }`), newInvalidCharacterError("x", `at start of value`).withPos(`{ "0": `, "/0"), ""},
},
}, {
name: jsontest.Name("InvalidObject/MismatchingDelim"),
calls: []encoderMethodCall{
{Value(` { ] `), newInvalidCharacterError("]", `at start of string (expecting '"')`).withPos(` { `, ""), ""},
{Value(` { "0":0 ] `), newInvalidCharacterError("]", `after object value (expecting ',' or '}')`).withPos(` { "0":0 `, ""), ""},
{BeginObject, nil, ""},
{EndArray, E(errMismatchDelim).withPos(`{`, ""), ""},
{Value(`]`), newInvalidCharacterError("]", "at start of value").withPos(`{`, ""), ""},
{EndObject, nil, ""},
},
wantOut: "{}\n",
}, {
name: jsontest.Name("ValidObject/UniqueNames"),
calls: []encoderMethodCall{
{BeginObject, nil, ""},
{String("0"), nil, ""},
{Uint(0), nil, ""},
{String("1"), nil, ""},
{Uint(1), nil, ""},
{EndObject, nil, ""},
{Value(` { "0" : 0 , "1" : 1 } `), nil, ""},
},
wantOut: `{"0":0,"1":1}` + "\n" + `{"0":0,"1":1}` + "\n",
}, {
name: jsontest.Name("ValidObject/DuplicateNames"),
opts: []Options{AllowDuplicateNames(true)},
calls: []encoderMethodCall{
{BeginObject, nil, ""},
{String("0"), nil, ""},
{Uint(0), nil, ""},
{String("0"), nil, ""},
{Uint(0), nil, ""},
{EndObject, nil, ""},
{Value(` { "0" : 0 , "0" : 0 } `), nil, ""},
},
wantOut: `{"0":0,"0":0}` + "\n" + `{"0":0,"0":0}` + "\n",
}, {
name: jsontest.Name("InvalidObject/DuplicateNames"),
calls: []encoderMethodCall{
{BeginObject, nil, ""},
{String("X"), nil, ""},
{BeginObject, nil, ""},
{EndObject, nil, ""},
{String("X"), E(ErrDuplicateName).withPos(`{"X":{},`, "/X"), "/X"},
{Value(`"X"`), E(ErrDuplicateName).withPos(`{"X":{},`, "/X"), "/X"},
{String("Y"), nil, ""},
{BeginObject, nil, ""},
{EndObject, nil, ""},
{String("X"), E(ErrDuplicateName).withPos(`{"X":{},"Y":{},`, "/X"), "/Y"},
{Value(`"X"`), E(ErrDuplicateName).withPos(`{"X":{},"Y":{},`, "/X"), "/Y"},
{String("Y"), E(ErrDuplicateName).withPos(`{"X":{},"Y":{},`, "/Y"), "/Y"},
{Value(`"Y"`), E(ErrDuplicateName).withPos(`{"X":{},"Y":{},`, "/Y"), "/Y"},
{EndObject, nil, ""},
{Value(` { "X" : 0 , "Y" : 1 , "X" : 0 } `), E(ErrDuplicateName).withPos(`{"X":{},"Y":{}}`+"\n"+` { "X" : 0 , "Y" : 1 , `, "/X"), ""},
},
wantOut: `{"X":{},"Y":{}}` + "\n",
}, {
name: jsontest.Name("TruncatedArray/AfterStart"),
calls: []encoderMethodCall{
{Value(`[`), E(io.ErrUnexpectedEOF).withPos(`[`, ""), ""},
},
}, {
name: jsontest.Name("TruncatedArray/AfterValue"),
calls: []encoderMethodCall{
{Value(`[0`), E(io.ErrUnexpectedEOF).withPos(`[0`, ""), ""},
},
}, {
name: jsontest.Name("TruncatedArray/AfterComma"),
calls: []encoderMethodCall{
{Value(`[0,`), E(io.ErrUnexpectedEOF).withPos(`[0,`, ""), ""},
},
}, {
name: jsontest.Name("TruncatedArray/MissingComma"),
calls: []encoderMethodCall{
{Value(` [ "fizz" "buzz" ] `), newInvalidCharacterError("\"", "after array value (expecting ',' or ']')").withPos(` [ "fizz" `, ""), ""},
},
}, {
name: jsontest.Name("InvalidArray/MismatchingDelim"),
calls: []encoderMethodCall{
{Value(` [ } `), newInvalidCharacterError("}", `at start of value`).withPos(` [ `, "/0"), ""},
{BeginArray, nil, ""},
{EndObject, E(errMismatchDelim).withPos(`[`, "/0"), ""},
{Value(`}`), newInvalidCharacterError("}", "at start of value").withPos(`[`, "/0"), ""},
{EndArray, nil, ""},
},
wantOut: "[]\n",
}, {
name: jsontest.Name("Format/Object/SpaceAfterColon"),
opts: []Options{SpaceAfterColon(true)},
calls: []encoderMethodCall{{Value(`{"fizz":"buzz","wizz":"wuzz"}`), nil, ""}},
wantOut: "{\"fizz\": \"buzz\",\"wizz\": \"wuzz\"}\n",
}, {
name: jsontest.Name("Format/Object/SpaceAfterComma"),
opts: []Options{SpaceAfterComma(true)},
calls: []encoderMethodCall{{Value(`{"fizz":"buzz","wizz":"wuzz"}`), nil, ""}},
wantOut: "{\"fizz\":\"buzz\", \"wizz\":\"wuzz\"}\n",
}, {
name: jsontest.Name("Format/Object/SpaceAfterColonAndComma"),
opts: []Options{SpaceAfterColon(true), SpaceAfterComma(true)},
calls: []encoderMethodCall{{Value(`{"fizz":"buzz","wizz":"wuzz"}`), nil, ""}},
wantOut: "{\"fizz\": \"buzz\", \"wizz\": \"wuzz\"}\n",
}, {
name: jsontest.Name("Format/Object/NoSpaceAfterColon+SpaceAfterComma+Multiline"),
opts: []Options{SpaceAfterColon(false), SpaceAfterComma(true), Multiline(true)},
calls: []encoderMethodCall{{Value(`{"fizz":"buzz","wizz":"wuzz"}`), nil, ""}},
wantOut: "{\n\t\"fizz\":\"buzz\", \n\t\"wizz\":\"wuzz\"\n}\n",
}, {
name: jsontest.Name("Format/Array/SpaceAfterComma"),
opts: []Options{SpaceAfterComma(true)},
calls: []encoderMethodCall{{Value(`["fizz","buzz"]`), nil, ""}},
wantOut: "[\"fizz\", \"buzz\"]\n",
}, {
name: jsontest.Name("Format/Array/NoSpaceAfterComma+Multiline"),
opts: []Options{SpaceAfterComma(false), Multiline(true)},
calls: []encoderMethodCall{{Value(`["fizz","buzz"]`), nil, ""}},
wantOut: "[\n\t\"fizz\",\n\t\"buzz\"\n]\n",
}, {
name: jsontest.Name("Format/ReorderWithWhitespace"),
opts: []Options{
AllowDuplicateNames(true),
AllowInvalidUTF8(true),
ReorderRawObjects(true),
SpaceAfterComma(true),
SpaceAfterColon(false),
Multiline(true),
WithIndentPrefix(" "),
WithIndent("\t"),
PreserveRawStrings(true),
},
calls: []encoderMethodCall{
{BeginArray, nil, ""},
{BeginArray, nil, ""},
{Value(` { "fizz" : "buzz" ,
"zip" : {
"x` + "\xfd" + `x" : 123 , "x` + "\xff" + `x" : 123, "x` + "\xfe" + `x" : 123
},
"zap" : {
"xxx" : 333, "xxx": 1, "xxx": 22
},
"alpha" : "bravo" } `), nil, ""},
{EndArray, nil, ""},
{EndArray, nil, ""},
},
wantOut: "[\n \t[\n \t\t{\n \t\t\t\"alpha\":\"bravo\", \n \t\t\t\"fizz\":\"buzz\", \n \t\t\t\"zap\":{\n \t\t\t\t\"xxx\":1, \n \t\t\t\t\"xxx\":22, \n \t\t\t\t\"xxx\":333\n \t\t\t}, \n \t\t\t\"zip\":{\n \t\t\t\t\"x\xfdx\":123, \n \t\t\t\t\"x\xfex\":123, \n \t\t\t\t\"x\xffx\":123\n \t\t\t}\n \t\t}\n \t]\n ]\n",
}, {
name: jsontest.Name("Format/CanonicalizeRawInts"),
opts: []Options{CanonicalizeRawInts(true), SpaceAfterComma(true)},
calls: []encoderMethodCall{
{Value(`[0.100,5.0,1E6,-9223372036854775808,-10,-1,-0,0,1,10,9223372036854775807]`), nil, ""},
},
wantOut: "[0.100, 5.0, 1E6, -9223372036854776000, -10, -1, 0, 0, 1, 10, 9223372036854776000]\n",
}, {
name: jsontest.Name("Format/CanonicalizeRawFloats"),
opts: []Options{CanonicalizeRawFloats(true), SpaceAfterComma(true)},
calls: []encoderMethodCall{
{Value(`[0.100,5.0,1E6,-9223372036854775808,-10,-1,-0,0,1,10,9223372036854775807]`), nil, ""},
},
wantOut: "[0.1, 5, 1000000, -9223372036854775808, -10, -1, 0, 0, 1, 10, 9223372036854775807]\n",
}, {
name: jsontest.Name("ErrorPosition"),
calls: []encoderMethodCall{
{Value(` "a` + "\xff" + `0" `), E(jsonwire.ErrInvalidUTF8).withPos(` "a`, ""), ""},
{String(`a` + "\xff" + `0`), E(jsonwire.ErrInvalidUTF8).withPos(``, ""), ""},
},
}, {
name: jsontest.Name("ErrorPosition/0"),
calls: []encoderMethodCall{
{Value(` [ "a` + "\xff" + `1" ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ "a`, "/0"), ""},
{BeginArray, nil, ""},
{Value(` "a` + "\xff" + `1" `), E(jsonwire.ErrInvalidUTF8).withPos(`[ "a`, "/0"), ""},
{String(`a` + "\xff" + `1`), E(jsonwire.ErrInvalidUTF8).withPos(`[`, "/0"), ""},
},
wantOut: `[`,
}, {
name: jsontest.Name("ErrorPosition/1"),
calls: []encoderMethodCall{
{Value(` [ "a1" , "b` + "\xff" + `1" ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ "a1" , "b`, "/1"), ""},
{BeginArray, nil, ""},
{String("a1"), nil, ""},
{Value(` "b` + "\xff" + `1" `), E(jsonwire.ErrInvalidUTF8).withPos(`["a1", "b`, "/1"), ""},
{String(`b` + "\xff" + `1`), E(jsonwire.ErrInvalidUTF8).withPos(`["a1",`, "/1"), ""},
},
wantOut: `["a1"`,
}, {
name: jsontest.Name("ErrorPosition/0/0"),
calls: []encoderMethodCall{
{Value(` [ [ "a` + "\xff" + `2" ] ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ [ "a`, "/0/0"), ""},
{BeginArray, nil, ""},
{Value(` [ "a` + "\xff" + `2" ] `), E(jsonwire.ErrInvalidUTF8).withPos(`[ [ "a`, "/0/0"), ""},
{BeginArray, nil, "/0"},
{Value(` "a` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`[[ "a`, "/0/0"), "/0"},
{String(`a` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`[[`, "/0/0"), "/0"},
},
wantOut: `[[`,
}, {
name: jsontest.Name("ErrorPosition/1/0"),
calls: []encoderMethodCall{
{Value(` [ "a1" , [ "a` + "\xff" + `2" ] ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ "a1" , [ "a`, "/1/0"), ""},
{BeginArray, nil, ""},
{String("a1"), nil, "/0"},
{Value(` [ "a` + "\xff" + `2" ] `), E(jsonwire.ErrInvalidUTF8).withPos(`["a1", [ "a`, "/1/0"), ""},
{BeginArray, nil, "/1"},
{Value(` "a` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`["a1",[ "a`, "/1/0"), "/1"},
{String(`a` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`["a1",[`, "/1/0"), "/1"},
},
wantOut: `["a1",[`,
}, {
name: jsontest.Name("ErrorPosition/0/1"),
calls: []encoderMethodCall{
{Value(` [ [ "a2" , "b` + "\xff" + `2" ] ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ [ "a2" , "b`, "/0/1"), ""},
{BeginArray, nil, ""},
{Value(` [ "a2" , "b` + "\xff" + `2" ] `), E(jsonwire.ErrInvalidUTF8).withPos(`[ [ "a2" , "b`, "/0/1"), ""},
{BeginArray, nil, "/0"},
{String("a2"), nil, "/0/0"},
{Value(` "b` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`[["a2", "b`, "/0/1"), "/0/0"},
{String(`b` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`[["a2",`, "/0/1"), "/0/0"},
},
wantOut: `[["a2"`,
}, {
name: jsontest.Name("ErrorPosition/1/1"),
calls: []encoderMethodCall{
{Value(` [ "a1" , [ "a2" , "b` + "\xff" + `2" ] ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ "a1" , [ "a2" , "b`, "/1/1"), ""},
{BeginArray, nil, ""},
{String("a1"), nil, "/0"},
{Value(` [ "a2" , "b` + "\xff" + `2" ] `), E(jsonwire.ErrInvalidUTF8).withPos(`["a1", [ "a2" , "b`, "/1/1"), ""},
{BeginArray, nil, "/1"},
{String("a2"), nil, "/1/0"},
{Value(` "b` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`["a1",["a2", "b`, "/1/1"), "/1/0"},
{String(`b` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`["a1",["a2",`, "/1/1"), "/1/0"},
},
wantOut: `["a1",["a2"`,
}, {
name: jsontest.Name("ErrorPosition/a1-"),
calls: []encoderMethodCall{
{Value(` { "a` + "\xff" + `1" : "b1" } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a`, ""), ""},
{BeginObject, nil, ""},
{Value(` "a` + "\xff" + `1" `), E(jsonwire.ErrInvalidUTF8).withPos(`{ "a`, ""), ""},
{String(`a` + "\xff" + `1`), E(jsonwire.ErrInvalidUTF8).withPos(`{`, ""), ""},
},
wantOut: `{`,
}, {
name: jsontest.Name("ErrorPosition/a1"),
calls: []encoderMethodCall{
{Value(` { "a1" : "b` + "\xff" + `1" } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : "b`, "/a1"), ""},
{BeginObject, nil, ""},
{String("a1"), nil, "/a1"},
{Value(` "b` + "\xff" + `1" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1": "b`, "/a1"), ""},
{String(`b` + "\xff" + `1`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":`, "/a1"), ""},
},
wantOut: `{"a1"`,
}, {
name: jsontest.Name("ErrorPosition/c1-"),
calls: []encoderMethodCall{
{Value(` { "a1" : "b1" , "c` + "\xff" + `1" : "d1" } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : "b1" , "c`, ""), ""},
{BeginObject, nil, ""},
{String("a1"), nil, "/a1"},
{String("b1"), nil, "/a1"},
{Value(` "c` + "\xff" + `1" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":"b1": "c`, ""), "/a1"},
{String(`c` + "\xff" + `1`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":"b1":`, ""), "/a1"},
},
wantOut: `{"a1":"b1"`,
}, {
name: jsontest.Name("ErrorPosition/c1"),
calls: []encoderMethodCall{
{Value(` { "a1" : "b1" , "c1" : "d` + "\xff" + `1" } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : "b1" , "c1" : "d`, "/c1"), ""},
{BeginObject, nil, ""},
{String("a1"), nil, "/a1"},
{String("b1"), nil, "/a1"},
{String("c1"), nil, "/c1"},
{Value(` "d` + "\xff" + `1" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":"b1":"c1": "d`, "/c1"), "/c1"},
{String(`d` + "\xff" + `1`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":"b1":"c1":`, "/c1"), "/c1"},
},
wantOut: `{"a1":"b1","c1"`,
}, {
name: jsontest.Name("ErrorPosition/a1/a2-"),
calls: []encoderMethodCall{
{Value(` { "a1" : { "a` + "\xff" + `2" : "b2" } } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : { "a`, "/a1"), ""},
{BeginObject, nil, ""},
{String("a1"), nil, "/a1"},
{Value(` { "a` + "\xff" + `2" : "b2" } `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1": { "a`, "/a1"), ""},
{BeginObject, nil, "/a1"},
{Value(` "a` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{ "a`, "/a1"), "/a1"},
{String(`a` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{`, "/a1"), "/a1"},
},
wantOut: `{"a1":{`,
}, {
name: jsontest.Name("ErrorPosition/a1/a2"),
calls: []encoderMethodCall{
{Value(` { "a1" : { "a2" : "b` + "\xff" + `2" } } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : { "a2" : "b`, "/a1/a2"), ""},
{BeginObject, nil, ""},
{String("a1"), nil, "/a1"},
{Value(` { "a2" : "b` + "\xff" + `2" } `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1": { "a2" : "b`, "/a1/a2"), ""},
{BeginObject, nil, "/a1"},
{String("a2"), nil, "/a1/a2"},
{Value(` "b` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{"a2": "b`, "/a1/a2"), "/a1/a2"},
{String(`b` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{"a2":`, "/a1/a2"), "/a1/a2"},
},
wantOut: `{"a1":{"a2"`,
}, {
name: jsontest.Name("ErrorPosition/a1/c2-"),
calls: []encoderMethodCall{
{Value(` { "a1" : { "a2" : "b2" , "c` + "\xff" + `2" : "d2" } } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : { "a2" : "b2" , "c`, "/a1"), ""},
{BeginObject, nil, ""},
{String("a1"), nil, "/a1"},
{BeginObject, nil, "/a1"},
{String("a2"), nil, "/a1/a2"},
{String("b2"), nil, "/a1/a2"},
{Value(` "c` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{"a2":"b2", "c`, "/a1"), "/a1/a2"},
{String(`c` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{"a2":"b2",`, "/a1"), "/a1/a2"},
},
wantOut: `{"a1":{"a2":"b2"`,
}, {
name: jsontest.Name("ErrorPosition/a1/c2"),
calls: []encoderMethodCall{
{Value(` { "a1" : { "a2" : "b2" , "c2" : "d` + "\xff" + `2" } } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : { "a2" : "b2" , "c2" : "d`, "/a1/c2"), ""},
{BeginObject, nil, ""},
{String("a1"), nil, "/a1"},
{Value(` { "a2" : "b2" , "c2" : "d` + "\xff" + `2" } `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1": { "a2" : "b2" , "c2" : "d`, "/a1/c2"), ""},
{BeginObject, nil, ""},
{String("a2"), nil, "/a1/a2"},
{String("b2"), nil, "/a1/a2"},
{String("c2"), nil, "/a1/c2"},
{Value(` "d` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{"a2":"b2","c2": "d`, "/a1/c2"), "/a1/c2"},
{String(`d` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":{"a2":"b2","c2":`, "/a1/c2"), "/a1/c2"},
},
wantOut: `{"a1":{"a2":"b2","c2"`,
}, {
name: jsontest.Name("ErrorPosition/1/a2"),
calls: []encoderMethodCall{
{Value(` [ "a1" , { "a2" : "b` + "\xff" + `2" } ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ "a1" , { "a2" : "b`, "/1/a2"), ""},
{BeginArray, nil, ""},
{String("a1"), nil, "/0"},
{Value(` { "a2" : "b` + "\xff" + `2" } `), E(jsonwire.ErrInvalidUTF8).withPos(`["a1", { "a2" : "b`, "/1/a2"), ""},
{BeginObject, nil, "/1"},
{String("a2"), nil, "/1/a2"},
{Value(` "b` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`["a1",{"a2": "b`, "/1/a2"), "/1/a2"},
{String(`b` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`["a1",{"a2":`, "/1/a2"), "/1/a2"},
},
wantOut: `["a1",{"a2"`,
}, {
name: jsontest.Name("ErrorPosition/c1/1"),
calls: []encoderMethodCall{
{Value(` { "a1" : "b1" , "c1" : [ "a2" , "b` + "\xff" + `2" ] } `), E(jsonwire.ErrInvalidUTF8).withPos(` { "a1" : "b1" , "c1" : [ "a2" , "b`, "/c1/1"), ""},
{BeginObject, nil, ""},
{String("a1"), nil, "/a1"},
{String("b1"), nil, "/a1"},
{String("c1"), nil, "/c1"},
{Value(` [ "a2" , "b` + "\xff" + `2" ] `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":"b1","c1": [ "a2" , "b`, "/c1/1"), ""},
{BeginArray, nil, "/c1"},
{String("a2"), nil, "/c1/0"},
{Value(` "b` + "\xff" + `2" `), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":"b1","c1":["a2", "b`, "/c1/1"), "/c1/0"},
{String(`b` + "\xff" + `2`), E(jsonwire.ErrInvalidUTF8).withPos(`{"a1":"b1","c1":["a2",`, "/c1/1"), "/c1/0"},
},
wantOut: `{"a1":"b1","c1":["a2"`,
}, {
name: jsontest.Name("ErrorPosition/0/a1/1/c3/1"),
calls: []encoderMethodCall{
{Value(` [ { "a1" : [ "a2" , { "a3" : "b3" , "c3" : [ "a4" , "b` + "\xff" + `4" ] } ] } ] `), E(jsonwire.ErrInvalidUTF8).withPos(` [ { "a1" : [ "a2" , { "a3" : "b3" , "c3" : [ "a4" , "b`, "/0/a1/1/c3/1"), ""},
{BeginArray, nil, ""},
{Value(` { "a1" : [ "a2" , { "a3" : "b3" , "c3" : [ "a4" , "b` + "\xff" + `4" ] } ] } `), E(jsonwire.ErrInvalidUTF8).withPos(`[ { "a1" : [ "a2" , { "a3" : "b3" , "c3" : [ "a4" , "b`, "/0/a1/1/c3/1"), ""},
{BeginObject, nil, "/0"},
{String("a1"), nil, "/0/a1"},
{Value(` [ "a2" , { "a3" : "b3" , "c3" : [ "a4" , "b` + "\xff" + `4" ] } ] `), E(jsonwire.ErrInvalidUTF8).withPos(`[{"a1": [ "a2" , { "a3" : "b3" , "c3" : [ "a4" , "b`, "/0/a1/1/c3/1"), ""},
{BeginArray, nil, ""},
{String("a2"), nil, "/0/a1/0"},
{Value(` { "a3" : "b3" , "c3" : [ "a4" , "b` + "\xff" + `4" ] } `), E(jsonwire.ErrInvalidUTF8).withPos(`[{"a1":["a2", { "a3" : "b3" , "c3" : [ "a4" , "b`, "/0/a1/1/c3/1"), ""},
{BeginObject, nil, "/0/a1/1"},
{String("a3"), nil, "/0/a1/1/a3"},
{String("b3"), nil, "/0/a1/1/a3"},
{String("c3"), nil, "/0/a1/1/c3"},
{Value(` [ "a4" , "b` + "\xff" + `4" ] `), E(jsonwire.ErrInvalidUTF8).withPos(`[{"a1":["a2",{"a3":"b3","c3": [ "a4" , "b`, "/0/a1/1/c3/1"), ""},
{BeginArray, nil, "/0/a1/1/c3"},
{String("a4"), nil, "/0/a1/1/c3/0"},
{Value(` "b` + "\xff" + `4" `), E(jsonwire.ErrInvalidUTF8).withPos(`[{"a1":["a2",{"a3":"b3","c3":["a4", "b`, "/0/a1/1/c3/1"), "/0/a1/1/c3/0"},
{String(`b` + "\xff" + `4`), E(jsonwire.ErrInvalidUTF8).withPos(`[{"a1":["a2",{"a3":"b3","c3":["a4",`, "/0/a1/1/c3/1"), "/0/a1/1/c3/0"},
},
wantOut: `[{"a1":["a2",{"a3":"b3","c3":["a4"`,
}}
// TestEncoderErrors test that Encoder errors occur when we expect and
// leaves the Encoder in a consistent state.
func TestEncoderErrors(t *testing.T) {
for _, td := range encoderErrorTestdata {
t.Run(path.Join(td.name.Name), func(t *testing.T) {
testEncoderErrors(t, td.name.Where, td.opts, td.calls, td.wantOut)
})
}
}
func testEncoderErrors(t *testing.T, where jsontest.CasePos, opts []Options, calls []encoderMethodCall, wantOut string) {
dst := new(bytes.Buffer)
enc := NewEncoder(dst, opts...)
for i, call := range calls {
var gotErr error
switch tokVal := call.in.(type) {
case Token:
gotErr = enc.WriteToken(tokVal)
case Value:
gotErr = enc.WriteValue(tokVal)
}
if !equalError(gotErr, call.wantErr) {
t.Fatalf("%s: %d: error mismatch:\ngot %v\nwant %v", where, i, gotErr, call.wantErr)
}
if call.wantPointer != "" {
gotPointer := enc.StackPointer()
if gotPointer != call.wantPointer {
t.Fatalf("%s: %d: Encoder.StackPointer = %s, want %s", where, i, gotPointer, call.wantPointer)
}
}
}
gotOut := dst.String() + string(enc.s.unflushedBuffer())
if gotOut != wantOut {
t.Fatalf("%s: output mismatch:\ngot %q\nwant %q", where, gotOut, wantOut)
}
gotOffset := int(enc.OutputOffset())
wantOffset := len(wantOut)
if gotOffset != wantOffset {
t.Fatalf("%s: Encoder.OutputOffset = %v, want %v", where, gotOffset, wantOffset)
}
}

View File

@ -0,0 +1,182 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"bytes"
"io"
"strconv"
"encoding/json/internal/jsonwire"
)
const errorPrefix = "jsontext: "
type ioError struct {
action string // either "read" or "write"
err error
}
func (e *ioError) Error() string {
return errorPrefix + e.action + " error: " + e.err.Error()
}
func (e *ioError) Unwrap() error {
return e.err
}
// SyntacticError is a description of a syntactic error that occurred when
// encoding or decoding JSON according to the grammar.
//
// The contents of this error as produced by this package may change over time.
type SyntacticError struct {
requireKeyedLiterals
nonComparable
// ByteOffset indicates that an error occurred after this byte offset.
ByteOffset int64
// JSONPointer indicates that an error occurred within this JSON value
// as indicated using the JSON Pointer notation (see RFC 6901).
JSONPointer Pointer
// Err is the underlying error.
Err error
}
// wrapSyntacticError wraps an error and annotates it with a precise location
// using the provided [encoderState] or [decoderState].
// If err is an [ioError] or [io.EOF], then it is not wrapped.
//
// It takes a relative offset pos that can be resolved into
// an absolute offset using state.offsetAt.
//
// It takes a where that specify how the JSON pointer is derived.
// If the underlying error is a [pointerSuffixError],
// then the suffix is appended to the derived pointer.
func wrapSyntacticError(state interface {
offsetAt(pos int) int64
AppendStackPointer(b []byte, where int) []byte
}, err error, pos, where int) error {
if _, ok := err.(*ioError); err == io.EOF || ok {
return err
}
offset := state.offsetAt(pos)
ptr := state.AppendStackPointer(nil, where)
if serr, ok := err.(*pointerSuffixError); ok {
ptr = serr.appendPointer(ptr)
err = serr.error
}
if d, ok := state.(*decoderState); ok && err == errMismatchDelim {
where := "at start of value"
if len(d.Tokens.Stack) > 0 && d.Tokens.Last.Length() > 0 {
switch {
case d.Tokens.Last.isArray():
where = "after array element (expecting ',' or ']')"
ptr = []byte(Pointer(ptr).Parent()) // problem is with parent array
case d.Tokens.Last.isObject():
where = "after object value (expecting ',' or '}')"
ptr = []byte(Pointer(ptr).Parent()) // problem is with parent object
}
}
err = jsonwire.NewInvalidCharacterError(d.buf[pos:], where)
}
return &SyntacticError{ByteOffset: offset, JSONPointer: Pointer(ptr), Err: err}
}
func (e *SyntacticError) Error() string {
pointer := e.JSONPointer
offset := e.ByteOffset
b := []byte(errorPrefix)
if e.Err != nil {
b = append(b, e.Err.Error()...)
if e.Err == ErrDuplicateName {
b = strconv.AppendQuote(append(b, ' '), pointer.LastToken())
pointer = pointer.Parent()
offset = 0 // not useful to print offset for duplicate names
}
} else {
b = append(b, "syntactic error"...)
}
if pointer != "" {
b = strconv.AppendQuote(append(b, " within "...), jsonwire.TruncatePointer(string(pointer), 100))
}
if offset > 0 {
b = strconv.AppendInt(append(b, " after offset "...), offset, 10)
}
return string(b)
}
func (e *SyntacticError) Unwrap() error {
return e.Err
}
// pointerSuffixError represents a JSON pointer suffix to be appended
// to [SyntacticError.JSONPointer]. It is an internal error type
// used within this package and does not appear in the public API.
//
// This type is primarily used to annotate errors in Encoder.WriteValue
// and Decoder.ReadValue with precise positions.
// At the time WriteValue or ReadValue is called, a JSON pointer to the
// upcoming value can be constructed using the Encoder/Decoder state.
// However, tracking pointers within values during normal operation
// would incur a performance penalty in the error-free case.
//
// To provide precise error locations without this overhead,
// the error is wrapped with object names or array indices
// as the call stack is popped when an error occurs.
// Since this happens in reverse order, pointerSuffixError holds
// the pointer in reverse and is only later reversed when appending to
// the pointer prefix.
//
// For example, if the encoder is at "/alpha/bravo/charlie"
// and an error occurs in WriteValue at "/xray/yankee/zulu", then
// the final pointer should be "/alpha/bravo/charlie/xray/yankee/zulu".
//
// As pointerSuffixError is populated during the error return path,
// it first contains "/zulu", then "/zulu/yankee",
// and finally "/zulu/yankee/xray".
// These tokens are reversed and concatenated to "/alpha/bravo/charlie"
// to form the full pointer.
type pointerSuffixError struct {
error
// reversePointer is a JSON pointer, but with each token in reverse order.
reversePointer []byte
}
// wrapWithObjectName wraps err with a JSON object name access,
// which must be a valid quoted JSON string.
func wrapWithObjectName(err error, quotedName []byte) error {
serr, _ := err.(*pointerSuffixError)
if serr == nil {
serr = &pointerSuffixError{error: err}
}
name := jsonwire.UnquoteMayCopy(quotedName, false)
serr.reversePointer = appendEscapePointerName(append(serr.reversePointer, '/'), name)
return serr
}
// wrapWithArrayIndex wraps err with a JSON array index access.
func wrapWithArrayIndex(err error, index int64) error {
serr, _ := err.(*pointerSuffixError)
if serr == nil {
serr = &pointerSuffixError{error: err}
}
serr.reversePointer = strconv.AppendUint(append(serr.reversePointer, '/'), uint64(index), 10)
return serr
}
// appendPointer appends the path encoded in e to the end of pointer.
func (e *pointerSuffixError) appendPointer(pointer []byte) []byte {
// Copy each token in reversePointer to the end of pointer in reverse order.
// Double reversal means that the appended suffix is now in forward order.
bi, bo := e.reversePointer, pointer
for len(bi) > 0 {
i := bytes.LastIndexByte(bi, '/')
bi, bo = bi[:i], append(bo, bi[i:]...)
}
return bo
}

View File

@ -0,0 +1,130 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext_test
import (
"bytes"
"fmt"
"io"
"log"
"strings"
"encoding/json/jsontext"
"encoding/json/v2"
)
// This example demonstrates the use of the [Encoder] and [Decoder] to
// parse and modify JSON without unmarshaling it into a concrete Go type.
func Example_stringReplace() {
// Example input with non-idiomatic use of "Golang" instead of "Go".
const input = `{
"title": "Golang version 1 is released",
"author": "Andrew Gerrand",
"date": "2012-03-28",
"text": "Today marks a major milestone in the development of the Golang programming language.",
"otherArticles": [
"Twelve Years of Golang",
"The Laws of Reflection",
"Learn Golang from your browser"
]
}`
// Using a Decoder and Encoder, we can parse through every token,
// check and modify the token if necessary, and
// write the token to the output.
var replacements []jsontext.Pointer
in := strings.NewReader(input)
dec := jsontext.NewDecoder(in)
out := new(bytes.Buffer)
enc := jsontext.NewEncoder(out, jsontext.Multiline(true)) // expand for readability
for {
// Read a token from the input.
tok, err := dec.ReadToken()
if err != nil {
if err == io.EOF {
break
}
log.Fatal(err)
}
// Check whether the token contains the string "Golang" and
// replace each occurrence with "Go" instead.
if tok.Kind() == '"' && strings.Contains(tok.String(), "Golang") {
replacements = append(replacements, dec.StackPointer())
tok = jsontext.String(strings.ReplaceAll(tok.String(), "Golang", "Go"))
}
// Write the (possibly modified) token to the output.
if err := enc.WriteToken(tok); err != nil {
log.Fatal(err)
}
}
// Print the list of replacements and the adjusted JSON output.
if len(replacements) > 0 {
fmt.Println(`Replaced "Golang" with "Go" in:`)
for _, where := range replacements {
fmt.Println("\t" + where)
}
fmt.Println()
}
fmt.Println("Result:", out.String())
// Output:
// Replaced "Golang" with "Go" in:
// /title
// /text
// /otherArticles/0
// /otherArticles/2
//
// Result: {
// "title": "Go version 1 is released",
// "author": "Andrew Gerrand",
// "date": "2012-03-28",
// "text": "Today marks a major milestone in the development of the Go programming language.",
// "otherArticles": [
// "Twelve Years of Go",
// "The Laws of Reflection",
// "Learn Go from your browser"
// ]
// }
}
// Directly embedding JSON within HTML requires special handling for safety.
// Escape certain runes to prevent JSON directly treated as HTML
// from being able to perform <script> injection.
//
// This example shows how to obtain equivalent behavior provided by the
// v1 [encoding/json] package that is no longer directly supported by this package.
// Newly written code that intermix JSON and HTML should instead be using the
// [github.com/google/safehtml] module for safety purposes.
func ExampleEscapeForHTML() {
page := struct {
Title string
Body string
}{
Title: "Example Embedded Javascript",
Body: `<script> console.log("Hello, world!"); </script>`,
}
b, err := json.Marshal(&page,
// Escape certain runes within a JSON string so that
// JSON will be safe to directly embed inside HTML.
jsontext.EscapeForHTML(true),
jsontext.EscapeForJS(true),
jsontext.Multiline(true)) // expand for readability
if err != nil {
log.Fatal(err)
}
fmt.Println(string(b))
// Output:
// {
// "Title": "Example Embedded Javascript",
// "Body": "\u003cscript\u003e console.log(\"Hello, world!\"); \u003c/script\u003e"
// }
}

View File

@ -0,0 +1,77 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"io"
"encoding/json/internal"
)
// Internal is for internal use only.
// This is exempt from the Go compatibility agreement.
var Internal exporter
type exporter struct{}
// Export exposes internal functionality from "jsontext" to "json".
// This cannot be dynamically called by other packages since
// they cannot obtain a reference to the internal.AllowInternalUse value.
func (exporter) Export(p *internal.NotForPublicUse) export {
if p != &internal.AllowInternalUse {
panic("unauthorized call to Export")
}
return export{}
}
// The export type exposes functionality to packages with visibility to
// the internal.AllowInternalUse variable. The "json" package uses this
// to modify low-level state in the Encoder and Decoder types.
// It mutates the state directly instead of calling ReadToken or WriteToken
// since this is more performant. The public APIs need to track state to ensure
// that users are constructing a valid JSON value, but the "json" implementation
// guarantees that it emits valid JSON by the structure of the code itself.
type export struct{}
// Encoder returns a pointer to the underlying encoderState.
func (export) Encoder(e *Encoder) *encoderState { return &e.s }
// Decoder returns a pointer to the underlying decoderState.
func (export) Decoder(d *Decoder) *decoderState { return &d.s }
func (export) GetBufferedEncoder(o ...Options) *Encoder {
return getBufferedEncoder(o...)
}
func (export) PutBufferedEncoder(e *Encoder) {
putBufferedEncoder(e)
}
func (export) GetStreamingEncoder(w io.Writer, o ...Options) *Encoder {
return getStreamingEncoder(w, o...)
}
func (export) PutStreamingEncoder(e *Encoder) {
putStreamingEncoder(e)
}
func (export) GetBufferedDecoder(b []byte, o ...Options) *Decoder {
return getBufferedDecoder(b, o...)
}
func (export) PutBufferedDecoder(d *Decoder) {
putBufferedDecoder(d)
}
func (export) GetStreamingDecoder(r io.Reader, o ...Options) *Decoder {
return getStreamingDecoder(r, o...)
}
func (export) PutStreamingDecoder(d *Decoder) {
putStreamingDecoder(d)
}
func (export) IsIOError(err error) bool {
_, ok := err.(*ioError)
return ok
}

View File

@ -0,0 +1,236 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"bytes"
"errors"
"io"
"math/rand"
"slices"
"testing"
"encoding/json/internal/jsontest"
)
func FuzzCoder(f *testing.F) {
// Add a number of inputs to the corpus including valid and invalid data.
for _, td := range coderTestdata {
f.Add(int64(0), []byte(td.in))
}
for _, td := range decoderErrorTestdata {
f.Add(int64(0), []byte(td.in))
}
for _, td := range encoderErrorTestdata {
f.Add(int64(0), []byte(td.wantOut))
}
for _, td := range jsontest.Data {
f.Add(int64(0), td.Data())
}
f.Fuzz(func(t *testing.T, seed int64, b []byte) {
var tokVals []tokOrVal
rn := rand.NewSource(seed)
// Read a sequence of tokens or values. Skip the test for any errors
// since we expect this with randomly generated fuzz inputs.
src := bytes.NewReader(b)
dec := NewDecoder(src)
for {
if rn.Int63()%8 > 0 {
tok, err := dec.ReadToken()
if err != nil {
if err == io.EOF {
break
}
t.Skipf("Decoder.ReadToken error: %v", err)
}
tokVals = append(tokVals, tok.Clone())
} else {
val, err := dec.ReadValue()
if err != nil {
expectError := dec.PeekKind() == '}' || dec.PeekKind() == ']'
if expectError && errors.As(err, new(*SyntacticError)) {
continue
}
if err == io.EOF {
break
}
t.Skipf("Decoder.ReadValue error: %v", err)
}
tokVals = append(tokVals, append(zeroValue, val...))
}
}
// Write a sequence of tokens or values. Fail the test for any errors
// since the previous stage guarantees that the input is valid.
dst := new(bytes.Buffer)
enc := NewEncoder(dst)
for _, tokVal := range tokVals {
switch tokVal := tokVal.(type) {
case Token:
if err := enc.WriteToken(tokVal); err != nil {
t.Fatalf("Encoder.WriteToken error: %v", err)
}
case Value:
if err := enc.WriteValue(tokVal); err != nil {
t.Fatalf("Encoder.WriteValue error: %v", err)
}
}
}
// Encoded output and original input must decode to the same thing.
var got, want []Token
for dec := NewDecoder(bytes.NewReader(b)); dec.PeekKind() > 0; {
tok, err := dec.ReadToken()
if err != nil {
t.Fatalf("Decoder.ReadToken error: %v", err)
}
got = append(got, tok.Clone())
}
for dec := NewDecoder(dst); dec.PeekKind() > 0; {
tok, err := dec.ReadToken()
if err != nil {
t.Fatalf("Decoder.ReadToken error: %v", err)
}
want = append(want, tok.Clone())
}
if !equalTokens(got, want) {
t.Fatalf("mismatching output:\ngot %v\nwant %v", got, want)
}
})
}
func FuzzResumableDecoder(f *testing.F) {
for _, td := range resumableDecoderTestdata {
f.Add(int64(0), []byte(td))
}
f.Fuzz(func(t *testing.T, seed int64, b []byte) {
rn := rand.NewSource(seed)
// Regardless of how many bytes the underlying io.Reader produces,
// the provided tokens, values, and errors should always be identical.
t.Run("ReadToken", func(t *testing.T) {
decGot := NewDecoder(&FaultyBuffer{B: b, MaxBytes: 8, Rand: rn})
decWant := NewDecoder(bytes.NewReader(b))
gotTok, gotErr := decGot.ReadToken()
wantTok, wantErr := decWant.ReadToken()
if gotTok.String() != wantTok.String() || !equalError(gotErr, wantErr) {
t.Errorf("Decoder.ReadToken = (%v, %v), want (%v, %v)", gotTok, gotErr, wantTok, wantErr)
}
})
t.Run("ReadValue", func(t *testing.T) {
decGot := NewDecoder(&FaultyBuffer{B: b, MaxBytes: 8, Rand: rn})
decWant := NewDecoder(bytes.NewReader(b))
gotVal, gotErr := decGot.ReadValue()
wantVal, wantErr := decWant.ReadValue()
if !slices.Equal(gotVal, wantVal) || !equalError(gotErr, wantErr) {
t.Errorf("Decoder.ReadValue = (%s, %v), want (%s, %v)", gotVal, gotErr, wantVal, wantErr)
}
})
})
}
func FuzzValueFormat(f *testing.F) {
for _, td := range valueTestdata {
f.Add(int64(0), []byte(td.in))
}
// isValid reports whether b is valid according to the specified options.
isValid := func(b []byte, opts ...Options) bool {
d := NewDecoder(bytes.NewReader(b), opts...)
_, errVal := d.ReadValue()
_, errEOF := d.ReadToken()
return errVal == nil && errEOF == io.EOF
}
// stripWhitespace removes all JSON whitespace characters from the input.
stripWhitespace := func(in []byte) (out []byte) {
out = make([]byte, 0, len(in))
for _, c := range in {
switch c {
case ' ', '\n', '\r', '\t':
default:
out = append(out, c)
}
}
return out
}
allOptions := []Options{
AllowDuplicateNames(true),
AllowInvalidUTF8(true),
EscapeForHTML(true),
EscapeForJS(true),
PreserveRawStrings(true),
CanonicalizeRawInts(true),
CanonicalizeRawFloats(true),
ReorderRawObjects(true),
SpaceAfterColon(true),
SpaceAfterComma(true),
Multiline(true),
WithIndent("\t"),
WithIndentPrefix(" "),
}
f.Fuzz(func(t *testing.T, seed int64, b []byte) {
validRFC7159 := isValid(b, AllowInvalidUTF8(true), AllowDuplicateNames(true))
validRFC8259 := isValid(b, AllowInvalidUTF8(false), AllowDuplicateNames(true))
validRFC7493 := isValid(b, AllowInvalidUTF8(false), AllowDuplicateNames(false))
switch {
case !validRFC7159 && validRFC8259:
t.Errorf("invalid input per RFC 7159 implies invalid per RFC 8259")
case !validRFC8259 && validRFC7493:
t.Errorf("invalid input per RFC 8259 implies invalid per RFC 7493")
}
gotValid := Value(b).IsValid()
wantValid := validRFC7493
if gotValid != wantValid {
t.Errorf("Value.IsValid = %v, want %v", gotValid, wantValid)
}
gotCompacted := Value(string(b))
gotCompactOk := gotCompacted.Compact() == nil
wantCompactOk := validRFC7159
if !bytes.Equal(stripWhitespace(gotCompacted), stripWhitespace(b)) {
t.Errorf("stripWhitespace(Value.Compact) = %s, want %s", stripWhitespace(gotCompacted), stripWhitespace(b))
}
if gotCompactOk != wantCompactOk {
t.Errorf("Value.Compact success mismatch: got %v, want %v", gotCompactOk, wantCompactOk)
}
gotIndented := Value(string(b))
gotIndentOk := gotIndented.Indent() == nil
wantIndentOk := validRFC7159
if !bytes.Equal(stripWhitespace(gotIndented), stripWhitespace(b)) {
t.Errorf("stripWhitespace(Value.Indent) = %s, want %s", stripWhitespace(gotIndented), stripWhitespace(b))
}
if gotIndentOk != wantIndentOk {
t.Errorf("Value.Indent success mismatch: got %v, want %v", gotIndentOk, wantIndentOk)
}
gotCanonicalized := Value(string(b))
gotCanonicalizeOk := gotCanonicalized.Canonicalize() == nil
wantCanonicalizeOk := validRFC7493
if gotCanonicalizeOk != wantCanonicalizeOk {
t.Errorf("Value.Canonicalize success mismatch: got %v, want %v", gotCanonicalizeOk, wantCanonicalizeOk)
}
// Random options should not result in a panic.
var opts []Options
rn := rand.New(rand.NewSource(seed))
for _, opt := range allOptions {
if rn.Intn(len(allOptions)/4) == 0 {
opts = append(opts, opt)
}
}
v := Value(b)
v.Format(opts...) // should not panic
})
}

View File

@ -0,0 +1,303 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"strings"
"encoding/json/internal/jsonflags"
"encoding/json/internal/jsonopts"
"encoding/json/internal/jsonwire"
)
// Options configures [NewEncoder], [Encoder.Reset], [NewDecoder],
// and [Decoder.Reset] with specific features.
// Each function takes in a variadic list of options, where properties
// set in latter options override the value of previously set properties.
//
// There is a single Options type, which is used with both encoding and decoding.
// Some options affect both operations, while others only affect one operation:
//
// - [AllowDuplicateNames] affects encoding and decoding
// - [AllowInvalidUTF8] affects encoding and decoding
// - [EscapeForHTML] affects encoding only
// - [EscapeForJS] affects encoding only
// - [PreserveRawStrings] affects encoding only
// - [CanonicalizeRawInts] affects encoding only
// - [CanonicalizeRawFloats] affects encoding only
// - [ReorderRawObjects] affects encoding only
// - [SpaceAfterColon] affects encoding only
// - [SpaceAfterComma] affects encoding only
// - [Multiline] affects encoding only
// - [WithIndent] affects encoding only
// - [WithIndentPrefix] affects encoding only
//
// Options that do not affect a particular operation are ignored.
//
// The Options type is identical to [encoding/json.Options] and
// [encoding/json/v2.Options]. Options from the other packages may
// be passed to functionality in this package, but are ignored.
// Options from this package may be used with the other packages.
type Options = jsonopts.Options
// AllowDuplicateNames specifies that JSON objects may contain
// duplicate member names. Disabling the duplicate name check may provide
// performance benefits, but breaks compliance with RFC 7493, section 2.3.
// The input or output will still be compliant with RFC 8259,
// which leaves the handling of duplicate names as unspecified behavior.
//
// This affects either encoding or decoding.
func AllowDuplicateNames(v bool) Options {
if v {
return jsonflags.AllowDuplicateNames | 1
} else {
return jsonflags.AllowDuplicateNames | 0
}
}
// AllowInvalidUTF8 specifies that JSON strings may contain invalid UTF-8,
// which will be mangled as the Unicode replacement character, U+FFFD.
// This causes the encoder or decoder to break compliance with
// RFC 7493, section 2.1, and RFC 8259, section 8.1.
//
// This affects either encoding or decoding.
func AllowInvalidUTF8(v bool) Options {
if v {
return jsonflags.AllowInvalidUTF8 | 1
} else {
return jsonflags.AllowInvalidUTF8 | 0
}
}
// EscapeForHTML specifies that '<', '>', and '&' characters within JSON strings
// should be escaped as a hexadecimal Unicode codepoint (e.g., \u003c) so that
// the output is safe to embed within HTML.
//
// This only affects encoding and is ignored when decoding.
func EscapeForHTML(v bool) Options {
if v {
return jsonflags.EscapeForHTML | 1
} else {
return jsonflags.EscapeForHTML | 0
}
}
// EscapeForJS specifies that U+2028 and U+2029 characters within JSON strings
// should be escaped as a hexadecimal Unicode codepoint (e.g., \u2028) so that
// the output is valid to embed within JavaScript. See RFC 8259, section 12.
//
// This only affects encoding and is ignored when decoding.
func EscapeForJS(v bool) Options {
if v {
return jsonflags.EscapeForJS | 1
} else {
return jsonflags.EscapeForJS | 0
}
}
// PreserveRawStrings specifies that when encoding a raw JSON string in a
// [Token] or [Value], pre-escaped sequences
// in a JSON string are preserved to the output.
// However, raw strings still respect [EscapeForHTML] and [EscapeForJS]
// such that the relevant characters are escaped.
// If [AllowInvalidUTF8] is enabled, bytes of invalid UTF-8
// are preserved to the output.
//
// This only affects encoding and is ignored when decoding.
func PreserveRawStrings(v bool) Options {
if v {
return jsonflags.PreserveRawStrings | 1
} else {
return jsonflags.PreserveRawStrings | 0
}
}
// CanonicalizeRawInts specifies that when encoding a raw JSON
// integer number (i.e., a number without a fraction and exponent) in a
// [Token] or [Value], the number is canonicalized
// according to RFC 8785, section 3.2.2.3. As a special case,
// the number -0 is canonicalized as 0.
//
// JSON numbers are treated as IEEE 754 double precision numbers.
// Any numbers with precision beyond what is representable by that form
// will lose their precision when canonicalized. For example,
// integer values beyond ±2⁵³ will lose their precision.
// For example, 1234567890123456789 is formatted as 1234567890123456800.
//
// This only affects encoding and is ignored when decoding.
func CanonicalizeRawInts(v bool) Options {
if v {
return jsonflags.CanonicalizeRawInts | 1
} else {
return jsonflags.CanonicalizeRawInts | 0
}
}
// CanonicalizeRawFloats specifies that when encoding a raw JSON
// floating-point number (i.e., a number with a fraction or exponent) in a
// [Token] or [Value], the number is canonicalized
// according to RFC 8785, section 3.2.2.3. As a special case,
// the number -0 is canonicalized as 0.
//
// JSON numbers are treated as IEEE 754 double precision numbers.
// It is safe to canonicalize a serialized single precision number and
// parse it back as a single precision number and expect the same value.
// If a number exceeds ±1.7976931348623157e+308, which is the maximum
// finite number, then it saturated at that value and formatted as such.
//
// This only affects encoding and is ignored when decoding.
func CanonicalizeRawFloats(v bool) Options {
if v {
return jsonflags.CanonicalizeRawFloats | 1
} else {
return jsonflags.CanonicalizeRawFloats | 0
}
}
// ReorderRawObjects specifies that when encoding a raw JSON object in a
// [Value], the object members are reordered according to
// RFC 8785, section 3.2.3.
//
// This only affects encoding and is ignored when decoding.
func ReorderRawObjects(v bool) Options {
if v {
return jsonflags.ReorderRawObjects | 1
} else {
return jsonflags.ReorderRawObjects | 0
}
}
// SpaceAfterColon specifies that the JSON output should emit a space character
// after each colon separator following a JSON object name.
// If false, then no space character appears after the colon separator.
//
// This only affects encoding and is ignored when decoding.
func SpaceAfterColon(v bool) Options {
if v {
return jsonflags.SpaceAfterColon | 1
} else {
return jsonflags.SpaceAfterColon | 0
}
}
// SpaceAfterComma specifies that the JSON output should emit a space character
// after each comma separator following a JSON object value or array element.
// If false, then no space character appears after the comma separator.
//
// This only affects encoding and is ignored when decoding.
func SpaceAfterComma(v bool) Options {
if v {
return jsonflags.SpaceAfterComma | 1
} else {
return jsonflags.SpaceAfterComma | 0
}
}
// Multiline specifies that the JSON output should expand to multiple lines,
// where every JSON object member or JSON array element appears on
// a new, indented line according to the nesting depth.
//
// If [SpaceAfterColon] is not specified, then the default is true.
// If [SpaceAfterComma] is not specified, then the default is false.
// If [WithIndent] is not specified, then the default is "\t".
//
// If set to false, then the output is a single-line,
// where the only whitespace emitted is determined by the current
// values of [SpaceAfterColon] and [SpaceAfterComma].
//
// This only affects encoding and is ignored when decoding.
func Multiline(v bool) Options {
if v {
return jsonflags.Multiline | 1
} else {
return jsonflags.Multiline | 0
}
}
// WithIndent specifies that the encoder should emit multiline output
// where each element in a JSON object or array begins on a new, indented line
// beginning with the indent prefix (see [WithIndentPrefix])
// followed by one or more copies of indent according to the nesting depth.
// The indent must only be composed of space or tab characters.
//
// If the intent to emit indented output without a preference for
// the particular indent string, then use [Multiline] instead.
//
// This only affects encoding and is ignored when decoding.
// Use of this option implies [Multiline] being set to true.
func WithIndent(indent string) Options {
// Fast-path: Return a constant for common indents, which avoids allocating.
// These are derived from analyzing the Go module proxy on 2023-07-01.
switch indent {
case "\t":
return jsonopts.Indent("\t") // ~14k usages
case " ":
return jsonopts.Indent(" ") // ~18k usages
case " ":
return jsonopts.Indent(" ") // ~1.7k usages
case " ":
return jsonopts.Indent(" ") // ~52k usages
case " ":
return jsonopts.Indent(" ") // ~12k usages
case "":
return jsonopts.Indent("") // ~1.5k usages
}
// Otherwise, allocate for this unique value.
if s := strings.Trim(indent, " \t"); len(s) > 0 {
panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent")
}
return jsonopts.Indent(indent)
}
// WithIndentPrefix specifies that the encoder should emit multiline output
// where each element in a JSON object or array begins on a new, indented line
// beginning with the indent prefix followed by one or more copies of indent
// (see [WithIndent]) according to the nesting depth.
// The prefix must only be composed of space or tab characters.
//
// This only affects encoding and is ignored when decoding.
// Use of this option implies [Multiline] being set to true.
func WithIndentPrefix(prefix string) Options {
if s := strings.Trim(prefix, " \t"); len(s) > 0 {
panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent prefix")
}
return jsonopts.IndentPrefix(prefix)
}
/*
// TODO(https://go.dev/issue/56733): Implement WithByteLimit and WithDepthLimit.
// WithByteLimit sets a limit on the number of bytes of input or output bytes
// that may be consumed or produced for each top-level JSON value.
// If a [Decoder] or [Encoder] method call would need to consume/produce
// more than a total of n bytes to make progress on the top-level JSON value,
// then the call will report an error.
// Whitespace before and within the top-level value are counted against the limit.
// Whitespace after a top-level value are counted against the limit
// for the next top-level value.
//
// A non-positive limit is equivalent to no limit at all.
// If unspecified, the default limit is no limit at all.
// This affects either encoding or decoding.
func WithByteLimit(n int64) Options {
return jsonopts.ByteLimit(max(n, 0))
}
// WithDepthLimit sets a limit on the maximum depth of JSON nesting
// that may be consumed or produced for each top-level JSON value.
// If a [Decoder] or [Encoder] method call would need to consume or produce
// a depth greater than n to make progress on the top-level JSON value,
// then the call will report an error.
//
// A non-positive limit is equivalent to no limit at all.
// If unspecified, the default limit is 10000.
// This affects either encoding or decoding.
func WithDepthLimit(n int) Options {
return jsonopts.DepthLimit(max(n, 0))
}
*/

View File

@ -0,0 +1,152 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"bytes"
"io"
"math/bits"
"sync"
)
// TODO(https://go.dev/issue/47657): Use sync.PoolOf.
var (
// This owns the internal buffer since there is no io.Writer to output to.
// Since the buffer can get arbitrarily large in normal usage,
// there is statistical tracking logic to determine whether to recycle
// the internal buffer or not based on a history of utilization.
bufferedEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }}
// This owns the internal buffer, but it is only used to temporarily store
// buffered JSON before flushing it to the underlying io.Writer.
// In a sufficiently efficient streaming mode, we do not expect the buffer
// to grow arbitrarily large. Thus, we avoid recycling large buffers.
streamingEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }}
// This does not own the internal buffer since
// it is taken directly from the provided bytes.Buffer.
bytesBufferEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }}
)
// bufferStatistics is statistics to track buffer utilization.
// It is used to determine whether to recycle a buffer or not
// to avoid https://go.dev/issue/23199.
type bufferStatistics struct {
strikes int // number of times the buffer was under-utilized
prevLen int // length of previous buffer
}
func getBufferedEncoder(opts ...Options) *Encoder {
e := bufferedEncoderPool.Get().(*Encoder)
if e.s.Buf == nil {
// Round up to nearest 2ⁿ to make best use of malloc size classes.
// See runtime/sizeclasses.go on Go1.15.
// Logical OR with 63 to ensure 64 as the minimum buffer size.
n := 1 << bits.Len(uint(e.s.bufStats.prevLen|63))
e.s.Buf = make([]byte, 0, n)
}
e.s.reset(e.s.Buf[:0], nil, opts...)
return e
}
func putBufferedEncoder(e *Encoder) {
// Recycle large buffers only if sufficiently utilized.
// If a buffer is under-utilized enough times sequentially,
// then it is discarded, ensuring that a single large buffer
// won't be kept alive by a continuous stream of small usages.
//
// The worst case utilization is computed as:
// MIN_UTILIZATION_THRESHOLD / (1 + MAX_NUM_STRIKES)
//
// For the constants chosen below, this is (25%)/(1+4) ⇒ 5%.
// This may seem low, but it ensures a lower bound on
// the absolute worst-case utilization. Without this check,
// this would be theoretically 0%, which is infinitely worse.
//
// See https://go.dev/issue/27735.
switch {
case cap(e.s.Buf) <= 4<<10: // always recycle buffers smaller than 4KiB
e.s.bufStats.strikes = 0
case cap(e.s.Buf)/4 <= len(e.s.Buf): // at least 25% utilization
e.s.bufStats.strikes = 0
case e.s.bufStats.strikes < 4: // at most 4 strikes
e.s.bufStats.strikes++
default: // discard the buffer; too large and too often under-utilized
e.s.bufStats.strikes = 0
e.s.bufStats.prevLen = len(e.s.Buf) // heuristic for size to allocate next time
e.s.Buf = nil
}
bufferedEncoderPool.Put(e)
}
func getStreamingEncoder(w io.Writer, opts ...Options) *Encoder {
if _, ok := w.(*bytes.Buffer); ok {
e := bytesBufferEncoderPool.Get().(*Encoder)
e.s.reset(nil, w, opts...) // buffer taken from bytes.Buffer
return e
} else {
e := streamingEncoderPool.Get().(*Encoder)
e.s.reset(e.s.Buf[:0], w, opts...) // preserve existing buffer
return e
}
}
func putStreamingEncoder(e *Encoder) {
if _, ok := e.s.wr.(*bytes.Buffer); ok {
bytesBufferEncoderPool.Put(e)
} else {
if cap(e.s.Buf) > 64<<10 {
e.s.Buf = nil // avoid pinning arbitrarily large amounts of memory
}
streamingEncoderPool.Put(e)
}
}
var (
// This does not own the internal buffer since it is externally provided.
bufferedDecoderPool = &sync.Pool{New: func() any { return new(Decoder) }}
// This owns the internal buffer, but it is only used to temporarily store
// buffered JSON fetched from the underlying io.Reader.
// In a sufficiently efficient streaming mode, we do not expect the buffer
// to grow arbitrarily large. Thus, we avoid recycling large buffers.
streamingDecoderPool = &sync.Pool{New: func() any { return new(Decoder) }}
// This does not own the internal buffer since
// it is taken directly from the provided bytes.Buffer.
bytesBufferDecoderPool = bufferedDecoderPool
)
func getBufferedDecoder(b []byte, opts ...Options) *Decoder {
d := bufferedDecoderPool.Get().(*Decoder)
d.s.reset(b, nil, opts...)
return d
}
func putBufferedDecoder(d *Decoder) {
bufferedDecoderPool.Put(d)
}
func getStreamingDecoder(r io.Reader, opts ...Options) *Decoder {
if _, ok := r.(*bytes.Buffer); ok {
d := bytesBufferDecoderPool.Get().(*Decoder)
d.s.reset(nil, r, opts...) // buffer taken from bytes.Buffer
return d
} else {
d := streamingDecoderPool.Get().(*Decoder)
d.s.reset(d.s.buf[:0], r, opts...) // preserve existing buffer
return d
}
}
func putStreamingDecoder(d *Decoder) {
if _, ok := d.s.rd.(*bytes.Buffer); ok {
bytesBufferDecoderPool.Put(d)
} else {
if cap(d.s.buf) > 64<<10 {
d.s.buf = nil // avoid pinning arbitrarily large amounts of memory
}
streamingDecoderPool.Put(d)
}
}

View File

@ -0,0 +1,41 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"encoding/json/internal/jsonflags"
"encoding/json/internal/jsonwire"
)
// AppendQuote appends a double-quoted JSON string literal representing src
// to dst and returns the extended buffer.
// It uses the minimal string representation per RFC 8785, section 3.2.2.2.
// Invalid UTF-8 bytes are replaced with the Unicode replacement character
// and an error is returned at the end indicating the presence of invalid UTF-8.
// The dst must not overlap with the src.
func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) {
dst, err := jsonwire.AppendQuote(dst, src, &jsonflags.Flags{})
if err != nil {
err = &SyntacticError{Err: err}
}
return dst, err
}
// AppendUnquote appends the decoded interpretation of src as a
// double-quoted JSON string literal to dst and returns the extended buffer.
// The input src must be a JSON string without any surrounding whitespace.
// Invalid UTF-8 bytes are replaced with the Unicode replacement character
// and an error is returned at the end indicating the presence of invalid UTF-8.
// Any trailing bytes after the JSON string literal results in an error.
// The dst must not overlap with the src.
func AppendUnquote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) {
dst, err := jsonwire.AppendUnquote(dst, src)
if err != nil {
err = &SyntacticError{Err: err}
}
return dst, err
}

View File

@ -0,0 +1,828 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"errors"
"iter"
"math"
"strconv"
"strings"
"unicode/utf8"
"encoding/json/internal/jsonwire"
)
// ErrDuplicateName indicates that a JSON token could not be
// encoded or decoded because it results in a duplicate JSON object name.
// This error is directly wrapped within a [SyntacticError] when produced.
//
// The name of a duplicate JSON object member can be extracted as:
//
// err := ...
// var serr jsontext.SyntacticError
// if errors.As(err, &serr) && serr.Err == jsontext.ErrDuplicateName {
// ptr := serr.JSONPointer // JSON pointer to duplicate name
// name := ptr.LastToken() // duplicate name itself
// ...
// }
//
// This error is only returned if [AllowDuplicateNames] is false.
var ErrDuplicateName = errors.New("duplicate object member name")
// ErrNonStringName indicates that a JSON token could not be
// encoded or decoded because it is not a string,
// as required for JSON object names according to RFC 8259, section 4.
// This error is directly wrapped within a [SyntacticError] when produced.
var ErrNonStringName = errors.New("object member name must be a string")
var (
errMissingValue = errors.New("missing value after object name")
errMismatchDelim = errors.New("mismatching structural token for object or array")
errMaxDepth = errors.New("exceeded max depth")
errInvalidNamespace = errors.New("object namespace is in an invalid state")
)
// Per RFC 8259, section 9, implementations may enforce a maximum depth.
// Such a limit is necessary to prevent stack overflows.
const maxNestingDepth = 10000
type state struct {
// Tokens validates whether the next token kind is valid.
Tokens stateMachine
// Names is a stack of object names.
Names objectNameStack
// Namespaces is a stack of object namespaces.
// For performance reasons, Encoder or Decoder may not update this
// if Marshal or Unmarshal is able to track names in a more efficient way.
// See makeMapArshaler and makeStructArshaler.
// Not used if AllowDuplicateNames is true.
Namespaces objectNamespaceStack
}
// needObjectValue reports whether the next token should be an object value.
// This method is used by [wrapSyntacticError].
func (s *state) needObjectValue() bool {
return s.Tokens.Last.needObjectValue()
}
func (s *state) reset() {
s.Tokens.reset()
s.Names.reset()
s.Namespaces.reset()
}
// Pointer is a JSON Pointer (RFC 6901) that references a particular JSON value
// relative to the root of the top-level JSON value.
//
// A Pointer is a slash-separated list of tokens, where each token is
// either a JSON object name or an index to a JSON array element
// encoded as a base-10 integer value.
// It is impossible to distinguish between an array index and an object name
// (that happens to be an base-10 encoded integer) without also knowing
// the structure of the top-level JSON value that the pointer refers to.
//
// There is exactly one representation of a pointer to a particular value,
// so comparability of Pointer values is equivalent to checking whether
// they both point to the exact same value.
type Pointer string
// IsValid reports whether p is a valid JSON Pointer according to RFC 6901.
// Note that the concatenation of two valid pointers produces a valid pointer.
func (p Pointer) IsValid() bool {
for i, r := range p {
switch {
case r == '~' && (i+1 == len(p) || (p[i+1] != '0' && p[i+1] != '1')):
return false // invalid escape
case r == '\ufffd' && !strings.HasPrefix(string(p[i:]), "\ufffd"):
return false // invalid UTF-8
}
}
return len(p) == 0 || p[0] == '/'
}
// Contains reports whether the JSON value that p points to
// is equal to or contains the JSON value that pc points to.
func (p Pointer) Contains(pc Pointer) bool {
// Invariant: len(p) <= len(pc) if p.Contains(pc)
suffix, ok := strings.CutPrefix(string(pc), string(p))
return ok && (suffix == "" || suffix[0] == '/')
}
// Parent strips off the last token and returns the remaining pointer.
// The parent of an empty p is an empty string.
func (p Pointer) Parent() Pointer {
return p[:max(strings.LastIndexByte(string(p), '/'), 0)]
}
// LastToken returns the last token in the pointer.
// The last token of an empty p is an empty string.
func (p Pointer) LastToken() string {
last := p[max(strings.LastIndexByte(string(p), '/'), 0):]
return unescapePointerToken(strings.TrimPrefix(string(last), "/"))
}
// AppendToken appends a token to the end of p and returns the full pointer.
func (p Pointer) AppendToken(tok string) Pointer {
return Pointer(appendEscapePointerName([]byte(p+"/"), tok))
}
// TODO: Add Pointer.AppendTokens,
// but should this take in a ...string or an iter.Seq[string]?
// Tokens returns an iterator over the reference tokens in the JSON pointer,
// starting from the first token until the last token (unless stopped early).
func (p Pointer) Tokens() iter.Seq[string] {
return func(yield func(string) bool) {
for len(p) > 0 {
p = Pointer(strings.TrimPrefix(string(p), "/"))
i := min(uint(strings.IndexByte(string(p), '/')), uint(len(p)))
if !yield(unescapePointerToken(string(p)[:i])) {
return
}
p = p[i:]
}
}
}
func unescapePointerToken(token string) string {
if strings.Contains(token, "~") {
// Per RFC 6901, section 3, unescape '~' and '/' characters.
token = strings.ReplaceAll(token, "~1", "/")
token = strings.ReplaceAll(token, "~0", "~")
}
return token
}
// appendStackPointer appends a JSON Pointer (RFC 6901) to the current value.
//
// - If where is -1, then it points to the previously processed token.
//
// - If where is 0, then it points to the parent JSON object or array,
// or an object member if in-between an object member key and value.
// This is useful when the position is ambiguous whether
// we are interested in the previous or next token, or
// when we are uncertain whether the next token
// continues or terminates the current object or array.
//
// - If where is +1, then it points to the next expected value,
// assuming that it continues the current JSON object or array.
// As a special case, if the next token is a JSON object name,
// then it points to the parent JSON object.
//
// Invariant: Must call s.names.copyQuotedBuffer beforehand.
func (s state) appendStackPointer(b []byte, where int) []byte {
var objectDepth int
for i := 1; i < s.Tokens.Depth(); i++ {
e := s.Tokens.index(i)
arrayDelta := -1 // by default point to previous array element
if isLast := i == s.Tokens.Depth()-1; isLast {
switch {
case where < 0 && e.Length() == 0 || where == 0 && !e.needObjectValue() || where > 0 && e.NeedObjectName():
return b
case where > 0 && e.isArray():
arrayDelta = 0 // point to next array element
}
}
switch {
case e.isObject():
b = appendEscapePointerName(append(b, '/'), s.Names.getUnquoted(objectDepth))
objectDepth++
case e.isArray():
b = strconv.AppendUint(append(b, '/'), uint64(e.Length()+int64(arrayDelta)), 10)
}
}
return b
}
func appendEscapePointerName[Bytes ~[]byte | ~string](b []byte, name Bytes) []byte {
for _, r := range string(name) {
// Per RFC 6901, section 3, escape '~' and '/' characters.
switch r {
case '~':
b = append(b, "~0"...)
case '/':
b = append(b, "~1"...)
default:
b = utf8.AppendRune(b, r)
}
}
return b
}
// stateMachine is a push-down automaton that validates whether
// a sequence of tokens is valid or not according to the JSON grammar.
// It is useful for both encoding and decoding.
//
// It is a stack where each entry represents a nested JSON object or array.
// The stack has a minimum depth of 1 where the first level is a
// virtual JSON array to handle a stream of top-level JSON values.
// The top-level virtual JSON array is special in that it doesn't require commas
// between each JSON value.
//
// For performance, most methods are carefully written to be inlinable.
// The zero value is a valid state machine ready for use.
type stateMachine struct {
Stack []stateEntry
Last stateEntry
}
// reset resets the state machine.
// The machine always starts with a minimum depth of 1.
func (m *stateMachine) reset() {
m.Stack = m.Stack[:0]
if cap(m.Stack) > 1<<10 {
m.Stack = nil
}
m.Last = stateTypeArray
}
// Depth is the current nested depth of JSON objects and arrays.
// It is one-indexed (i.e., top-level values have a depth of 1).
func (m stateMachine) Depth() int {
return len(m.Stack) + 1
}
// index returns a reference to the ith entry.
// It is only valid until the next push method call.
func (m *stateMachine) index(i int) *stateEntry {
if i == len(m.Stack) {
return &m.Last
}
return &m.Stack[i]
}
// DepthLength reports the current nested depth and
// the length of the last JSON object or array.
func (m stateMachine) DepthLength() (int, int64) {
return m.Depth(), m.Last.Length()
}
// appendLiteral appends a JSON literal as the next token in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) appendLiteral() error {
switch {
case m.Last.NeedObjectName():
return ErrNonStringName
case !m.Last.isValidNamespace():
return errInvalidNamespace
default:
m.Last.Increment()
return nil
}
}
// appendString appends a JSON string as the next token in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) appendString() error {
switch {
case !m.Last.isValidNamespace():
return errInvalidNamespace
default:
m.Last.Increment()
return nil
}
}
// appendNumber appends a JSON number as the next token in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) appendNumber() error {
return m.appendLiteral()
}
// pushObject appends a JSON start object token as next in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) pushObject() error {
switch {
case m.Last.NeedObjectName():
return ErrNonStringName
case !m.Last.isValidNamespace():
return errInvalidNamespace
case len(m.Stack) == maxNestingDepth:
return errMaxDepth
default:
m.Last.Increment()
m.Stack = append(m.Stack, m.Last)
m.Last = stateTypeObject
return nil
}
}
// popObject appends a JSON end object token as next in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) popObject() error {
switch {
case !m.Last.isObject():
return errMismatchDelim
case m.Last.needObjectValue():
return errMissingValue
case !m.Last.isValidNamespace():
return errInvalidNamespace
default:
m.Last = m.Stack[len(m.Stack)-1]
m.Stack = m.Stack[:len(m.Stack)-1]
return nil
}
}
// pushArray appends a JSON start array token as next in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) pushArray() error {
switch {
case m.Last.NeedObjectName():
return ErrNonStringName
case !m.Last.isValidNamespace():
return errInvalidNamespace
case len(m.Stack) == maxNestingDepth:
return errMaxDepth
default:
m.Last.Increment()
m.Stack = append(m.Stack, m.Last)
m.Last = stateTypeArray
return nil
}
}
// popArray appends a JSON end array token as next in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) popArray() error {
switch {
case !m.Last.isArray() || len(m.Stack) == 0: // forbid popping top-level virtual JSON array
return errMismatchDelim
case !m.Last.isValidNamespace():
return errInvalidNamespace
default:
m.Last = m.Stack[len(m.Stack)-1]
m.Stack = m.Stack[:len(m.Stack)-1]
return nil
}
}
// NeedIndent reports whether indent whitespace should be injected.
// A zero value means that no whitespace should be injected.
// A positive value means '\n', indentPrefix, and (n-1) copies of indentBody
// should be appended to the output immediately before the next token.
func (m stateMachine) NeedIndent(next Kind) (n int) {
willEnd := next == '}' || next == ']'
switch {
case m.Depth() == 1:
return 0 // top-level values are never indented
case m.Last.Length() == 0 && willEnd:
return 0 // an empty object or array is never indented
case m.Last.Length() == 0 || m.Last.needImplicitComma(next):
return m.Depth()
case willEnd:
return m.Depth() - 1
default:
return 0
}
}
// MayAppendDelim appends a colon or comma that may precede the next token.
func (m stateMachine) MayAppendDelim(b []byte, next Kind) []byte {
switch {
case m.Last.needImplicitColon():
return append(b, ':')
case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values
return append(b, ',')
default:
return b
}
}
// needDelim reports whether a colon or comma token should be implicitly emitted
// before the next token of the specified kind.
// A zero value means no delimiter should be emitted.
func (m stateMachine) needDelim(next Kind) (delim byte) {
switch {
case m.Last.needImplicitColon():
return ':'
case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values
return ','
default:
return 0
}
}
// InvalidateDisabledNamespaces marks all disabled namespaces as invalid.
//
// For efficiency, Marshal and Unmarshal may disable namespaces since there are
// more efficient ways to track duplicate names. However, if an error occurs,
// the namespaces in Encoder or Decoder will be left in an inconsistent state.
// Mark the namespaces as invalid so that future method calls on
// Encoder or Decoder will return an error.
func (m *stateMachine) InvalidateDisabledNamespaces() {
for i := range m.Depth() {
e := m.index(i)
if !e.isActiveNamespace() {
e.invalidateNamespace()
}
}
}
// stateEntry encodes several artifacts within a single unsigned integer:
// - whether this represents a JSON object or array,
// - whether this object should check for duplicate names, and
// - how many elements are in this JSON object or array.
type stateEntry uint64
const (
// The type mask (1 bit) records whether this is a JSON object or array.
stateTypeMask stateEntry = 0x8000_0000_0000_0000
stateTypeObject stateEntry = 0x8000_0000_0000_0000
stateTypeArray stateEntry = 0x0000_0000_0000_0000
// The name check mask (2 bit) records whether to update
// the namespaces for the current JSON object and
// whether the namespace is valid.
stateNamespaceMask stateEntry = 0x6000_0000_0000_0000
stateDisableNamespace stateEntry = 0x4000_0000_0000_0000
stateInvalidNamespace stateEntry = 0x2000_0000_0000_0000
// The count mask (61 bits) records the number of elements.
stateCountMask stateEntry = 0x1fff_ffff_ffff_ffff
stateCountLSBMask stateEntry = 0x0000_0000_0000_0001
stateCountOdd stateEntry = 0x0000_0000_0000_0001
stateCountEven stateEntry = 0x0000_0000_0000_0000
)
// Length reports the number of elements in the JSON object or array.
// Each name and value in an object entry is treated as a separate element.
func (e stateEntry) Length() int64 {
return int64(e & stateCountMask)
}
// isObject reports whether this is a JSON object.
func (e stateEntry) isObject() bool {
return e&stateTypeMask == stateTypeObject
}
// isArray reports whether this is a JSON array.
func (e stateEntry) isArray() bool {
return e&stateTypeMask == stateTypeArray
}
// NeedObjectName reports whether the next token must be a JSON string,
// which is necessary for JSON object names.
func (e stateEntry) NeedObjectName() bool {
return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountEven
}
// needImplicitColon reports whether an colon should occur next,
// which always occurs after JSON object names.
func (e stateEntry) needImplicitColon() bool {
return e.needObjectValue()
}
// needObjectValue reports whether the next token must be a JSON value,
// which is necessary after every JSON object name.
func (e stateEntry) needObjectValue() bool {
return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountOdd
}
// needImplicitComma reports whether an comma should occur next,
// which always occurs after a value in a JSON object or array
// before the next value (or name).
func (e stateEntry) needImplicitComma(next Kind) bool {
return !e.needObjectValue() && e.Length() > 0 && next != '}' && next != ']'
}
// Increment increments the number of elements for the current object or array.
// This assumes that overflow won't practically be an issue since
// 1<<bits.OnesCount(stateCountMask) is sufficiently large.
func (e *stateEntry) Increment() {
(*e)++
}
// decrement decrements the number of elements for the current object or array.
// It is the callers responsibility to ensure that e.length > 0.
func (e *stateEntry) decrement() {
(*e)--
}
// DisableNamespace disables the JSON object namespace such that the
// Encoder or Decoder no longer updates the namespace.
func (e *stateEntry) DisableNamespace() {
*e |= stateDisableNamespace
}
// isActiveNamespace reports whether the JSON object namespace is actively
// being updated and used for duplicate name checks.
func (e stateEntry) isActiveNamespace() bool {
return e&(stateDisableNamespace) == 0
}
// invalidateNamespace marks the JSON object namespace as being invalid.
func (e *stateEntry) invalidateNamespace() {
*e |= stateInvalidNamespace
}
// isValidNamespace reports whether the JSON object namespace is valid.
func (e stateEntry) isValidNamespace() bool {
return e&(stateInvalidNamespace) == 0
}
// objectNameStack is a stack of names when descending into a JSON object.
// In contrast to objectNamespaceStack, this only has to remember a single name
// per JSON object.
//
// This data structure may contain offsets to encodeBuffer or decodeBuffer.
// It violates clean abstraction of layers, but is significantly more efficient.
// This ensures that popping and pushing in the common case is a trivial
// push/pop of an offset integer.
//
// The zero value is an empty names stack ready for use.
type objectNameStack struct {
// offsets is a stack of offsets for each name.
// A non-negative offset is the ending offset into the local names buffer.
// A negative offset is the bit-wise inverse of a starting offset into
// a remote buffer (e.g., encodeBuffer or decodeBuffer).
// A math.MinInt offset at the end implies that the last object is empty.
// Invariant: Positive offsets always occur before negative offsets.
offsets []int
// unquotedNames is a back-to-back concatenation of names.
unquotedNames []byte
}
func (ns *objectNameStack) reset() {
ns.offsets = ns.offsets[:0]
ns.unquotedNames = ns.unquotedNames[:0]
if cap(ns.offsets) > 1<<6 {
ns.offsets = nil // avoid pinning arbitrarily large amounts of memory
}
if cap(ns.unquotedNames) > 1<<10 {
ns.unquotedNames = nil // avoid pinning arbitrarily large amounts of memory
}
}
func (ns *objectNameStack) length() int {
return len(ns.offsets)
}
// getUnquoted retrieves the ith unquoted name in the stack.
// It returns an empty string if the last object is empty.
//
// Invariant: Must call copyQuotedBuffer beforehand.
func (ns *objectNameStack) getUnquoted(i int) []byte {
ns.ensureCopiedBuffer()
if i == 0 {
return ns.unquotedNames[:ns.offsets[0]]
} else {
return ns.unquotedNames[ns.offsets[i-1]:ns.offsets[i-0]]
}
}
// invalidOffset indicates that the last JSON object currently has no name.
const invalidOffset = math.MinInt
// push descends into a nested JSON object.
func (ns *objectNameStack) push() {
ns.offsets = append(ns.offsets, invalidOffset)
}
// ReplaceLastQuotedOffset replaces the last name with the starting offset
// to the quoted name in some remote buffer. All offsets provided must be
// relative to the same buffer until copyQuotedBuffer is called.
func (ns *objectNameStack) ReplaceLastQuotedOffset(i int) {
// Use bit-wise inversion instead of naive multiplication by -1 to avoid
// ambiguity regarding zero (which is a valid offset into the names field).
// Bit-wise inversion is mathematically equivalent to -i-1,
// such that 0 becomes -1, 1 becomes -2, and so forth.
// This ensures that remote offsets are always negative.
ns.offsets[len(ns.offsets)-1] = ^i
}
// replaceLastUnquotedName replaces the last name with the provided name.
//
// Invariant: Must call copyQuotedBuffer beforehand.
func (ns *objectNameStack) replaceLastUnquotedName(s string) {
ns.ensureCopiedBuffer()
var startOffset int
if len(ns.offsets) > 1 {
startOffset = ns.offsets[len(ns.offsets)-2]
}
ns.unquotedNames = append(ns.unquotedNames[:startOffset], s...)
ns.offsets[len(ns.offsets)-1] = len(ns.unquotedNames)
}
// clearLast removes any name in the last JSON object.
// It is semantically equivalent to ns.push followed by ns.pop.
func (ns *objectNameStack) clearLast() {
ns.offsets[len(ns.offsets)-1] = invalidOffset
}
// pop ascends out of a nested JSON object.
func (ns *objectNameStack) pop() {
ns.offsets = ns.offsets[:len(ns.offsets)-1]
}
// copyQuotedBuffer copies names from the remote buffer into the local names
// buffer so that there are no more offset references into the remote buffer.
// This allows the remote buffer to change contents without affecting
// the names that this data structure is trying to remember.
func (ns *objectNameStack) copyQuotedBuffer(b []byte) {
// Find the first negative offset.
var i int
for i = len(ns.offsets) - 1; i >= 0 && ns.offsets[i] < 0; i-- {
continue
}
// Copy each name from the remote buffer into the local buffer.
for i = i + 1; i < len(ns.offsets); i++ {
if i == len(ns.offsets)-1 && ns.offsets[i] == invalidOffset {
if i == 0 {
ns.offsets[i] = 0
} else {
ns.offsets[i] = ns.offsets[i-1]
}
break // last JSON object had a push without any names
}
// As a form of Hyrum proofing, we write an invalid character into the
// buffer to make misuse of Decoder.ReadToken more obvious.
// We need to undo that mutation here.
quotedName := b[^ns.offsets[i]:]
if quotedName[0] == invalidateBufferByte {
quotedName[0] = '"'
}
// Append the unquoted name to the local buffer.
var startOffset int
if i > 0 {
startOffset = ns.offsets[i-1]
}
if n := jsonwire.ConsumeSimpleString(quotedName); n > 0 {
ns.unquotedNames = append(ns.unquotedNames[:startOffset], quotedName[len(`"`):n-len(`"`)]...)
} else {
ns.unquotedNames, _ = jsonwire.AppendUnquote(ns.unquotedNames[:startOffset], quotedName)
}
ns.offsets[i] = len(ns.unquotedNames)
}
}
func (ns *objectNameStack) ensureCopiedBuffer() {
if len(ns.offsets) > 0 && ns.offsets[len(ns.offsets)-1] < 0 {
panic("BUG: copyQuotedBuffer not called beforehand")
}
}
// objectNamespaceStack is a stack of object namespaces.
// This data structure assists in detecting duplicate names.
type objectNamespaceStack []objectNamespace
// reset resets the object namespace stack.
func (nss *objectNamespaceStack) reset() {
if cap(*nss) > 1<<10 {
*nss = nil
}
*nss = (*nss)[:0]
}
// push starts a new namespace for a nested JSON object.
func (nss *objectNamespaceStack) push() {
if cap(*nss) > len(*nss) {
*nss = (*nss)[:len(*nss)+1]
nss.Last().reset()
} else {
*nss = append(*nss, objectNamespace{})
}
}
// Last returns a pointer to the last JSON object namespace.
func (nss objectNamespaceStack) Last() *objectNamespace {
return &nss[len(nss)-1]
}
// pop terminates the namespace for a nested JSON object.
func (nss *objectNamespaceStack) pop() {
*nss = (*nss)[:len(*nss)-1]
}
// objectNamespace is the namespace for a JSON object.
// In contrast to objectNameStack, this needs to remember a all names
// per JSON object.
//
// The zero value is an empty namespace ready for use.
type objectNamespace struct {
// It relies on a linear search over all the names before switching
// to use a Go map for direct lookup.
// endOffsets is a list of offsets to the end of each name in buffers.
// The length of offsets is the number of names in the namespace.
endOffsets []uint
// allUnquotedNames is a back-to-back concatenation of every name in the namespace.
allUnquotedNames []byte
// mapNames is a Go map containing every name in the namespace.
// Only valid if non-nil.
mapNames map[string]struct{}
}
// reset resets the namespace to be empty.
func (ns *objectNamespace) reset() {
ns.endOffsets = ns.endOffsets[:0]
ns.allUnquotedNames = ns.allUnquotedNames[:0]
ns.mapNames = nil
if cap(ns.endOffsets) > 1<<6 {
ns.endOffsets = nil // avoid pinning arbitrarily large amounts of memory
}
if cap(ns.allUnquotedNames) > 1<<10 {
ns.allUnquotedNames = nil // avoid pinning arbitrarily large amounts of memory
}
}
// length reports the number of names in the namespace.
func (ns *objectNamespace) length() int {
return len(ns.endOffsets)
}
// getUnquoted retrieves the ith unquoted name in the namespace.
func (ns *objectNamespace) getUnquoted(i int) []byte {
if i == 0 {
return ns.allUnquotedNames[:ns.endOffsets[0]]
} else {
return ns.allUnquotedNames[ns.endOffsets[i-1]:ns.endOffsets[i-0]]
}
}
// lastUnquoted retrieves the last name in the namespace.
func (ns *objectNamespace) lastUnquoted() []byte {
return ns.getUnquoted(ns.length() - 1)
}
// insertQuoted inserts a name and reports whether it was inserted,
// which only occurs if name is not already in the namespace.
// The provided name must be a valid JSON string.
func (ns *objectNamespace) insertQuoted(name []byte, isVerbatim bool) bool {
if isVerbatim {
name = name[len(`"`) : len(name)-len(`"`)]
}
return ns.insert(name, !isVerbatim)
}
func (ns *objectNamespace) InsertUnquoted(name []byte) bool {
return ns.insert(name, false)
}
func (ns *objectNamespace) insert(name []byte, quoted bool) bool {
var allNames []byte
if quoted {
allNames, _ = jsonwire.AppendUnquote(ns.allUnquotedNames, name)
} else {
allNames = append(ns.allUnquotedNames, name...)
}
name = allNames[len(ns.allUnquotedNames):]
// Switch to a map if the buffer is too large for linear search.
// This does not add the current name to the map.
if ns.mapNames == nil && (ns.length() > 64 || len(ns.allUnquotedNames) > 1024) {
ns.mapNames = make(map[string]struct{})
var startOffset uint
for _, endOffset := range ns.endOffsets {
name := ns.allUnquotedNames[startOffset:endOffset]
ns.mapNames[string(name)] = struct{}{} // allocates a new string
startOffset = endOffset
}
}
if ns.mapNames == nil {
// Perform linear search over the buffer to find matching names.
// It provides O(n) lookup, but does not require any allocations.
var startOffset uint
for _, endOffset := range ns.endOffsets {
if string(ns.allUnquotedNames[startOffset:endOffset]) == string(name) {
return false
}
startOffset = endOffset
}
} else {
// Use the map if it is populated.
// It provides O(1) lookup, but requires a string allocation per name.
if _, ok := ns.mapNames[string(name)]; ok {
return false
}
ns.mapNames[string(name)] = struct{}{} // allocates a new string
}
ns.allUnquotedNames = allNames
ns.endOffsets = append(ns.endOffsets, uint(len(ns.allUnquotedNames)))
return true
}
// removeLast removes the last name in the namespace.
func (ns *objectNamespace) removeLast() {
if ns.mapNames != nil {
delete(ns.mapNames, string(ns.lastUnquoted()))
}
if ns.length()-1 == 0 {
ns.endOffsets = ns.endOffsets[:0]
ns.allUnquotedNames = ns.allUnquotedNames[:0]
} else {
ns.endOffsets = ns.endOffsets[:ns.length()-1]
ns.allUnquotedNames = ns.allUnquotedNames[:ns.endOffsets[ns.length()-1]]
}
}

View File

@ -0,0 +1,396 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"fmt"
"slices"
"strings"
"testing"
"unicode/utf8"
)
func TestPointer(t *testing.T) {
tests := []struct {
in Pointer
wantParent Pointer
wantLast string
wantTokens []string
wantValid bool
}{
{"", "", "", nil, true},
{"a", "", "a", []string{"a"}, false},
{"~", "", "~", []string{"~"}, false},
{"/a", "", "a", []string{"a"}, true},
{"/foo/bar", "/foo", "bar", []string{"foo", "bar"}, true},
{"///", "//", "", []string{"", "", ""}, true},
{"/~0~1", "", "~/", []string{"~/"}, true},
{"/\xde\xad\xbe\xef", "", "\xde\xad\xbe\xef", []string{"\xde\xad\xbe\xef"}, false},
}
for _, tt := range tests {
if got := tt.in.Parent(); got != tt.wantParent {
t.Errorf("Pointer(%q).Parent = %q, want %q", tt.in, got, tt.wantParent)
}
if got := tt.in.LastToken(); got != tt.wantLast {
t.Errorf("Pointer(%q).Last = %q, want %q", tt.in, got, tt.wantLast)
}
if strings.HasPrefix(string(tt.in), "/") {
wantRoundtrip := tt.in
if !utf8.ValidString(string(wantRoundtrip)) {
// Replace bytes of invalid UTF-8 with Unicode replacement character.
wantRoundtrip = Pointer([]rune(wantRoundtrip))
}
if got := tt.in.Parent().AppendToken(tt.in.LastToken()); got != wantRoundtrip {
t.Errorf("Pointer(%q).Parent().AppendToken(LastToken()) = %q, want %q", tt.in, got, tt.in)
}
in := tt.in
for {
if (in + "x").Contains(tt.in) {
t.Errorf("Pointer(%q).Contains(%q) = true, want false", in+"x", tt.in)
}
if !in.Contains(tt.in) {
t.Errorf("Pointer(%q).Contains(%q) = false, want true", in, tt.in)
}
if in == in.Parent() {
break
}
in = in.Parent()
}
}
if got := slices.Collect(tt.in.Tokens()); !slices.Equal(got, tt.wantTokens) {
t.Errorf("Pointer(%q).Tokens = %q, want %q", tt.in, got, tt.wantTokens)
}
if got := tt.in.IsValid(); got != tt.wantValid {
t.Errorf("Pointer(%q).IsValid = %v, want %v", tt.in, got, tt.wantValid)
}
}
}
func TestStateMachine(t *testing.T) {
// To test a state machine, we pass an ordered sequence of operations and
// check whether the current state is as expected.
// The operation type is a union type of various possible operations,
// which either call mutating methods on the state machine or
// call accessor methods on state machine and verify the results.
type operation any
type (
// stackLengths checks the results of stateEntry.length accessors.
stackLengths []int64
// appendTokens is sequence of token kinds to append where
// none of them are expected to fail.
//
// For example: `[nft]` is equivalent to the following sequence:
//
// pushArray()
// appendLiteral()
// appendString()
// appendNumber()
// popArray()
//
appendTokens string
// appendToken is a single token kind to append with the expected error.
appendToken struct {
kind Kind
want error
}
// needDelim checks the result of the needDelim accessor.
needDelim struct {
next Kind
want byte
}
)
// Each entry is a sequence of tokens to pass to the state machine.
tests := []struct {
label string
ops []operation
}{{
"TopLevelValues",
[]operation{
stackLengths{0},
needDelim{'n', 0},
appendTokens(`nft`),
stackLengths{3},
needDelim{'"', 0},
appendTokens(`"0[]{}`),
stackLengths{7},
},
}, {
"ArrayValues",
[]operation{
stackLengths{0},
needDelim{'[', 0},
appendTokens(`[`),
stackLengths{1, 0},
needDelim{'n', 0},
appendTokens(`nft`),
stackLengths{1, 3},
needDelim{'"', ','},
appendTokens(`"0[]{}`),
stackLengths{1, 7},
needDelim{']', 0},
appendTokens(`]`),
stackLengths{1},
},
}, {
"ObjectValues",
[]operation{
stackLengths{0},
needDelim{'{', 0},
appendTokens(`{`),
stackLengths{1, 0},
needDelim{'"', 0},
appendTokens(`"`),
stackLengths{1, 1},
needDelim{'n', ':'},
appendTokens(`n`),
stackLengths{1, 2},
needDelim{'"', ','},
appendTokens(`"f"t`),
stackLengths{1, 6},
appendTokens(`"""0"[]"{}`),
stackLengths{1, 14},
needDelim{'}', 0},
appendTokens(`}`),
stackLengths{1},
},
}, {
"ObjectCardinality",
[]operation{
appendTokens(`{`),
// Appending any kind other than string for object name is an error.
appendToken{'n', ErrNonStringName},
appendToken{'f', ErrNonStringName},
appendToken{'t', ErrNonStringName},
appendToken{'0', ErrNonStringName},
appendToken{'{', ErrNonStringName},
appendToken{'[', ErrNonStringName},
appendTokens(`"`),
// Appending '}' without first appending any value is an error.
appendToken{'}', errMissingValue},
appendTokens(`"`),
appendTokens(`}`),
},
}, {
"MismatchingDelims",
[]operation{
appendToken{'}', errMismatchDelim}, // appending '}' without preceding '{'
appendTokens(`[[{`),
appendToken{']', errMismatchDelim}, // appending ']' that mismatches preceding '{'
appendTokens(`}]`),
appendToken{'}', errMismatchDelim}, // appending '}' that mismatches preceding '['
appendTokens(`]`),
appendToken{']', errMismatchDelim}, // appending ']' without preceding '['
},
}}
for _, tt := range tests {
t.Run(tt.label, func(t *testing.T) {
// Flatten appendTokens to sequence of appendToken entries.
var ops []operation
for _, op := range tt.ops {
if toks, ok := op.(appendTokens); ok {
for _, k := range []byte(toks) {
ops = append(ops, appendToken{Kind(k), nil})
}
continue
}
ops = append(ops, op)
}
// Append each token to the state machine and check the output.
var state stateMachine
state.reset()
var sequence []Kind
for _, op := range ops {
switch op := op.(type) {
case stackLengths:
var got []int64
for i := range state.Depth() {
e := state.index(i)
got = append(got, e.Length())
}
want := []int64(op)
if !slices.Equal(got, want) {
t.Fatalf("%s: stack lengths mismatch:\ngot %v\nwant %v", sequence, got, want)
}
case appendToken:
got := state.append(op.kind)
if !equalError(got, op.want) {
t.Fatalf("%s: append('%c') = %v, want %v", sequence, op.kind, got, op.want)
}
if got == nil {
sequence = append(sequence, op.kind)
}
case needDelim:
if got := state.needDelim(op.next); got != op.want {
t.Fatalf("%s: needDelim('%c') = '%c', want '%c'", sequence, op.next, got, op.want)
}
default:
panic(fmt.Sprintf("unknown operation: %T", op))
}
}
})
}
}
// append is a thin wrapper over the other append, pop, or push methods
// based on the token kind.
func (s *stateMachine) append(k Kind) error {
switch k {
case 'n', 'f', 't':
return s.appendLiteral()
case '"':
return s.appendString()
case '0':
return s.appendNumber()
case '{':
return s.pushObject()
case '}':
return s.popObject()
case '[':
return s.pushArray()
case ']':
return s.popArray()
default:
panic(fmt.Sprintf("invalid token kind: '%c'", k))
}
}
func TestObjectNamespace(t *testing.T) {
type operation any
type (
insert struct {
name string
wantInserted bool
}
removeLast struct{}
)
// Sequence of insert operations to perform (order matters).
ops := []operation{
insert{`""`, true},
removeLast{},
insert{`""`, true},
insert{`""`, false},
// Test insertion of the same name with different formatting.
insert{`"alpha"`, true},
insert{`"ALPHA"`, true}, // case-sensitive matching
insert{`"alpha"`, false},
insert{`"\u0061\u006c\u0070\u0068\u0061"`, false}, // unescapes to "alpha"
removeLast{}, // removes "ALPHA"
insert{`"alpha"`, false},
removeLast{}, // removes "alpha"
insert{`"alpha"`, true},
removeLast{},
// Bulk insert simple names.
insert{`"alpha"`, true},
insert{`"bravo"`, true},
insert{`"charlie"`, true},
insert{`"delta"`, true},
insert{`"echo"`, true},
insert{`"foxtrot"`, true},
insert{`"golf"`, true},
insert{`"hotel"`, true},
insert{`"india"`, true},
insert{`"juliet"`, true},
insert{`"kilo"`, true},
insert{`"lima"`, true},
insert{`"mike"`, true},
insert{`"november"`, true},
insert{`"oscar"`, true},
insert{`"papa"`, true},
insert{`"quebec"`, true},
insert{`"romeo"`, true},
insert{`"sierra"`, true},
insert{`"tango"`, true},
insert{`"uniform"`, true},
insert{`"victor"`, true},
insert{`"whiskey"`, true},
insert{`"xray"`, true},
insert{`"yankee"`, true},
insert{`"zulu"`, true},
// Test insertion of invalid UTF-8.
insert{`"` + "\ufffd" + `"`, true},
insert{`"` + "\ufffd" + `"`, false},
insert{`"\ufffd"`, false}, // unescapes to Unicode replacement character
insert{`"\uFFFD"`, false}, // unescapes to Unicode replacement character
insert{`"` + "\xff" + `"`, false}, // mangles as Unicode replacement character
removeLast{},
insert{`"` + "\ufffd" + `"`, true},
// Test insertion of unicode characters.
insert{`"☺☻☹"`, true},
insert{`"☺☻☹"`, false},
removeLast{},
insert{`"☺☻☹"`, true},
}
// Execute the sequence of operations twice:
// 1) on a fresh namespace and 2) on a namespace that has been reset.
var ns objectNamespace
wantNames := []string{}
for _, reset := range []bool{false, true} {
if reset {
ns.reset()
wantNames = nil
}
// Execute the operations and ensure the state is consistent.
for i, op := range ops {
switch op := op.(type) {
case insert:
gotInserted := ns.insertQuoted([]byte(op.name), false)
if gotInserted != op.wantInserted {
t.Fatalf("%d: objectNamespace{%v}.insert(%v) = %v, want %v", i, strings.Join(wantNames, " "), op.name, gotInserted, op.wantInserted)
}
if gotInserted {
b, _ := AppendUnquote(nil, []byte(op.name))
wantNames = append(wantNames, string(b))
}
case removeLast:
ns.removeLast()
wantNames = wantNames[:len(wantNames)-1]
default:
panic(fmt.Sprintf("unknown operation: %T", op))
}
// Check that the namespace is consistent.
gotNames := []string{}
for i := range ns.length() {
gotNames = append(gotNames, string(ns.getUnquoted(i)))
}
if !slices.Equal(gotNames, wantNames) {
t.Fatalf("%d: objectNamespace = {%v}, want {%v}", i, strings.Join(gotNames, " "), strings.Join(wantNames, " "))
}
}
// Verify that we have not switched to using a Go map.
if ns.mapNames != nil {
t.Errorf("objectNamespace.mapNames = non-nil, want nil")
}
// Insert a large number of names.
for i := range 64 {
ns.InsertUnquoted([]byte(fmt.Sprintf(`name%d`, i)))
}
// Verify that we did switch to using a Go map.
if ns.mapNames == nil {
t.Errorf("objectNamespace.mapNames = nil, want non-nil")
}
}
}

View File

@ -0,0 +1,527 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"bytes"
"errors"
"math"
"strconv"
"encoding/json/internal/jsonflags"
"encoding/json/internal/jsonwire"
)
// NOTE: Token is analogous to v1 json.Token.
const (
maxInt64 = math.MaxInt64
minInt64 = math.MinInt64
maxUint64 = math.MaxUint64
minUint64 = 0 // for consistency and readability purposes
invalidTokenPanic = "invalid jsontext.Token; it has been voided by a subsequent json.Decoder call"
)
var errInvalidToken = errors.New("invalid jsontext.Token")
// Token represents a lexical JSON token, which may be one of the following:
// - a JSON literal (i.e., null, true, or false)
// - a JSON string (e.g., "hello, world!")
// - a JSON number (e.g., 123.456)
// - a start or end delimiter for a JSON object (i.e., { or } )
// - a start or end delimiter for a JSON array (i.e., [ or ] )
//
// A Token cannot represent entire array or object values, while a [Value] can.
// There is no Token to represent commas and colons since
// these structural tokens can be inferred from the surrounding context.
type Token struct {
nonComparable
// Tokens can exist in either a "raw" or an "exact" form.
// Tokens produced by the Decoder are in the "raw" form.
// Tokens returned by constructors are usually in the "exact" form.
// The Encoder accepts Tokens in either the "raw" or "exact" form.
//
// The following chart shows the possible values for each Token type:
// ╔═════════════════╦════════════╤════════════╤════════════╗
// ║ Token type ║ raw field │ str field │ num field ║
// ╠═════════════════╬════════════╪════════════╪════════════╣
// ║ null (raw) ║ "null" │ "" │ 0 ║
// ║ false (raw) ║ "false" │ "" │ 0 ║
// ║ true (raw) ║ "true" │ "" │ 0 ║
// ║ string (raw) ║ non-empty │ "" │ offset ║
// ║ string (string) ║ nil │ non-empty │ 0 ║
// ║ number (raw) ║ non-empty │ "" │ offset ║
// ║ number (float) ║ nil │ "f" │ non-zero ║
// ║ number (int64) ║ nil │ "i" │ non-zero ║
// ║ number (uint64) ║ nil │ "u" │ non-zero ║
// ║ object (delim) ║ "{" or "}" │ "" │ 0 ║
// ║ array (delim) ║ "[" or "]" │ "" │ 0 ║
// ╚═════════════════╩════════════╧════════════╧════════════╝
//
// Notes:
// - For tokens stored in "raw" form, the num field contains the
// absolute offset determined by raw.previousOffsetStart().
// The buffer itself is stored in raw.previousBuffer().
// - JSON literals and structural characters are always in the "raw" form.
// - JSON strings and numbers can be in either "raw" or "exact" forms.
// - The exact zero value of JSON strings and numbers in the "exact" forms
// have ambiguous representation. Thus, they are always represented
// in the "raw" form.
// raw contains a reference to the raw decode buffer.
// If non-nil, then its value takes precedence over str and num.
// It is only valid if num == raw.previousOffsetStart().
raw *decodeBuffer
// str is the unescaped JSON string if num is zero.
// Otherwise, it is "f", "i", or "u" if num should be interpreted
// as a float64, int64, or uint64, respectively.
str string
// num is a float64, int64, or uint64 stored as a uint64 value.
// It is non-zero for any JSON number in the "exact" form.
num uint64
}
// TODO: Does representing 1-byte delimiters as *decodeBuffer cause performance issues?
var (
Null Token = rawToken("null")
False Token = rawToken("false")
True Token = rawToken("true")
BeginObject Token = rawToken("{")
EndObject Token = rawToken("}")
BeginArray Token = rawToken("[")
EndArray Token = rawToken("]")
zeroString Token = rawToken(`""`)
zeroNumber Token = rawToken(`0`)
nanString Token = String("NaN")
pinfString Token = String("Infinity")
ninfString Token = String("-Infinity")
)
func rawToken(s string) Token {
return Token{raw: &decodeBuffer{buf: []byte(s), prevStart: 0, prevEnd: len(s)}}
}
// Bool constructs a Token representing a JSON boolean.
func Bool(b bool) Token {
if b {
return True
}
return False
}
// String constructs a Token representing a JSON string.
// The provided string should contain valid UTF-8, otherwise invalid characters
// may be mangled as the Unicode replacement character.
func String(s string) Token {
if len(s) == 0 {
return zeroString
}
return Token{str: s}
}
// Float constructs a Token representing a JSON number.
// The values NaN, +Inf, and -Inf will be represented
// as a JSON string with the values "NaN", "Infinity", and "-Infinity".
func Float(n float64) Token {
switch {
case math.Float64bits(n) == 0:
return zeroNumber
case math.IsNaN(n):
return nanString
case math.IsInf(n, +1):
return pinfString
case math.IsInf(n, -1):
return ninfString
}
return Token{str: "f", num: math.Float64bits(n)}
}
// Int constructs a Token representing a JSON number from an int64.
func Int(n int64) Token {
if n == 0 {
return zeroNumber
}
return Token{str: "i", num: uint64(n)}
}
// Uint constructs a Token representing a JSON number from a uint64.
func Uint(n uint64) Token {
if n == 0 {
return zeroNumber
}
return Token{str: "u", num: uint64(n)}
}
// Clone makes a copy of the Token such that its value remains valid
// even after a subsequent [Decoder.Read] call.
func (t Token) Clone() Token {
// TODO: Allow caller to avoid any allocations?
if raw := t.raw; raw != nil {
// Avoid copying globals.
if t.raw.prevStart == 0 {
switch t.raw {
case Null.raw:
return Null
case False.raw:
return False
case True.raw:
return True
case BeginObject.raw:
return BeginObject
case EndObject.raw:
return EndObject
case BeginArray.raw:
return BeginArray
case EndArray.raw:
return EndArray
}
}
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
buf := bytes.Clone(raw.previousBuffer())
return Token{raw: &decodeBuffer{buf: buf, prevStart: 0, prevEnd: len(buf)}}
}
return t
}
// Bool returns the value for a JSON boolean.
// It panics if the token kind is not a JSON boolean.
func (t Token) Bool() bool {
switch t.raw {
case True.raw:
return true
case False.raw:
return false
default:
panic("invalid JSON token kind: " + t.Kind().String())
}
}
// appendString appends a JSON string to dst and returns it.
// It panics if t is not a JSON string.
func (t Token) appendString(dst []byte, flags *jsonflags.Flags) ([]byte, error) {
if raw := t.raw; raw != nil {
// Handle raw string value.
buf := raw.previousBuffer()
if Kind(buf[0]) == '"' {
if jsonwire.ConsumeSimpleString(buf) == len(buf) {
return append(dst, buf...), nil
}
dst, _, err := jsonwire.ReformatString(dst, buf, flags)
return dst, err
}
} else if len(t.str) != 0 && t.num == 0 {
// Handle exact string value.
return jsonwire.AppendQuote(dst, t.str, flags)
}
panic("invalid JSON token kind: " + t.Kind().String())
}
// String returns the unescaped string value for a JSON string.
// For other JSON kinds, this returns the raw JSON representation.
func (t Token) String() string {
// This is inlinable to take advantage of "function outlining".
// This avoids an allocation for the string(b) conversion
// if the caller does not use the string in an escaping manner.
// See https://blog.filippo.io/efficient-go-apis-with-the-inliner/
s, b := t.string()
if len(b) > 0 {
return string(b)
}
return s
}
func (t Token) string() (string, []byte) {
if raw := t.raw; raw != nil {
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
buf := raw.previousBuffer()
if buf[0] == '"' {
// TODO: Preserve ValueFlags in Token?
isVerbatim := jsonwire.ConsumeSimpleString(buf) == len(buf)
return "", jsonwire.UnquoteMayCopy(buf, isVerbatim)
}
// Handle tokens that are not JSON strings for fmt.Stringer.
return "", buf
}
if len(t.str) != 0 && t.num == 0 {
return t.str, nil
}
// Handle tokens that are not JSON strings for fmt.Stringer.
if t.num > 0 {
switch t.str[0] {
case 'f':
return string(jsonwire.AppendFloat(nil, math.Float64frombits(t.num), 64)), nil
case 'i':
return strconv.FormatInt(int64(t.num), 10), nil
case 'u':
return strconv.FormatUint(uint64(t.num), 10), nil
}
}
return "<invalid jsontext.Token>", nil
}
// appendNumber appends a JSON number to dst and returns it.
// It panics if t is not a JSON number.
func (t Token) appendNumber(dst []byte, flags *jsonflags.Flags) ([]byte, error) {
if raw := t.raw; raw != nil {
// Handle raw number value.
buf := raw.previousBuffer()
if Kind(buf[0]).normalize() == '0' {
dst, _, err := jsonwire.ReformatNumber(dst, buf, flags)
return dst, err
}
} else if t.num != 0 {
// Handle exact number value.
switch t.str[0] {
case 'f':
return jsonwire.AppendFloat(dst, math.Float64frombits(t.num), 64), nil
case 'i':
return strconv.AppendInt(dst, int64(t.num), 10), nil
case 'u':
return strconv.AppendUint(dst, uint64(t.num), 10), nil
}
}
panic("invalid JSON token kind: " + t.Kind().String())
}
// Float returns the floating-point value for a JSON number.
// It returns a NaN, +Inf, or -Inf value for any JSON string
// with the values "NaN", "Infinity", or "-Infinity".
// It panics for all other cases.
func (t Token) Float() float64 {
if raw := t.raw; raw != nil {
// Handle raw number value.
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
buf := raw.previousBuffer()
if Kind(buf[0]).normalize() == '0' {
fv, _ := jsonwire.ParseFloat(buf, 64)
return fv
}
} else if t.num != 0 {
// Handle exact number value.
switch t.str[0] {
case 'f':
return math.Float64frombits(t.num)
case 'i':
return float64(int64(t.num))
case 'u':
return float64(uint64(t.num))
}
}
// Handle string values with "NaN", "Infinity", or "-Infinity".
if t.Kind() == '"' {
switch t.String() {
case "NaN":
return math.NaN()
case "Infinity":
return math.Inf(+1)
case "-Infinity":
return math.Inf(-1)
}
}
panic("invalid JSON token kind: " + t.Kind().String())
}
// Int returns the signed integer value for a JSON number.
// The fractional component of any number is ignored (truncation toward zero).
// Any number beyond the representation of an int64 will be saturated
// to the closest representable value.
// It panics if the token kind is not a JSON number.
func (t Token) Int() int64 {
if raw := t.raw; raw != nil {
// Handle raw integer value.
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
neg := false
buf := raw.previousBuffer()
if len(buf) > 0 && buf[0] == '-' {
neg, buf = true, buf[1:]
}
if numAbs, ok := jsonwire.ParseUint(buf); ok {
if neg {
if numAbs > -minInt64 {
return minInt64
}
return -1 * int64(numAbs)
} else {
if numAbs > +maxInt64 {
return maxInt64
}
return +1 * int64(numAbs)
}
}
} else if t.num != 0 {
// Handle exact integer value.
switch t.str[0] {
case 'i':
return int64(t.num)
case 'u':
if t.num > maxInt64 {
return maxInt64
}
return int64(t.num)
}
}
// Handle JSON number that is a floating-point value.
if t.Kind() == '0' {
switch fv := t.Float(); {
case fv >= maxInt64:
return maxInt64
case fv <= minInt64:
return minInt64
default:
return int64(fv) // truncation toward zero
}
}
panic("invalid JSON token kind: " + t.Kind().String())
}
// Uint returns the unsigned integer value for a JSON number.
// The fractional component of any number is ignored (truncation toward zero).
// Any number beyond the representation of an uint64 will be saturated
// to the closest representable value.
// It panics if the token kind is not a JSON number.
func (t Token) Uint() uint64 {
// NOTE: This accessor returns 0 for any negative JSON number,
// which might be surprising, but is at least consistent with the behavior
// of saturating out-of-bounds numbers to the closest representable number.
if raw := t.raw; raw != nil {
// Handle raw integer value.
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
neg := false
buf := raw.previousBuffer()
if len(buf) > 0 && buf[0] == '-' {
neg, buf = true, buf[1:]
}
if num, ok := jsonwire.ParseUint(buf); ok {
if neg {
return minUint64
}
return num
}
} else if t.num != 0 {
// Handle exact integer value.
switch t.str[0] {
case 'u':
return t.num
case 'i':
if int64(t.num) < minUint64 {
return minUint64
}
return uint64(int64(t.num))
}
}
// Handle JSON number that is a floating-point value.
if t.Kind() == '0' {
switch fv := t.Float(); {
case fv >= maxUint64:
return maxUint64
case fv <= minUint64:
return minUint64
default:
return uint64(fv) // truncation toward zero
}
}
panic("invalid JSON token kind: " + t.Kind().String())
}
// Kind returns the token kind.
func (t Token) Kind() Kind {
switch {
case t.raw != nil:
raw := t.raw
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
return Kind(t.raw.buf[raw.prevStart]).normalize()
case t.num != 0:
return '0'
case len(t.str) != 0:
return '"'
default:
return invalidKind
}
}
// Kind represents each possible JSON token kind with a single byte,
// which is conveniently the first byte of that kind's grammar
// with the restriction that numbers always be represented with '0':
//
// - 'n': null
// - 'f': false
// - 't': true
// - '"': string
// - '0': number
// - '{': object start
// - '}': object end
// - '[': array start
// - ']': array end
//
// An invalid kind is usually represented using 0,
// but may be non-zero due to invalid JSON data.
type Kind byte
const invalidKind Kind = 0
// String prints the kind in a humanly readable fashion.
func (k Kind) String() string {
switch k {
case 'n':
return "null"
case 'f':
return "false"
case 't':
return "true"
case '"':
return "string"
case '0':
return "number"
case '{':
return "{"
case '}':
return "}"
case '[':
return "["
case ']':
return "]"
default:
return "<invalid jsontext.Kind: " + jsonwire.QuoteRune(string(k)) + ">"
}
}
// normalize coalesces all possible starting characters of a number as just '0'.
func (k Kind) normalize() Kind {
if k == '-' || ('0' <= k && k <= '9') {
return '0'
}
return k
}

View File

@ -0,0 +1,168 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"math"
"reflect"
"testing"
)
func TestTokenStringAllocations(t *testing.T) {
if testing.CoverMode() != "" {
t.Skip("coverage mode breaks the compiler optimization this depends on")
}
tok := rawToken(`"hello"`)
var m map[string]bool
got := int(testing.AllocsPerRun(10, func() {
// This function uses tok.String() is a non-escaping manner
// (i.e., looking it up in a Go map). It should not allocate.
if m[tok.String()] {
panic("never executed")
}
}))
if got > 0 {
t.Errorf("Token.String allocated %d times, want 0", got)
}
}
func TestTokenAccessors(t *testing.T) {
type token struct {
Bool bool
String string
Float float64
Int int64
Uint uint64
Kind Kind
}
tests := []struct {
in Token
want token
}{
{Token{}, token{String: "<invalid jsontext.Token>"}},
{Null, token{String: "null", Kind: 'n'}},
{False, token{Bool: false, String: "false", Kind: 'f'}},
{True, token{Bool: true, String: "true", Kind: 't'}},
{Bool(false), token{Bool: false, String: "false", Kind: 'f'}},
{Bool(true), token{Bool: true, String: "true", Kind: 't'}},
{BeginObject, token{String: "{", Kind: '{'}},
{EndObject, token{String: "}", Kind: '}'}},
{BeginArray, token{String: "[", Kind: '['}},
{EndArray, token{String: "]", Kind: ']'}},
{String(""), token{String: "", Kind: '"'}},
{String("hello, world!"), token{String: "hello, world!", Kind: '"'}},
{rawToken(`"hello, world!"`), token{String: "hello, world!", Kind: '"'}},
{Float(0), token{String: "0", Float: 0, Int: 0, Uint: 0, Kind: '0'}},
{Float(math.Copysign(0, -1)), token{String: "-0", Float: math.Copysign(0, -1), Int: 0, Uint: 0, Kind: '0'}},
{Float(math.NaN()), token{String: "NaN", Float: math.NaN(), Int: 0, Uint: 0, Kind: '"'}},
{Float(math.Inf(+1)), token{String: "Infinity", Float: math.Inf(+1), Kind: '"'}},
{Float(math.Inf(-1)), token{String: "-Infinity", Float: math.Inf(-1), Kind: '"'}},
{Int(minInt64), token{String: "-9223372036854775808", Float: minInt64, Int: minInt64, Uint: minUint64, Kind: '0'}},
{Int(minInt64 + 1), token{String: "-9223372036854775807", Float: minInt64 + 1, Int: minInt64 + 1, Uint: minUint64, Kind: '0'}},
{Int(-1), token{String: "-1", Float: -1, Int: -1, Uint: minUint64, Kind: '0'}},
{Int(0), token{String: "0", Float: 0, Int: 0, Uint: 0, Kind: '0'}},
{Int(+1), token{String: "1", Float: +1, Int: +1, Uint: +1, Kind: '0'}},
{Int(maxInt64 - 1), token{String: "9223372036854775806", Float: maxInt64 - 1, Int: maxInt64 - 1, Uint: maxInt64 - 1, Kind: '0'}},
{Int(maxInt64), token{String: "9223372036854775807", Float: maxInt64, Int: maxInt64, Uint: maxInt64, Kind: '0'}},
{Uint(minUint64), token{String: "0", Kind: '0'}},
{Uint(minUint64 + 1), token{String: "1", Float: minUint64 + 1, Int: minUint64 + 1, Uint: minUint64 + 1, Kind: '0'}},
{Uint(maxUint64 - 1), token{String: "18446744073709551614", Float: maxUint64 - 1, Int: maxInt64, Uint: maxUint64 - 1, Kind: '0'}},
{Uint(maxUint64), token{String: "18446744073709551615", Float: maxUint64, Int: maxInt64, Uint: maxUint64, Kind: '0'}},
{rawToken(`-0`), token{String: "-0", Float: math.Copysign(0, -1), Int: 0, Uint: 0, Kind: '0'}},
{rawToken(`1e1000`), token{String: "1e1000", Float: math.MaxFloat64, Int: maxInt64, Uint: maxUint64, Kind: '0'}},
{rawToken(`-1e1000`), token{String: "-1e1000", Float: -math.MaxFloat64, Int: minInt64, Uint: minUint64, Kind: '0'}},
{rawToken(`0.1`), token{String: "0.1", Float: 0.1, Int: 0, Uint: 0, Kind: '0'}},
{rawToken(`0.5`), token{String: "0.5", Float: 0.5, Int: 0, Uint: 0, Kind: '0'}},
{rawToken(`0.9`), token{String: "0.9", Float: 0.9, Int: 0, Uint: 0, Kind: '0'}},
{rawToken(`1.1`), token{String: "1.1", Float: 1.1, Int: 1, Uint: 1, Kind: '0'}},
{rawToken(`-0.1`), token{String: "-0.1", Float: -0.1, Int: 0, Uint: 0, Kind: '0'}},
{rawToken(`-0.5`), token{String: "-0.5", Float: -0.5, Int: 0, Uint: 0, Kind: '0'}},
{rawToken(`-0.9`), token{String: "-0.9", Float: -0.9, Int: 0, Uint: 0, Kind: '0'}},
{rawToken(`-1.1`), token{String: "-1.1", Float: -1.1, Int: -1, Uint: 0, Kind: '0'}},
{rawToken(`99999999999999999999`), token{String: "99999999999999999999", Float: 1e20 - 1, Int: maxInt64, Uint: maxUint64, Kind: '0'}},
{rawToken(`-99999999999999999999`), token{String: "-99999999999999999999", Float: -1e20 - 1, Int: minInt64, Uint: minUint64, Kind: '0'}},
}
for _, tt := range tests {
t.Run("", func(t *testing.T) {
got := token{
Bool: func() bool {
defer func() { recover() }()
return tt.in.Bool()
}(),
String: tt.in.String(),
Float: func() float64 {
defer func() { recover() }()
return tt.in.Float()
}(),
Int: func() int64 {
defer func() { recover() }()
return tt.in.Int()
}(),
Uint: func() uint64 {
defer func() { recover() }()
return tt.in.Uint()
}(),
Kind: tt.in.Kind(),
}
if got.Bool != tt.want.Bool {
t.Errorf("Token(%s).Bool() = %v, want %v", tt.in, got.Bool, tt.want.Bool)
}
if got.String != tt.want.String {
t.Errorf("Token(%s).String() = %v, want %v", tt.in, got.String, tt.want.String)
}
if math.Float64bits(got.Float) != math.Float64bits(tt.want.Float) {
t.Errorf("Token(%s).Float() = %v, want %v", tt.in, got.Float, tt.want.Float)
}
if got.Int != tt.want.Int {
t.Errorf("Token(%s).Int() = %v, want %v", tt.in, got.Int, tt.want.Int)
}
if got.Uint != tt.want.Uint {
t.Errorf("Token(%s).Uint() = %v, want %v", tt.in, got.Uint, tt.want.Uint)
}
if got.Kind != tt.want.Kind {
t.Errorf("Token(%s).Kind() = %v, want %v", tt.in, got.Kind, tt.want.Kind)
}
})
}
}
func TestTokenClone(t *testing.T) {
tests := []struct {
in Token
wantExactRaw bool
}{
{Token{}, true},
{Null, true},
{False, true},
{True, true},
{BeginObject, true},
{EndObject, true},
{BeginArray, true},
{EndArray, true},
{String("hello, world!"), true},
{rawToken(`"hello, world!"`), false},
{Float(3.14159), true},
{rawToken(`3.14159`), false},
}
for _, tt := range tests {
t.Run("", func(t *testing.T) {
got := tt.in.Clone()
if !reflect.DeepEqual(got, tt.in) {
t.Errorf("Token(%s) == Token(%s).Clone() = false, want true", tt.in, tt.in)
}
gotExactRaw := got.raw == tt.in.raw
if gotExactRaw != tt.wantExactRaw {
t.Errorf("Token(%s).raw == Token(%s).Clone().raw = %v, want %v", tt.in, tt.in, gotExactRaw, tt.wantExactRaw)
}
})
}
}

View File

@ -0,0 +1,395 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"bytes"
"errors"
"io"
"slices"
"sync"
"encoding/json/internal/jsonflags"
"encoding/json/internal/jsonwire"
)
// NOTE: Value is analogous to v1 json.RawMessage.
// AppendFormat formats the JSON value in src and appends it to dst
// according to the specified options.
// See [Value.Format] for more details about the formatting behavior.
//
// The dst and src may overlap.
// If an error is reported, then the entirety of src is appended to dst.
func AppendFormat(dst, src []byte, opts ...Options) ([]byte, error) {
e := getBufferedEncoder(opts...)
defer putBufferedEncoder(e)
e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1)
if err := e.s.WriteValue(src); err != nil {
return append(dst, src...), err
}
return append(dst, e.s.Buf...), nil
}
// Value represents a single raw JSON value, which may be one of the following:
// - a JSON literal (i.e., null, true, or false)
// - a JSON string (e.g., "hello, world!")
// - a JSON number (e.g., 123.456)
// - an entire JSON object (e.g., {"fizz":"buzz"} )
// - an entire JSON array (e.g., [1,2,3] )
//
// Value can represent entire array or object values, while [Token] cannot.
// Value may contain leading and/or trailing whitespace.
type Value []byte
// Clone returns a copy of v.
func (v Value) Clone() Value {
return bytes.Clone(v)
}
// String returns the string formatting of v.
func (v Value) String() string {
if v == nil {
return "null"
}
return string(v)
}
// IsValid reports whether the raw JSON value is syntactically valid
// according to the specified options.
//
// By default (if no options are specified), it validates according to RFC 7493.
// It verifies whether the input is properly encoded as UTF-8,
// that escape sequences within strings decode to valid Unicode codepoints, and
// that all names in each object are unique.
// It does not verify whether numbers are representable within the limits
// of any common numeric type (e.g., float64, int64, or uint64).
//
// Relevant options include:
// - [AllowDuplicateNames]
// - [AllowInvalidUTF8]
//
// All other options are ignored.
func (v Value) IsValid(opts ...Options) bool {
// TODO: Document support for [WithByteLimit] and [WithDepthLimit].
d := getBufferedDecoder(v, opts...)
defer putBufferedDecoder(d)
_, errVal := d.ReadValue()
_, errEOF := d.ReadToken()
return errVal == nil && errEOF == io.EOF
}
// Format formats the raw JSON value in place.
//
// By default (if no options are specified), it validates according to RFC 7493
// and produces the minimal JSON representation, where
// all whitespace is elided and JSON strings use the shortest encoding.
//
// Relevant options include:
// - [AllowDuplicateNames]
// - [AllowInvalidUTF8]
// - [EscapeForHTML]
// - [EscapeForJS]
// - [PreserveRawStrings]
// - [CanonicalizeRawInts]
// - [CanonicalizeRawFloats]
// - [ReorderRawObjects]
// - [SpaceAfterColon]
// - [SpaceAfterComma]
// - [Multiline]
// - [WithIndent]
// - [WithIndentPrefix]
//
// All other options are ignored.
//
// It is guaranteed to succeed if the value is valid according to the same options.
// If the value is already formatted, then the buffer is not mutated.
func (v *Value) Format(opts ...Options) error {
// TODO: Document support for [WithByteLimit] and [WithDepthLimit].
return v.format(opts, nil)
}
// format accepts two []Options to avoid the allocation appending them together.
// It is equivalent to v.Format(append(opts1, opts2...)...).
func (v *Value) format(opts1, opts2 []Options) error {
e := getBufferedEncoder(opts1...)
defer putBufferedEncoder(e)
e.s.Join(opts2...)
e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1)
if err := e.s.WriteValue(*v); err != nil {
return err
}
if !bytes.Equal(*v, e.s.Buf) {
*v = append((*v)[:0], e.s.Buf...)
}
return nil
}
// Compact removes all whitespace from the raw JSON value.
//
// It does not reformat JSON strings or numbers to use any other representation.
// To maximize the set of JSON values that can be formatted,
// this permits values with duplicate names and invalid UTF-8.
//
// Compact is equivalent to calling [Value.Format] with the following options:
// - [AllowDuplicateNames](true)
// - [AllowInvalidUTF8](true)
// - [PreserveRawStrings](true)
//
// Any options specified by the caller are applied after the initial set
// and may deliberately override prior options.
func (v *Value) Compact(opts ...Options) error {
return v.format([]Options{
AllowDuplicateNames(true),
AllowInvalidUTF8(true),
PreserveRawStrings(true),
}, opts)
}
// Indent reformats the whitespace in the raw JSON value so that each element
// in a JSON object or array begins on a indented line according to the nesting.
//
// It does not reformat JSON strings or numbers to use any other representation.
// To maximize the set of JSON values that can be formatted,
// this permits values with duplicate names and invalid UTF-8.
//
// Indent is equivalent to calling [Value.Format] with the following options:
// - [AllowDuplicateNames](true)
// - [AllowInvalidUTF8](true)
// - [PreserveRawStrings](true)
// - [Multiline](true)
//
// Any options specified by the caller are applied after the initial set
// and may deliberately override prior options.
func (v *Value) Indent(opts ...Options) error {
return v.format([]Options{
AllowDuplicateNames(true),
AllowInvalidUTF8(true),
PreserveRawStrings(true),
Multiline(true),
}, opts)
}
// Canonicalize canonicalizes the raw JSON value according to the
// JSON Canonicalization Scheme (JCS) as defined by RFC 8785
// where it produces a stable representation of a JSON value.
//
// JSON strings are formatted to use their minimal representation,
// JSON numbers are formatted as double precision numbers according
// to some stable serialization algorithm.
// JSON object members are sorted in ascending order by name.
// All whitespace is removed.
//
// The output stability is dependent on the stability of the application data
// (see RFC 8785, Appendix E). It cannot produce stable output from
// fundamentally unstable input. For example, if the JSON value
// contains ephemeral data (e.g., a frequently changing timestamp),
// then the value is still unstable regardless of whether this is called.
//
// Canonicalize is equivalent to calling [Value.Format] with the following options:
// - [CanonicalizeRawInts](true)
// - [CanonicalizeRawFloats](true)
// - [ReorderRawObjects](true)
//
// Any options specified by the caller are applied after the initial set
// and may deliberately override prior options.
//
// Note that JCS treats all JSON numbers as IEEE 754 double precision numbers.
// Any numbers with precision beyond what is representable by that form
// will lose their precision when canonicalized. For example, integer values
// beyond ±2⁵³ will lose their precision. To preserve the original representation
// of JSON integers, additionally set [CanonicalizeRawInts] to false:
//
// v.Canonicalize(jsontext.CanonicalizeRawInts(false))
func (v *Value) Canonicalize(opts ...Options) error {
return v.format([]Options{
CanonicalizeRawInts(true),
CanonicalizeRawFloats(true),
ReorderRawObjects(true),
}, opts)
}
// MarshalJSON returns v as the JSON encoding of v.
// It returns the stored value as the raw JSON output without any validation.
// If v is nil, then this returns a JSON null.
func (v Value) MarshalJSON() ([]byte, error) {
// NOTE: This matches the behavior of v1 json.RawMessage.MarshalJSON.
if v == nil {
return []byte("null"), nil
}
return v, nil
}
// UnmarshalJSON sets v as the JSON encoding of b.
// It stores a copy of the provided raw JSON input without any validation.
func (v *Value) UnmarshalJSON(b []byte) error {
// NOTE: This matches the behavior of v1 json.RawMessage.UnmarshalJSON.
if v == nil {
return errors.New("jsontext.Value: UnmarshalJSON on nil pointer")
}
*v = append((*v)[:0], b...)
return nil
}
// Kind returns the starting token kind.
// For a valid value, this will never include '}' or ']'.
func (v Value) Kind() Kind {
if v := v[jsonwire.ConsumeWhitespace(v):]; len(v) > 0 {
return Kind(v[0]).normalize()
}
return invalidKind
}
const commaAndWhitespace = ", \n\r\t"
type objectMember struct {
// name is the unquoted name.
name []byte // e.g., "name"
// buffer is the entirety of the raw JSON object member
// starting from right after the previous member (or opening '{')
// until right after the member value.
buffer []byte // e.g., `, \n\r\t"name": "value"`
}
func (x objectMember) Compare(y objectMember) int {
if c := jsonwire.CompareUTF16(x.name, y.name); c != 0 {
return c
}
// With [AllowDuplicateNames] or [AllowInvalidUTF8],
// names could be identical, so also sort using the member value.
return jsonwire.CompareUTF16(
bytes.TrimLeft(x.buffer, commaAndWhitespace),
bytes.TrimLeft(y.buffer, commaAndWhitespace))
}
var objectMemberPool = sync.Pool{New: func() any { return new([]objectMember) }}
func getObjectMembers() *[]objectMember {
ns := objectMemberPool.Get().(*[]objectMember)
*ns = (*ns)[:0]
return ns
}
func putObjectMembers(ns *[]objectMember) {
if cap(*ns) < 1<<10 {
clear(*ns) // avoid pinning name and buffer
objectMemberPool.Put(ns)
}
}
// mustReorderObjects reorders in-place all object members in a JSON value,
// which must be valid otherwise it panics.
func mustReorderObjects(b []byte) {
// Obtain a buffered encoder just to use its internal buffer as
// a scratch buffer for reordering object members.
e2 := getBufferedEncoder()
defer putBufferedEncoder(e2)
// Disable unnecessary checks to syntactically parse the JSON value.
d := getBufferedDecoder(b)
defer putBufferedDecoder(d)
d.s.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1)
mustReorderObjectsFromDecoder(d, &e2.s.Buf) // per RFC 8785, section 3.2.3
}
// mustReorderObjectsFromDecoder recursively reorders all object members in place
// according to the ordering specified in RFC 8785, section 3.2.3.
//
// Pre-conditions:
// - The value is valid (i.e., no decoder errors should ever occur).
// - Initial call is provided a Decoder reading from the start of v.
//
// Post-conditions:
// - Exactly one JSON value is read from the Decoder.
// - All fully-parsed JSON objects are reordered by directly moving
// the members in the value buffer.
//
// The runtime is approximately O(n·log(n)) + O(m·log(m)),
// where n is len(v) and m is the total number of object members.
func mustReorderObjectsFromDecoder(d *Decoder, scratch *[]byte) {
switch tok, err := d.ReadToken(); tok.Kind() {
case '{':
// Iterate and collect the name and offsets for every object member.
members := getObjectMembers()
defer putObjectMembers(members)
var prevMember objectMember
isSorted := true
beforeBody := d.InputOffset() // offset after '{'
for d.PeekKind() != '}' {
beforeName := d.InputOffset()
var flags jsonwire.ValueFlags
name, _ := d.s.ReadValue(&flags)
name = jsonwire.UnquoteMayCopy(name, flags.IsVerbatim())
mustReorderObjectsFromDecoder(d, scratch)
afterValue := d.InputOffset()
currMember := objectMember{name, d.s.buf[beforeName:afterValue]}
if isSorted && len(*members) > 0 {
isSorted = objectMember.Compare(prevMember, currMember) < 0
}
*members = append(*members, currMember)
prevMember = currMember
}
afterBody := d.InputOffset() // offset before '}'
d.ReadToken()
// Sort the members; return early if it's already sorted.
if isSorted {
return
}
firstBufferBeforeSorting := (*members)[0].buffer
slices.SortFunc(*members, objectMember.Compare)
firstBufferAfterSorting := (*members)[0].buffer
// Append the reordered members to a new buffer,
// then copy the reordered members back over the original members.
// Avoid swapping in place since each member may be a different size
// where moving a member over a smaller member may corrupt the data
// for subsequent members before they have been moved.
//
// The following invariant must hold:
// sum([m.after-m.before for m in members]) == afterBody-beforeBody
commaAndWhitespacePrefix := func(b []byte) []byte {
return b[:len(b)-len(bytes.TrimLeft(b, commaAndWhitespace))]
}
sorted := (*scratch)[:0]
for i, member := range *members {
switch {
case i == 0 && &member.buffer[0] != &firstBufferBeforeSorting[0]:
// First member after sorting is not the first member before sorting,
// so use the prefix of the first member before sorting.
sorted = append(sorted, commaAndWhitespacePrefix(firstBufferBeforeSorting)...)
sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...)
case i != 0 && &member.buffer[0] == &firstBufferBeforeSorting[0]:
// Later member after sorting is the first member before sorting,
// so use the prefix of the first member after sorting.
sorted = append(sorted, commaAndWhitespacePrefix(firstBufferAfterSorting)...)
sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...)
default:
sorted = append(sorted, member.buffer...)
}
}
if int(afterBody-beforeBody) != len(sorted) {
panic("BUG: length invariant violated")
}
copy(d.s.buf[beforeBody:afterBody], sorted)
// Update scratch buffer to the largest amount ever used.
if len(sorted) > len(*scratch) {
*scratch = sorted
}
case '[':
for d.PeekKind() != ']' {
mustReorderObjectsFromDecoder(d, scratch)
}
d.ReadToken()
default:
if err != nil {
panic("BUG: " + err.Error())
}
}
}

View File

@ -0,0 +1,200 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package jsontext
import (
"io"
"strings"
"testing"
"encoding/json/internal/jsontest"
"encoding/json/internal/jsonwire"
)
type valueTestdataEntry struct {
name jsontest.CaseName
in string
wantValid bool
wantCompacted string
wantCompactErr error // implies wantCompacted is in
wantIndented string // wantCompacted if empty; uses "\t" for indent prefix and " " for indent
wantIndentErr error // implies wantCompacted is in
wantCanonicalized string // wantCompacted if empty
wantCanonicalizeErr error // implies wantCompacted is in
}
var valueTestdata = append(func() (out []valueTestdataEntry) {
// Initialize valueTestdata from coderTestdata.
for _, td := range coderTestdata {
// NOTE: The Compact method preserves the raw formatting of strings,
// while the Encoder (by default) does not.
if td.name.Name == "ComplicatedString" {
td.outCompacted = strings.TrimSpace(td.in)
}
out = append(out, valueTestdataEntry{
name: td.name,
in: td.in,
wantValid: true,
wantCompacted: td.outCompacted,
wantIndented: td.outIndented,
wantCanonicalized: td.outCanonicalized,
})
}
return out
}(), []valueTestdataEntry{{
name: jsontest.Name("RFC8785/Primitives"),
in: `{
"numbers": [333333333.33333329, 1E30, 4.50,
2e-3, 0.000000000000000000000000001, -0],
"string": "\u20ac$\u000F\u000aA'\u0042\u0022\u005c\\\"\/",
"literals": [null, true, false]
}`,
wantValid: true,
wantCompacted: `{"numbers":[333333333.33333329,1E30,4.50,2e-3,0.000000000000000000000000001,-0],"string":"\u20ac$\u000F\u000aA'\u0042\u0022\u005c\\\"\/","literals":[null,true,false]}`,
wantIndented: `{
"numbers": [
333333333.33333329,
1E30,
4.50,
2e-3,
0.000000000000000000000000001,
-0
],
"string": "\u20ac$\u000F\u000aA'\u0042\u0022\u005c\\\"\/",
"literals": [
null,
true,
false
]
}`,
wantCanonicalized: `{"literals":[null,true,false],"numbers":[333333333.3333333,1e+30,4.5,0.002,1e-27,0],"string":"€$\u000f\nA'B\"\\\\\"/"}`,
}, {
name: jsontest.Name("RFC8785/ObjectOrdering"),
in: `{
"\u20ac": "Euro Sign",
"\r": "Carriage Return",
"\ufb33": "Hebrew Letter Dalet With Dagesh",
"1": "One",
"\ud83d\ude00": "Emoji: Grinning Face",
"\u0080": "Control",
"\u00f6": "Latin Small Letter O With Diaeresis"
}`,
wantValid: true,
wantCompacted: `{"\u20ac":"Euro Sign","\r":"Carriage Return","\ufb33":"Hebrew Letter Dalet With Dagesh","1":"One","\ud83d\ude00":"Emoji: Grinning Face","\u0080":"Control","\u00f6":"Latin Small Letter O With Diaeresis"}`,
wantIndented: `{
"\u20ac": "Euro Sign",
"\r": "Carriage Return",
"\ufb33": "Hebrew Letter Dalet With Dagesh",
"1": "One",
"\ud83d\ude00": "Emoji: Grinning Face",
"\u0080": "Control",
"\u00f6": "Latin Small Letter O With Diaeresis"
}`,
wantCanonicalized: `{"\r":"Carriage Return","1":"One","€":"Control","ö":"Latin Small Letter O With Diaeresis","€":"Euro Sign","😀":"Emoji: Grinning Face","דּ":"Hebrew Letter Dalet With Dagesh"}`,
}, {
name: jsontest.Name("LargeIntegers"),
in: ` [ -9223372036854775808 , 9223372036854775807 ] `,
wantValid: true,
wantCompacted: `[-9223372036854775808,9223372036854775807]`,
wantIndented: `[
-9223372036854775808,
9223372036854775807
]`,
wantCanonicalized: `[-9223372036854776000,9223372036854776000]`, // NOTE: Loss of precision due to numbers being treated as floats.
}, {
name: jsontest.Name("InvalidUTF8"),
in: ` "living` + "\xde\xad\xbe\xef" + `\ufffd<66>" `,
wantValid: false, // uses RFC 7493 as the definition; which validates UTF-8
wantCompacted: `"living` + "\xde\xad\xbe\xef" + `\ufffd<66>"`,
wantCanonicalizeErr: E(jsonwire.ErrInvalidUTF8).withPos(` "living`+"\xde\xad", ""),
}, {
name: jsontest.Name("InvalidUTF8/SurrogateHalf"),
in: `"\ud800"`,
wantValid: false, // uses RFC 7493 as the definition; which validates UTF-8
wantCompacted: `"\ud800"`,
wantCanonicalizeErr: newInvalidEscapeSequenceError(`\ud800"`).withPos(`"`, ""),
}, {
name: jsontest.Name("UppercaseEscaped"),
in: `"\u000B"`,
wantValid: true,
wantCompacted: `"\u000B"`,
wantCanonicalized: `"\u000b"`,
}, {
name: jsontest.Name("DuplicateNames"),
in: ` { "0" : 0 , "1" : 1 , "0" : 0 }`,
wantValid: false, // uses RFC 7493 as the definition; which does check for object uniqueness
wantCompacted: `{"0":0,"1":1,"0":0}`,
wantIndented: `{
"0": 0,
"1": 1,
"0": 0
}`,
wantCanonicalizeErr: E(ErrDuplicateName).withPos(` { "0" : 0 , "1" : 1 , `, "/0"),
}, {
name: jsontest.Name("Whitespace"),
in: " \n\r\t",
wantValid: false,
wantCompacted: " \n\r\t",
wantCompactErr: E(io.ErrUnexpectedEOF).withPos(" \n\r\t", ""),
wantIndentErr: E(io.ErrUnexpectedEOF).withPos(" \n\r\t", ""),
wantCanonicalizeErr: E(io.ErrUnexpectedEOF).withPos(" \n\r\t", ""),
}}...)
func TestValueMethods(t *testing.T) {
for _, td := range valueTestdata {
t.Run(td.name.Name, func(t *testing.T) {
if td.wantIndented == "" {
td.wantIndented = td.wantCompacted
}
if td.wantCanonicalized == "" {
td.wantCanonicalized = td.wantCompacted
}
if td.wantCompactErr != nil {
td.wantCompacted = td.in
}
if td.wantIndentErr != nil {
td.wantIndented = td.in
}
if td.wantCanonicalizeErr != nil {
td.wantCanonicalized = td.in
}
v := Value(td.in)
gotValid := v.IsValid()
if gotValid != td.wantValid {
t.Errorf("%s: Value.IsValid = %v, want %v", td.name.Where, gotValid, td.wantValid)
}
gotCompacted := Value(td.in)
gotCompactErr := gotCompacted.Compact()
if string(gotCompacted) != td.wantCompacted {
t.Errorf("%s: Value.Compact = %s, want %s", td.name.Where, gotCompacted, td.wantCompacted)
}
if !equalError(gotCompactErr, td.wantCompactErr) {
t.Errorf("%s: Value.Compact error mismatch:\ngot %v\nwant %v", td.name.Where, gotCompactErr, td.wantCompactErr)
}
gotIndented := Value(td.in)
gotIndentErr := gotIndented.Indent(WithIndentPrefix("\t"), WithIndent(" "))
if string(gotIndented) != td.wantIndented {
t.Errorf("%s: Value.Indent = %s, want %s", td.name.Where, gotIndented, td.wantIndented)
}
if !equalError(gotIndentErr, td.wantIndentErr) {
t.Errorf("%s: Value.Indent error mismatch:\ngot %v\nwant %v", td.name.Where, gotIndentErr, td.wantIndentErr)
}
gotCanonicalized := Value(td.in)
gotCanonicalizeErr := gotCanonicalized.Canonicalize()
if string(gotCanonicalized) != td.wantCanonicalized {
t.Errorf("%s: Value.Canonicalize = %s, want %s", td.name.Where, gotCanonicalized, td.wantCanonicalized)
}
if !equalError(gotCanonicalizeErr, td.wantCanonicalizeErr) {
t.Errorf("%s: Value.Canonicalize error mismatch:\ngot %v\nwant %v", td.name.Where, gotCanonicalizeErr, td.wantCanonicalizeErr)
}
})
}
}

View File

@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json
import (

View File

@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json
// JSON value parser state machine.

View File

@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json
import (

View File

@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json
import (

View File

@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json
import (

View File

@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json
import "unicode/utf8"

View File

@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json
import (

View File

@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json
import (

View File

@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.jsonv2
package json
import "testing"

View File

@ -0,0 +1,570 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"bytes"
"encoding"
"io"
"reflect"
"slices"
"strings"
"sync"
"time"
"encoding/json/internal"
"encoding/json/internal/jsonflags"
"encoding/json/internal/jsonopts"
"encoding/json/jsontext"
)
// Reference encoding and time packages to assist pkgsite
// in being able to hotlink references to those packages.
var (
_ encoding.TextMarshaler
_ encoding.TextAppender
_ encoding.TextUnmarshaler
_ time.Time
_ time.Duration
)
// export exposes internal functionality of the "jsontext" package.
var export = jsontext.Internal.Export(&internal.AllowInternalUse)
// Marshal serializes a Go value as a []byte according to the provided
// marshal and encode options (while ignoring unmarshal or decode options).
// It does not terminate the output with a newline.
//
// Type-specific marshal functions and methods take precedence
// over the default representation of a value.
// Functions or methods that operate on *T are only called when encoding
// a value of type T (by taking its address) or a non-nil value of *T.
// Marshal ensures that a value is always addressable
// (by boxing it on the heap if necessary) so that
// these functions and methods can be consistently called. For performance,
// it is recommended that Marshal be passed a non-nil pointer to the value.
//
// The input value is encoded as JSON according the following rules:
//
// - If any type-specific functions in a [WithMarshalers] option match
// the value type, then those functions are called to encode the value.
// If all applicable functions return [SkipFunc],
// then the value is encoded according to subsequent rules.
//
// - If the value type implements [MarshalerTo],
// then the MarshalJSONTo method is called to encode the value.
//
// - If the value type implements [Marshaler],
// then the MarshalJSON method is called to encode the value.
//
// - If the value type implements [encoding.TextAppender],
// then the AppendText method is called to encode the value and
// subsequently encode its result as a JSON string.
//
// - If the value type implements [encoding.TextMarshaler],
// then the MarshalText method is called to encode the value and
// subsequently encode its result as a JSON string.
//
// - Otherwise, the value is encoded according to the value's type
// as described in detail below.
//
// Most Go types have a default JSON representation.
// Certain types support specialized formatting according to
// a format flag optionally specified in the Go struct tag
// for the struct field that contains the current value
// (see the “JSON Representation of Go structs” section for more details).
//
// The representation of each type is as follows:
//
// - A Go boolean is encoded as a JSON boolean (e.g., true or false).
// It does not support any custom format flags.
//
// - A Go string is encoded as a JSON string.
// It does not support any custom format flags.
//
// - A Go []byte or [N]byte is encoded as a JSON string containing
// the binary value encoded using RFC 4648.
// If the format is "base64" or unspecified, then this uses RFC 4648, section 4.
// If the format is "base64url", then this uses RFC 4648, section 5.
// If the format is "base32", then this uses RFC 4648, section 6.
// If the format is "base32hex", then this uses RFC 4648, section 7.
// If the format is "base16" or "hex", then this uses RFC 4648, section 8.
// If the format is "array", then the bytes value is encoded as a JSON array
// where each byte is recursively JSON-encoded as each JSON array element.
//
// - A Go integer is encoded as a JSON number without fractions or exponents.
// If [StringifyNumbers] is specified or encoding a JSON object name,
// then the JSON number is encoded within a JSON string.
// It does not support any custom format flags.
//
// - A Go float is encoded as a JSON number.
// If [StringifyNumbers] is specified or encoding a JSON object name,
// then the JSON number is encoded within a JSON string.
// If the format is "nonfinite", then NaN, +Inf, and -Inf are encoded as
// the JSON strings "NaN", "Infinity", and "-Infinity", respectively.
// Otherwise, the presence of non-finite numbers results in a [SemanticError].
//
// - A Go map is encoded as a JSON object, where each Go map key and value
// is recursively encoded as a name and value pair in the JSON object.
// The Go map key must encode as a JSON string, otherwise this results
// in a [SemanticError]. The Go map is traversed in a non-deterministic order.
// For deterministic encoding, consider using the [Deterministic] option.
// If the format is "emitnull", then a nil map is encoded as a JSON null.
// If the format is "emitempty", then a nil map is encoded as an empty JSON object,
// regardless of whether [FormatNilMapAsNull] is specified.
// Otherwise by default, a nil map is encoded as an empty JSON object.
//
// - A Go struct is encoded as a JSON object.
// See the “JSON Representation of Go structs” section
// in the package-level documentation for more details.
//
// - A Go slice is encoded as a JSON array, where each Go slice element
// is recursively JSON-encoded as the elements of the JSON array.
// If the format is "emitnull", then a nil slice is encoded as a JSON null.
// If the format is "emitempty", then a nil slice is encoded as an empty JSON array,
// regardless of whether [FormatNilSliceAsNull] is specified.
// Otherwise by default, a nil slice is encoded as an empty JSON array.
//
// - A Go array is encoded as a JSON array, where each Go array element
// is recursively JSON-encoded as the elements of the JSON array.
// The JSON array length is always identical to the Go array length.
// It does not support any custom format flags.
//
// - A Go pointer is encoded as a JSON null if nil, otherwise it is
// the recursively JSON-encoded representation of the underlying value.
// Format flags are forwarded to the encoding of the underlying value.
//
// - A Go interface is encoded as a JSON null if nil, otherwise it is
// the recursively JSON-encoded representation of the underlying value.
// It does not support any custom format flags.
//
// - A Go [time.Time] is encoded as a JSON string containing the timestamp
// formatted in RFC 3339 with nanosecond precision.
// If the format matches one of the format constants declared
// in the time package (e.g., RFC1123), then that format is used.
// If the format is "unix", "unixmilli", "unixmicro", or "unixnano",
// then the timestamp is encoded as a JSON number of the number of seconds
// (or milliseconds, microseconds, or nanoseconds) since the Unix epoch,
// which is January 1st, 1970 at 00:00:00 UTC.
// Otherwise, the format is used as-is with [time.Time.Format] if non-empty.
//
// - A Go [time.Duration] is encoded as a JSON string containing the duration
// formatted according to [time.Duration.String].
// If the format is "sec", "milli", "micro", or "nano",
// then the duration is encoded as a JSON number of the number of seconds
// (or milliseconds, microseconds, or nanoseconds) in the duration.
// If the format is "units", it uses [time.Duration.String].
//
// - All other Go types (e.g., complex numbers, channels, and functions)
// have no default representation and result in a [SemanticError].
//
// JSON cannot represent cyclic data structures and Marshal does not handle them.
// Passing cyclic structures will result in an error.
func Marshal(in any, opts ...Options) (out []byte, err error) {
enc := export.GetBufferedEncoder(opts...)
defer export.PutBufferedEncoder(enc)
xe := export.Encoder(enc)
xe.Flags.Set(jsonflags.OmitTopLevelNewline | 1)
err = marshalEncode(enc, in, &xe.Struct)
if err != nil && xe.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
return nil, internal.TransformMarshalError(in, err)
}
return bytes.Clone(xe.Buf), err
}
// MarshalWrite serializes a Go value into an [io.Writer] according to the provided
// marshal and encode options (while ignoring unmarshal or decode options).
// It does not terminate the output with a newline.
// See [Marshal] for details about the conversion of a Go value into JSON.
func MarshalWrite(out io.Writer, in any, opts ...Options) (err error) {
enc := export.GetStreamingEncoder(out, opts...)
defer export.PutStreamingEncoder(enc)
xe := export.Encoder(enc)
xe.Flags.Set(jsonflags.OmitTopLevelNewline | 1)
err = marshalEncode(enc, in, &xe.Struct)
if err != nil && xe.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
return internal.TransformMarshalError(in, err)
}
return err
}
// MarshalEncode serializes a Go value into an [jsontext.Encoder] according to
// the provided marshal options (while ignoring unmarshal, encode, or decode options).
// Any marshal-relevant options already specified on the [jsontext.Encoder]
// take lower precedence than the set of options provided by the caller.
// Unlike [Marshal] and [MarshalWrite], encode options are ignored because
// they must have already been specified on the provided [jsontext.Encoder].
//
// See [Marshal] for details about the conversion of a Go value into JSON.
func MarshalEncode(out *jsontext.Encoder, in any, opts ...Options) (err error) {
xe := export.Encoder(out)
if len(opts) > 0 {
optsOriginal := xe.Struct
defer func() { xe.Struct = optsOriginal }()
xe.Struct.JoinWithoutCoderOptions(opts...)
}
err = marshalEncode(out, in, &xe.Struct)
if err != nil && xe.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
return internal.TransformMarshalError(in, err)
}
return err
}
func marshalEncode(out *jsontext.Encoder, in any, mo *jsonopts.Struct) (err error) {
v := reflect.ValueOf(in)
if !v.IsValid() || (v.Kind() == reflect.Pointer && v.IsNil()) {
return out.WriteToken(jsontext.Null)
}
// Shallow copy non-pointer values to obtain an addressable value.
// It is beneficial to performance to always pass pointers to avoid this.
forceAddr := v.Kind() != reflect.Pointer
if forceAddr {
v2 := reflect.New(v.Type())
v2.Elem().Set(v)
v = v2
}
va := addressableValue{v.Elem(), forceAddr} // dereferenced pointer is always addressable
t := va.Type()
// Lookup and call the marshal function for this type.
marshal := lookupArshaler(t).marshal
if mo.Marshalers != nil {
marshal, _ = mo.Marshalers.(*Marshalers).lookup(marshal, t)
}
if err := marshal(out, va, mo); err != nil {
if !mo.Flags.Get(jsonflags.AllowDuplicateNames) {
export.Encoder(out).Tokens.InvalidateDisabledNamespaces()
}
return err
}
return nil
}
// Unmarshal decodes a []byte input into a Go value according to the provided
// unmarshal and decode options (while ignoring marshal or encode options).
// The input must be a single JSON value with optional whitespace interspersed.
// The output must be a non-nil pointer.
//
// Type-specific unmarshal functions and methods take precedence
// over the default representation of a value.
// Functions or methods that operate on *T are only called when decoding
// a value of type T (by taking its address) or a non-nil value of *T.
// Unmarshal ensures that a value is always addressable
// (by boxing it on the heap if necessary) so that
// these functions and methods can be consistently called.
//
// The input is decoded into the output according the following rules:
//
// - If any type-specific functions in a [WithUnmarshalers] option match
// the value type, then those functions are called to decode the JSON
// value. If all applicable functions return [SkipFunc],
// then the input is decoded according to subsequent rules.
//
// - If the value type implements [UnmarshalerFrom],
// then the UnmarshalJSONFrom method is called to decode the JSON value.
//
// - If the value type implements [Unmarshaler],
// then the UnmarshalJSON method is called to decode the JSON value.
//
// - If the value type implements [encoding.TextUnmarshaler],
// then the input is decoded as a JSON string and
// the UnmarshalText method is called with the decoded string value.
// This fails with a [SemanticError] if the input is not a JSON string.
//
// - Otherwise, the JSON value is decoded according to the value's type
// as described in detail below.
//
// Most Go types have a default JSON representation.
// Certain types support specialized formatting according to
// a format flag optionally specified in the Go struct tag
// for the struct field that contains the current value
// (see the “JSON Representation of Go structs” section for more details).
// A JSON null may be decoded into every supported Go value where
// it is equivalent to storing the zero value of the Go value.
// If the input JSON kind is not handled by the current Go value type,
// then this fails with a [SemanticError]. Unless otherwise specified,
// the decoded value replaces any pre-existing value.
//
// The representation of each type is as follows:
//
// - A Go boolean is decoded from a JSON boolean (e.g., true or false).
// It does not support any custom format flags.
//
// - A Go string is decoded from a JSON string.
// It does not support any custom format flags.
//
// - A Go []byte or [N]byte is decoded from a JSON string
// containing the binary value encoded using RFC 4648.
// If the format is "base64" or unspecified, then this uses RFC 4648, section 4.
// If the format is "base64url", then this uses RFC 4648, section 5.
// If the format is "base32", then this uses RFC 4648, section 6.
// If the format is "base32hex", then this uses RFC 4648, section 7.
// If the format is "base16" or "hex", then this uses RFC 4648, section 8.
// If the format is "array", then the Go slice or array is decoded from a
// JSON array where each JSON element is recursively decoded for each byte.
// When decoding into a non-nil []byte, the slice length is reset to zero
// and the decoded input is appended to it.
// When decoding into a [N]byte, the input must decode to exactly N bytes,
// otherwise it fails with a [SemanticError].
//
// - A Go integer is decoded from a JSON number.
// It must be decoded from a JSON string containing a JSON number
// if [StringifyNumbers] is specified or decoding a JSON object name.
// It fails with a [SemanticError] if the JSON number
// has a fractional or exponent component.
// It also fails if it overflows the representation of the Go integer type.
// It does not support any custom format flags.
//
// - A Go float is decoded from a JSON number.
// It must be decoded from a JSON string containing a JSON number
// if [StringifyNumbers] is specified or decoding a JSON object name.
// It fails if it overflows the representation of the Go float type.
// If the format is "nonfinite", then the JSON strings
// "NaN", "Infinity", and "-Infinity" are decoded as NaN, +Inf, and -Inf.
// Otherwise, the presence of such strings results in a [SemanticError].
//
// - A Go map is decoded from a JSON object,
// where each JSON object name and value pair is recursively decoded
// as the Go map key and value. Maps are not cleared.
// If the Go map is nil, then a new map is allocated to decode into.
// If the decoded key matches an existing Go map entry, the entry value
// is reused by decoding the JSON object value into it.
// The formats "emitnull" and "emitempty" have no effect when decoding.
//
// - A Go struct is decoded from a JSON object.
// See the “JSON Representation of Go structs” section
// in the package-level documentation for more details.
//
// - A Go slice is decoded from a JSON array, where each JSON element
// is recursively decoded and appended to the Go slice.
// Before appending into a Go slice, a new slice is allocated if it is nil,
// otherwise the slice length is reset to zero.
// The formats "emitnull" and "emitempty" have no effect when decoding.
//
// - A Go array is decoded from a JSON array, where each JSON array element
// is recursively decoded as each corresponding Go array element.
// Each Go array element is zeroed before decoding into it.
// It fails with a [SemanticError] if the JSON array does not contain
// the exact same number of elements as the Go array.
// It does not support any custom format flags.
//
// - A Go pointer is decoded based on the JSON kind and underlying Go type.
// If the input is a JSON null, then this stores a nil pointer.
// Otherwise, it allocates a new underlying value if the pointer is nil,
// and recursively JSON decodes into the underlying value.
// Format flags are forwarded to the decoding of the underlying type.
//
// - A Go interface is decoded based on the JSON kind and underlying Go type.
// If the input is a JSON null, then this stores a nil interface value.
// Otherwise, a nil interface value of an empty interface type is initialized
// with a zero Go bool, string, float64, map[string]any, or []any if the
// input is a JSON boolean, string, number, object, or array, respectively.
// If the interface value is still nil, then this fails with a [SemanticError]
// since decoding could not determine an appropriate Go type to decode into.
// For example, unmarshaling into a nil io.Reader fails since
// there is no concrete type to populate the interface value with.
// Otherwise an underlying value exists and it recursively decodes
// the JSON input into it. It does not support any custom format flags.
//
// - A Go [time.Time] is decoded from a JSON string containing the time
// formatted in RFC 3339 with nanosecond precision.
// If the format matches one of the format constants declared in
// the time package (e.g., RFC1123), then that format is used for parsing.
// If the format is "unix", "unixmilli", "unixmicro", or "unixnano",
// then the timestamp is decoded from a JSON number of the number of seconds
// (or milliseconds, microseconds, or nanoseconds) since the Unix epoch,
// which is January 1st, 1970 at 00:00:00 UTC.
// Otherwise, the format is used as-is with [time.Time.Parse] if non-empty.
//
// - A Go [time.Duration] is decoded from a JSON string by
// passing the decoded string to [time.ParseDuration].
// If the format is "sec", "milli", "micro", or "nano",
// then the duration is decoded from a JSON number of the number of seconds
// (or milliseconds, microseconds, or nanoseconds) in the duration.
// If the format is "units", it uses [time.ParseDuration].
//
// - All other Go types (e.g., complex numbers, channels, and functions)
// have no default representation and result in a [SemanticError].
//
// In general, unmarshaling follows merge semantics (similar to RFC 7396)
// where the decoded Go value replaces the destination value
// for any JSON kind other than an object.
// For JSON objects, the input object is merged into the destination value
// where matching object members recursively apply merge semantics.
func Unmarshal(in []byte, out any, opts ...Options) (err error) {
dec := export.GetBufferedDecoder(in, opts...)
defer export.PutBufferedDecoder(dec)
xd := export.Decoder(dec)
err = unmarshalFull(dec, out, &xd.Struct)
if err != nil && xd.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
return internal.TransformUnmarshalError(out, err)
}
return err
}
// UnmarshalRead deserializes a Go value from an [io.Reader] according to the
// provided unmarshal and decode options (while ignoring marshal or encode options).
// The input must be a single JSON value with optional whitespace interspersed.
// It consumes the entirety of [io.Reader] until [io.EOF] is encountered,
// without reporting an error for EOF. The output must be a non-nil pointer.
// See [Unmarshal] for details about the conversion of JSON into a Go value.
func UnmarshalRead(in io.Reader, out any, opts ...Options) (err error) {
dec := export.GetStreamingDecoder(in, opts...)
defer export.PutStreamingDecoder(dec)
xd := export.Decoder(dec)
err = unmarshalFull(dec, out, &xd.Struct)
if err != nil && xd.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
return internal.TransformUnmarshalError(out, err)
}
return err
}
func unmarshalFull(in *jsontext.Decoder, out any, uo *jsonopts.Struct) error {
switch err := unmarshalDecode(in, out, uo); err {
case nil:
return export.Decoder(in).CheckEOF()
case io.EOF:
return io.ErrUnexpectedEOF
default:
return err
}
}
// UnmarshalDecode deserializes a Go value from a [jsontext.Decoder] according to
// the provided unmarshal options (while ignoring marshal, encode, or decode options).
// Any unmarshal options already specified on the [jsontext.Decoder]
// take lower precedence than the set of options provided by the caller.
// Unlike [Unmarshal] and [UnmarshalRead], decode options are ignored because
// they must have already been specified on the provided [jsontext.Decoder].
//
// The input may be a stream of one or more JSON values,
// where this only unmarshals the next JSON value in the stream.
// The output must be a non-nil pointer.
// See [Unmarshal] for details about the conversion of JSON into a Go value.
func UnmarshalDecode(in *jsontext.Decoder, out any, opts ...Options) (err error) {
xd := export.Decoder(in)
if len(opts) > 0 {
optsOriginal := xd.Struct
defer func() { xd.Struct = optsOriginal }()
xd.Struct.JoinWithoutCoderOptions(opts...)
}
err = unmarshalDecode(in, out, &xd.Struct)
if err != nil && xd.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
return internal.TransformUnmarshalError(out, err)
}
return err
}
func unmarshalDecode(in *jsontext.Decoder, out any, uo *jsonopts.Struct) (err error) {
v := reflect.ValueOf(out)
if v.Kind() != reflect.Pointer || v.IsNil() {
return &SemanticError{action: "unmarshal", GoType: reflect.TypeOf(out), Err: internal.ErrNonNilReference}
}
va := addressableValue{v.Elem(), false} // dereferenced pointer is always addressable
t := va.Type()
// In legacy semantics, the entirety of the next JSON value
// was validated before attempting to unmarshal it.
if uo.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
if err := export.Decoder(in).CheckNextValue(); err != nil {
return err
}
}
// Lookup and call the unmarshal function for this type.
unmarshal := lookupArshaler(t).unmarshal
if uo.Unmarshalers != nil {
unmarshal, _ = uo.Unmarshalers.(*Unmarshalers).lookup(unmarshal, t)
}
if err := unmarshal(in, va, uo); err != nil {
if !uo.Flags.Get(jsonflags.AllowDuplicateNames) {
export.Decoder(in).Tokens.InvalidateDisabledNamespaces()
}
return err
}
return nil
}
// addressableValue is a reflect.Value that is guaranteed to be addressable
// such that calling the Addr and Set methods do not panic.
//
// There is no compile magic that enforces this property,
// but rather the need to construct this type makes it easier to examine each
// construction site to ensure that this property is upheld.
type addressableValue struct {
reflect.Value
// forcedAddr reports whether this value is addressable
// only through the use of [newAddressableValue].
// This is only used for [jsonflags.CallMethodsWithLegacySemantics].
forcedAddr bool
}
// newAddressableValue constructs a new addressable value of type t.
func newAddressableValue(t reflect.Type) addressableValue {
return addressableValue{reflect.New(t).Elem(), true}
}
// TODO: Remove *jsonopts.Struct argument from [marshaler] and [unmarshaler].
// This can be directly accessed on the encoder or decoder.
// All marshal and unmarshal behavior is implemented using these signatures.
// The *jsonopts.Struct argument is guaranteed to identical to or at least
// a strict super-set of the options in Encoder.Struct or Decoder.Struct.
// It is identical for Marshal, Unmarshal, MarshalWrite, and UnmarshalRead.
// It is a super-set for MarshalEncode and UnmarshalDecode.
type (
marshaler = func(*jsontext.Encoder, addressableValue, *jsonopts.Struct) error
unmarshaler = func(*jsontext.Decoder, addressableValue, *jsonopts.Struct) error
)
type arshaler struct {
marshal marshaler
unmarshal unmarshaler
nonDefault bool
}
var lookupArshalerCache sync.Map // map[reflect.Type]*arshaler
func lookupArshaler(t reflect.Type) *arshaler {
if v, ok := lookupArshalerCache.Load(t); ok {
return v.(*arshaler)
}
fncs := makeDefaultArshaler(t)
fncs = makeMethodArshaler(fncs, t)
fncs = makeTimeArshaler(fncs, t)
// Use the last stored so that duplicate arshalers can be garbage collected.
v, _ := lookupArshalerCache.LoadOrStore(t, fncs)
return v.(*arshaler)
}
var stringsPools = &sync.Pool{New: func() any { return new(stringSlice) }}
type stringSlice []string
// getStrings returns a non-nil pointer to a slice with length n.
func getStrings(n int) *stringSlice {
s := stringsPools.Get().(*stringSlice)
if cap(*s) < n {
*s = make([]string, n)
}
*s = (*s)[:n]
return s
}
func putStrings(s *stringSlice) {
if cap(*s) > 1<<10 {
*s = nil // avoid pinning arbitrarily large amounts of memory
}
stringsPools.Put(s)
}
func (ss *stringSlice) Sort() {
slices.SortFunc(*ss, func(x, y string) int { return strings.Compare(x, y) })
}

View File

@ -0,0 +1,283 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"cmp"
"reflect"
"strconv"
"encoding/json/internal"
"encoding/json/internal/jsonflags"
"encoding/json/internal/jsonopts"
"encoding/json/internal/jsonwire"
"encoding/json/jsontext"
)
// This file contains an optimized marshal and unmarshal implementation
// for the any type. This type is often used when the Go program has
// no knowledge of the JSON schema. This is a common enough occurrence
// to justify the complexity of adding logic for this.
// marshalValueAny marshals a Go any as a JSON value.
// This assumes that there are no special formatting directives
// for any possible nested value.
func marshalValueAny(enc *jsontext.Encoder, val any, mo *jsonopts.Struct) error {
switch val := val.(type) {
case nil:
return enc.WriteToken(jsontext.Null)
case bool:
return enc.WriteToken(jsontext.Bool(val))
case string:
return enc.WriteToken(jsontext.String(val))
case float64:
return enc.WriteToken(jsontext.Float(val))
case map[string]any:
return marshalObjectAny(enc, val, mo)
case []any:
return marshalArrayAny(enc, val, mo)
default:
v := newAddressableValue(reflect.TypeOf(val))
v.Set(reflect.ValueOf(val))
marshal := lookupArshaler(v.Type()).marshal
if mo.Marshalers != nil {
marshal, _ = mo.Marshalers.(*Marshalers).lookup(marshal, v.Type())
}
return marshal(enc, v, mo)
}
}
// unmarshalValueAny unmarshals a JSON value as a Go any.
// This assumes that there are no special formatting directives
// for any possible nested value.
// Duplicate names must be rejected since this does not implement merging.
func unmarshalValueAny(dec *jsontext.Decoder, uo *jsonopts.Struct) (any, error) {
switch k := dec.PeekKind(); k {
case '{':
return unmarshalObjectAny(dec, uo)
case '[':
return unmarshalArrayAny(dec, uo)
default:
xd := export.Decoder(dec)
var flags jsonwire.ValueFlags
val, err := xd.ReadValue(&flags)
if err != nil {
return nil, err
}
switch val.Kind() {
case 'n':
return nil, nil
case 'f':
return false, nil
case 't':
return true, nil
case '"':
val = jsonwire.UnquoteMayCopy(val, flags.IsVerbatim())
if xd.StringCache == nil {
xd.StringCache = new(stringCache)
}
return makeString(xd.StringCache, val), nil
case '0':
if uo.Flags.Get(jsonflags.UnmarshalAnyWithRawNumber) {
return internal.RawNumberOf(val), nil
}
fv, ok := jsonwire.ParseFloat(val, 64)
if !ok {
return fv, newUnmarshalErrorAfterWithValue(dec, float64Type, strconv.ErrRange)
}
return fv, nil
default:
panic("BUG: invalid kind: " + k.String())
}
}
}
// marshalObjectAny marshals a Go map[string]any as a JSON object
// (or as a JSON null if nil and [jsonflags.FormatNilMapAsNull]).
func marshalObjectAny(enc *jsontext.Encoder, obj map[string]any, mo *jsonopts.Struct) error {
// Check for cycles.
xe := export.Encoder(enc)
if xe.Tokens.Depth() > startDetectingCyclesAfter {
v := reflect.ValueOf(obj)
if err := visitPointer(&xe.SeenPointers, v); err != nil {
return newMarshalErrorBefore(enc, anyType, err)
}
defer leavePointer(&xe.SeenPointers, v)
}
// Handle empty maps.
if len(obj) == 0 {
if mo.Flags.Get(jsonflags.FormatNilMapAsNull) && obj == nil {
return enc.WriteToken(jsontext.Null)
}
// Optimize for marshaling an empty map without any preceding whitespace.
if !mo.Flags.Get(jsonflags.AnyWhitespace) && !xe.Tokens.Last.NeedObjectName() {
xe.Buf = append(xe.Tokens.MayAppendDelim(xe.Buf, '{'), "{}"...)
xe.Tokens.Last.Increment()
if xe.NeedFlush() {
return xe.Flush()
}
return nil
}
}
if err := enc.WriteToken(jsontext.BeginObject); err != nil {
return err
}
// A Go map guarantees that each entry has a unique key
// The only possibility of duplicates is due to invalid UTF-8.
if !mo.Flags.Get(jsonflags.AllowInvalidUTF8) {
xe.Tokens.Last.DisableNamespace()
}
if !mo.Flags.Get(jsonflags.Deterministic) || len(obj) <= 1 {
for name, val := range obj {
if err := enc.WriteToken(jsontext.String(name)); err != nil {
return err
}
if err := marshalValueAny(enc, val, mo); err != nil {
return err
}
}
} else {
names := getStrings(len(obj))
var i int
for name := range obj {
(*names)[i] = name
i++
}
names.Sort()
for _, name := range *names {
if err := enc.WriteToken(jsontext.String(name)); err != nil {
return err
}
if err := marshalValueAny(enc, obj[name], mo); err != nil {
return err
}
}
putStrings(names)
}
if err := enc.WriteToken(jsontext.EndObject); err != nil {
return err
}
return nil
}
// unmarshalObjectAny unmarshals a JSON object as a Go map[string]any.
// It panics if not decoding a JSON object.
func unmarshalObjectAny(dec *jsontext.Decoder, uo *jsonopts.Struct) (map[string]any, error) {
switch tok, err := dec.ReadToken(); {
case err != nil:
return nil, err
case tok.Kind() != '{':
panic("BUG: invalid kind: " + tok.Kind().String())
}
obj := make(map[string]any)
// A Go map guarantees that each entry has a unique key
// The only possibility of duplicates is due to invalid UTF-8.
if !uo.Flags.Get(jsonflags.AllowInvalidUTF8) {
export.Decoder(dec).Tokens.Last.DisableNamespace()
}
var errUnmarshal error
for dec.PeekKind() != '}' {
tok, err := dec.ReadToken()
if err != nil {
return obj, err
}
name := tok.String()
// Manually check for duplicate names.
if _, ok := obj[name]; ok {
// TODO: Unread the object name.
name := export.Decoder(dec).PreviousTokenOrValue()
err := newDuplicateNameError(dec.StackPointer(), nil, dec.InputOffset()-len64(name))
return obj, err
}
val, err := unmarshalValueAny(dec, uo)
obj[name] = val
if err != nil {
if isFatalError(err, uo.Flags) {
return obj, err
}
errUnmarshal = cmp.Or(err, errUnmarshal)
}
}
if _, err := dec.ReadToken(); err != nil {
return obj, err
}
return obj, errUnmarshal
}
// marshalArrayAny marshals a Go []any as a JSON array
// (or as a JSON null if nil and [jsonflags.FormatNilSliceAsNull]).
func marshalArrayAny(enc *jsontext.Encoder, arr []any, mo *jsonopts.Struct) error {
// Check for cycles.
xe := export.Encoder(enc)
if xe.Tokens.Depth() > startDetectingCyclesAfter {
v := reflect.ValueOf(arr)
if err := visitPointer(&xe.SeenPointers, v); err != nil {
return newMarshalErrorBefore(enc, sliceAnyType, err)
}
defer leavePointer(&xe.SeenPointers, v)
}
// Handle empty slices.
if len(arr) == 0 {
if mo.Flags.Get(jsonflags.FormatNilSliceAsNull) && arr == nil {
return enc.WriteToken(jsontext.Null)
}
// Optimize for marshaling an empty slice without any preceding whitespace.
if !mo.Flags.Get(jsonflags.AnyWhitespace) && !xe.Tokens.Last.NeedObjectName() {
xe.Buf = append(xe.Tokens.MayAppendDelim(xe.Buf, '['), "[]"...)
xe.Tokens.Last.Increment()
if xe.NeedFlush() {
return xe.Flush()
}
return nil
}
}
if err := enc.WriteToken(jsontext.BeginArray); err != nil {
return err
}
for _, val := range arr {
if err := marshalValueAny(enc, val, mo); err != nil {
return err
}
}
if err := enc.WriteToken(jsontext.EndArray); err != nil {
return err
}
return nil
}
// unmarshalArrayAny unmarshals a JSON array as a Go []any.
// It panics if not decoding a JSON array.
func unmarshalArrayAny(dec *jsontext.Decoder, uo *jsonopts.Struct) ([]any, error) {
switch tok, err := dec.ReadToken(); {
case err != nil:
return nil, err
case tok.Kind() != '[':
panic("BUG: invalid kind: " + tok.Kind().String())
}
arr := []any{}
var errUnmarshal error
for dec.PeekKind() != ']' {
val, err := unmarshalValueAny(dec, uo)
arr = append(arr, val)
if err != nil {
if isFatalError(err, uo.Flags) {
return arr, err
}
errUnmarshal = cmp.Or(errUnmarshal, err)
}
}
if _, err := dec.ReadToken(); err != nil {
return arr, err
}
return arr, errUnmarshal
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,432 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"errors"
"fmt"
"reflect"
"sync"
"encoding/json/internal"
"encoding/json/internal/jsonflags"
"encoding/json/internal/jsonopts"
"encoding/json/jsontext"
)
// SkipFunc may be returned by [MarshalToFunc] and [UnmarshalFromFunc] functions.
//
// Any function that returns SkipFunc must not cause observable side effects
// on the provided [jsontext.Encoder] or [jsontext.Decoder].
// For example, it is permissible to call [jsontext.Decoder.PeekKind],
// but not permissible to call [jsontext.Decoder.ReadToken] or
// [jsontext.Encoder.WriteToken] since such methods mutate the state.
var SkipFunc = errors.New("json: skip function")
var errSkipMutation = errors.New("must not read or write any tokens when skipping")
var errNonSingularValue = errors.New("must read or write exactly one value")
// Marshalers is a list of functions that may override the marshal behavior
// of specific types. Populate [WithMarshalers] to use it with
// [Marshal], [MarshalWrite], or [MarshalEncode].
// A nil *Marshalers is equivalent to an empty list.
// There are no exported fields or methods on Marshalers.
type Marshalers = typedMarshalers
// JoinMarshalers constructs a flattened list of marshal functions.
// If multiple functions in the list are applicable for a value of a given type,
// then those earlier in the list take precedence over those that come later.
// If a function returns [SkipFunc], then the next applicable function is called,
// otherwise the default marshaling behavior is used.
//
// For example:
//
// m1 := JoinMarshalers(f1, f2)
// m2 := JoinMarshalers(f0, m1, f3) // equivalent to m3
// m3 := JoinMarshalers(f0, f1, f2, f3) // equivalent to m2
func JoinMarshalers(ms ...*Marshalers) *Marshalers {
return newMarshalers(ms...)
}
// Unmarshalers is a list of functions that may override the unmarshal behavior
// of specific types. Populate [WithUnmarshalers] to use it with
// [Unmarshal], [UnmarshalRead], or [UnmarshalDecode].
// A nil *Unmarshalers is equivalent to an empty list.
// There are no exported fields or methods on Unmarshalers.
type Unmarshalers = typedUnmarshalers
// JoinUnmarshalers constructs a flattened list of unmarshal functions.
// If multiple functions in the list are applicable for a value of a given type,
// then those earlier in the list take precedence over those that come later.
// If a function returns [SkipFunc], then the next applicable function is called,
// otherwise the default unmarshaling behavior is used.
//
// For example:
//
// u1 := JoinUnmarshalers(f1, f2)
// u2 := JoinUnmarshalers(f0, u1, f3) // equivalent to u3
// u3 := JoinUnmarshalers(f0, f1, f2, f3) // equivalent to u2
func JoinUnmarshalers(us ...*Unmarshalers) *Unmarshalers {
return newUnmarshalers(us...)
}
type typedMarshalers = typedArshalers[jsontext.Encoder]
type typedUnmarshalers = typedArshalers[jsontext.Decoder]
type typedArshalers[Coder any] struct {
nonComparable
fncVals []typedArshaler[Coder]
fncCache sync.Map // map[reflect.Type]arshaler
// fromAny reports whether any of Go types used to represent arbitrary JSON
// (i.e., any, bool, string, float64, map[string]any, or []any) matches
// any of the provided type-specific arshalers.
//
// This bit of information is needed in arshal_default.go to determine
// whether to use the specialized logic in arshal_any.go to handle
// the any interface type. The logic in arshal_any.go does not support
// type-specific arshal functions, so we must avoid using that logic
// if this is true.
fromAny bool
}
type typedMarshaler = typedArshaler[jsontext.Encoder]
type typedUnmarshaler = typedArshaler[jsontext.Decoder]
type typedArshaler[Coder any] struct {
typ reflect.Type
fnc func(*Coder, addressableValue, *jsonopts.Struct) error
maySkip bool
}
func newMarshalers(ms ...*Marshalers) *Marshalers { return newTypedArshalers(ms...) }
func newUnmarshalers(us ...*Unmarshalers) *Unmarshalers { return newTypedArshalers(us...) }
func newTypedArshalers[Coder any](as ...*typedArshalers[Coder]) *typedArshalers[Coder] {
var a typedArshalers[Coder]
for _, a2 := range as {
if a2 != nil {
a.fncVals = append(a.fncVals, a2.fncVals...)
a.fromAny = a.fromAny || a2.fromAny
}
}
if len(a.fncVals) == 0 {
return nil
}
return &a
}
func (a *typedArshalers[Coder]) lookup(fnc func(*Coder, addressableValue, *jsonopts.Struct) error, t reflect.Type) (func(*Coder, addressableValue, *jsonopts.Struct) error, bool) {
if a == nil {
return fnc, false
}
if v, ok := a.fncCache.Load(t); ok {
if v == nil {
return fnc, false
}
return v.(func(*Coder, addressableValue, *jsonopts.Struct) error), true
}
// Collect a list of arshalers that can be called for this type.
// This list may be longer than 1 since some arshalers can be skipped.
var fncs []func(*Coder, addressableValue, *jsonopts.Struct) error
for _, fncVal := range a.fncVals {
if !castableTo(t, fncVal.typ) {
continue
}
fncs = append(fncs, fncVal.fnc)
if !fncVal.maySkip {
break // subsequent arshalers will never be called
}
}
if len(fncs) == 0 {
a.fncCache.Store(t, nil) // nil to indicate that no funcs found
return fnc, false
}
// Construct an arshaler that may call every applicable arshaler.
fncDefault := fnc
fnc = func(c *Coder, v addressableValue, o *jsonopts.Struct) error {
for _, fnc := range fncs {
if err := fnc(c, v, o); err != SkipFunc {
return err // may be nil or non-nil
}
}
return fncDefault(c, v, o)
}
// Use the first stored so duplicate work can be garbage collected.
v, _ := a.fncCache.LoadOrStore(t, fnc)
return v.(func(*Coder, addressableValue, *jsonopts.Struct) error), true
}
// MarshalFunc constructs a type-specific marshaler that
// specifies how to marshal values of type T.
// T can be any type except a named pointer.
// The function is always provided with a non-nil pointer value
// if T is an interface or pointer type.
//
// The function must marshal exactly one JSON value.
// The value of T must not be retained outside the function call.
// It may not return [SkipFunc].
func MarshalFunc[T any](fn func(T) ([]byte, error)) *Marshalers {
t := reflect.TypeFor[T]()
assertCastableTo(t, true)
typFnc := typedMarshaler{
typ: t,
fnc: func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error {
val, err := fn(va.castTo(t).Interface().(T))
if err != nil {
err = wrapSkipFunc(err, "marshal function of type func(T) ([]byte, error)")
if mo.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
return internal.NewMarshalerError(va.Addr().Interface(), err, "MarshalFunc") // unlike unmarshal, always wrapped
}
err = newMarshalErrorBefore(enc, t, err)
return collapseSemanticErrors(err)
}
if err := enc.WriteValue(val); err != nil {
if mo.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
return internal.NewMarshalerError(va.Addr().Interface(), err, "MarshalFunc") // unlike unmarshal, always wrapped
}
if isSyntacticError(err) {
err = newMarshalErrorBefore(enc, t, err)
}
return err
}
return nil
},
}
return &Marshalers{fncVals: []typedMarshaler{typFnc}, fromAny: castableToFromAny(t)}
}
// MarshalToFunc constructs a type-specific marshaler that
// specifies how to marshal values of type T.
// T can be any type except a named pointer.
// The function is always provided with a non-nil pointer value
// if T is an interface or pointer type.
//
// The function must marshal exactly one JSON value by calling write methods
// on the provided encoder. It may return [SkipFunc] such that marshaling can
// move on to the next marshal function. However, no mutable method calls may
// be called on the encoder if [SkipFunc] is returned.
// The pointer to [jsontext.Encoder] and the value of T
// must not be retained outside the function call.
func MarshalToFunc[T any](fn func(*jsontext.Encoder, T) error) *Marshalers {
t := reflect.TypeFor[T]()
assertCastableTo(t, true)
typFnc := typedMarshaler{
typ: t,
fnc: func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error {
xe := export.Encoder(enc)
prevDepth, prevLength := xe.Tokens.DepthLength()
xe.Flags.Set(jsonflags.WithinArshalCall | 1)
err := fn(enc, va.castTo(t).Interface().(T))
xe.Flags.Set(jsonflags.WithinArshalCall | 0)
currDepth, currLength := xe.Tokens.DepthLength()
if err == nil && (prevDepth != currDepth || prevLength+1 != currLength) {
err = errNonSingularValue
}
if err != nil {
if err == SkipFunc {
if prevDepth == currDepth && prevLength == currLength {
return SkipFunc
}
err = errSkipMutation
}
if mo.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
return internal.NewMarshalerError(va.Addr().Interface(), err, "MarshalToFunc") // unlike unmarshal, always wrapped
}
if !export.IsIOError(err) {
err = newSemanticErrorWithPosition(enc, t, prevDepth, prevLength, err)
}
return err
}
return nil
},
maySkip: true,
}
return &Marshalers{fncVals: []typedMarshaler{typFnc}, fromAny: castableToFromAny(t)}
}
// UnmarshalFunc constructs a type-specific unmarshaler that
// specifies how to unmarshal values of type T.
// T must be an unnamed pointer or an interface type.
// The function is always provided with a non-nil pointer value.
//
// The function must unmarshal exactly one JSON value.
// The input []byte must not be mutated.
// The input []byte and value T must not be retained outside the function call.
// It may not return [SkipFunc].
func UnmarshalFunc[T any](fn func([]byte, T) error) *Unmarshalers {
t := reflect.TypeFor[T]()
assertCastableTo(t, false)
typFnc := typedUnmarshaler{
typ: t,
fnc: func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error {
val, err := dec.ReadValue()
if err != nil {
return err // must be a syntactic or I/O error
}
err = fn(val, va.castTo(t).Interface().(T))
if err != nil {
err = wrapSkipFunc(err, "unmarshal function of type func([]byte, T) error")
if uo.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
return err // unlike marshal, never wrapped
}
err = newUnmarshalErrorAfter(dec, t, err)
return collapseSemanticErrors(err)
}
return nil
},
}
return &Unmarshalers{fncVals: []typedUnmarshaler{typFnc}, fromAny: castableToFromAny(t)}
}
// UnmarshalFromFunc constructs a type-specific unmarshaler that
// specifies how to unmarshal values of type T.
// T must be an unnamed pointer or an interface type.
// The function is always provided with a non-nil pointer value.
//
// The function must unmarshal exactly one JSON value by calling read methods
// on the provided decoder. It may return [SkipFunc] such that unmarshaling can
// move on to the next unmarshal function. However, no mutable method calls may
// be called on the decoder if [SkipFunc] is returned.
// The pointer to [jsontext.Decoder] and the value of T
// must not be retained outside the function call.
func UnmarshalFromFunc[T any](fn func(*jsontext.Decoder, T) error) *Unmarshalers {
t := reflect.TypeFor[T]()
assertCastableTo(t, false)
typFnc := typedUnmarshaler{
typ: t,
fnc: func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error {
xd := export.Decoder(dec)
prevDepth, prevLength := xd.Tokens.DepthLength()
xd.Flags.Set(jsonflags.WithinArshalCall | 1)
err := fn(dec, va.castTo(t).Interface().(T))
xd.Flags.Set(jsonflags.WithinArshalCall | 0)
currDepth, currLength := xd.Tokens.DepthLength()
if err == nil && (prevDepth != currDepth || prevLength+1 != currLength) {
err = errNonSingularValue
}
if err != nil {
if err == SkipFunc {
if prevDepth == currDepth && prevLength == currLength {
return SkipFunc
}
err = errSkipMutation
}
if uo.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
if err2 := xd.SkipUntil(prevDepth, prevLength+1); err2 != nil {
return err2
}
return err // unlike marshal, never wrapped
}
if !isSyntacticError(err) && !export.IsIOError(err) {
err = newSemanticErrorWithPosition(dec, t, prevDepth, prevLength, err)
}
return err
}
return nil
},
maySkip: true,
}
return &Unmarshalers{fncVals: []typedUnmarshaler{typFnc}, fromAny: castableToFromAny(t)}
}
// assertCastableTo asserts that "to" is a valid type to be casted to.
// These are the Go types that type-specific arshalers may operate upon.
//
// Let AllTypes be the universal set of all possible Go types.
// This function generally asserts that:
//
// len([from for from in AllTypes if castableTo(from, to)]) > 0
//
// otherwise it panics.
//
// As a special-case if marshal is false, then we forbid any non-pointer or
// non-interface type since it is almost always a bug trying to unmarshal
// into something where the end-user caller did not pass in an addressable value
// since they will not observe the mutations.
func assertCastableTo(to reflect.Type, marshal bool) {
switch to.Kind() {
case reflect.Interface:
return
case reflect.Pointer:
// Only allow unnamed pointers to be consistent with the fact that
// taking the address of a value produces an unnamed pointer type.
if to.Name() == "" {
return
}
default:
// Technically, non-pointer types are permissible for unmarshal.
// However, they are often a bug since the receiver would be immutable.
// Thus, only allow them for marshaling.
if marshal {
return
}
}
if marshal {
panic(fmt.Sprintf("input type %v must be an interface type, an unnamed pointer type, or a non-pointer type", to))
} else {
panic(fmt.Sprintf("input type %v must be an interface type or an unnamed pointer type", to))
}
}
// castableTo checks whether values of type "from" can be casted to type "to".
// Nil pointer or interface "from" values are never considered castable.
//
// This function must be kept in sync with addressableValue.castTo.
func castableTo(from, to reflect.Type) bool {
switch to.Kind() {
case reflect.Interface:
// TODO: This breaks when ordinary interfaces can have type sets
// since interfaces now exist where only the value form of a type (T)
// implements the interface, but not the pointer variant (*T).
// See https://go.dev/issue/45346.
return reflect.PointerTo(from).Implements(to)
case reflect.Pointer:
// Common case for unmarshaling.
// From must be a concrete or interface type.
return reflect.PointerTo(from) == to
default:
// Common case for marshaling.
// From must be a concrete type.
return from == to
}
}
// castTo casts va to the specified type.
// If the type is an interface, then the underlying type will always
// be a non-nil pointer to a concrete type.
//
// Requirement: castableTo(va.Type(), to) must hold.
func (va addressableValue) castTo(to reflect.Type) reflect.Value {
switch to.Kind() {
case reflect.Interface:
return va.Addr().Convert(to)
case reflect.Pointer:
return va.Addr()
default:
return va.Value
}
}
// castableToFromAny reports whether "to" can be casted to from any
// of the dynamic types used to represent arbitrary JSON.
func castableToFromAny(to reflect.Type) bool {
for _, from := range []reflect.Type{anyType, boolType, stringType, float64Type, mapStringAnyType, sliceAnyType} {
if castableTo(from, to) {
return true
}
}
return false
}
func wrapSkipFunc(err error, what string) error {
if err == SkipFunc {
return errors.New(what + " cannot be skipped")
}
return err
}

View File

@ -0,0 +1,230 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"bytes"
"errors"
"io"
"reflect"
"encoding/json/internal/jsonflags"
"encoding/json/internal/jsonopts"
"encoding/json/internal/jsonwire"
"encoding/json/jsontext"
)
// This package supports "inlining" a Go struct field, where the contents
// of the serialized field (which must be a JSON object) are treated as if
// they are part of the parent Go struct (which represents a JSON object).
//
// Generally, inlined fields are of a Go struct type, where the fields of the
// nested struct are virtually hoisted up to the parent struct using rules
// similar to how Go embedding works (but operating within the JSON namespace).
//
// However, inlined fields may also be of a Go map type with a string key or
// a jsontext.Value. Such inlined fields are called "fallback" fields since they
// represent any arbitrary JSON object member. Explicitly named fields take
// precedence over the inlined fallback. Only one inlined fallback is allowed.
var errRawInlinedNotObject = errors.New("inlined raw value must be a JSON object")
var jsontextValueType = reflect.TypeFor[jsontext.Value]()
// marshalInlinedFallbackAll marshals all the members in an inlined fallback.
func marshalInlinedFallbackAll(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct, f *structField, insertUnquotedName func([]byte) bool) error {
v := addressableValue{va.Field(f.index0), va.forcedAddr} // addressable if struct value is addressable
if len(f.index) > 0 {
v = v.fieldByIndex(f.index, false)
if !v.IsValid() {
return nil // implies a nil inlined field
}
}
v = v.indirect(false)
if !v.IsValid() {
return nil
}
if v.Type() == jsontextValueType {
// TODO(https://go.dev/issue/62121): Use reflect.Value.AssertTo.
b := *v.Addr().Interface().(*jsontext.Value)
if len(b) == 0 { // TODO: Should this be nil? What if it were all whitespace?
return nil
}
dec := export.GetBufferedDecoder(b)
defer export.PutBufferedDecoder(dec)
xd := export.Decoder(dec)
xd.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1)
tok, err := dec.ReadToken()
if err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
return newMarshalErrorBefore(enc, v.Type(), err)
}
if tok.Kind() != '{' {
return newMarshalErrorBefore(enc, v.Type(), errRawInlinedNotObject)
}
for dec.PeekKind() != '}' {
// Parse the JSON object name.
var flags jsonwire.ValueFlags
val, err := xd.ReadValue(&flags)
if err != nil {
return newMarshalErrorBefore(enc, v.Type(), err)
}
if insertUnquotedName != nil {
name := jsonwire.UnquoteMayCopy(val, flags.IsVerbatim())
if !insertUnquotedName(name) {
return newDuplicateNameError(enc.StackPointer().Parent(), val, enc.OutputOffset())
}
}
if err := enc.WriteValue(val); err != nil {
return err
}
// Parse the JSON object value.
val, err = xd.ReadValue(&flags)
if err != nil {
return newMarshalErrorBefore(enc, v.Type(), err)
}
if err := enc.WriteValue(val); err != nil {
return err
}
}
if _, err := dec.ReadToken(); err != nil {
return newMarshalErrorBefore(enc, v.Type(), err)
}
if err := xd.CheckEOF(); err != nil {
return newMarshalErrorBefore(enc, v.Type(), err)
}
return nil
} else {
m := v // must be a map[~string]V
n := m.Len()
if n == 0 {
return nil
}
mk := newAddressableValue(m.Type().Key())
mv := newAddressableValue(m.Type().Elem())
marshalKey := func(mk addressableValue) error {
b, err := jsonwire.AppendQuote(enc.UnusedBuffer(), mk.String(), &mo.Flags)
if err != nil {
return newMarshalErrorBefore(enc, m.Type().Key(), err)
}
if insertUnquotedName != nil {
isVerbatim := bytes.IndexByte(b, '\\') < 0
name := jsonwire.UnquoteMayCopy(b, isVerbatim)
if !insertUnquotedName(name) {
return newDuplicateNameError(enc.StackPointer().Parent(), b, enc.OutputOffset())
}
}
return enc.WriteValue(b)
}
marshalVal := f.fncs.marshal
if mo.Marshalers != nil {
marshalVal, _ = mo.Marshalers.(*Marshalers).lookup(marshalVal, mv.Type())
}
if !mo.Flags.Get(jsonflags.Deterministic) || n <= 1 {
for iter := m.MapRange(); iter.Next(); {
mk.SetIterKey(iter)
if err := marshalKey(mk); err != nil {
return err
}
mv.Set(iter.Value())
if err := marshalVal(enc, mv, mo); err != nil {
return err
}
}
} else {
names := getStrings(n)
for i, iter := 0, m.Value.MapRange(); i < n && iter.Next(); i++ {
mk.SetIterKey(iter)
(*names)[i] = mk.String()
}
names.Sort()
for _, name := range *names {
mk.SetString(name)
if err := marshalKey(mk); err != nil {
return err
}
// TODO(https://go.dev/issue/57061): Use mv.SetMapIndexOf.
mv.Set(m.MapIndex(mk.Value))
if err := marshalVal(enc, mv, mo); err != nil {
return err
}
}
putStrings(names)
}
return nil
}
}
// unmarshalInlinedFallbackNext unmarshals only the next member in an inlined fallback.
func unmarshalInlinedFallbackNext(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct, f *structField, quotedName, unquotedName []byte) error {
v := addressableValue{va.Field(f.index0), va.forcedAddr} // addressable if struct value is addressable
if len(f.index) > 0 {
v = v.fieldByIndex(f.index, true)
}
v = v.indirect(true)
if v.Type() == jsontextValueType {
b := v.Addr().Interface().(*jsontext.Value)
if len(*b) == 0 { // TODO: Should this be nil? What if it were all whitespace?
*b = append(*b, '{')
} else {
*b = jsonwire.TrimSuffixWhitespace(*b)
if jsonwire.HasSuffixByte(*b, '}') {
// TODO: When merging into an object for the first time,
// should we verify that it is valid?
*b = jsonwire.TrimSuffixByte(*b, '}')
*b = jsonwire.TrimSuffixWhitespace(*b)
if !jsonwire.HasSuffixByte(*b, ',') && !jsonwire.HasSuffixByte(*b, '{') {
*b = append(*b, ',')
}
} else {
return newUnmarshalErrorAfterWithSkipping(dec, uo, v.Type(), errRawInlinedNotObject)
}
}
*b = append(*b, quotedName...)
*b = append(*b, ':')
val, err := dec.ReadValue()
if err != nil {
return err
}
*b = append(*b, val...)
*b = append(*b, '}')
return nil
} else {
name := string(unquotedName) // TODO: Intern this?
m := v // must be a map[~string]V
if m.IsNil() {
m.Set(reflect.MakeMap(m.Type()))
}
mk := reflect.ValueOf(name)
if mkt := m.Type().Key(); mkt != stringType {
mk = mk.Convert(mkt)
}
mv := newAddressableValue(m.Type().Elem()) // TODO: Cache across calls?
if v2 := m.MapIndex(mk); v2.IsValid() {
mv.Set(v2)
}
unmarshal := f.fncs.unmarshal
if uo.Unmarshalers != nil {
unmarshal, _ = uo.Unmarshalers.(*Unmarshalers).lookup(unmarshal, mv.Type())
}
err := unmarshal(dec, mv, uo)
m.SetMapIndex(mk, mv.Value)
if err != nil {
return err
}
return nil
}
}

View File

@ -0,0 +1,337 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"encoding"
"errors"
"reflect"
"encoding/json/internal"
"encoding/json/internal/jsonflags"
"encoding/json/internal/jsonopts"
"encoding/json/internal/jsonwire"
"encoding/json/jsontext"
)
var errNonStringValue = errors.New("JSON value must be string type")
// Interfaces for custom serialization.
var (
jsonMarshalerType = reflect.TypeFor[Marshaler]()
jsonMarshalerToType = reflect.TypeFor[MarshalerTo]()
jsonUnmarshalerType = reflect.TypeFor[Unmarshaler]()
jsonUnmarshalerFromType = reflect.TypeFor[UnmarshalerFrom]()
textAppenderType = reflect.TypeFor[encoding.TextAppender]()
textMarshalerType = reflect.TypeFor[encoding.TextMarshaler]()
textUnmarshalerType = reflect.TypeFor[encoding.TextUnmarshaler]()
allMarshalerTypes = []reflect.Type{jsonMarshalerToType, jsonMarshalerType, textAppenderType, textMarshalerType}
allUnmarshalerTypes = []reflect.Type{jsonUnmarshalerFromType, jsonUnmarshalerType, textUnmarshalerType}
allMethodTypes = append(allMarshalerTypes, allUnmarshalerTypes...)
)
// Marshaler is implemented by types that can marshal themselves.
// It is recommended that types implement [MarshalerTo] unless the implementation
// is trying to avoid a hard dependency on the "jsontext" package.
//
// It is recommended that implementations return a buffer that is safe
// for the caller to retain and potentially mutate.
type Marshaler interface {
MarshalJSON() ([]byte, error)
}
// MarshalerTo is implemented by types that can marshal themselves.
// It is recommended that types implement MarshalerTo instead of [Marshaler]
// since this is both more performant and flexible.
// If a type implements both Marshaler and MarshalerTo,
// then MarshalerTo takes precedence. In such a case, both implementations
// should aim to have equivalent behavior for the default marshal options.
//
// The implementation must write only one JSON value to the Encoder and
// must not retain the pointer to [jsontext.Encoder].
type MarshalerTo interface {
MarshalJSONTo(*jsontext.Encoder) error
// TODO: Should users call the MarshalEncode function or
// should/can they call this method directly? Does it matter?
}
// Unmarshaler is implemented by types that can unmarshal themselves.
// It is recommended that types implement [UnmarshalerFrom] unless the implementation
// is trying to avoid a hard dependency on the "jsontext" package.
//
// The input can be assumed to be a valid encoding of a JSON value
// if called from unmarshal functionality in this package.
// UnmarshalJSON must copy the JSON data if it is retained after returning.
// It is recommended that UnmarshalJSON implement merge semantics when
// unmarshaling into a pre-populated value.
//
// Implementations must not retain or mutate the input []byte.
type Unmarshaler interface {
UnmarshalJSON([]byte) error
}
// UnmarshalerFrom is implemented by types that can unmarshal themselves.
// It is recommended that types implement UnmarshalerFrom instead of [Unmarshaler]
// since this is both more performant and flexible.
// If a type implements both Unmarshaler and UnmarshalerFrom,
// then UnmarshalerFrom takes precedence. In such a case, both implementations
// should aim to have equivalent behavior for the default unmarshal options.
//
// The implementation must read only one JSON value from the Decoder.
// It is recommended that UnmarshalJSONFrom implement merge semantics when
// unmarshaling into a pre-populated value.
//
// Implementations must not retain the pointer to [jsontext.Decoder].
type UnmarshalerFrom interface {
UnmarshalJSONFrom(*jsontext.Decoder) error
// TODO: Should users call the UnmarshalDecode function or
// should/can they call this method directly? Does it matter?
}
func makeMethodArshaler(fncs *arshaler, t reflect.Type) *arshaler {
// Avoid injecting method arshaler on the pointer or interface version
// to avoid ever calling the method on a nil pointer or interface receiver.
// Let it be injected on the value receiver (which is always addressable).
if t.Kind() == reflect.Pointer || t.Kind() == reflect.Interface {
return fncs
}
if needAddr, ok := implements(t, textMarshalerType); ok {
fncs.nonDefault = true
prevMarshal := fncs.marshal
fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error {
if mo.Flags.Get(jsonflags.CallMethodsWithLegacySemantics) &&
(needAddr && va.forcedAddr) {
return prevMarshal(enc, va, mo)
}
marshaler := va.Addr().Interface().(encoding.TextMarshaler)
if err := export.Encoder(enc).AppendRaw('"', false, func(b []byte) ([]byte, error) {
b2, err := marshaler.MarshalText()
return append(b, b2...), err
}); err != nil {
err = wrapSkipFunc(err, "marshal method")
if mo.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
return internal.NewMarshalerError(va.Addr().Interface(), err, "MarshalText") // unlike unmarshal, always wrapped
}
if !isSemanticError(err) && !export.IsIOError(err) {
err = newMarshalErrorBefore(enc, t, err)
}
return err
}
return nil
}
}
if needAddr, ok := implements(t, textAppenderType); ok {
fncs.nonDefault = true
prevMarshal := fncs.marshal
fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) (err error) {
if mo.Flags.Get(jsonflags.CallMethodsWithLegacySemantics) &&
(needAddr && va.forcedAddr) {
return prevMarshal(enc, va, mo)
}
appender := va.Addr().Interface().(encoding.TextAppender)
if err := export.Encoder(enc).AppendRaw('"', false, appender.AppendText); err != nil {
err = wrapSkipFunc(err, "append method")
if mo.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
return internal.NewMarshalerError(va.Addr().Interface(), err, "AppendText") // unlike unmarshal, always wrapped
}
if !isSemanticError(err) && !export.IsIOError(err) {
err = newMarshalErrorBefore(enc, t, err)
}
return err
}
return nil
}
}
if needAddr, ok := implements(t, jsonMarshalerType); ok {
fncs.nonDefault = true
prevMarshal := fncs.marshal
fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error {
if mo.Flags.Get(jsonflags.CallMethodsWithLegacySemantics) &&
((needAddr && va.forcedAddr) || export.Encoder(enc).Tokens.Last.NeedObjectName()) {
return prevMarshal(enc, va, mo)
}
marshaler := va.Addr().Interface().(Marshaler)
val, err := marshaler.MarshalJSON()
if err != nil {
err = wrapSkipFunc(err, "marshal method")
if mo.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
return internal.NewMarshalerError(va.Addr().Interface(), err, "MarshalJSON") // unlike unmarshal, always wrapped
}
err = newMarshalErrorBefore(enc, t, err)
return collapseSemanticErrors(err)
}
if err := enc.WriteValue(val); err != nil {
if mo.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
return internal.NewMarshalerError(va.Addr().Interface(), err, "MarshalJSON") // unlike unmarshal, always wrapped
}
if isSyntacticError(err) {
err = newMarshalErrorBefore(enc, t, err)
}
return err
}
return nil
}
}
if needAddr, ok := implements(t, jsonMarshalerToType); ok {
fncs.nonDefault = true
prevMarshal := fncs.marshal
fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error {
if mo.Flags.Get(jsonflags.CallMethodsWithLegacySemantics) &&
((needAddr && va.forcedAddr) || export.Encoder(enc).Tokens.Last.NeedObjectName()) {
return prevMarshal(enc, va, mo)
}
xe := export.Encoder(enc)
prevDepth, prevLength := xe.Tokens.DepthLength()
xe.Flags.Set(jsonflags.WithinArshalCall | 1)
err := va.Addr().Interface().(MarshalerTo).MarshalJSONTo(enc)
xe.Flags.Set(jsonflags.WithinArshalCall | 0)
currDepth, currLength := xe.Tokens.DepthLength()
if (prevDepth != currDepth || prevLength+1 != currLength) && err == nil {
err = errNonSingularValue
}
if err != nil {
err = wrapSkipFunc(err, "marshal method")
if mo.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
return internal.NewMarshalerError(va.Addr().Interface(), err, "MarshalJSONTo") // unlike unmarshal, always wrapped
}
if !export.IsIOError(err) {
err = newSemanticErrorWithPosition(enc, t, prevDepth, prevLength, err)
}
return err
}
return nil
}
}
if _, ok := implements(t, textUnmarshalerType); ok {
fncs.nonDefault = true
fncs.unmarshal = func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error {
xd := export.Decoder(dec)
var flags jsonwire.ValueFlags
val, err := xd.ReadValue(&flags)
if err != nil {
return err // must be a syntactic or I/O error
}
if val.Kind() == 'n' {
if !uo.Flags.Get(jsonflags.MergeWithLegacySemantics) {
va.SetZero()
}
return nil
}
if val.Kind() != '"' {
return newUnmarshalErrorAfter(dec, t, errNonStringValue)
}
s := jsonwire.UnquoteMayCopy(val, flags.IsVerbatim())
unmarshaler := va.Addr().Interface().(encoding.TextUnmarshaler)
if err := unmarshaler.UnmarshalText(s); err != nil {
err = wrapSkipFunc(err, "unmarshal method")
if uo.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
return err // unlike marshal, never wrapped
}
if !isSemanticError(err) && !isSyntacticError(err) && !export.IsIOError(err) {
err = newUnmarshalErrorAfter(dec, t, err)
}
return err
}
return nil
}
}
if _, ok := implements(t, jsonUnmarshalerType); ok {
fncs.nonDefault = true
prevUnmarshal := fncs.unmarshal
fncs.unmarshal = func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error {
if uo.Flags.Get(jsonflags.CallMethodsWithLegacySemantics) &&
export.Decoder(dec).Tokens.Last.NeedObjectName() {
return prevUnmarshal(dec, va, uo)
}
val, err := dec.ReadValue()
if err != nil {
return err // must be a syntactic or I/O error
}
unmarshaler := va.Addr().Interface().(Unmarshaler)
if err := unmarshaler.UnmarshalJSON(val); err != nil {
err = wrapSkipFunc(err, "unmarshal method")
if uo.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
return err // unlike marshal, never wrapped
}
err = newUnmarshalErrorAfter(dec, t, err)
return collapseSemanticErrors(err)
}
return nil
}
}
if _, ok := implements(t, jsonUnmarshalerFromType); ok {
fncs.nonDefault = true
prevUnmarshal := fncs.unmarshal
fncs.unmarshal = func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error {
if uo.Flags.Get(jsonflags.CallMethodsWithLegacySemantics) &&
export.Decoder(dec).Tokens.Last.NeedObjectName() {
return prevUnmarshal(dec, va, uo)
}
xd := export.Decoder(dec)
prevDepth, prevLength := xd.Tokens.DepthLength()
xd.Flags.Set(jsonflags.WithinArshalCall | 1)
err := va.Addr().Interface().(UnmarshalerFrom).UnmarshalJSONFrom(dec)
xd.Flags.Set(jsonflags.WithinArshalCall | 0)
currDepth, currLength := xd.Tokens.DepthLength()
if (prevDepth != currDepth || prevLength+1 != currLength) && err == nil {
err = errNonSingularValue
}
if err != nil {
err = wrapSkipFunc(err, "unmarshal method")
if uo.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
if err2 := xd.SkipUntil(prevDepth, prevLength+1); err2 != nil {
return err2
}
return err // unlike marshal, never wrapped
}
if !isSyntacticError(err) && !export.IsIOError(err) {
err = newSemanticErrorWithPosition(dec, t, prevDepth, prevLength, err)
}
return err
}
return nil
}
}
return fncs
}
// implementsAny is like t.Implements(ifaceType) for a list of interfaces,
// but checks whether either t or reflect.PointerTo(t) implements the interface.
func implementsAny(t reflect.Type, ifaceTypes ...reflect.Type) bool {
for _, ifaceType := range ifaceTypes {
if _, ok := implements(t, ifaceType); ok {
return true
}
}
return false
}
// implements is like t.Implements(ifaceType) but checks whether
// either t or reflect.PointerTo(t) implements the interface.
// It also reports whether the value needs to be addressed
// in order to satisfy the interface.
func implements(t, ifaceType reflect.Type) (needAddr, ok bool) {
switch {
case t.Implements(ifaceType):
return false, true
case reflect.PointerTo(t).Implements(ifaceType):
return true, true
default:
return false, false
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,600 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"bytes"
"cmp"
"errors"
"fmt"
"math"
"math/bits"
"reflect"
"strconv"
"strings"
"time"
"encoding/json/internal"
"encoding/json/internal/jsonflags"
"encoding/json/internal/jsonopts"
"encoding/json/internal/jsonwire"
"encoding/json/jsontext"
)
var (
timeDurationType = reflect.TypeFor[time.Duration]()
timeTimeType = reflect.TypeFor[time.Time]()
)
func makeTimeArshaler(fncs *arshaler, t reflect.Type) *arshaler {
// Ideally, time types would implement MarshalerTo and UnmarshalerFrom,
// but that would incur a dependency on package json from package time.
// Given how widely used time is, it is more acceptable that we incur a
// dependency on time from json.
//
// Injecting the arshaling functionality like this will not be identical
// to actually declaring methods on the time types since embedding of the
// time types will not be able to forward this functionality.
switch t {
case timeDurationType:
fncs.nonDefault = true
marshalNano := fncs.marshal
fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) error {
xe := export.Encoder(enc)
var m durationArshaler
if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() {
if !m.initFormat(mo.Format) {
return newInvalidFormatError(enc, t, mo)
}
} else if mo.Flags.Get(jsonflags.FormatTimeWithLegacySemantics) {
return marshalNano(enc, va, mo)
}
// TODO(https://go.dev/issue/62121): Use reflect.Value.AssertTo.
m.td = *va.Addr().Interface().(*time.Duration)
k := stringOrNumberKind(!m.isNumeric() || xe.Tokens.Last.NeedObjectName() || mo.Flags.Get(jsonflags.StringifyNumbers))
if err := xe.AppendRaw(k, true, m.appendMarshal); err != nil {
if !isSyntacticError(err) && !export.IsIOError(err) {
err = newMarshalErrorBefore(enc, t, err)
}
return err
}
return nil
}
unmarshalNano := fncs.unmarshal
fncs.unmarshal = func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error {
xd := export.Decoder(dec)
var u durationArshaler
if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() {
if !u.initFormat(uo.Format) {
return newInvalidFormatError(dec, t, uo)
}
} else if uo.Flags.Get(jsonflags.FormatTimeWithLegacySemantics) {
return unmarshalNano(dec, va, uo)
}
stringify := !u.isNumeric() || xd.Tokens.Last.NeedObjectName() || uo.Flags.Get(jsonflags.StringifyNumbers)
var flags jsonwire.ValueFlags
td := va.Addr().Interface().(*time.Duration)
val, err := xd.ReadValue(&flags)
if err != nil {
return err
}
switch k := val.Kind(); k {
case 'n':
if !uo.Flags.Get(jsonflags.MergeWithLegacySemantics) {
*td = time.Duration(0)
}
return nil
case '"':
if !stringify {
break
}
val = jsonwire.UnquoteMayCopy(val, flags.IsVerbatim())
if err := u.unmarshal(val); err != nil {
return newUnmarshalErrorAfter(dec, t, err)
}
*td = u.td
return nil
case '0':
if stringify {
break
}
if err := u.unmarshal(val); err != nil {
return newUnmarshalErrorAfter(dec, t, err)
}
*td = u.td
return nil
}
return newUnmarshalErrorAfter(dec, t, nil)
}
case timeTimeType:
fncs.nonDefault = true
fncs.marshal = func(enc *jsontext.Encoder, va addressableValue, mo *jsonopts.Struct) (err error) {
xe := export.Encoder(enc)
var m timeArshaler
if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() {
if !m.initFormat(mo.Format) {
return newInvalidFormatError(enc, t, mo)
}
}
// TODO(https://go.dev/issue/62121): Use reflect.Value.AssertTo.
m.tt = *va.Addr().Interface().(*time.Time)
k := stringOrNumberKind(!m.isNumeric() || xe.Tokens.Last.NeedObjectName() || mo.Flags.Get(jsonflags.StringifyNumbers))
if err := xe.AppendRaw(k, !m.hasCustomFormat(), m.appendMarshal); err != nil {
if mo.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
return internal.NewMarshalerError(va.Addr().Interface(), err, "MarshalJSON") // unlike unmarshal, always wrapped
}
if !isSyntacticError(err) && !export.IsIOError(err) {
err = newMarshalErrorBefore(enc, t, err)
}
return err
}
return nil
}
fncs.unmarshal = func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) (err error) {
xd := export.Decoder(dec)
var u timeArshaler
if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() {
if !u.initFormat(uo.Format) {
return newInvalidFormatError(dec, t, uo)
}
} else if uo.Flags.Get(jsonflags.FormatTimeWithLegacySemantics) {
u.looseRFC3339 = true
}
stringify := !u.isNumeric() || xd.Tokens.Last.NeedObjectName() || uo.Flags.Get(jsonflags.StringifyNumbers)
var flags jsonwire.ValueFlags
tt := va.Addr().Interface().(*time.Time)
val, err := xd.ReadValue(&flags)
if err != nil {
return err
}
switch k := val.Kind(); k {
case 'n':
if !uo.Flags.Get(jsonflags.MergeWithLegacySemantics) {
*tt = time.Time{}
}
return nil
case '"':
if !stringify {
break
}
val = jsonwire.UnquoteMayCopy(val, flags.IsVerbatim())
if err := u.unmarshal(val); err != nil {
if uo.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
return err // unlike marshal, never wrapped
}
return newUnmarshalErrorAfter(dec, t, err)
}
*tt = u.tt
return nil
case '0':
if stringify {
break
}
if err := u.unmarshal(val); err != nil {
if uo.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
return err // unlike marshal, never wrapped
}
return newUnmarshalErrorAfter(dec, t, err)
}
*tt = u.tt
return nil
}
return newUnmarshalErrorAfter(dec, t, nil)
}
}
return fncs
}
type durationArshaler struct {
td time.Duration
// base records the representation where:
// - 0 uses time.Duration.String
// - 1e0, 1e3, 1e6, or 1e9 use a decimal encoding of the duration as
// nanoseconds, microseconds, milliseconds, or seconds.
base uint64
}
func (a *durationArshaler) initFormat(format string) (ok bool) {
switch format {
case "units":
a.base = 0
case "sec":
a.base = 1e9
case "milli":
a.base = 1e6
case "micro":
a.base = 1e3
case "nano":
a.base = 1e0
default:
return false
}
return true
}
func (a *durationArshaler) isNumeric() bool {
return a.base != 0 && a.base != 60
}
func (a *durationArshaler) appendMarshal(b []byte) ([]byte, error) {
switch a.base {
case 0:
return append(b, a.td.String()...), nil
default:
return appendDurationBase10(b, a.td, a.base), nil
}
}
func (a *durationArshaler) unmarshal(b []byte) (err error) {
switch a.base {
case 0:
a.td, err = time.ParseDuration(string(b))
default:
a.td, err = parseDurationBase10(b, a.base)
}
return err
}
type timeArshaler struct {
tt time.Time
// base records the representation where:
// - 0 uses RFC 3339 encoding of the timestamp
// - 1e0, 1e3, 1e6, or 1e9 use a decimal encoding of the timestamp as
// seconds, milliseconds, microseconds, or nanoseconds since Unix epoch.
// - math.MaxUint uses time.Time.Format to encode the timestamp
base uint64
format string // time format passed to time.Parse
looseRFC3339 bool
}
func (a *timeArshaler) initFormat(format string) bool {
// We assume that an exported constant in the time package will
// always start with an uppercase ASCII letter.
if len(format) == 0 {
return false
}
a.base = math.MaxUint // implies custom format
if c := format[0]; !('a' <= c && c <= 'z') && !('A' <= c && c <= 'Z') {
a.format = format
return true
}
switch format {
case "ANSIC":
a.format = time.ANSIC
case "UnixDate":
a.format = time.UnixDate
case "RubyDate":
a.format = time.RubyDate
case "RFC822":
a.format = time.RFC822
case "RFC822Z":
a.format = time.RFC822Z
case "RFC850":
a.format = time.RFC850
case "RFC1123":
a.format = time.RFC1123
case "RFC1123Z":
a.format = time.RFC1123Z
case "RFC3339":
a.base = 0
a.format = time.RFC3339
case "RFC3339Nano":
a.base = 0
a.format = time.RFC3339Nano
case "Kitchen":
a.format = time.Kitchen
case "Stamp":
a.format = time.Stamp
case "StampMilli":
a.format = time.StampMilli
case "StampMicro":
a.format = time.StampMicro
case "StampNano":
a.format = time.StampNano
case "DateTime":
a.format = time.DateTime
case "DateOnly":
a.format = time.DateOnly
case "TimeOnly":
a.format = time.TimeOnly
case "unix":
a.base = 1e0
case "unixmilli":
a.base = 1e3
case "unixmicro":
a.base = 1e6
case "unixnano":
a.base = 1e9
default:
// Reject any Go identifier in case new constants are supported.
if strings.TrimFunc(format, isLetterOrDigit) == "" {
return false
}
a.format = format
}
return true
}
func (a *timeArshaler) isNumeric() bool {
return int(a.base) > 0
}
func (a *timeArshaler) hasCustomFormat() bool {
return a.base == math.MaxUint
}
func (a *timeArshaler) appendMarshal(b []byte) ([]byte, error) {
switch a.base {
case 0:
format := cmp.Or(a.format, time.RFC3339Nano)
n0 := len(b)
b = a.tt.AppendFormat(b, format)
// Not all Go timestamps can be represented as valid RFC 3339.
// Explicitly check for these edge cases.
// See https://go.dev/issue/4556 and https://go.dev/issue/54580.
switch b := b[n0:]; {
case b[len("9999")] != '-': // year must be exactly 4 digits wide
return b, errors.New("year outside of range [0,9999]")
case b[len(b)-1] != 'Z':
c := b[len(b)-len("Z07:00")]
if ('0' <= c && c <= '9') || parseDec2(b[len(b)-len("07:00"):]) >= 24 {
return b, errors.New("timezone hour outside of range [0,23]")
}
}
return b, nil
case math.MaxUint:
return a.tt.AppendFormat(b, a.format), nil
default:
return appendTimeUnix(b, a.tt, a.base), nil
}
}
func (a *timeArshaler) unmarshal(b []byte) (err error) {
switch a.base {
case 0:
// Use time.Time.UnmarshalText to avoid possible string allocation.
if err := a.tt.UnmarshalText(b); err != nil {
return err
}
// TODO(https://go.dev/issue/57912):
// RFC 3339 specifies the grammar for a valid timestamp.
// However, the parsing functionality in "time" is too loose and
// incorrectly accepts invalid timestamps as valid.
// Remove these manual checks when "time" checks it for us.
newParseError := func(layout, value, layoutElem, valueElem, message string) error {
return &time.ParseError{Layout: layout, Value: value, LayoutElem: layoutElem, ValueElem: valueElem, Message: message}
}
switch {
case a.looseRFC3339:
return nil
case b[len("2006-01-02T")+1] == ':': // hour must be two digits
return newParseError(time.RFC3339, string(b), "15", string(b[len("2006-01-02T"):][:1]), "")
case b[len("2006-01-02T15:04:05")] == ',': // sub-second separator must be a period
return newParseError(time.RFC3339, string(b), ".", ",", "")
case b[len(b)-1] != 'Z':
switch {
case parseDec2(b[len(b)-len("07:00"):]) >= 24: // timezone hour must be in range
return newParseError(time.RFC3339, string(b), "Z07:00", string(b[len(b)-len("Z07:00"):]), ": timezone hour out of range")
case parseDec2(b[len(b)-len("00"):]) >= 60: // timezone minute must be in range
return newParseError(time.RFC3339, string(b), "Z07:00", string(b[len(b)-len("Z07:00"):]), ": timezone minute out of range")
}
}
return nil
case math.MaxUint:
a.tt, err = time.Parse(a.format, string(b))
return err
default:
a.tt, err = parseTimeUnix(b, a.base)
return err
}
}
// appendDurationBase10 appends d formatted as a decimal fractional number,
// where pow10 is a power-of-10 used to scale down the number.
func appendDurationBase10(b []byte, d time.Duration, pow10 uint64) []byte {
b, n := mayAppendDurationSign(b, d) // append sign
whole, frac := bits.Div64(0, n, uint64(pow10)) // compute whole and frac fields
b = strconv.AppendUint(b, whole, 10) // append whole field
return appendFracBase10(b, frac, pow10) // append frac field
}
// parseDurationBase10 parses d from a decimal fractional number,
// where pow10 is a power-of-10 used to scale up the number.
func parseDurationBase10(b []byte, pow10 uint64) (time.Duration, error) {
suffix, neg := consumeSign(b) // consume sign
wholeBytes, fracBytes := bytesCutByte(suffix, '.', true) // consume whole and frac fields
whole, okWhole := jsonwire.ParseUint(wholeBytes) // parse whole field; may overflow
frac, okFrac := parseFracBase10(fracBytes, pow10) // parse frac field
hi, lo := bits.Mul64(whole, uint64(pow10)) // overflow if hi > 0
sum, co := bits.Add64(lo, uint64(frac), 0) // overflow if co > 0
switch d := mayApplyDurationSign(sum, neg); { // overflow if neg != (d < 0)
case (!okWhole && whole != math.MaxUint64) || !okFrac:
return 0, fmt.Errorf("invalid duration %q: %w", b, strconv.ErrSyntax)
case !okWhole || hi > 0 || co > 0 || neg != (d < 0):
return 0, fmt.Errorf("invalid duration %q: %w", b, strconv.ErrRange)
default:
return d, nil
}
}
// mayAppendDurationSign appends a negative sign if n is negative.
func mayAppendDurationSign(b []byte, d time.Duration) ([]byte, uint64) {
if d < 0 {
b = append(b, '-')
d *= -1
}
return b, uint64(d)
}
// mayApplyDurationSign inverts n if neg is specified.
func mayApplyDurationSign(n uint64, neg bool) time.Duration {
if neg {
return -1 * time.Duration(n)
} else {
return +1 * time.Duration(n)
}
}
// appendTimeUnix appends t formatted as a decimal fractional number,
// where pow10 is a power-of-10 used to scale up the number.
func appendTimeUnix(b []byte, t time.Time, pow10 uint64) []byte {
sec, nsec := t.Unix(), int64(t.Nanosecond())
if sec < 0 {
b = append(b, '-')
sec, nsec = negateSecNano(sec, nsec)
}
switch {
case pow10 == 1e0: // fast case where units is in seconds
b = strconv.AppendUint(b, uint64(sec), 10)
return appendFracBase10(b, uint64(nsec), 1e9)
case uint64(sec) < 1e9: // intermediate case where units is not seconds, but no overflow
b = strconv.AppendUint(b, uint64(sec)*uint64(pow10)+uint64(uint64(nsec)/(1e9/pow10)), 10)
return appendFracBase10(b, (uint64(nsec)*pow10)%1e9, 1e9)
default: // slow case where units is not seconds and overflow would occur
b = strconv.AppendUint(b, uint64(sec), 10)
b = appendPaddedBase10(b, uint64(nsec)/(1e9/pow10), pow10)
return appendFracBase10(b, (uint64(nsec)*pow10)%1e9, 1e9)
}
}
// parseTimeUnix parses t formatted as a decimal fractional number,
// where pow10 is a power-of-10 used to scale down the number.
func parseTimeUnix(b []byte, pow10 uint64) (time.Time, error) {
suffix, neg := consumeSign(b) // consume sign
wholeBytes, fracBytes := bytesCutByte(suffix, '.', true) // consume whole and frac fields
whole, okWhole := jsonwire.ParseUint(wholeBytes) // parse whole field; may overflow
frac, okFrac := parseFracBase10(fracBytes, 1e9/pow10) // parse frac field
var sec, nsec int64
switch {
case pow10 == 1e0: // fast case where units is in seconds
sec = int64(whole) // check overflow later after negation
nsec = int64(frac) // cannot overflow
case okWhole: // intermediate case where units is not seconds, but no overflow
sec = int64(whole / pow10) // check overflow later after negation
nsec = int64((whole%pow10)*(1e9/pow10) + frac) // cannot overflow
case !okWhole && whole == math.MaxUint64: // slow case where units is not seconds and overflow occurred
width := int(math.Log10(float64(pow10))) // compute len(strconv.Itoa(pow10-1))
whole, okWhole = jsonwire.ParseUint(wholeBytes[:len(wholeBytes)-width]) // parse the upper whole field
mid, _ := parsePaddedBase10(wholeBytes[len(wholeBytes)-width:], pow10) // parse the lower whole field
sec = int64(whole) // check overflow later after negation
nsec = int64(mid*(1e9/pow10) + frac) // cannot overflow
}
if neg {
sec, nsec = negateSecNano(sec, nsec)
}
switch t := time.Unix(sec, nsec).UTC(); {
case (!okWhole && whole != math.MaxUint64) || !okFrac:
return time.Time{}, fmt.Errorf("invalid time %q: %w", b, strconv.ErrSyntax)
case !okWhole || neg != (t.Unix() < 0):
return time.Time{}, fmt.Errorf("invalid time %q: %w", b, strconv.ErrRange)
default:
return t, nil
}
}
// negateSecNano negates a Unix timestamp, where nsec must be within [0, 1e9).
func negateSecNano(sec, nsec int64) (int64, int64) {
sec = ^sec // twos-complement negation (i.e., -1*sec + 1)
nsec = -nsec + 1e9 // negate nsec and add 1e9 (which is the extra +1 from sec negation)
sec += int64(nsec / 1e9) // handle possible overflow of nsec if it started as zero
nsec %= 1e9 // ensure nsec stays within [0, 1e9)
return sec, nsec
}
// appendFracBase10 appends the fraction of n/max10,
// where max10 is a power-of-10 that is larger than n.
func appendFracBase10(b []byte, n, max10 uint64) []byte {
if n == 0 {
return b
}
return bytes.TrimRight(appendPaddedBase10(append(b, '.'), n, max10), "0")
}
// parseFracBase10 parses the fraction of n/max10,
// where max10 is a power-of-10 that is larger than n.
func parseFracBase10(b []byte, max10 uint64) (n uint64, ok bool) {
switch {
case len(b) == 0:
return 0, true
case len(b) < len(".0") || b[0] != '.':
return 0, false
}
return parsePaddedBase10(b[len("."):], max10)
}
// appendPaddedBase10 appends a zero-padded encoding of n,
// where max10 is a power-of-10 that is larger than n.
func appendPaddedBase10(b []byte, n, max10 uint64) []byte {
if n < max10/10 {
// Formatting of n is shorter than log10(max10),
// so add max10/10 to ensure the length is equal to log10(max10).
i := len(b)
b = strconv.AppendUint(b, n+max10/10, 10)
b[i]-- // subtract the addition of max10/10
return b
}
return strconv.AppendUint(b, n, 10)
}
// parsePaddedBase10 parses b as the zero-padded encoding of n,
// where max10 is a power-of-10 that is larger than n.
// Truncated suffix is treated as implicit zeros.
// Extended suffix is ignored, but verified to contain only digits.
func parsePaddedBase10(b []byte, max10 uint64) (n uint64, ok bool) {
pow10 := uint64(1)
for pow10 < max10 {
n *= 10
if len(b) > 0 {
if b[0] < '0' || '9' < b[0] {
return n, false
}
n += uint64(b[0] - '0')
b = b[1:]
}
pow10 *= 10
}
if len(b) > 0 && len(bytes.TrimRight(b, "0123456789")) > 0 {
return n, false // trailing characters are not digits
}
return n, true
}
// consumeSign consumes an optional leading negative sign.
func consumeSign(b []byte) ([]byte, bool) {
if len(b) > 0 && b[0] == '-' {
return b[len("-"):], true
}
return b, false
}
// bytesCutByte is similar to bytes.Cut(b, []byte{c}),
// except c may optionally be included as part of the suffix.
func bytesCutByte(b []byte, c byte, include bool) ([]byte, []byte) {
if i := bytes.IndexByte(b, c); i >= 0 {
if include {
return b[:i], b[i:]
}
return b[:i], b[i+1:]
}
return b, nil
}
// parseDec2 parses b as an unsigned, base-10, 2-digit number.
// The result is undefined if digits are not base-10.
func parseDec2(b []byte) byte {
if len(b) < 2 {
return 0
}
return 10*(b[0]-'0') + (b[1] - '0')
}

View File

@ -0,0 +1,312 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"fmt"
"math"
"testing"
"time"
"encoding/json/internal/jsonwire"
)
func baseLabel(base uint64) string {
if log10 := math.Log10(float64(base)); log10 == float64(int64(log10)) {
return fmt.Sprintf("1e%d", int(log10))
}
return fmt.Sprint(base)
}
var formatDurationTestdata = []struct {
td time.Duration
base10Sec string
base10Milli string
base10Micro string
base10Nano string
}{
{math.MaxInt64, "9223372036.854775807", "9223372036854.775807", "9223372036854775.807", "9223372036854775807"},
{1e12 + 1e12, "2000", "2000000", "2000000000", "2000000000000"},
{1e12 + 1e11, "1100", "1100000", "1100000000", "1100000000000"},
{1e12 + 1e10, "1010", "1010000", "1010000000", "1010000000000"},
{1e12 + 1e9, "1001", "1001000", "1001000000", "1001000000000"},
{1e12 + 1e8, "1000.1", "1000100", "1000100000", "1000100000000"},
{1e12 + 1e7, "1000.01", "1000010", "1000010000", "1000010000000"},
{1e12 + 1e6, "1000.001", "1000001", "1000001000", "1000001000000"},
{1e12 + 1e5, "1000.0001", "1000000.1", "1000000100", "1000000100000"},
{1e12 + 1e4, "1000.00001", "1000000.01", "1000000010", "1000000010000"},
{1e12 + 1e3, "1000.000001", "1000000.001", "1000000001", "1000000001000"},
{1e12 + 1e2, "1000.0000001", "1000000.0001", "1000000000.1", "1000000000100"},
{1e12 + 1e1, "1000.00000001", "1000000.00001", "1000000000.01", "1000000000010"},
{1e12 + 1e0, "1000.000000001", "1000000.000001", "1000000000.001", "1000000000001"},
{+(1e9 + 1), "1.000000001", "1000.000001", "1000000.001", "1000000001"},
{+(1e9), "1", "1000", "1000000", "1000000000"},
{+(1e9 - 1), "0.999999999", "999.999999", "999999.999", "999999999"},
{+100000000, "0.1", "100", "100000", "100000000"},
{+120000000, "0.12", "120", "120000", "120000000"},
{+123000000, "0.123", "123", "123000", "123000000"},
{+123400000, "0.1234", "123.4", "123400", "123400000"},
{+123450000, "0.12345", "123.45", "123450", "123450000"},
{+123456000, "0.123456", "123.456", "123456", "123456000"},
{+123456700, "0.1234567", "123.4567", "123456.7", "123456700"},
{+123456780, "0.12345678", "123.45678", "123456.78", "123456780"},
{+123456789, "0.123456789", "123.456789", "123456.789", "123456789"},
{+12345678, "0.012345678", "12.345678", "12345.678", "12345678"},
{+1234567, "0.001234567", "1.234567", "1234.567", "1234567"},
{+123456, "0.000123456", "0.123456", "123.456", "123456"},
{+12345, "0.000012345", "0.012345", "12.345", "12345"},
{+1234, "0.000001234", "0.001234", "1.234", "1234"},
{+123, "0.000000123", "0.000123", "0.123", "123"},
{+12, "0.000000012", "0.000012", "0.012", "12"},
{+1, "0.000000001", "0.000001", "0.001", "1"},
{0, "0", "0", "0", "0"},
{-1, "-0.000000001", "-0.000001", "-0.001", "-1"},
{-12, "-0.000000012", "-0.000012", "-0.012", "-12"},
{-123, "-0.000000123", "-0.000123", "-0.123", "-123"},
{-1234, "-0.000001234", "-0.001234", "-1.234", "-1234"},
{-12345, "-0.000012345", "-0.012345", "-12.345", "-12345"},
{-123456, "-0.000123456", "-0.123456", "-123.456", "-123456"},
{-1234567, "-0.001234567", "-1.234567", "-1234.567", "-1234567"},
{-12345678, "-0.012345678", "-12.345678", "-12345.678", "-12345678"},
{-123456789, "-0.123456789", "-123.456789", "-123456.789", "-123456789"},
{-123456780, "-0.12345678", "-123.45678", "-123456.78", "-123456780"},
{-123456700, "-0.1234567", "-123.4567", "-123456.7", "-123456700"},
{-123456000, "-0.123456", "-123.456", "-123456", "-123456000"},
{-123450000, "-0.12345", "-123.45", "-123450", "-123450000"},
{-123400000, "-0.1234", "-123.4", "-123400", "-123400000"},
{-123000000, "-0.123", "-123", "-123000", "-123000000"},
{-120000000, "-0.12", "-120", "-120000", "-120000000"},
{-100000000, "-0.1", "-100", "-100000", "-100000000"},
{-(1e9 - 1), "-0.999999999", "-999.999999", "-999999.999", "-999999999"},
{-(1e9), "-1", "-1000", "-1000000", "-1000000000"},
{-(1e9 + 1), "-1.000000001", "-1000.000001", "-1000000.001", "-1000000001"},
{math.MinInt64, "-9223372036.854775808", "-9223372036854.775808", "-9223372036854775.808", "-9223372036854775808"},
}
func TestFormatDuration(t *testing.T) {
var gotBuf []byte
check := func(td time.Duration, s string, base uint64) {
a := durationArshaler{td, base}
gotBuf, _ = a.appendMarshal(gotBuf[:0])
if string(gotBuf) != s {
t.Errorf("formatDuration(%d, %s) = %q, want %q", td, baseLabel(base), string(gotBuf), s)
}
if err := a.unmarshal(gotBuf); err != nil {
t.Errorf("parseDuration(%q, %s) error: %v", gotBuf, baseLabel(base), err)
}
if a.td != td {
t.Errorf("parseDuration(%q, %s) = %d, want %d", gotBuf, baseLabel(base), a.td, td)
}
}
for _, tt := range formatDurationTestdata {
check(tt.td, tt.base10Sec, 1e9)
check(tt.td, tt.base10Milli, 1e6)
check(tt.td, tt.base10Micro, 1e3)
check(tt.td, tt.base10Nano, 1e0)
}
}
var parseDurationTestdata = []struct {
in string
base uint64
want time.Duration
wantErr bool
}{
{"0", 1e0, 0, false},
{"0.", 1e0, 0, true},
{"0.0", 1e0, 0, false},
{"0.00", 1e0, 0, false},
{"00.0", 1e0, 0, true},
{"+0", 1e0, 0, true},
{"1e0", 1e0, 0, true},
{"1.000000000x", 1e9, 0, true},
{"1.000000x", 1e6, 0, true},
{"1.000x", 1e3, 0, true},
{"1.x", 1e0, 0, true},
{"1.0000000009", 1e9, +time.Second, false},
{"1.0000009", 1e6, +time.Millisecond, false},
{"1.0009", 1e3, +time.Microsecond, false},
{"1.9", 1e0, +time.Nanosecond, false},
{"-9223372036854775809", 1e0, 0, true},
{"9223372036854775.808", 1e3, 0, true},
{"-9223372036854.775809", 1e6, 0, true},
{"9223372036.854775808", 1e9, 0, true},
{"-1.9", 1e0, -time.Nanosecond, false},
{"-1.0009", 1e3, -time.Microsecond, false},
{"-1.0000009", 1e6, -time.Millisecond, false},
{"-1.0000000009", 1e9, -time.Second, false},
}
func TestParseDuration(t *testing.T) {
for _, tt := range parseDurationTestdata {
a := durationArshaler{base: tt.base}
switch err := a.unmarshal([]byte(tt.in)); {
case a.td != tt.want:
t.Errorf("parseDuration(%q, %s) = %v, want %v", tt.in, baseLabel(tt.base), a.td, tt.want)
case (err == nil) && tt.wantErr:
t.Errorf("parseDuration(%q, %s) error is nil, want non-nil", tt.in, baseLabel(tt.base))
case (err != nil) && !tt.wantErr:
t.Errorf("parseDuration(%q, %s) error is non-nil, want nil", tt.in, baseLabel(tt.base))
}
}
}
func FuzzFormatDuration(f *testing.F) {
for _, tt := range formatDurationTestdata {
f.Add(int64(tt.td))
}
f.Fuzz(func(t *testing.T, want int64) {
var buf []byte
for _, base := range [...]uint64{1e0, 1e3, 1e6, 1e9} {
a := durationArshaler{td: time.Duration(want), base: base}
buf, _ = a.appendMarshal(buf[:0])
switch err := a.unmarshal(buf); {
case err != nil:
t.Fatalf("parseDuration(%q, %s) error: %v", buf, baseLabel(base), err)
case a.td != time.Duration(want):
t.Fatalf("parseDuration(%q, %s) = %v, want %v", buf, baseLabel(base), a.td, time.Duration(want))
}
}
})
}
func FuzzParseDuration(f *testing.F) {
for _, tt := range parseDurationTestdata {
f.Add([]byte(tt.in))
}
f.Fuzz(func(t *testing.T, in []byte) {
for _, base := range [...]uint64{1e0, 1e3, 1e6, 1e9, 60} {
a := durationArshaler{base: base}
if err := a.unmarshal(in); err == nil && base != 60 {
if n, err := jsonwire.ConsumeNumber(in); err != nil || n != len(in) {
t.Fatalf("parseDuration(%q) error is nil for invalid JSON number", in)
}
}
}
})
}
type formatTimeTestdataEntry struct {
ts time.Time
unixSec string
unixMilli string
unixMicro string
unixNano string
}
var formatTimeTestdata = func() []formatTimeTestdataEntry {
out := []formatTimeTestdataEntry{
{time.Unix(math.MaxInt64/int64(1e0), 1e9-1).UTC(), "9223372036854775807.999999999", "9223372036854775807999.999999", "9223372036854775807999999.999", "9223372036854775807999999999"},
{time.Unix(math.MaxInt64/int64(1e1), 1e9-1).UTC(), "922337203685477580.999999999", "922337203685477580999.999999", "922337203685477580999999.999", "922337203685477580999999999"},
{time.Unix(math.MaxInt64/int64(1e2), 1e9-1).UTC(), "92233720368547758.999999999", "92233720368547758999.999999", "92233720368547758999999.999", "92233720368547758999999999"},
{time.Unix(math.MinInt64, 1).UTC(), "-9223372036854775807.999999999", "-9223372036854775807999.999999", "-9223372036854775807999999.999", "-9223372036854775807999999999"},
{time.Unix(math.MinInt64, 0).UTC(), "-9223372036854775808", "-9223372036854775808000", "-9223372036854775808000000", "-9223372036854775808000000000"},
}
for _, tt := range formatDurationTestdata {
out = append(out, formatTimeTestdataEntry{time.Unix(0, int64(tt.td)).UTC(), tt.base10Sec, tt.base10Milli, tt.base10Micro, tt.base10Nano})
}
return out
}()
func TestFormatTime(t *testing.T) {
var gotBuf []byte
check := func(ts time.Time, s string, pow10 uint64) {
gotBuf = appendTimeUnix(gotBuf[:0], ts, pow10)
if string(gotBuf) != s {
t.Errorf("formatTime(time.Unix(%d, %d), %s) = %q, want %q", ts.Unix(), ts.Nanosecond(), baseLabel(pow10), string(gotBuf), s)
}
gotTS, err := parseTimeUnix(gotBuf, pow10)
if err != nil {
t.Errorf("parseTime(%q, %s) error: %v", gotBuf, baseLabel(pow10), err)
}
if !gotTS.Equal(ts) {
t.Errorf("parseTime(%q, %s) = time.Unix(%d, %d), want time.Unix(%d, %d)", gotBuf, baseLabel(pow10), gotTS.Unix(), gotTS.Nanosecond(), ts.Unix(), ts.Nanosecond())
}
}
for _, tt := range formatTimeTestdata {
check(tt.ts, tt.unixSec, 1e0)
check(tt.ts, tt.unixMilli, 1e3)
check(tt.ts, tt.unixMicro, 1e6)
check(tt.ts, tt.unixNano, 1e9)
}
}
var parseTimeTestdata = []struct {
in string
base uint64
want time.Time
wantErr bool
}{
{"0", 1e0, time.Unix(0, 0).UTC(), false},
{"0.", 1e0, time.Time{}, true},
{"0.0", 1e0, time.Unix(0, 0).UTC(), false},
{"0.00", 1e0, time.Unix(0, 0).UTC(), false},
{"00.0", 1e0, time.Time{}, true},
{"+0", 1e0, time.Time{}, true},
{"1e0", 1e0, time.Time{}, true},
{"1234567890123456789012345678901234567890", 1e0, time.Time{}, true},
{"9223372036854775808000.000000", 1e3, time.Time{}, true},
{"9223372036854775807999999.9999", 1e6, time.Unix(math.MaxInt64, 1e9-1).UTC(), false},
{"9223372036854775807999999999.9", 1e9, time.Unix(math.MaxInt64, 1e9-1).UTC(), false},
{"9223372036854775807.999999999x", 1e0, time.Time{}, true},
{"9223372036854775807000000000", 1e9, time.Unix(math.MaxInt64, 0).UTC(), false},
{"-9223372036854775808", 1e0, time.Unix(math.MinInt64, 0).UTC(), false},
{"-9223372036854775808000.000001", 1e3, time.Time{}, true},
{"-9223372036854775808000000.0001", 1e6, time.Unix(math.MinInt64, 0).UTC(), false},
{"-9223372036854775808000000000.x", 1e9, time.Time{}, true},
{"-1234567890123456789012345678901234567890", 1e9, time.Time{}, true},
}
func TestParseTime(t *testing.T) {
for _, tt := range parseTimeTestdata {
a := timeArshaler{base: tt.base}
switch err := a.unmarshal([]byte(tt.in)); {
case a.tt != tt.want:
t.Errorf("parseTime(%q, %s) = time.Unix(%d, %d), want time.Unix(%d, %d)", tt.in, baseLabel(tt.base), a.tt.Unix(), a.tt.Nanosecond(), tt.want.Unix(), tt.want.Nanosecond())
case (err == nil) && tt.wantErr:
t.Errorf("parseTime(%q, %s) = (time.Unix(%d, %d), nil), want non-nil error", tt.in, baseLabel(tt.base), a.tt.Unix(), a.tt.Nanosecond())
case (err != nil) && !tt.wantErr:
t.Errorf("parseTime(%q, %s) error is non-nil, want nil", tt.in, baseLabel(tt.base))
}
}
}
func FuzzFormatTime(f *testing.F) {
for _, tt := range formatTimeTestdata {
f.Add(tt.ts.Unix(), int64(tt.ts.Nanosecond()))
}
f.Fuzz(func(t *testing.T, wantSec, wantNano int64) {
want := time.Unix(wantSec, int64(uint64(wantNano)%1e9)).UTC()
var buf []byte
for _, base := range [...]uint64{1e0, 1e3, 1e6, 1e9} {
a := timeArshaler{tt: want, base: base}
buf, _ = a.appendMarshal(buf[:0])
switch err := a.unmarshal(buf); {
case err != nil:
t.Fatalf("parseTime(%q, %s) error: %v", buf, baseLabel(base), err)
case a.tt != want:
t.Fatalf("parseTime(%q, %s) = time.Unix(%d, %d), want time.Unix(%d, %d)", buf, baseLabel(base), a.tt.Unix(), a.tt.Nanosecond(), want.Unix(), want.Nanosecond())
}
}
})
}
func FuzzParseTime(f *testing.F) {
for _, tt := range parseTimeTestdata {
f.Add([]byte(tt.in))
}
f.Fuzz(func(t *testing.T, in []byte) {
for _, base := range [...]uint64{1e0, 1e3, 1e6, 1e9} {
a := timeArshaler{base: base}
if err := a.unmarshal(in); err == nil {
if n, err := jsonwire.ConsumeNumber(in); err != nil || n != len(in) {
t.Fatalf("parseTime(%q) error is nil for invalid JSON number", in)
}
}
}
})
}

View File

@ -0,0 +1,647 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json_test
import (
"bytes"
"cmp"
"fmt"
"io"
"os"
"path"
"reflect"
"strings"
"testing"
"testing/iotest"
"time"
jsonv1 "encoding/json"
jsonv1in2 "encoding/json"
"encoding/json/internal/jsontest"
"encoding/json/jsontext"
jsonv2 "encoding/json/v2"
)
// benchVersion is the version to benchmark (either "v1", "v1in2", or "v2").
var benchVersion = cmp.Or(os.Getenv("BENCHMARK_VERSION"), "v2")
var jsonFuncs = func() (funcs struct {
marshal func(any) ([]byte, error)
unmarshal func([]byte, any) error
encodeValue func(w io.Writer, b []byte) error
encodeTokens func(w io.Writer, toks []jsontext.Token) error
decodeValue func(r io.Reader) error
decodeTokens func(r io.Reader) error
}) {
ignoreEOF := func(err error) error {
if err == io.EOF {
err = nil
}
return err
}
switch benchVersion {
case "v1":
funcs.marshal = jsonv1.Marshal
funcs.unmarshal = jsonv1.Unmarshal
funcs.encodeValue = func(w io.Writer, b []byte) error {
return jsonv1.NewEncoder(w).Encode(jsonv1.RawMessage(b))
}
funcs.decodeValue = func(r io.Reader) error {
var v jsonv1.RawMessage
return jsonv1.NewDecoder(r).Decode(&v)
}
funcs.decodeTokens = func(r io.Reader) error {
d := jsonv1.NewDecoder(r)
for {
if _, err := d.Token(); err != nil {
return ignoreEOF(err)
}
}
}
case "v1in2":
funcs.marshal = jsonv1in2.Marshal
funcs.unmarshal = jsonv1in2.Unmarshal
funcs.encodeValue = func(w io.Writer, b []byte) error {
return jsonv1in2.NewEncoder(w).Encode(jsonv1in2.RawMessage(b))
}
funcs.decodeValue = func(r io.Reader) error {
var v jsonv1in2.RawMessage
return jsonv1in2.NewDecoder(r).Decode(&v)
}
funcs.decodeTokens = func(r io.Reader) error {
d := jsonv1in2.NewDecoder(r)
for {
if _, err := d.Token(); err != nil {
return ignoreEOF(err)
}
}
}
case "v2":
funcs.marshal = func(v any) ([]byte, error) { return jsonv2.Marshal(v) }
funcs.unmarshal = func(b []byte, v any) error { return jsonv2.Unmarshal(b, v) }
funcs.encodeValue = func(w io.Writer, b []byte) error {
return jsontext.NewEncoder(w).WriteValue(b)
}
funcs.encodeTokens = func(w io.Writer, toks []jsontext.Token) error {
e := jsontext.NewEncoder(w)
for _, tok := range toks {
if err := e.WriteToken(tok); err != nil {
return err
}
}
return nil
}
funcs.decodeValue = func(r io.Reader) error {
_, err := jsontext.NewDecoder(r).ReadValue()
return err
}
funcs.decodeTokens = func(r io.Reader) error {
d := jsontext.NewDecoder(r)
for {
if _, err := d.ReadToken(); err != nil {
return ignoreEOF(err)
}
}
}
default:
panic("unknown version: " + benchVersion)
}
return
}()
// bytesBuffer is identical to bytes.Buffer,
// but a different type to avoid any optimizations for bytes.Buffer.
type bytesBuffer struct{ *bytes.Buffer }
func addr[T any](v T) *T {
return &v
}
func len64[Bytes ~[]byte | ~string](in Bytes) int64 {
return int64(len(in))
}
var arshalTestdata = []struct {
name string
raw []byte
val any
new func() any
skipV1 bool
}{{
name: "Bool",
raw: []byte("true"),
val: addr(true),
new: func() any { return new(bool) },
}, {
name: "String",
raw: []byte(`"hello, world!"`),
val: addr("hello, world!"),
new: func() any { return new(string) },
}, {
name: "Int",
raw: []byte("-1234"),
val: addr(int64(-1234)),
new: func() any { return new(int64) },
}, {
name: "Uint",
raw: []byte("1234"),
val: addr(uint64(1234)),
new: func() any { return new(uint64) },
}, {
name: "Float",
raw: []byte("12.34"),
val: addr(float64(12.34)),
new: func() any { return new(float64) },
}, {
name: "Map/ManyEmpty",
raw: []byte(`[{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}]`),
val: addr(func() (out []map[string]string) {
for range 100 {
out = append(out, map[string]string{})
}
return out
}()),
new: func() any { return new([]map[string]string) },
}, {
name: "Map/OneLarge",
raw: []byte(`{"A":"A","B":"B","C":"C","D":"D","E":"E","F":"F","G":"G","H":"H","I":"I","J":"J","K":"K","L":"L","M":"M","N":"N","O":"O","P":"P","Q":"Q","R":"R","S":"S","T":"T","U":"U","V":"V","W":"W","X":"X","Y":"Y","Z":"Z"}`),
val: addr(map[string]string{"A": "A", "B": "B", "C": "C", "D": "D", "E": "E", "F": "F", "G": "G", "H": "H", "I": "I", "J": "J", "K": "K", "L": "L", "M": "M", "N": "N", "O": "O", "P": "P", "Q": "Q", "R": "R", "S": "S", "T": "T", "U": "U", "V": "V", "W": "W", "X": "X", "Y": "Y", "Z": "Z"}),
new: func() any { return new(map[string]string) },
}, {
name: "Map/ManySmall",
raw: []byte(`{"A":{"K":"V"},"B":{"K":"V"},"C":{"K":"V"},"D":{"K":"V"},"E":{"K":"V"},"F":{"K":"V"},"G":{"K":"V"},"H":{"K":"V"},"I":{"K":"V"},"J":{"K":"V"},"K":{"K":"V"},"L":{"K":"V"},"M":{"K":"V"},"N":{"K":"V"},"O":{"K":"V"},"P":{"K":"V"},"Q":{"K":"V"},"R":{"K":"V"},"S":{"K":"V"},"T":{"K":"V"},"U":{"K":"V"},"V":{"K":"V"},"W":{"K":"V"},"X":{"K":"V"},"Y":{"K":"V"},"Z":{"K":"V"}}`),
val: addr(map[string]map[string]string{"A": {"K": "V"}, "B": {"K": "V"}, "C": {"K": "V"}, "D": {"K": "V"}, "E": {"K": "V"}, "F": {"K": "V"}, "G": {"K": "V"}, "H": {"K": "V"}, "I": {"K": "V"}, "J": {"K": "V"}, "K": {"K": "V"}, "L": {"K": "V"}, "M": {"K": "V"}, "N": {"K": "V"}, "O": {"K": "V"}, "P": {"K": "V"}, "Q": {"K": "V"}, "R": {"K": "V"}, "S": {"K": "V"}, "T": {"K": "V"}, "U": {"K": "V"}, "V": {"K": "V"}, "W": {"K": "V"}, "X": {"K": "V"}, "Y": {"K": "V"}, "Z": {"K": "V"}}),
new: func() any { return new(map[string]map[string]string) },
}, {
name: "Struct/ManyEmpty",
raw: []byte(`[{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}]`),
val: addr(make([]struct{}, 100)),
new: func() any {
return new([]struct{})
},
}, {
name: "Struct/OneLarge",
raw: []byte(`{"A":"A","B":"B","C":"C","D":"D","E":"E","F":"F","G":"G","H":"H","I":"I","J":"J","K":"K","L":"L","M":"M","N":"N","O":"O","P":"P","Q":"Q","R":"R","S":"S","T":"T","U":"U","V":"V","W":"W","X":"X","Y":"Y","Z":"Z"}`),
val: addr(struct{ A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z string }{"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"}),
new: func() any {
return new(struct{ A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z string })
},
}, {
name: "Struct/ManySmall",
raw: []byte(`{"A":{"K":"V"},"B":{"K":"V"},"C":{"K":"V"},"D":{"K":"V"},"E":{"K":"V"},"F":{"K":"V"},"G":{"K":"V"},"H":{"K":"V"},"I":{"K":"V"},"J":{"K":"V"},"K":{"K":"V"},"L":{"K":"V"},"M":{"K":"V"},"N":{"K":"V"},"O":{"K":"V"},"P":{"K":"V"},"Q":{"K":"V"},"R":{"K":"V"},"S":{"K":"V"},"T":{"K":"V"},"U":{"K":"V"},"V":{"K":"V"},"W":{"K":"V"},"X":{"K":"V"},"Y":{"K":"V"},"Z":{"K":"V"}}`),
val: func() any {
V := struct{ K string }{"V"}
return addr(struct{ A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z struct{ K string } }{
V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V,
})
}(),
new: func() any {
return new(struct{ A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z struct{ K string } })
},
}, {
name: "Slice/ManyEmpty",
raw: []byte(`[[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]]`),
val: addr(func() (out [][]string) {
for range 100 {
out = append(out, []string{})
}
return out
}()),
new: func() any { return new([][]string) },
}, {
name: "Slice/OneLarge",
raw: []byte(`["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z"]`),
val: addr([]string{"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"}),
new: func() any { return new([]string) },
}, {
name: "Slice/ManySmall",
raw: []byte(`[["A"],["B"],["C"],["D"],["E"],["F"],["G"],["H"],["I"],["J"],["K"],["L"],["M"],["N"],["O"],["P"],["Q"],["R"],["S"],["T"],["U"],["V"],["W"],["X"],["Y"],["Z"]]`),
val: addr([][]string{{"A"}, {"B"}, {"C"}, {"D"}, {"E"}, {"F"}, {"G"}, {"H"}, {"I"}, {"J"}, {"K"}, {"L"}, {"M"}, {"N"}, {"O"}, {"P"}, {"Q"}, {"R"}, {"S"}, {"T"}, {"U"}, {"V"}, {"W"}, {"X"}, {"Y"}, {"Z"}}),
new: func() any { return new([][]string) },
}, {
name: "Array/OneLarge",
raw: []byte(`["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z"]`),
val: addr([26]string{"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"}),
new: func() any { return new([26]string) },
}, {
name: "Array/ManySmall",
raw: []byte(`[["A"],["B"],["C"],["D"],["E"],["F"],["G"],["H"],["I"],["J"],["K"],["L"],["M"],["N"],["O"],["P"],["Q"],["R"],["S"],["T"],["U"],["V"],["W"],["X"],["Y"],["Z"]]`),
val: addr([26][1]string{{"A"}, {"B"}, {"C"}, {"D"}, {"E"}, {"F"}, {"G"}, {"H"}, {"I"}, {"J"}, {"K"}, {"L"}, {"M"}, {"N"}, {"O"}, {"P"}, {"Q"}, {"R"}, {"S"}, {"T"}, {"U"}, {"V"}, {"W"}, {"X"}, {"Y"}, {"Z"}}),
new: func() any { return new([26][1]string) },
}, {
name: "Bytes/Slice",
raw: []byte(`"47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="`),
val: addr([]byte{0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55}),
new: func() any { return new([]byte) },
}, {
name: "Bytes/Array",
raw: []byte(`"47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="`),
val: addr([32]byte{0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55}),
new: func() any { return new([32]byte) },
skipV1: true,
}, {
name: "Pointer",
raw: []byte("true"),
val: addr(addr(addr(addr(addr(addr(addr(addr(addr(addr(addr(true))))))))))),
new: func() any { return new(**********bool) },
}, {
name: "TextArshal",
raw: []byte(`"method"`),
val: new(textArshaler),
new: func() any { return new(textArshaler) },
}, {
name: "JSONArshalV1",
raw: []byte(`"method"`),
val: new(jsonArshalerV1),
new: func() any { return new(jsonArshalerV1) },
}, {
name: "JSONArshalV2",
raw: []byte(`"method"`),
val: new(jsonArshalerV2),
new: func() any { return new(jsonArshalerV2) },
skipV1: true,
}, {
name: "Duration",
raw: []byte(`"1h1m1s"`),
val: addr(time.Hour + time.Minute + time.Second),
new: func() any { return new(time.Duration) },
skipV1: true,
}, {
name: "Time",
raw: []byte(`"2006-01-02T22:04:05Z"`),
val: addr(time.Unix(1136239445, 0).UTC()),
new: func() any { return new(time.Time) },
}}
type textArshaler struct{ _ [4]int }
func (textArshaler) MarshalText() ([]byte, error) {
return []byte("method"), nil
}
func (*textArshaler) UnmarshalText(b []byte) error {
if string(b) != "method" {
return fmt.Errorf("UnmarshalText: got %q, want %q", b, "method")
}
return nil
}
type jsonArshalerV1 struct{ _ [4]int }
func (jsonArshalerV1) MarshalJSON() ([]byte, error) {
return []byte(`"method"`), nil
}
func (*jsonArshalerV1) UnmarshalJSON(b []byte) error {
if string(b) != `"method"` {
return fmt.Errorf("UnmarshalJSON: got %q, want %q", b, `"method"`)
}
return nil
}
type jsonArshalerV2 struct{ _ [4]int }
func (jsonArshalerV2) MarshalJSONTo(enc *jsontext.Encoder) error {
return enc.WriteToken(jsontext.String("method"))
}
func (*jsonArshalerV2) UnmarshalJSONFrom(dec *jsontext.Decoder) error {
b, err := dec.ReadValue()
if string(b) != `"method"` {
return fmt.Errorf("UnmarshalJSONFrom: got %q, want %q", b, `"method"`)
}
return err
}
func TestBenchmarkUnmarshal(t *testing.T) { runUnmarshal(t) }
func BenchmarkUnmarshal(b *testing.B) { runUnmarshal(b) }
func runUnmarshal(tb testing.TB) {
for _, tt := range arshalTestdata {
if tt.skipV1 && strings.HasPrefix(benchVersion, "v1") {
runTestOrBench(tb, tt.name, 0, func(tb testing.TB) { tb.Skip("not supported in v1") })
return
}
// Setup the unmarshal operation.
var val any
run := func(tb testing.TB) {
val = tt.new()
if err := jsonFuncs.unmarshal(tt.raw, val); err != nil {
tb.Fatalf("Unmarshal error: %v", err)
}
}
// Verify the results.
if _, ok := tb.(*testing.T); ok {
run0 := run
run = func(tb testing.TB) {
run0(tb)
if !reflect.DeepEqual(val, tt.val) {
tb.Fatalf("Unmarshal output mismatch:\ngot %v\nwant %v", val, tt.val)
}
}
}
runTestOrBench(tb, tt.name, len64(tt.raw), run)
}
}
func TestBenchmarkMarshal(t *testing.T) { runMarshal(t) }
func BenchmarkMarshal(b *testing.B) { runMarshal(b) }
func runMarshal(tb testing.TB) {
for _, tt := range arshalTestdata {
if tt.skipV1 && strings.HasPrefix(benchVersion, "v1") {
runTestOrBench(tb, tt.name, 0, func(tb testing.TB) { tb.Skip("not supported in v1") })
return
}
// Setup the marshal operation.
var raw []byte
run := func(tb testing.TB) {
var err error
raw, err = jsonFuncs.marshal(tt.val)
if err != nil {
tb.Fatalf("Marshal error: %v", err)
}
}
// Verify the results.
if _, ok := tb.(*testing.T); ok {
run0 := run
run = func(tb testing.TB) {
run0(tb)
if !bytes.Equal(raw, tt.raw) {
// Map marshaling in v2 is non-deterministic.
byteHistogram := func(b []byte) (h [256]int) {
for _, c := range b {
h[c]++
}
return h
}
if !(strings.HasPrefix(tt.name, "Map/") && byteHistogram(raw) == byteHistogram(tt.raw)) {
tb.Fatalf("Marshal output mismatch:\ngot %s\nwant %s", raw, tt.raw)
}
}
}
}
runTestOrBench(tb, tt.name, len64(tt.raw), run)
}
}
func TestBenchmarkTestdata(t *testing.T) { runAllTestdata(t) }
func BenchmarkTestdata(b *testing.B) { runAllTestdata(b) }
func runAllTestdata(tb testing.TB) {
for _, td := range jsontest.Data {
for _, arshalName := range []string{"Marshal", "Unmarshal"} {
for _, typeName := range []string{"Concrete", "Interface"} {
newValue := func() any { return new(any) }
if typeName == "Concrete" {
if td.New == nil {
continue
}
newValue = td.New
}
value := mustUnmarshalValue(tb, td.Data(), newValue)
name := path.Join(td.Name, arshalName, typeName)
runTestOrBench(tb, name, int64(len(td.Data())), func(tb testing.TB) {
runArshal(tb, arshalName, newValue, td.Data(), value)
})
}
}
tokens := mustDecodeTokens(tb, td.Data())
buffer := make([]byte, 0, 2*len(td.Data()))
for _, codeName := range []string{"Encode", "Decode"} {
for _, typeName := range []string{"Token", "Value"} {
for _, modeName := range []string{"Streaming", "Buffered"} {
name := path.Join(td.Name, codeName, typeName, modeName)
runTestOrBench(tb, name, int64(len(td.Data())), func(tb testing.TB) {
runCode(tb, codeName, typeName, modeName, buffer, td.Data(), tokens)
})
}
}
}
}
}
func mustUnmarshalValue(t testing.TB, data []byte, newValue func() any) (value any) {
value = newValue()
if err := jsonv2.Unmarshal(data, value); err != nil {
t.Fatalf("Unmarshal error: %v", err)
}
return value
}
func runArshal(t testing.TB, arshalName string, newValue func() any, data []byte, value any) {
switch arshalName {
case "Marshal":
if _, err := jsonFuncs.marshal(value); err != nil {
t.Fatalf("Marshal error: %v", err)
}
case "Unmarshal":
if err := jsonFuncs.unmarshal(data, newValue()); err != nil {
t.Fatalf("Unmarshal error: %v", err)
}
}
}
func mustDecodeTokens(t testing.TB, data []byte) []jsontext.Token {
var tokens []jsontext.Token
dec := jsontext.NewDecoder(bytes.NewReader(data))
for {
tok, err := dec.ReadToken()
if err != nil {
if err == io.EOF {
break
}
t.Fatalf("Decoder.ReadToken error: %v", err)
}
// Prefer exact representation for JSON strings and numbers
// since this more closely matches common use cases.
switch tok.Kind() {
case '"':
tokens = append(tokens, jsontext.String(tok.String()))
case '0':
tokens = append(tokens, jsontext.Float(tok.Float()))
default:
tokens = append(tokens, tok.Clone())
}
}
return tokens
}
func runCode(t testing.TB, codeName, typeName, modeName string, buffer, data []byte, tokens []jsontext.Token) {
switch codeName {
case "Encode":
runEncode(t, typeName, modeName, buffer, data, tokens)
case "Decode":
runDecode(t, typeName, modeName, buffer, data, tokens)
}
}
func runEncode(t testing.TB, typeName, modeName string, buffer, data []byte, tokens []jsontext.Token) {
if strings.HasPrefix(benchVersion, "v1") {
switch {
case modeName == "Buffered":
t.Skip("no support for direct buffered output in v1; see https://go.dev/issue/7872")
case typeName == "Token":
t.Skip("no support for encoding tokens in v1; see https://go.dev/issue/40127")
}
}
var w io.Writer
switch modeName {
case "Streaming":
w = bytesBuffer{bytes.NewBuffer(buffer[:0])}
case "Buffered":
w = bytes.NewBuffer(buffer[:0])
}
switch typeName {
case "Token":
if err := jsonFuncs.encodeTokens(w, tokens); err != nil {
t.Fatalf("Encoder.WriteToken error: %v", err)
}
case "Value":
if err := jsonFuncs.encodeValue(w, data); err != nil {
t.Fatalf("Encoder.WriteValue error: %v", err)
}
}
}
func runDecode(t testing.TB, typeName, modeName string, buffer, data []byte, tokens []jsontext.Token) {
if strings.HasPrefix(benchVersion, "v1") && modeName == "Buffered" {
t.Skip("no support for direct buffered input in v1; see https://go.dev/issue/11046")
}
var r io.Reader
switch modeName {
case "Streaming":
r = bytesBuffer{bytes.NewBuffer(data)}
case "Buffered":
r = bytes.NewBuffer(data)
}
switch typeName {
case "Token":
if err := jsonFuncs.decodeTokens(r); err != nil {
t.Fatalf("Decoder.ReadToken error: %v", err)
}
case "Value":
if err := jsonFuncs.decodeValue(r); err != nil {
t.Fatalf("Decoder.ReadValue error: %v", err)
}
}
}
var ws = strings.Repeat(" ", 4<<10)
var slowStreamingDecodeTestdata = []struct {
name string
data []byte
}{
{"LargeString", []byte(`"` + strings.Repeat(" ", 4<<10) + `"`)},
{"LargeNumber", []byte("0." + strings.Repeat("0", 4<<10))},
{"LargeWhitespace/Null", []byte(ws + "null" + ws)},
{"LargeWhitespace/Object", []byte(ws + "{" + ws + `"name1"` + ws + ":" + ws + `"value"` + ws + "," + ws + `"name2"` + ws + ":" + ws + `"value"` + ws + "}" + ws)},
{"LargeWhitespace/Array", []byte(ws + "[" + ws + `"value"` + ws + "," + ws + `"value"` + ws + "]" + ws)},
}
func TestBenchmarkSlowStreamingDecode(t *testing.T) { runAllSlowStreamingDecode(t) }
func BenchmarkSlowStreamingDecode(b *testing.B) { runAllSlowStreamingDecode(b) }
func runAllSlowStreamingDecode(tb testing.TB) {
for _, td := range slowStreamingDecodeTestdata {
for _, typeName := range []string{"Token", "Value"} {
name := path.Join(td.name, typeName)
runTestOrBench(tb, name, len64(td.data), func(tb testing.TB) {
runSlowStreamingDecode(tb, typeName, td.data)
})
}
}
}
// runSlowStreamingDecode tests a streaming Decoder operating on
// a slow io.Reader that only returns 1 byte at a time,
// which tends to exercise pathological behavior.
func runSlowStreamingDecode(t testing.TB, typeName string, data []byte) {
r := iotest.OneByteReader(bytes.NewReader(data))
switch typeName {
case "Token":
if err := jsonFuncs.decodeTokens(r); err != nil {
t.Fatalf("Decoder.ReadToken error: %v", err)
}
case "Value":
if err := jsonFuncs.decodeValue(r); err != nil {
t.Fatalf("Decoder.ReadValue error: %v", err)
}
}
}
func TestBenchmarkTextValue(t *testing.T) { runValue(t) }
func BenchmarkTextValue(b *testing.B) { runValue(b) }
func runValue(tb testing.TB) {
if testing.Short() {
tb.Skip() // CitmCatalog is not loaded in short mode
}
var data []byte
for _, ts := range jsontest.Data {
if ts.Name == "CitmCatalog" {
data = ts.Data()
}
}
runTestOrBench(tb, "IsValid", len64(data), func(tb testing.TB) {
jsontext.Value(data).IsValid()
})
methods := []struct {
name string
format func(*jsontext.Value, ...jsontext.Options) error
}{
{"Compact", (*jsontext.Value).Compact},
{"Indent", (*jsontext.Value).Indent},
{"Canonicalize", (*jsontext.Value).Canonicalize},
}
var v jsontext.Value
for _, method := range methods {
runTestOrBench(tb, method.name, len64(data), func(tb testing.TB) {
v = append(v[:0], data...) // reset with original input
if err := method.format(&v); err != nil {
tb.Errorf("jsontext.Value.%v error: %v", method.name, err)
}
})
v = append(v[:0], data...)
method.format(&v)
runTestOrBench(tb, method.name+"/Noop", len64(data), func(tb testing.TB) {
if err := method.format(&v); err != nil {
tb.Errorf("jsontext.Value.%v error: %v", method.name, err)
}
})
}
}
func runTestOrBench(tb testing.TB, name string, numBytes int64, run func(tb testing.TB)) {
switch tb := tb.(type) {
case *testing.T:
tb.Run(name, func(t *testing.T) {
run(t)
})
case *testing.B:
tb.Run(name, func(b *testing.B) {
b.ResetTimer()
b.ReportAllocs()
b.SetBytes(numBytes)
for range b.N {
run(b)
}
})
}
}

170
src/encoding/json/v2/doc.go Normal file
View File

@ -0,0 +1,170 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
// Package json implements semantic processing of JSON as specified in RFC 8259.
// JSON is a simple data interchange format that can represent
// primitive data types such as booleans, strings, and numbers,
// in addition to structured data types such as objects and arrays.
//
// [Marshal] and [Unmarshal] encode and decode Go values
// to/from JSON text contained within a []byte.
// [MarshalWrite] and [UnmarshalRead] operate on JSON text
// by writing to or reading from an [io.Writer] or [io.Reader].
// [MarshalEncode] and [UnmarshalDecode] operate on JSON text
// by encoding to or decoding from a [jsontext.Encoder] or [jsontext.Decoder].
// [Options] may be passed to each of the marshal or unmarshal functions
// to configure the semantic behavior of marshaling and unmarshaling
// (i.e., alter how JSON data is understood as Go data and vice versa).
// [jsontext.Options] may also be passed to the marshal or unmarshal functions
// to configure the syntactic behavior of encoding or decoding.
//
// The data types of JSON are mapped to/from the data types of Go based on
// the closest logical equivalent between the two type systems. For example,
// a JSON boolean corresponds with a Go bool,
// a JSON string corresponds with a Go string,
// a JSON number corresponds with a Go int, uint or float,
// a JSON array corresponds with a Go slice or array, and
// a JSON object corresponds with a Go struct or map.
// See the documentation on [Marshal] and [Unmarshal] for a comprehensive list
// of how the JSON and Go type systems correspond.
//
// Arbitrary Go types can customize their JSON representation by implementing
// [Marshaler], [MarshalerTo], [Unmarshaler], or [UnmarshalerFrom].
// This provides authors of Go types with control over how their types are
// serialized as JSON. Alternatively, users can implement functions that match
// [MarshalFunc], [MarshalToFunc], [UnmarshalFunc], or [UnmarshalFromFunc]
// to specify the JSON representation for arbitrary types.
// This provides callers of JSON functionality with control over
// how any arbitrary type is serialized as JSON.
//
// # JSON Representation of Go structs
//
// A Go struct is naturally represented as a JSON object,
// where each Go struct field corresponds with a JSON object member.
// When marshaling, all Go struct fields are recursively encoded in depth-first
// order as JSON object members except those that are ignored or omitted.
// When unmarshaling, JSON object members are recursively decoded
// into the corresponding Go struct fields.
// Object members that do not match any struct fields,
// also known as “unknown members”, are ignored by default or rejected
// if [RejectUnknownMembers] is specified.
//
// The representation of each struct field can be customized in the
// "json" struct field tag, where the tag is a comma separated list of options.
// As a special case, if the entire tag is `json:"-"`,
// then the field is ignored with regard to its JSON representation.
// Some options also have equivalent behavior controlled by a caller-specified [Options].
// Field-specified options take precedence over caller-specified options.
//
// The first option is the JSON object name override for the Go struct field.
// If the name is not specified, then the Go struct field name
// is used as the JSON object name. JSON names containing commas or quotes,
// or names identical to "" or "-", can be specified using
// a single-quoted string literal, where the syntax is identical to
// the Go grammar for a double-quoted string literal,
// but instead uses single quotes as the delimiters.
// By default, unmarshaling uses case-sensitive matching to identify
// the Go struct field associated with a JSON object name.
//
// After the name, the following tag options are supported:
//
// - omitzero: When marshaling, the "omitzero" option specifies that
// the struct field should be omitted if the field value is zero
// as determined by the "IsZero() bool" method if present,
// otherwise based on whether the field is the zero Go value.
// This option has no effect when unmarshaling.
//
// - omitempty: When marshaling, the "omitempty" option specifies that
// the struct field should be omitted if the field value would have been
// encoded as a JSON null, empty string, empty object, or empty array.
// This option has no effect when unmarshaling.
//
// - string: The "string" option specifies that [StringifyNumbers]
// be set when marshaling or unmarshaling a struct field value.
// This causes numeric types to be encoded as a JSON number
// within a JSON string, and to be decoded from a JSON string
// containing the JSON number without any surrounding whitespace.
// This extra level of encoding is often necessary since
// many JSON parsers cannot precisely represent 64-bit integers.
//
// - case: When unmarshaling, the "case" option specifies how
// JSON object names are matched with the JSON name for Go struct fields.
// The option is a key-value pair specified as "case:value" where
// the value must either be 'ignore' or 'strict'.
// The 'ignore' value specifies that matching is case-insensitive
// where dashes and underscores are also ignored. If multiple fields match,
// the first declared field in breadth-first order takes precedence.
// The 'strict' value specifies that matching is case-sensitive.
// This takes precedence over the [MatchCaseInsensitiveNames] option.
//
// - inline: The "inline" option specifies that
// the JSON representable content of this field type is to be promoted
// as if they were specified in the parent struct.
// It is the JSON equivalent of Go struct embedding.
// A Go embedded field is implicitly inlined unless an explicit JSON name
// is specified. The inlined field must be a Go struct
// (that does not implement any JSON methods), [jsontext.Value],
// map[~string]T, or an unnamed pointer to such types. When marshaling,
// inlined fields from a pointer type are omitted if it is nil.
// Inlined fields of type [jsontext.Value] and map[~string]T are called
// “inlined fallbacks” as they can represent all possible
// JSON object members not directly handled by the parent struct.
// Only one inlined fallback field may be specified in a struct,
// while many non-fallback fields may be specified. This option
// must not be specified with any other option (including the JSON name).
//
// - unknown: The "unknown" option is a specialized variant
// of the inlined fallback to indicate that this Go struct field
// contains any number of unknown JSON object members. The field type must
// be a [jsontext.Value], map[~string]T, or an unnamed pointer to such types.
// If [DiscardUnknownMembers] is specified when marshaling,
// the contents of this field are ignored.
// If [RejectUnknownMembers] is specified when unmarshaling,
// any unknown object members are rejected regardless of whether
// an inlined fallback with the "unknown" option exists. This option
// must not be specified with any other option (including the JSON name).
//
// - format: The "format" option specifies a format flag
// used to specialize the formatting of the field value.
// The option is a key-value pair specified as "format:value" where
// the value must be either a literal consisting of letters and numbers
// (e.g., "format:RFC3339") or a single-quoted string literal
// (e.g., "format:'2006-01-02'"). The interpretation of the format flag
// is determined by the struct field type.
//
// The "omitzero" and "omitempty" options are mostly semantically identical.
// The former is defined in terms of the Go type system,
// while the latter in terms of the JSON type system.
// Consequently they behave differently in some circumstances.
// For example, only a nil slice or map is omitted under "omitzero", while
// an empty slice or map is omitted under "omitempty" regardless of nilness.
// The "omitzero" option is useful for types with a well-defined zero value
// (e.g., [net/netip.Addr]) or have an IsZero method (e.g., [time.Time.IsZero]).
//
// Every Go struct corresponds to a list of JSON representable fields
// which is constructed by performing a breadth-first search over
// all struct fields (excluding unexported or ignored fields),
// where the search recursively descends into inlined structs.
// The set of non-inlined fields in a struct must have unique JSON names.
// If multiple fields all have the same JSON name, then the one
// at shallowest depth takes precedence and the other fields at deeper depths
// are excluded from the list of JSON representable fields.
// If multiple fields at the shallowest depth have the same JSON name,
// but exactly one is explicitly tagged with a JSON name,
// then that field takes precedence and all others are excluded from the list.
// This is analogous to Go visibility rules for struct field selection
// with embedded struct types.
//
// Marshaling or unmarshaling a non-empty struct
// without any JSON representable fields results in a [SemanticError].
// Unexported fields must not have any `json` tags except for `json:"-"`.
package json
// requireKeyedLiterals can be embedded in a struct to require keyed literals.
type requireKeyedLiterals struct{}
// nonComparable can be embedded in a struct to prevent comparability.
type nonComparable [0]func()

View File

@ -0,0 +1,420 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"cmp"
"errors"
"fmt"
"reflect"
"strconv"
"strings"
"sync"
"encoding/json/internal/jsonflags"
"encoding/json/internal/jsonopts"
"encoding/json/internal/jsonwire"
"encoding/json/jsontext"
)
// ErrUnknownName indicates that a JSON object member could not be
// unmarshaled because the name is not known to the target Go struct.
// This error is directly wrapped within a [SemanticError] when produced.
//
// The name of an unknown JSON object member can be extracted as:
//
// err := ...
// var serr json.SemanticError
// if errors.As(err, &serr) && serr.Err == json.ErrUnknownName {
// ptr := serr.JSONPointer // JSON pointer to unknown name
// name := ptr.LastToken() // unknown name itself
// ...
// }
//
// This error is only returned if [RejectUnknownMembers] is true.
var ErrUnknownName = errors.New("unknown object member name")
const errorPrefix = "json: "
func isSemanticError(err error) bool {
_, ok := err.(*SemanticError)
return ok
}
func isSyntacticError(err error) bool {
_, ok := err.(*jsontext.SyntacticError)
return ok
}
// isFatalError reports whether this error must terminate asharling.
// All errors are considered fatal unless operating under
// [jsonflags.ReportErrorsWithLegacySemantics] in which case only
// syntactic errors and I/O errors are considered fatal.
func isFatalError(err error, flags jsonflags.Flags) bool {
return !flags.Get(jsonflags.ReportErrorsWithLegacySemantics) ||
isSyntacticError(err) || export.IsIOError(err)
}
// SemanticError describes an error determining the meaning
// of JSON data as Go data or vice-versa.
//
// The contents of this error as produced by this package may change over time.
type SemanticError struct {
requireKeyedLiterals
nonComparable
action string // either "marshal" or "unmarshal"
// ByteOffset indicates that an error occurred after this byte offset.
ByteOffset int64
// JSONPointer indicates that an error occurred within this JSON value
// as indicated using the JSON Pointer notation (see RFC 6901).
JSONPointer jsontext.Pointer
// JSONKind is the JSON kind that could not be handled.
JSONKind jsontext.Kind // may be zero if unknown
// JSONValue is the JSON number or string that could not be unmarshaled.
// It is not populated during marshaling.
JSONValue jsontext.Value // may be nil if irrelevant or unknown
// GoType is the Go type that could not be handled.
GoType reflect.Type // may be nil if unknown
// Err is the underlying error.
Err error // may be nil
}
// coder is implemented by [jsontext.Encoder] or [jsontext.Decoder].
type coder interface{ StackPointer() jsontext.Pointer }
// newInvalidFormatError wraps err in a SemanticError because
// the current type t cannot handle the provided options format.
// This error must be called before producing or consuming the next value.
//
// If [jsonflags.ReportErrorsWithLegacySemantics] is specified,
// then this automatically skips the next value when unmarshaling
// to ensure that the value is fully consumed.
func newInvalidFormatError(c coder, t reflect.Type, o *jsonopts.Struct) error {
err := fmt.Errorf("invalid format flag %q", o.Format)
switch c := c.(type) {
case *jsontext.Encoder:
err = newMarshalErrorBefore(c, t, err)
case *jsontext.Decoder:
err = newUnmarshalErrorBeforeWithSkipping(c, o, t, err)
}
return err
}
// newMarshalErrorBefore wraps err in a SemanticError assuming that e
// is positioned right before the next token or value, which causes an error.
func newMarshalErrorBefore(e *jsontext.Encoder, t reflect.Type, err error) error {
return &SemanticError{action: "marshal", GoType: t, Err: err,
ByteOffset: e.OutputOffset() + int64(export.Encoder(e).CountNextDelimWhitespace()),
JSONPointer: jsontext.Pointer(export.Encoder(e).AppendStackPointer(nil, +1))}
}
// newUnmarshalErrorBefore wraps err in a SemanticError assuming that d
// is positioned right before the next token or value, which causes an error.
// It does not record the next JSON kind as this error is used to indicate
// the receiving Go value is invalid to unmarshal into (and not a JSON error).
func newUnmarshalErrorBefore(d *jsontext.Decoder, t reflect.Type, err error) error {
return &SemanticError{action: "unmarshal", GoType: t, Err: err,
ByteOffset: d.InputOffset() + int64(export.Decoder(d).CountNextDelimWhitespace()),
JSONPointer: jsontext.Pointer(export.Decoder(d).AppendStackPointer(nil, +1))}
}
// newUnmarshalErrorBeforeWithSkipping is like [newUnmarshalErrorBefore],
// but automatically skips the next value if
// [jsonflags.ReportErrorsWithLegacySemantics] is specified.
func newUnmarshalErrorBeforeWithSkipping(d *jsontext.Decoder, o *jsonopts.Struct, t reflect.Type, err error) error {
err = newUnmarshalErrorBefore(d, t, err)
if o.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
if err2 := export.Decoder(d).SkipValue(); err2 != nil {
return err2
}
}
return err
}
// newUnmarshalErrorAfter wraps err in a SemanticError assuming that d
// is positioned right after the previous token or value, which caused an error.
func newUnmarshalErrorAfter(d *jsontext.Decoder, t reflect.Type, err error) error {
tokOrVal := export.Decoder(d).PreviousTokenOrValue()
return &SemanticError{action: "unmarshal", GoType: t, Err: err,
ByteOffset: d.InputOffset() - int64(len(tokOrVal)),
JSONPointer: jsontext.Pointer(export.Decoder(d).AppendStackPointer(nil, -1)),
JSONKind: jsontext.Value(tokOrVal).Kind()}
}
// newUnmarshalErrorAfter wraps err in a SemanticError assuming that d
// is positioned right after the previous token or value, which caused an error.
// It also stores a copy of the last JSON value if it is a string or number.
func newUnmarshalErrorAfterWithValue(d *jsontext.Decoder, t reflect.Type, err error) error {
serr := newUnmarshalErrorAfter(d, t, err).(*SemanticError)
if serr.JSONKind == '"' || serr.JSONKind == '0' {
serr.JSONValue = jsontext.Value(export.Decoder(d).PreviousTokenOrValue()).Clone()
}
return serr
}
// newUnmarshalErrorAfterWithSkipping is like [newUnmarshalErrorAfter],
// but automatically skips the remainder of the current value if
// [jsonflags.ReportErrorsWithLegacySemantics] is specified.
func newUnmarshalErrorAfterWithSkipping(d *jsontext.Decoder, o *jsonopts.Struct, t reflect.Type, err error) error {
err = newUnmarshalErrorAfter(d, t, err)
if o.Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
if err2 := export.Decoder(d).SkipValueRemainder(); err2 != nil {
return err2
}
}
return err
}
// newSemanticErrorWithPosition wraps err in a SemanticError assuming that
// the error occurred at the provided depth, and length.
// If err is already a SemanticError, then position information is only
// injected if it is currently unpopulated.
//
// If the position is unpopulated, it is ambiguous where the error occurred
// in the user code, whether it was before or after the current position.
// For the byte offset, we assume that the error occurred before the last read
// token or value when decoding, or before the next value when encoding.
// For the JSON pointer, we point to the parent object or array unless
// we can be certain that it happened with an object member.
//
// This is used to annotate errors returned by user-provided
// v2 MarshalJSON or UnmarshalJSON methods or functions.
func newSemanticErrorWithPosition(c coder, t reflect.Type, prevDepth int, prevLength int64, err error) error {
serr, _ := err.(*SemanticError)
if serr == nil {
serr = &SemanticError{Err: err}
}
var currDepth int
var currLength int64
var coderState interface{ AppendStackPointer([]byte, int) []byte }
var offset int64
switch c := c.(type) {
case *jsontext.Encoder:
e := export.Encoder(c)
serr.action = cmp.Or(serr.action, "marshal")
currDepth, currLength = e.Tokens.DepthLength()
offset = c.OutputOffset() + int64(export.Encoder(c).CountNextDelimWhitespace())
coderState = e
case *jsontext.Decoder:
d := export.Decoder(c)
serr.action = cmp.Or(serr.action, "unmarshal")
currDepth, currLength = d.Tokens.DepthLength()
tokOrVal := d.PreviousTokenOrValue()
offset = c.InputOffset() - int64(len(tokOrVal))
if (prevDepth == currDepth && prevLength == currLength) || len(tokOrVal) == 0 {
// If no Read method was called in the user-defined method or
// if the Peek method was called, then use the offset of the next value.
offset = c.InputOffset() + int64(export.Decoder(c).CountNextDelimWhitespace())
}
coderState = d
}
serr.ByteOffset = cmp.Or(serr.ByteOffset, offset)
if serr.JSONPointer == "" {
where := 0 // default to ambiguous positioning
switch {
case prevDepth == currDepth && prevLength+0 == currLength:
where = +1
case prevDepth == currDepth && prevLength+1 == currLength:
where = -1
}
serr.JSONPointer = jsontext.Pointer(coderState.AppendStackPointer(nil, where))
}
serr.GoType = cmp.Or(serr.GoType, t)
return serr
}
// collapseSemanticErrors collapses double SemanticErrors at the outer levels
// into a single SemanticError by preserving the inner error,
// but prepending the ByteOffset and JSONPointer with the outer error.
//
// For example:
//
// collapseSemanticErrors(&SemanticError{
// ByteOffset: len64(`[0,{"alpha":[0,1,`),
// JSONPointer: "/1/alpha/2",
// GoType: reflect.TypeFor[outerType](),
// Err: &SemanticError{
// ByteOffset: len64(`{"foo":"bar","fizz":[0,`),
// JSONPointer: "/fizz/1",
// GoType: reflect.TypeFor[innerType](),
// Err: ...,
// },
// })
//
// results in:
//
// &SemanticError{
// ByteOffset: len64(`[0,{"alpha":[0,1,`) + len64(`{"foo":"bar","fizz":[0,`),
// JSONPointer: "/1/alpha/2" + "/fizz/1",
// GoType: reflect.TypeFor[innerType](),
// Err: ...,
// }
//
// This is used to annotate errors returned by user-provided
// v1 MarshalJSON or UnmarshalJSON methods with precise position information
// if they themselves happened to return a SemanticError.
// Since MarshalJSON and UnmarshalJSON are not operating on the root JSON value,
// their positioning must be relative to the nested JSON value
// returned by UnmarshalJSON or passed to MarshalJSON.
// Therefore, we can construct an absolute position by concatenating
// the outer with the inner positions.
//
// Note that we do not use collapseSemanticErrors with user-provided functions
// that take in an [jsontext.Encoder] or [jsontext.Decoder] since they contain
// methods to report position relative to the root JSON value.
// We assume user-constructed errors are correctly precise about position.
func collapseSemanticErrors(err error) error {
if serr1, ok := err.(*SemanticError); ok {
if serr2, ok := serr1.Err.(*SemanticError); ok {
serr2.ByteOffset = serr1.ByteOffset + serr2.ByteOffset
serr2.JSONPointer = serr1.JSONPointer + serr2.JSONPointer
*serr1 = *serr2
}
}
return err
}
// errorModalVerb is a modal verb like "cannot" or "unable to".
//
// Once per process, Hyrum-proof the error message by deliberately
// switching between equivalent renderings of the same error message.
// The randomization is tied to the Hyrum-proofing already applied
// on map iteration in Go.
var errorModalVerb = sync.OnceValue(func() string {
for phrase := range map[string]struct{}{"cannot": {}, "unable to": {}} {
return phrase // use whichever phrase we get in the first iteration
}
return ""
})
func (e *SemanticError) Error() string {
var sb strings.Builder
sb.WriteString(errorPrefix)
sb.WriteString(errorModalVerb())
// Format action.
var preposition string
switch e.action {
case "marshal":
sb.WriteString(" marshal")
preposition = " from"
case "unmarshal":
sb.WriteString(" unmarshal")
preposition = " into"
default:
sb.WriteString(" handle")
preposition = " with"
}
// Format JSON kind.
switch e.JSONKind {
case 'n':
sb.WriteString(" JSON null")
case 'f', 't':
sb.WriteString(" JSON boolean")
case '"':
sb.WriteString(" JSON string")
case '0':
sb.WriteString(" JSON number")
case '{', '}':
sb.WriteString(" JSON object")
case '[', ']':
sb.WriteString(" JSON array")
default:
if e.action == "" {
preposition = ""
}
}
if len(e.JSONValue) > 0 && len(e.JSONValue) < 100 {
sb.WriteByte(' ')
sb.Write(e.JSONValue)
}
// Format Go type.
if e.GoType != nil {
typeString := e.GoType.String()
if len(typeString) > 100 {
// An excessively long type string most likely occurs for
// an anonymous struct declaration with many fields.
// Reduce the noise by just printing the kind,
// and optionally prepending it with the package name
// if the struct happens to include an unexported field.
typeString = e.GoType.Kind().String()
if e.GoType.Kind() == reflect.Struct && e.GoType.Name() == "" {
for i := range e.GoType.NumField() {
if pkgPath := e.GoType.Field(i).PkgPath; pkgPath != "" {
typeString = pkgPath[strings.LastIndexByte(pkgPath, '/')+len("/"):] + ".struct"
break
}
}
}
}
sb.WriteString(preposition)
sb.WriteString(" Go ")
sb.WriteString(typeString)
}
// Special handling for unknown names.
if e.Err == ErrUnknownName {
sb.WriteString(": ")
sb.WriteString(ErrUnknownName.Error())
sb.WriteString(" ")
sb.WriteString(strconv.Quote(e.JSONPointer.LastToken()))
if parent := e.JSONPointer.Parent(); parent != "" {
sb.WriteString(" within ")
sb.WriteString(strconv.Quote(jsonwire.TruncatePointer(string(parent), 100)))
}
return sb.String()
}
// Format where.
// Avoid printing if it overlaps with a wrapped SyntacticError.
switch serr, _ := e.Err.(*jsontext.SyntacticError); {
case e.JSONPointer != "":
if serr == nil || !e.JSONPointer.Contains(serr.JSONPointer) {
sb.WriteString(" within ")
sb.WriteString(strconv.Quote(jsonwire.TruncatePointer(string(e.JSONPointer), 100)))
}
case e.ByteOffset > 0:
if serr == nil || !(e.ByteOffset <= serr.ByteOffset) {
sb.WriteString(" after offset ")
sb.WriteString(strconv.FormatInt(e.ByteOffset, 10))
}
}
// Format underlying error.
if e.Err != nil {
errString := e.Err.Error()
if isSyntacticError(e.Err) {
errString = strings.TrimPrefix(errString, "jsontext: ")
}
sb.WriteString(": ")
sb.WriteString(errString)
}
return sb.String()
}
func (e *SemanticError) Unwrap() error {
return e.Err
}
func newDuplicateNameError(ptr jsontext.Pointer, quotedName []byte, offset int64) error {
if quotedName != nil {
name, _ := jsonwire.AppendUnquote(nil, quotedName)
ptr = ptr.AppendToken(string(name))
}
return &jsontext.SyntacticError{
ByteOffset: offset,
JSONPointer: ptr,
Err: jsontext.ErrDuplicateName,
}
}

View File

@ -0,0 +1,115 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"archive/tar"
"bytes"
"errors"
"io"
"strings"
"testing"
"encoding/json/internal/jsonwire"
"encoding/json/jsontext"
)
func TestSemanticError(t *testing.T) {
tests := []struct {
err error
want string
}{{
err: &SemanticError{},
want: `json: cannot handle`,
}, {
err: &SemanticError{JSONKind: 'n'},
want: `json: cannot handle JSON null`,
}, {
err: &SemanticError{action: "unmarshal", JSONKind: 't'},
want: `json: cannot unmarshal JSON boolean`,
}, {
err: &SemanticError{action: "unmarshal", JSONKind: 'x'},
want: `json: cannot unmarshal`, // invalid token kinds are ignored
}, {
err: &SemanticError{action: "marshal", JSONKind: '"'},
want: `json: cannot marshal JSON string`,
}, {
err: &SemanticError{GoType: T[bool]()},
want: `json: cannot handle Go bool`,
}, {
err: &SemanticError{action: "marshal", GoType: T[int]()},
want: `json: cannot marshal from Go int`,
}, {
err: &SemanticError{action: "unmarshal", GoType: T[uint]()},
want: `json: cannot unmarshal into Go uint`,
}, {
err: &SemanticError{GoType: T[struct{ Alpha, Bravo, Charlie, Delta, Echo, Foxtrot, Golf, Hotel string }]()},
want: `json: cannot handle Go struct`,
}, {
err: &SemanticError{GoType: T[struct{ Alpha, Bravo, Charlie, Delta, Echo, Foxtrot, Golf, Hotel, x string }]()},
want: `json: cannot handle Go v2.struct`,
}, {
err: &SemanticError{JSONKind: '0', GoType: T[tar.Header]()},
want: `json: cannot handle JSON number with Go tar.Header`,
}, {
err: &SemanticError{action: "unmarshal", JSONKind: '0', JSONValue: jsontext.Value(`1e1000`), GoType: T[int]()},
want: `json: cannot unmarshal JSON number 1e1000 into Go int`,
}, {
err: &SemanticError{action: "marshal", JSONKind: '{', GoType: T[bytes.Buffer]()},
want: `json: cannot marshal JSON object from Go bytes.Buffer`,
}, {
err: &SemanticError{action: "unmarshal", JSONKind: ']', GoType: T[strings.Reader]()},
want: `json: cannot unmarshal JSON array into Go strings.Reader`,
}, {
err: &SemanticError{action: "unmarshal", JSONKind: '{', GoType: T[float64](), ByteOffset: 123},
want: `json: cannot unmarshal JSON object into Go float64 after offset 123`,
}, {
err: &SemanticError{action: "marshal", JSONKind: 'f', GoType: T[complex128](), ByteOffset: 123, JSONPointer: "/foo/2/bar/3"},
want: `json: cannot marshal JSON boolean from Go complex128 within "/foo/2/bar/3"`,
}, {
err: &SemanticError{action: "unmarshal", JSONKind: '}', GoType: T[io.Reader](), ByteOffset: 123, JSONPointer: "/foo/2/bar/3", Err: errors.New("some underlying error")},
want: `json: cannot unmarshal JSON object into Go io.Reader within "/foo/2/bar/3": some underlying error`,
}, {
err: &SemanticError{Err: errors.New("some underlying error")},
want: `json: cannot handle: some underlying error`,
}, {
err: &SemanticError{ByteOffset: 123},
want: `json: cannot handle after offset 123`,
}, {
err: &SemanticError{JSONPointer: "/foo/2/bar/3"},
want: `json: cannot handle within "/foo/2/bar/3"`,
}, {
err: &SemanticError{action: "unmarshal", JSONPointer: "/3", GoType: T[struct{ Fizz, Buzz string }](), Err: ErrUnknownName},
want: `json: cannot unmarshal into Go struct { Fizz string; Buzz string }: unknown object member name "3"`,
}, {
err: &SemanticError{action: "unmarshal", JSONPointer: "/foo/2/bar/3", GoType: T[struct{ Foo string }](), Err: ErrUnknownName},
want: `json: cannot unmarshal into Go struct { Foo string }: unknown object member name "3" within "/foo/2/bar"`,
}, {
err: &SemanticError{JSONPointer: "/foo/bar", ByteOffset: 16, GoType: T[string](), Err: &jsontext.SyntacticError{JSONPointer: "/foo/bar/baz", ByteOffset: 53, Err: jsonwire.ErrInvalidUTF8}},
want: `json: cannot handle Go string: invalid UTF-8 within "/foo/bar/baz" after offset 53`,
}, {
err: &SemanticError{JSONPointer: "/fizz/bar", ByteOffset: 16, GoType: T[string](), Err: &jsontext.SyntacticError{JSONPointer: "/foo/bar/baz", ByteOffset: 53, Err: jsonwire.ErrInvalidUTF8}},
want: `json: cannot handle Go string within "/fizz/bar": invalid UTF-8 within "/foo/bar/baz" after offset 53`,
}, {
err: &SemanticError{ByteOffset: 16, GoType: T[string](), Err: &jsontext.SyntacticError{JSONPointer: "/foo/bar/baz", ByteOffset: 53, Err: jsonwire.ErrInvalidUTF8}},
want: `json: cannot handle Go string: invalid UTF-8 within "/foo/bar/baz" after offset 53`,
}, {
err: &SemanticError{ByteOffset: 85, GoType: T[string](), Err: &jsontext.SyntacticError{JSONPointer: "/foo/bar/baz", ByteOffset: 53, Err: jsonwire.ErrInvalidUTF8}},
want: `json: cannot handle Go string after offset 85: invalid UTF-8 within "/foo/bar/baz" after offset 53`,
}}
for _, tt := range tests {
got := tt.err.Error()
// Cleanup the error of non-deterministic rendering effects.
if strings.HasPrefix(got, errorPrefix+"unable to ") {
got = errorPrefix + "cannot " + strings.TrimPrefix(got, errorPrefix+"unable to ")
}
if got != tt.want {
t.Errorf("%#v.Error mismatch:\ngot %v\nwant %v", tt.err, got, tt.want)
}
}
}

View File

@ -0,0 +1,113 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json_test
import (
"fmt"
"log"
"reflect"
"encoding/json/jsontext"
"encoding/json/v2"
)
// OrderedObject is an ordered sequence of name/value members in a JSON object.
//
// RFC 8259 defines an object as an "unordered collection".
// JSON implementations need not make "ordering of object members visible"
// to applications nor will they agree on the semantic meaning of an object if
// "the names within an object are not unique". For maximum compatibility,
// applications should avoid relying on ordering or duplicity of object names.
type OrderedObject[V any] []ObjectMember[V]
// ObjectMember is a JSON object member.
type ObjectMember[V any] struct {
Name string
Value V
}
// MarshalJSONTo encodes obj as a JSON object into enc.
func (obj *OrderedObject[V]) MarshalJSONTo(enc *jsontext.Encoder) error {
if err := enc.WriteToken(jsontext.BeginObject); err != nil {
return err
}
for i := range *obj {
member := &(*obj)[i]
if err := json.MarshalEncode(enc, &member.Name); err != nil {
return err
}
if err := json.MarshalEncode(enc, &member.Value); err != nil {
return err
}
}
if err := enc.WriteToken(jsontext.EndObject); err != nil {
return err
}
return nil
}
// UnmarshalJSONFrom decodes a JSON object from dec into obj.
func (obj *OrderedObject[V]) UnmarshalJSONFrom(dec *jsontext.Decoder) error {
if k := dec.PeekKind(); k != '{' {
return fmt.Errorf("expected object start, but encountered %v", k)
}
if _, err := dec.ReadToken(); err != nil {
return err
}
for dec.PeekKind() != '}' {
*obj = append(*obj, ObjectMember[V]{})
member := &(*obj)[len(*obj)-1]
if err := json.UnmarshalDecode(dec, &member.Name); err != nil {
return err
}
if err := json.UnmarshalDecode(dec, &member.Value); err != nil {
return err
}
}
if _, err := dec.ReadToken(); err != nil {
return err
}
return nil
}
// The exact order of JSON object can be preserved through the use of a
// specialized type that implements [MarshalerTo] and [UnmarshalerFrom].
func Example_orderedObject() {
// Round-trip marshal and unmarshal an ordered object.
// We expect the order and duplicity of JSON object members to be preserved.
// Specify jsontext.AllowDuplicateNames since this object contains "fizz" twice.
want := OrderedObject[string]{
{"fizz", "buzz"},
{"hello", "world"},
{"fizz", "wuzz"},
}
b, err := json.Marshal(&want, jsontext.AllowDuplicateNames(true))
if err != nil {
log.Fatal(err)
}
var got OrderedObject[string]
err = json.Unmarshal(b, &got, jsontext.AllowDuplicateNames(true))
if err != nil {
log.Fatal(err)
}
// Sanity check.
if !reflect.DeepEqual(got, want) {
log.Fatalf("roundtrip mismatch: got %v, want %v", got, want)
}
// Print the serialized JSON object.
(*jsontext.Value)(&b).Indent() // indent for readability
fmt.Println(string(b))
// Output:
// {
// "fizz": "buzz",
// "hello": "world",
// "fizz": "wuzz"
// }
}

View File

@ -0,0 +1,692 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json_test
import (
"bytes"
"errors"
"fmt"
"log"
"math"
"net/http"
"net/netip"
"os"
"reflect"
"strconv"
"strings"
"sync/atomic"
"time"
"encoding/json/jsontext"
"encoding/json/v2"
)
// If a type implements [encoding.TextMarshaler] and/or [encoding.TextUnmarshaler],
// then the MarshalText and UnmarshalText methods are used to encode/decode
// the value to/from a JSON string.
func Example_textMarshal() {
// Round-trip marshal and unmarshal a hostname map where the netip.Addr type
// implements both encoding.TextMarshaler and encoding.TextUnmarshaler.
want := map[netip.Addr]string{
netip.MustParseAddr("192.168.0.100"): "carbonite",
netip.MustParseAddr("192.168.0.101"): "obsidian",
netip.MustParseAddr("192.168.0.102"): "diamond",
}
b, err := json.Marshal(&want, json.Deterministic(true))
if err != nil {
log.Fatal(err)
}
var got map[netip.Addr]string
err = json.Unmarshal(b, &got)
if err != nil {
log.Fatal(err)
}
// Sanity check.
if !reflect.DeepEqual(got, want) {
log.Fatalf("roundtrip mismatch: got %v, want %v", got, want)
}
// Print the serialized JSON object.
(*jsontext.Value)(&b).Indent() // indent for readability
fmt.Println(string(b))
// Output:
// {
// "192.168.0.100": "carbonite",
// "192.168.0.101": "obsidian",
// "192.168.0.102": "diamond"
// }
}
// By default, JSON object names for Go struct fields are derived from
// the Go field name, but may be specified in the `json` tag.
// Due to JSON's heritage in JavaScript, the most common naming convention
// used for JSON object names is camelCase.
func Example_fieldNames() {
var value struct {
// This field is explicitly ignored with the special "-" name.
Ignored any `json:"-"`
// No JSON name is not provided, so the Go field name is used.
GoName any
// A JSON name is provided without any special characters.
JSONName any `json:"jsonName"`
// No JSON name is not provided, so the Go field name is used.
Option any `json:",case:ignore"`
// An empty JSON name specified using an single-quoted string literal.
Empty any `json:"''"`
// A dash JSON name specified using an single-quoted string literal.
Dash any `json:"'-'"`
// A comma JSON name specified using an single-quoted string literal.
Comma any `json:"','"`
// JSON name with quotes specified using a single-quoted string literal.
Quote any `json:"'\"\\''"`
// An unexported field is always ignored.
unexported any
}
b, err := json.Marshal(value)
if err != nil {
log.Fatal(err)
}
(*jsontext.Value)(&b).Indent() // indent for readability
fmt.Println(string(b))
// Output:
// {
// "GoName": null,
// "jsonName": null,
// "Option": null,
// "": null,
// "-": null,
// ",": null,
// "\"'": null
// }
}
// Unmarshal matches JSON object names with Go struct fields using
// a case-sensitive match, but can be configured to use a case-insensitive
// match with the "case:ignore" option. This permits unmarshaling from inputs
// that use naming conventions such as camelCase, snake_case, or kebab-case.
func Example_caseSensitivity() {
// JSON input using various naming conventions.
const input = `[
{"firstname": true},
{"firstName": true},
{"FirstName": true},
{"FIRSTNAME": true},
{"first_name": true},
{"FIRST_NAME": true},
{"first-name": true},
{"FIRST-NAME": true},
{"unknown": true}
]`
// Without "case:ignore", Unmarshal looks for an exact match.
var caseStrict []struct {
X bool `json:"firstName"`
}
if err := json.Unmarshal([]byte(input), &caseStrict); err != nil {
log.Fatal(err)
}
fmt.Println(caseStrict) // exactly 1 match found
// With "case:ignore", Unmarshal looks first for an exact match,
// then for a case-insensitive match if none found.
var caseIgnore []struct {
X bool `json:"firstName,case:ignore"`
}
if err := json.Unmarshal([]byte(input), &caseIgnore); err != nil {
log.Fatal(err)
}
fmt.Println(caseIgnore) // 8 matches found
// Output:
// [{false} {true} {false} {false} {false} {false} {false} {false} {false}]
// [{true} {true} {true} {true} {true} {true} {true} {true} {false}]
}
// Go struct fields can be omitted from the output depending on either
// the input Go value or the output JSON encoding of the value.
// The "omitzero" option omits a field if it is the zero Go value or
// implements a "IsZero() bool" method that reports true.
// The "omitempty" option omits a field if it encodes as an empty JSON value,
// which we define as a JSON null or empty JSON string, object, or array.
// In many cases, the behavior of "omitzero" and "omitempty" are equivalent.
// If both provide the desired effect, then using "omitzero" is preferred.
func Example_omitFields() {
type MyStruct struct {
Foo string `json:",omitzero"`
Bar []int `json:",omitempty"`
// Both "omitzero" and "omitempty" can be specified together,
// in which case the field is omitted if either would take effect.
// This omits the Baz field either if it is a nil pointer or
// if it would have encoded as an empty JSON object.
Baz *MyStruct `json:",omitzero,omitempty"`
}
// Demonstrate behavior of "omitzero".
b, err := json.Marshal(struct {
Bool bool `json:",omitzero"`
Int int `json:",omitzero"`
String string `json:",omitzero"`
Time time.Time `json:",omitzero"`
Addr netip.Addr `json:",omitzero"`
Struct MyStruct `json:",omitzero"`
SliceNil []int `json:",omitzero"`
Slice []int `json:",omitzero"`
MapNil map[int]int `json:",omitzero"`
Map map[int]int `json:",omitzero"`
PointerNil *string `json:",omitzero"`
Pointer *string `json:",omitzero"`
InterfaceNil any `json:",omitzero"`
Interface any `json:",omitzero"`
}{
// Bool is omitted since false is the zero value for a Go bool.
Bool: false,
// Int is omitted since 0 is the zero value for a Go int.
Int: 0,
// String is omitted since "" is the zero value for a Go string.
String: "",
// Time is omitted since time.Time.IsZero reports true.
Time: time.Date(1, 1, 1, 0, 0, 0, 0, time.UTC),
// Addr is omitted since netip.Addr{} is the zero value for a Go struct.
Addr: netip.Addr{},
// Struct is NOT omitted since it is not the zero value for a Go struct.
Struct: MyStruct{Bar: []int{}, Baz: new(MyStruct)},
// SliceNil is omitted since nil is the zero value for a Go slice.
SliceNil: nil,
// Slice is NOT omitted since []int{} is not the zero value for a Go slice.
Slice: []int{},
// MapNil is omitted since nil is the zero value for a Go map.
MapNil: nil,
// Map is NOT omitted since map[int]int{} is not the zero value for a Go map.
Map: map[int]int{},
// PointerNil is omitted since nil is the zero value for a Go pointer.
PointerNil: nil,
// Pointer is NOT omitted since new(string) is not the zero value for a Go pointer.
Pointer: new(string),
// InterfaceNil is omitted since nil is the zero value for a Go interface.
InterfaceNil: nil,
// Interface is NOT omitted since (*string)(nil) is not the zero value for a Go interface.
Interface: (*string)(nil),
})
if err != nil {
log.Fatal(err)
}
(*jsontext.Value)(&b).Indent() // indent for readability
fmt.Println("OmitZero:", string(b)) // outputs "Struct", "Slice", "Map", "Pointer", and "Interface"
// Demonstrate behavior of "omitempty".
b, err = json.Marshal(struct {
Bool bool `json:",omitempty"`
Int int `json:",omitempty"`
String string `json:",omitempty"`
Time time.Time `json:",omitempty"`
Addr netip.Addr `json:",omitempty"`
Struct MyStruct `json:",omitempty"`
Slice []int `json:",omitempty"`
Map map[int]int `json:",omitempty"`
PointerNil *string `json:",omitempty"`
Pointer *string `json:",omitempty"`
InterfaceNil any `json:",omitempty"`
Interface any `json:",omitempty"`
}{
// Bool is NOT omitted since false is not an empty JSON value.
Bool: false,
// Int is NOT omitted since 0 is not a empty JSON value.
Int: 0,
// String is omitted since "" is an empty JSON string.
String: "",
// Time is NOT omitted since this encodes as a non-empty JSON string.
Time: time.Date(1, 1, 1, 0, 0, 0, 0, time.UTC),
// Addr is omitted since this encodes as an empty JSON string.
Addr: netip.Addr{},
// Struct is omitted since {} is an empty JSON object.
Struct: MyStruct{Bar: []int{}, Baz: new(MyStruct)},
// Slice is omitted since [] is an empty JSON array.
Slice: []int{},
// Map is omitted since {} is an empty JSON object.
Map: map[int]int{},
// PointerNil is omitted since null is an empty JSON value.
PointerNil: nil,
// Pointer is omitted since "" is an empty JSON string.
Pointer: new(string),
// InterfaceNil is omitted since null is an empty JSON value.
InterfaceNil: nil,
// Interface is omitted since null is an empty JSON value.
Interface: (*string)(nil),
})
if err != nil {
log.Fatal(err)
}
(*jsontext.Value)(&b).Indent() // indent for readability
fmt.Println("OmitEmpty:", string(b)) // outputs "Bool", "Int", and "Time"
// Output:
// OmitZero: {
// "Struct": {},
// "Slice": [],
// "Map": {},
// "Pointer": "",
// "Interface": null
// }
// OmitEmpty: {
// "Bool": false,
// "Int": 0,
// "Time": "0001-01-01T00:00:00Z"
// }
}
// JSON objects can be inlined within a parent object similar to
// how Go structs can be embedded within a parent struct.
// The inlining rules are similar to those of Go embedding,
// but operates upon the JSON namespace.
func Example_inlinedFields() {
// Base is embedded within Container.
type Base struct {
// ID is promoted into the JSON object for Container.
ID string
// Type is ignored due to presence of Container.Type.
Type string
// Time cancels out with Container.Inlined.Time.
Time time.Time
}
// Other is embedded within Container.
type Other struct{ Cost float64 }
// Container embeds Base and Other.
type Container struct {
// Base is an embedded struct and is implicitly JSON inlined.
Base
// Type takes precedence over Base.Type.
Type int
// Inlined is a named Go field, but is explicitly JSON inlined.
Inlined struct {
// User is promoted into the JSON object for Container.
User string
// Time cancels out with Base.Time.
Time string
} `json:",inline"`
// ID does not conflict with Base.ID since the JSON name is different.
ID string `json:"uuid"`
// Other is not JSON inlined since it has an explicit JSON name.
Other `json:"other"`
}
// Format an empty Container to show what fields are JSON serializable.
var input Container
b, err := json.Marshal(&input)
if err != nil {
log.Fatal(err)
}
(*jsontext.Value)(&b).Indent() // indent for readability
fmt.Println(string(b))
// Output:
// {
// "ID": "",
// "Type": 0,
// "User": "",
// "uuid": "",
// "other": {
// "Cost": 0
// }
// }
}
// Due to version skew, the set of JSON object members known at compile-time
// may differ from the set of members encountered at execution-time.
// As such, it may be useful to have finer grain handling of unknown members.
// This package supports preserving, rejecting, or discarding such members.
func Example_unknownMembers() {
const input = `{
"Name": "Teal",
"Value": "#008080",
"WebSafe": false
}`
type Color struct {
Name string
Value string
// Unknown is a Go struct field that holds unknown JSON object members.
// It is marked as having this behavior with the "unknown" tag option.
//
// The type may be a jsontext.Value or map[string]T.
Unknown jsontext.Value `json:",unknown"`
}
// By default, unknown members are stored in a Go field marked as "unknown"
// or ignored if no such field exists.
var color Color
err := json.Unmarshal([]byte(input), &color)
if err != nil {
log.Fatal(err)
}
fmt.Println("Unknown members:", string(color.Unknown))
// Specifying RejectUnknownMembers causes Unmarshal
// to reject the presence of any unknown members.
err = json.Unmarshal([]byte(input), new(Color), json.RejectUnknownMembers(true))
var serr *json.SemanticError
if errors.As(err, &serr) && serr.Err == json.ErrUnknownName {
fmt.Println("Unmarshal error:", serr.Err, strconv.Quote(serr.JSONPointer.LastToken()))
}
// By default, Marshal preserves unknown members stored in
// a Go struct field marked as "unknown".
b, err := json.Marshal(color)
if err != nil {
log.Fatal(err)
}
fmt.Println("Output with unknown members: ", string(b))
// Specifying DiscardUnknownMembers causes Marshal
// to discard any unknown members.
b, err = json.Marshal(color, json.DiscardUnknownMembers(true))
if err != nil {
log.Fatal(err)
}
fmt.Println("Output without unknown members:", string(b))
// Output:
// Unknown members: {"WebSafe":false}
// Unmarshal error: unknown object member name "WebSafe"
// Output with unknown members: {"Name":"Teal","Value":"#008080","WebSafe":false}
// Output without unknown members: {"Name":"Teal","Value":"#008080"}
}
// The "format" tag option can be used to alter the formatting of certain types.
func Example_formatFlags() {
value := struct {
BytesBase64 []byte `json:",format:base64"`
BytesHex [8]byte `json:",format:hex"`
BytesArray []byte `json:",format:array"`
FloatNonFinite float64 `json:",format:nonfinite"`
MapEmitNull map[string]any `json:",format:emitnull"`
SliceEmitNull []any `json:",format:emitnull"`
TimeDateOnly time.Time `json:",format:'2006-01-02'"`
TimeUnixSec time.Time `json:",format:unix"`
DurationSecs time.Duration `json:",format:sec"`
DurationNanos time.Duration `json:",format:nano"`
}{
BytesBase64: []byte{0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef},
BytesHex: [8]byte{0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef},
BytesArray: []byte{0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef},
FloatNonFinite: math.NaN(),
MapEmitNull: nil,
SliceEmitNull: nil,
TimeDateOnly: time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC),
TimeUnixSec: time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC),
DurationSecs: 12*time.Hour + 34*time.Minute + 56*time.Second + 7*time.Millisecond + 8*time.Microsecond + 9*time.Nanosecond,
DurationNanos: 12*time.Hour + 34*time.Minute + 56*time.Second + 7*time.Millisecond + 8*time.Microsecond + 9*time.Nanosecond,
}
b, err := json.Marshal(&value)
if err != nil {
log.Fatal(err)
}
(*jsontext.Value)(&b).Indent() // indent for readability
fmt.Println(string(b))
// Output:
// {
// "BytesBase64": "ASNFZ4mrze8=",
// "BytesHex": "0123456789abcdef",
// "BytesArray": [
// 1,
// 35,
// 69,
// 103,
// 137,
// 171,
// 205,
// 239
// ],
// "FloatNonFinite": "NaN",
// "MapEmitNull": null,
// "SliceEmitNull": null,
// "TimeDateOnly": "2000-01-01",
// "TimeUnixSec": 946684800,
// "DurationSecs": 45296.007008009,
// "DurationNanos": 45296007008009
// }
}
// When implementing HTTP endpoints, it is common to be operating with an
// [io.Reader] and an [io.Writer]. The [MarshalWrite] and [UnmarshalRead] functions
// assist in operating on such input/output types.
// [UnmarshalRead] reads the entirety of the [io.Reader] to ensure that [io.EOF]
// is encountered without any unexpected bytes after the top-level JSON value.
func Example_serveHTTP() {
// Some global state maintained by the server.
var n int64
// The "add" endpoint accepts a POST request with a JSON object
// containing a number to atomically add to the server's global counter.
// It returns the updated value of the counter.
http.HandleFunc("/api/add", func(w http.ResponseWriter, r *http.Request) {
// Unmarshal the request from the client.
var val struct{ N int64 }
if err := json.UnmarshalRead(r.Body, &val); err != nil {
// Inability to unmarshal the input suggests a client-side problem.
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
// Marshal a response from the server.
val.N = atomic.AddInt64(&n, val.N)
if err := json.MarshalWrite(w, &val); err != nil {
// Inability to marshal the output suggests a server-side problem.
// This error is not always observable by the client since
// json.MarshalWrite may have already written to the output.
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
})
}
// Some Go types have a custom JSON representation where the implementation
// is delegated to some external package. Consequently, the "json" package
// will not know how to use that external implementation.
// For example, the [google.golang.org/protobuf/encoding/protojson] package
// implements JSON for all [google.golang.org/protobuf/proto.Message] types.
// [WithMarshalers] and [WithUnmarshalers] can be used
// to configure "json" and "protojson" to cooperate together.
func Example_protoJSON() {
// Let protoMessage be "google.golang.org/protobuf/proto".Message.
type protoMessage interface{ ProtoReflect() }
// Let foopbMyMessage be a concrete implementation of proto.Message.
type foopbMyMessage struct{ protoMessage }
// Let protojson be an import of "google.golang.org/protobuf/encoding/protojson".
var protojson struct {
Marshal func(protoMessage) ([]byte, error)
Unmarshal func([]byte, protoMessage) error
}
// This value mixes both non-proto.Message types and proto.Message types.
// It should use the "json" package to handle non-proto.Message types and
// should use the "protojson" package to handle proto.Message types.
var value struct {
// GoStruct does not implement proto.Message and
// should use the default behavior of the "json" package.
GoStruct struct {
Name string
Age int
}
// ProtoMessage implements proto.Message and
// should be handled using protojson.Marshal.
ProtoMessage *foopbMyMessage
}
// Marshal using protojson.Marshal for proto.Message types.
b, err := json.Marshal(&value,
// Use protojson.Marshal as a type-specific marshaler.
json.WithMarshalers(json.MarshalFunc(protojson.Marshal)))
if err != nil {
log.Fatal(err)
}
// Unmarshal using protojson.Unmarshal for proto.Message types.
err = json.Unmarshal(b, &value,
// Use protojson.Unmarshal as a type-specific unmarshaler.
json.WithUnmarshalers(json.UnmarshalFunc(protojson.Unmarshal)))
if err != nil {
log.Fatal(err)
}
}
// Many error types are not serializable since they tend to be Go structs
// without any exported fields (e.g., errors constructed with [errors.New]).
// Some applications, may desire to marshal an error as a JSON string
// even if these errors cannot be unmarshaled.
func ExampleWithMarshalers_errors() {
// Response to serialize with some Go errors encountered.
response := []struct {
Result string `json:",omitzero"`
Error error `json:",omitzero"`
}{
{Result: "Oranges are a good source of Vitamin C."},
{Error: &strconv.NumError{Func: "ParseUint", Num: "-1234", Err: strconv.ErrSyntax}},
{Error: &os.PathError{Op: "ReadFile", Path: "/path/to/secret/file", Err: os.ErrPermission}},
}
b, err := json.Marshal(&response,
// Intercept every attempt to marshal an error type.
json.WithMarshalers(json.JoinMarshalers(
// Suppose we consider strconv.NumError to be a safe to serialize:
// this type-specific marshal function intercepts this type
// and encodes the error message as a JSON string.
json.MarshalToFunc(func(enc *jsontext.Encoder, err *strconv.NumError) error {
return enc.WriteToken(jsontext.String(err.Error()))
}),
// Error messages may contain sensitive information that may not
// be appropriate to serialize. For all errors not handled above,
// report some generic error message.
json.MarshalFunc(func(error) ([]byte, error) {
return []byte(`"internal server error"`), nil
}),
)),
jsontext.Multiline(true)) // expand for readability
if err != nil {
log.Fatal(err)
}
fmt.Println(string(b))
// Output:
// [
// {
// "Result": "Oranges are a good source of Vitamin C."
// },
// {
// "Error": "strconv.ParseUint: parsing \"-1234\": invalid syntax"
// },
// {
// "Error": "internal server error"
// }
// ]
}
// In some applications, the exact precision of JSON numbers needs to be
// preserved when unmarshaling. This can be accomplished using a type-specific
// unmarshal function that intercepts all any types and pre-populates the
// interface value with a [jsontext.Value], which can represent a JSON number exactly.
func ExampleWithUnmarshalers_rawNumber() {
// Input with JSON numbers beyond the representation of a float64.
const input = `[false, 1e-1000, 3.141592653589793238462643383279, 1e+1000, true]`
var value any
err := json.Unmarshal([]byte(input), &value,
// Intercept every attempt to unmarshal into the any type.
json.WithUnmarshalers(
json.UnmarshalFromFunc(func(dec *jsontext.Decoder, val *any) error {
// If the next value to be decoded is a JSON number,
// then provide a concrete Go type to unmarshal into.
if dec.PeekKind() == '0' {
*val = jsontext.Value(nil)
}
// Return SkipFunc to fallback on default unmarshal behavior.
return json.SkipFunc
}),
))
if err != nil {
log.Fatal(err)
}
fmt.Println(value)
// Sanity check.
want := []any{false, jsontext.Value("1e-1000"), jsontext.Value("3.141592653589793238462643383279"), jsontext.Value("1e+1000"), true}
if !reflect.DeepEqual(value, want) {
log.Fatalf("value mismatch:\ngot %v\nwant %v", value, want)
}
// Output:
// [false 1e-1000 3.141592653589793238462643383279 1e+1000 true]
}
// When using JSON for parsing configuration files,
// the parsing logic often needs to report an error with a line and column
// indicating where in the input an error occurred.
func ExampleWithUnmarshalers_recordOffsets() {
// Hypothetical configuration file.
const input = `[
{"Source": "192.168.0.100:1234", "Destination": "192.168.0.1:80"},
{"Source": "192.168.0.251:4004"},
{"Source": "192.168.0.165:8080", "Destination": "0.0.0.0:80"}
]`
type Tunnel struct {
Source netip.AddrPort
Destination netip.AddrPort
// ByteOffset is populated during unmarshal with the byte offset
// within the JSON input of the JSON object for this Go struct.
ByteOffset int64 `json:"-"` // metadata to be ignored for JSON serialization
}
var tunnels []Tunnel
err := json.Unmarshal([]byte(input), &tunnels,
// Intercept every attempt to unmarshal into the Tunnel type.
json.WithUnmarshalers(
json.UnmarshalFromFunc(func(dec *jsontext.Decoder, tunnel *Tunnel) error {
// Decoder.InputOffset reports the offset after the last token,
// but we want to record the offset before the next token.
//
// Call Decoder.PeekKind to buffer enough to reach the next token.
// Add the number of leading whitespace, commas, and colons
// to locate the start of the next token.
dec.PeekKind()
unread := dec.UnreadBuffer()
n := len(unread) - len(bytes.TrimLeft(unread, " \n\r\t,:"))
tunnel.ByteOffset = dec.InputOffset() + int64(n)
// Return SkipFunc to fallback on default unmarshal behavior.
return json.SkipFunc
}),
))
if err != nil {
log.Fatal(err)
}
// lineColumn converts a byte offset into a one-indexed line and column.
// The offset must be within the bounds of the input.
lineColumn := func(input string, offset int) (line, column int) {
line = 1 + strings.Count(input[:offset], "\n")
column = 1 + offset - (strings.LastIndex(input[:offset], "\n") + len("\n"))
return line, column
}
// Verify that the configuration file is valid.
for _, tunnel := range tunnels {
if !tunnel.Source.IsValid() || !tunnel.Destination.IsValid() {
line, column := lineColumn(input, int(tunnel.ByteOffset))
fmt.Printf("%d:%d: source and destination must both be specified", line, column)
}
}
// Output:
// 3:3: source and destination must both be specified
}

View File

@ -0,0 +1,646 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"cmp"
"errors"
"fmt"
"io"
"reflect"
"slices"
"strconv"
"strings"
"unicode"
"unicode/utf8"
"encoding/json/internal/jsonflags"
"encoding/json/internal/jsonwire"
)
type isZeroer interface {
IsZero() bool
}
var isZeroerType = reflect.TypeFor[isZeroer]()
type structFields struct {
flattened []structField // listed in depth-first ordering
byActualName map[string]*structField
byFoldedName map[string][]*structField
inlinedFallback *structField
}
// reindex recomputes index to avoid bounds check during runtime.
//
// During the construction of each [structField] in [makeStructFields],
// the index field is 0-indexed. However, before it returns,
// the 0th field is stored in index0 and index stores the remainder.
func (sf *structFields) reindex() {
reindex := func(f *structField) {
f.index0 = f.index[0]
f.index = f.index[1:]
if len(f.index) == 0 {
f.index = nil // avoid pinning the backing slice
}
}
for i := range sf.flattened {
reindex(&sf.flattened[i])
}
if sf.inlinedFallback != nil {
reindex(sf.inlinedFallback)
}
}
// lookupByFoldedName looks up name by a case-insensitive match
// that also ignores the presence of dashes and underscores.
func (fs *structFields) lookupByFoldedName(name []byte) []*structField {
return fs.byFoldedName[string(foldName(name))]
}
type structField struct {
id int // unique numeric ID in breadth-first ordering
index0 int // 0th index into a struct according to [reflect.Type.FieldByIndex]
index []int // 1st index and remainder according to [reflect.Type.FieldByIndex]
typ reflect.Type
fncs *arshaler
isZero func(addressableValue) bool
isEmpty func(addressableValue) bool
fieldOptions
}
var errNoExportedFields = errors.New("Go struct has no exported fields")
func makeStructFields(root reflect.Type) (fs structFields, serr *SemanticError) {
orErrorf := func(serr *SemanticError, t reflect.Type, f string, a ...any) *SemanticError {
return cmp.Or(serr, &SemanticError{GoType: t, Err: fmt.Errorf(f, a...)})
}
// Setup a queue for a breath-first search.
var queueIndex int
type queueEntry struct {
typ reflect.Type
index []int
visitChildren bool // whether to recursively visit inlined field in this struct
}
queue := []queueEntry{{root, nil, true}}
seen := map[reflect.Type]bool{root: true}
// Perform a breadth-first search over all reachable fields.
// This ensures that len(f.index) will be monotonically increasing.
var allFields, inlinedFallbacks []structField
for queueIndex < len(queue) {
qe := queue[queueIndex]
queueIndex++
t := qe.typ
inlinedFallbackIndex := -1 // index of last inlined fallback field in current struct
namesIndex := make(map[string]int) // index of each field with a given JSON object name in current struct
var hasAnyJSONTag bool // whether any Go struct field has a `json` tag
var hasAnyJSONField bool // whether any JSON serializable fields exist in current struct
for i := range t.NumField() {
sf := t.Field(i)
_, hasTag := sf.Tag.Lookup("json")
hasAnyJSONTag = hasAnyJSONTag || hasTag
options, ignored, err := parseFieldOptions(sf)
if err != nil {
serr = cmp.Or(serr, &SemanticError{GoType: t, Err: err})
}
if ignored {
continue
}
hasAnyJSONField = true
f := structField{
// Allocate a new slice (len=N+1) to hold both
// the parent index (len=N) and the current index (len=1).
// Do this to avoid clobbering the memory of the parent index.
index: append(append(make([]int, 0, len(qe.index)+1), qe.index...), i),
typ: sf.Type,
fieldOptions: options,
}
if sf.Anonymous && !f.hasName {
if indirectType(f.typ).Kind() != reflect.Struct {
serr = orErrorf(serr, t, "embedded Go struct field %s of non-struct type must be explicitly given a JSON name", sf.Name)
} else {
f.inline = true // implied by use of Go embedding without an explicit name
}
}
if f.inline || f.unknown {
// Handle an inlined field that serializes to/from
// zero or more JSON object members.
switch f.fieldOptions {
case fieldOptions{name: f.name, quotedName: f.quotedName, inline: true}:
case fieldOptions{name: f.name, quotedName: f.quotedName, unknown: true}:
case fieldOptions{name: f.name, quotedName: f.quotedName, inline: true, unknown: true}:
serr = orErrorf(serr, t, "Go struct field %s cannot have both `inline` and `unknown` specified", sf.Name)
f.inline = false // let `unknown` take precedence
default:
serr = orErrorf(serr, t, "Go struct field %s cannot have any options other than `inline` or `unknown` specified", sf.Name)
if f.hasName {
continue // invalid inlined field; treat as ignored
}
f.fieldOptions = fieldOptions{name: f.name, quotedName: f.quotedName, inline: f.inline, unknown: f.unknown}
if f.inline && f.unknown {
f.inline = false // let `unknown` take precedence
}
}
// Reject any types with custom serialization otherwise
// it becomes impossible to know what sub-fields to inline.
tf := indirectType(f.typ)
if implementsAny(tf, allMethodTypes...) && tf != jsontextValueType {
serr = orErrorf(serr, t, "inlined Go struct field %s of type %s must not implement marshal or unmarshal methods", sf.Name, tf)
}
// Handle an inlined field that serializes to/from
// a finite number of JSON object members backed by a Go struct.
if tf.Kind() == reflect.Struct {
if f.unknown {
serr = orErrorf(serr, t, "inlined Go struct field %s of type %s with `unknown` tag must be a Go map of string key or a jsontext.Value", sf.Name, tf)
continue // invalid inlined field; treat as ignored
}
if qe.visitChildren {
queue = append(queue, queueEntry{tf, f.index, !seen[tf]})
}
seen[tf] = true
continue
} else if !sf.IsExported() {
serr = orErrorf(serr, t, "inlined Go struct field %s is not exported", sf.Name)
continue // invalid inlined field; treat as ignored
}
// Handle an inlined field that serializes to/from any number of
// JSON object members back by a Go map or jsontext.Value.
switch {
case tf == jsontextValueType:
f.fncs = nil // specially handled in arshal_inlined.go
case tf.Kind() == reflect.Map && tf.Key().Kind() == reflect.String:
if implementsAny(tf.Key(), allMethodTypes...) {
serr = orErrorf(serr, t, "inlined map field %s of type %s must have a string key that does not implement marshal or unmarshal methods", sf.Name, tf)
continue // invalid inlined field; treat as ignored
}
f.fncs = lookupArshaler(tf.Elem())
default:
serr = orErrorf(serr, t, "inlined Go struct field %s of type %s must be a Go struct, Go map of string key, or jsontext.Value", sf.Name, tf)
continue // invalid inlined field; treat as ignored
}
// Reject multiple inlined fallback fields within the same struct.
if inlinedFallbackIndex >= 0 {
serr = orErrorf(serr, t, "inlined Go struct fields %s and %s cannot both be a Go map or jsontext.Value", t.Field(inlinedFallbackIndex).Name, sf.Name)
// Still append f to inlinedFallbacks as there is still a
// check for a dominant inlined fallback before returning.
}
inlinedFallbackIndex = i
inlinedFallbacks = append(inlinedFallbacks, f)
} else {
// Handle normal Go struct field that serializes to/from
// a single JSON object member.
// Unexported fields cannot be serialized except for
// embedded fields of a struct type,
// which might promote exported fields of their own.
if !sf.IsExported() {
tf := indirectType(f.typ)
if !(sf.Anonymous && tf.Kind() == reflect.Struct) {
serr = orErrorf(serr, t, "Go struct field %s is not exported", sf.Name)
continue
}
// Unfortunately, methods on the unexported field
// still cannot be called.
if implementsAny(tf, allMethodTypes...) ||
(f.omitzero && implementsAny(tf, isZeroerType)) {
serr = orErrorf(serr, t, "Go struct field %s is not exported for method calls", sf.Name)
continue
}
}
// Provide a function that uses a type's IsZero method.
switch {
case sf.Type.Kind() == reflect.Interface && sf.Type.Implements(isZeroerType):
f.isZero = func(va addressableValue) bool {
// Avoid panics calling IsZero on a nil interface or
// non-nil interface with nil pointer.
return va.IsNil() || (va.Elem().Kind() == reflect.Pointer && va.Elem().IsNil()) || va.Interface().(isZeroer).IsZero()
}
case sf.Type.Kind() == reflect.Pointer && sf.Type.Implements(isZeroerType):
f.isZero = func(va addressableValue) bool {
// Avoid panics calling IsZero on nil pointer.
return va.IsNil() || va.Interface().(isZeroer).IsZero()
}
case sf.Type.Implements(isZeroerType):
f.isZero = func(va addressableValue) bool { return va.Interface().(isZeroer).IsZero() }
case reflect.PointerTo(sf.Type).Implements(isZeroerType):
f.isZero = func(va addressableValue) bool { return va.Addr().Interface().(isZeroer).IsZero() }
}
// Provide a function that can determine whether the value would
// serialize as an empty JSON value.
switch sf.Type.Kind() {
case reflect.String, reflect.Map, reflect.Array, reflect.Slice:
f.isEmpty = func(va addressableValue) bool { return va.Len() == 0 }
case reflect.Pointer, reflect.Interface:
f.isEmpty = func(va addressableValue) bool { return va.IsNil() }
}
// Reject multiple fields with same name within the same struct.
if j, ok := namesIndex[f.name]; ok {
serr = orErrorf(serr, t, "Go struct fields %s and %s conflict over JSON object name %q", t.Field(j).Name, sf.Name, f.name)
// Still append f to allFields as there is still a
// check for a dominant field before returning.
}
namesIndex[f.name] = i
f.id = len(allFields)
f.fncs = lookupArshaler(sf.Type)
allFields = append(allFields, f)
}
}
// NOTE: New users to the json package are occasionally surprised that
// unexported fields are ignored. This occurs by necessity due to our
// inability to directly introspect such fields with Go reflection
// without the use of unsafe.
//
// To reduce friction here, refuse to serialize any Go struct that
// has no JSON serializable fields, has at least one Go struct field,
// and does not have any `json` tags present. For example,
// errors returned by errors.New would fail to serialize.
isEmptyStruct := t.NumField() == 0
if !isEmptyStruct && !hasAnyJSONTag && !hasAnyJSONField {
serr = cmp.Or(serr, &SemanticError{GoType: t, Err: errNoExportedFields})
}
}
// Sort the fields by exact name (breaking ties by depth and
// then by presence of an explicitly provided JSON name).
// Select the dominant field from each set of fields with the same name.
// If multiple fields have the same name, then the dominant field
// is the one that exists alone at the shallowest depth,
// or the one that is uniquely tagged with a JSON name.
// Otherwise, no dominant field exists for the set.
flattened := allFields[:0]
slices.SortStableFunc(allFields, func(x, y structField) int {
return cmp.Or(
strings.Compare(x.name, y.name),
cmp.Compare(len(x.index), len(y.index)),
boolsCompare(!x.hasName, !y.hasName))
})
for len(allFields) > 0 {
n := 1 // number of fields with the same exact name
for n < len(allFields) && allFields[n-1].name == allFields[n].name {
n++
}
if n == 1 || len(allFields[0].index) != len(allFields[1].index) || allFields[0].hasName != allFields[1].hasName {
flattened = append(flattened, allFields[0]) // only keep field if there is a dominant field
}
allFields = allFields[n:]
}
// Sort the fields according to a breadth-first ordering
// so that we can re-number IDs with the smallest possible values.
// This optimizes use of uintSet such that it fits in the 64-entry bit set.
slices.SortFunc(flattened, func(x, y structField) int {
return cmp.Compare(x.id, y.id)
})
for i := range flattened {
flattened[i].id = i
}
// Sort the fields according to a depth-first ordering
// as the typical order that fields are marshaled.
slices.SortFunc(flattened, func(x, y structField) int {
return slices.Compare(x.index, y.index)
})
// Compute the mapping of fields in the byActualName map.
// Pre-fold all names so that we can lookup folded names quickly.
fs = structFields{
flattened: flattened,
byActualName: make(map[string]*structField, len(flattened)),
byFoldedName: make(map[string][]*structField, len(flattened)),
}
for i, f := range fs.flattened {
foldedName := string(foldName([]byte(f.name)))
fs.byActualName[f.name] = &fs.flattened[i]
fs.byFoldedName[foldedName] = append(fs.byFoldedName[foldedName], &fs.flattened[i])
}
for foldedName, fields := range fs.byFoldedName {
if len(fields) > 1 {
// The precedence order for conflicting ignoreCase names
// is by breadth-first order, rather than depth-first order.
slices.SortFunc(fields, func(x, y *structField) int {
return cmp.Compare(x.id, y.id)
})
fs.byFoldedName[foldedName] = fields
}
}
if n := len(inlinedFallbacks); n == 1 || (n > 1 && len(inlinedFallbacks[0].index) != len(inlinedFallbacks[1].index)) {
fs.inlinedFallback = &inlinedFallbacks[0] // dominant inlined fallback field
}
fs.reindex()
return fs, serr
}
// indirectType unwraps one level of pointer indirection
// similar to how Go only allows embedding either T or *T,
// but not **T or P (which is a named pointer).
func indirectType(t reflect.Type) reflect.Type {
if t.Kind() == reflect.Pointer && t.Name() == "" {
t = t.Elem()
}
return t
}
// matchFoldedName matches a case-insensitive name depending on the options.
// It assumes that foldName(f.name) == foldName(name).
//
// Case-insensitive matching is used if the `case:ignore` tag option is specified
// or the MatchCaseInsensitiveNames call option is specified
// (and the `case:strict` tag option is not specified).
// Functionally, the `case:ignore` and `case:strict` tag options take precedence.
//
// The v1 definition of case-insensitivity operated under strings.EqualFold
// and would strictly compare dashes and underscores,
// while the v2 definition would ignore the presence of dashes and underscores.
// Thus, if the MatchCaseSensitiveDelimiter call option is specified,
// the match is further restricted to using strings.EqualFold.
func (f *structField) matchFoldedName(name []byte, flags *jsonflags.Flags) bool {
if f.casing == caseIgnore || (flags.Get(jsonflags.MatchCaseInsensitiveNames) && f.casing != caseStrict) {
if !flags.Get(jsonflags.MatchCaseSensitiveDelimiter) || strings.EqualFold(string(name), f.name) {
return true
}
}
return false
}
const (
caseIgnore = 1
caseStrict = 2
)
type fieldOptions struct {
name string
quotedName string // quoted name per RFC 8785, section 3.2.2.2.
hasName bool
nameNeedEscape bool
casing int8 // either 0, caseIgnore, or caseStrict
inline bool
unknown bool
omitzero bool
omitempty bool
string bool
format string
}
// parseFieldOptions parses the `json` tag in a Go struct field as
// a structured set of options configuring parameters such as
// the JSON member name and other features.
func parseFieldOptions(sf reflect.StructField) (out fieldOptions, ignored bool, err error) {
tag, hasTag := sf.Tag.Lookup("json")
// Check whether this field is explicitly ignored.
if tag == "-" {
return fieldOptions{}, true, nil
}
// Check whether this field is unexported and not embedded,
// which Go reflection cannot mutate for the sake of serialization.
//
// An embedded field of an unexported type is still capable of
// forwarding exported fields, which may be JSON serialized.
// This technically operates on the edge of what is permissible by
// the Go language, but the most recent decision is to permit this.
//
// See https://go.dev/issue/24153 and https://go.dev/issue/32772.
if !sf.IsExported() && !sf.Anonymous {
// Tag options specified on an unexported field suggests user error.
if hasTag {
err = cmp.Or(err, fmt.Errorf("unexported Go struct field %s cannot have non-ignored `json:%q` tag", sf.Name, tag))
}
return fieldOptions{}, true, err
}
// Determine the JSON member name for this Go field. A user-specified name
// may be provided as either an identifier or a single-quoted string.
// The single-quoted string allows arbitrary characters in the name.
// See https://go.dev/issue/2718 and https://go.dev/issue/3546.
out.name = sf.Name // always starts with an uppercase character
if len(tag) > 0 && !strings.HasPrefix(tag, ",") {
// For better compatibility with v1, accept almost any unescaped name.
n := len(tag) - len(strings.TrimLeftFunc(tag, func(r rune) bool {
return !strings.ContainsRune(",\\'\"`", r) // reserve comma, backslash, and quotes
}))
name := tag[:n]
// If the next character is not a comma, then the name is either
// malformed (if n > 0) or a single-quoted name.
// In either case, call consumeTagOption to handle it further.
var err2 error
if !strings.HasPrefix(tag[n:], ",") && len(name) != len(tag) {
name, n, err2 = consumeTagOption(tag)
if err2 != nil {
err = cmp.Or(err, fmt.Errorf("Go struct field %s has malformed `json` tag: %v", sf.Name, err2))
}
}
if !utf8.ValidString(name) {
err = cmp.Or(err, fmt.Errorf("Go struct field %s has JSON object name %q with invalid UTF-8", sf.Name, name))
name = string([]rune(name)) // replace invalid UTF-8 with utf8.RuneError
}
if err2 == nil {
out.hasName = true
out.name = name
}
tag = tag[n:]
}
b, _ := jsonwire.AppendQuote(nil, out.name, &jsonflags.Flags{})
out.quotedName = string(b)
out.nameNeedEscape = jsonwire.NeedEscape(out.name)
// Handle any additional tag options (if any).
var wasFormat bool
seenOpts := make(map[string]bool)
for len(tag) > 0 {
// Consume comma delimiter.
if tag[0] != ',' {
err = cmp.Or(err, fmt.Errorf("Go struct field %s has malformed `json` tag: invalid character %q before next option (expecting ',')", sf.Name, tag[0]))
} else {
tag = tag[len(","):]
if len(tag) == 0 {
err = cmp.Or(err, fmt.Errorf("Go struct field %s has malformed `json` tag: invalid trailing ',' character", sf.Name))
break
}
}
// Consume and process the tag option.
opt, n, err2 := consumeTagOption(tag)
if err2 != nil {
err = cmp.Or(err, fmt.Errorf("Go struct field %s has malformed `json` tag: %v", sf.Name, err2))
}
rawOpt := tag[:n]
tag = tag[n:]
switch {
case wasFormat:
err = cmp.Or(err, fmt.Errorf("Go struct field %s has `format` tag option that was not specified last", sf.Name))
case strings.HasPrefix(rawOpt, "'") && strings.TrimFunc(opt, isLetterOrDigit) == "":
err = cmp.Or(err, fmt.Errorf("Go struct field %s has unnecessarily quoted appearance of `%s` tag option; specify `%s` instead", sf.Name, rawOpt, opt))
}
switch opt {
case "case":
if !strings.HasPrefix(tag, ":") {
err = cmp.Or(err, fmt.Errorf("Go struct field %s is missing value for `case` tag option; specify `case:ignore` or `case:strict` instead", sf.Name))
break
}
tag = tag[len(":"):]
opt, n, err2 := consumeTagOption(tag)
if err2 != nil {
err = cmp.Or(err, fmt.Errorf("Go struct field %s has malformed value for `case` tag option: %v", sf.Name, err2))
break
}
rawOpt := tag[:n]
tag = tag[n:]
if strings.HasPrefix(rawOpt, "'") {
err = cmp.Or(err, fmt.Errorf("Go struct field %s has unnecessarily quoted appearance of `case:%s` tag option; specify `case:%s` instead", sf.Name, rawOpt, opt))
}
switch opt {
case "ignore":
out.casing |= caseIgnore
case "strict":
out.casing |= caseStrict
default:
err = cmp.Or(err, fmt.Errorf("Go struct field %s has unknown `case:%s` tag value", sf.Name, rawOpt))
}
case "inline":
out.inline = true
case "unknown":
out.unknown = true
case "omitzero":
out.omitzero = true
case "omitempty":
out.omitempty = true
case "string":
out.string = true
case "format":
if !strings.HasPrefix(tag, ":") {
err = cmp.Or(err, fmt.Errorf("Go struct field %s is missing value for `format` tag option", sf.Name))
break
}
tag = tag[len(":"):]
opt, n, err2 := consumeTagOption(tag)
if err2 != nil {
err = cmp.Or(err, fmt.Errorf("Go struct field %s has malformed value for `format` tag option: %v", sf.Name, err2))
break
}
tag = tag[n:]
out.format = opt
wasFormat = true
default:
// Reject keys that resemble one of the supported options.
// This catches invalid mutants such as "omitEmpty" or "omit_empty".
normOpt := strings.ReplaceAll(strings.ToLower(opt), "_", "")
switch normOpt {
case "case", "inline", "unknown", "omitzero", "omitempty", "string", "format":
err = cmp.Or(err, fmt.Errorf("Go struct field %s has invalid appearance of `%s` tag option; specify `%s` instead", sf.Name, opt, normOpt))
}
// NOTE: Everything else is ignored. This does not mean it is
// forward compatible to insert arbitrary tag options since
// a future version of this package may understand that tag.
}
// Reject duplicates.
switch {
case out.casing == caseIgnore|caseStrict:
err = cmp.Or(err, fmt.Errorf("Go struct field %s cannot have both `case:ignore` and `case:strict` tag options", sf.Name))
case seenOpts[opt]:
err = cmp.Or(err, fmt.Errorf("Go struct field %s has duplicate appearance of `%s` tag option", sf.Name, rawOpt))
}
seenOpts[opt] = true
}
return out, false, err
}
// consumeTagOption consumes the next option,
// which is either a Go identifier or a single-quoted string.
// If the next option is invalid, it returns all of in until the next comma,
// and reports an error.
func consumeTagOption(in string) (string, int, error) {
// For legacy compatibility with v1, assume options are comma-separated.
i := strings.IndexByte(in, ',')
if i < 0 {
i = len(in)
}
switch r, _ := utf8.DecodeRuneInString(in); {
// Option as a Go identifier.
case r == '_' || unicode.IsLetter(r):
n := len(in) - len(strings.TrimLeftFunc(in, isLetterOrDigit))
return in[:n], n, nil
// Option as a single-quoted string.
case r == '\'':
// The grammar is nearly identical to a double-quoted Go string literal,
// but uses single quotes as the terminators. The reason for a custom
// grammar is because both backtick and double quotes cannot be used
// verbatim in a struct tag.
//
// Convert a single-quoted string to a double-quote string and rely on
// strconv.Unquote to handle the rest.
var inEscape bool
b := []byte{'"'}
n := len(`'`)
for len(in) > n {
r, rn := utf8.DecodeRuneInString(in[n:])
switch {
case inEscape:
if r == '\'' {
b = b[:len(b)-1] // remove escape character: `\'` => `'`
}
inEscape = false
case r == '\\':
inEscape = true
case r == '"':
b = append(b, '\\') // insert escape character: `"` => `\"`
case r == '\'':
b = append(b, '"')
n += len(`'`)
out, err := strconv.Unquote(string(b))
if err != nil {
return in[:i], i, fmt.Errorf("invalid single-quoted string: %s", in[:n])
}
return out, n, nil
}
b = append(b, in[n:][:rn]...)
n += rn
}
if n > 10 {
n = 10 // limit the amount of context printed in the error
}
return in[:i], i, fmt.Errorf("single-quoted string not terminated: %s...", in[:n])
case len(in) == 0:
return in[:i], i, io.ErrUnexpectedEOF
default:
return in[:i], i, fmt.Errorf("invalid character %q at start of option (expecting Unicode letter or single quote)", r)
}
}
func isLetterOrDigit(r rune) bool {
return r == '_' || unicode.IsLetter(r) || unicode.IsNumber(r)
}
// boolsCompare compares x and y, ordering false before true.
func boolsCompare(x, y bool) int {
switch {
case !x && y:
return -1
default:
return 0
case x && !y:
return +1
}
}

View File

@ -0,0 +1,821 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"encoding"
"errors"
"reflect"
"testing"
"encoding/json/internal/jsontest"
"encoding/json/jsontext"
)
type unexported struct{}
func TestMakeStructFields(t *testing.T) {
type Embed struct {
Foo string
}
type Recursive struct {
A string
*Recursive `json:",inline"`
B string
}
type MapStringAny map[string]any
tests := []struct {
name jsontest.CaseName
in any
want structFields
wantErr error
}{{
name: jsontest.Name("Names"),
in: struct {
F1 string
F2 string `json:"-"`
F3 string `json:"json_name"`
f3 string
F5 string `json:"json_name_nocase,case:ignore"`
}{},
want: structFields{
flattened: []structField{
{id: 0, index: []int{0}, typ: stringType, fieldOptions: fieldOptions{name: "F1", quotedName: `"F1"`}},
{id: 1, index: []int{2}, typ: stringType, fieldOptions: fieldOptions{name: "json_name", quotedName: `"json_name"`, hasName: true}},
{id: 2, index: []int{4}, typ: stringType, fieldOptions: fieldOptions{name: "json_name_nocase", quotedName: `"json_name_nocase"`, hasName: true, casing: caseIgnore}},
},
},
}, {
name: jsontest.Name("BreadthFirstSearch"),
in: struct {
L1A string
L1B struct {
L2A string
L2B struct {
L3A string
} `json:",inline"`
L2C string
} `json:",inline"`
L1C string
L1D struct {
L2D string
L2E struct {
L3B string
} `json:",inline"`
L2F string
} `json:",inline"`
L1E string
}{},
want: structFields{
flattened: []structField{
{id: 0, index: []int{0}, typ: stringType, fieldOptions: fieldOptions{name: "L1A", quotedName: `"L1A"`}},
{id: 3, index: []int{1, 0}, typ: stringType, fieldOptions: fieldOptions{name: "L2A", quotedName: `"L2A"`}},
{id: 7, index: []int{1, 1, 0}, typ: stringType, fieldOptions: fieldOptions{name: "L3A", quotedName: `"L3A"`}},
{id: 4, index: []int{1, 2}, typ: stringType, fieldOptions: fieldOptions{name: "L2C", quotedName: `"L2C"`}},
{id: 1, index: []int{2}, typ: stringType, fieldOptions: fieldOptions{name: "L1C", quotedName: `"L1C"`}},
{id: 5, index: []int{3, 0}, typ: stringType, fieldOptions: fieldOptions{name: "L2D", quotedName: `"L2D"`}},
{id: 8, index: []int{3, 1, 0}, typ: stringType, fieldOptions: fieldOptions{name: "L3B", quotedName: `"L3B"`}},
{id: 6, index: []int{3, 2}, typ: stringType, fieldOptions: fieldOptions{name: "L2F", quotedName: `"L2F"`}},
{id: 2, index: []int{4}, typ: stringType, fieldOptions: fieldOptions{name: "L1E", quotedName: `"L1E"`}},
},
},
}, {
name: jsontest.Name("NameResolution"),
in: struct {
X1 struct {
X struct {
A string // loses in precedence to A
B string // cancels out with X2.X.B
D string // loses in precedence to D
} `json:",inline"`
} `json:",inline"`
X2 struct {
X struct {
B string // cancels out with X1.X.B
C string
D string // loses in precedence to D
} `json:",inline"`
} `json:",inline"`
A string // takes precedence over X1.X.A
D string // takes precedence over X1.X.D and X2.X.D
}{},
want: structFields{
flattened: []structField{
{id: 2, index: []int{1, 0, 1}, typ: stringType, fieldOptions: fieldOptions{name: "C", quotedName: `"C"`}},
{id: 0, index: []int{2}, typ: stringType, fieldOptions: fieldOptions{name: "A", quotedName: `"A"`}},
{id: 1, index: []int{3}, typ: stringType, fieldOptions: fieldOptions{name: "D", quotedName: `"D"`}},
},
},
}, {
name: jsontest.Name("NameResolution/ExplicitNameUniquePrecedence"),
in: struct {
X1 struct {
A string // loses in precedence to X2.A
} `json:",inline"`
X2 struct {
A string `json:"A"`
} `json:",inline"`
X3 struct {
A string // loses in precedence to X2.A
} `json:",inline"`
}{},
want: structFields{
flattened: []structField{
{id: 0, index: []int{1, 0}, typ: stringType, fieldOptions: fieldOptions{hasName: true, name: "A", quotedName: `"A"`}},
},
},
}, {
name: jsontest.Name("NameResolution/ExplicitNameCancelsOut"),
in: struct {
X1 struct {
A string // loses in precedence to X2.A or X3.A
} `json:",inline"`
X2 struct {
A string `json:"A"` // cancels out with X3.A
} `json:",inline"`
X3 struct {
A string `json:"A"` // cancels out with X2.A
} `json:",inline"`
}{},
want: structFields{flattened: []structField{}},
}, {
name: jsontest.Name("Embed/Implicit"),
in: struct {
Embed
}{},
want: structFields{
flattened: []structField{
{id: 0, index: []int{0, 0}, typ: stringType, fieldOptions: fieldOptions{name: "Foo", quotedName: `"Foo"`}},
},
},
}, {
name: jsontest.Name("Embed/Explicit"),
in: struct {
Embed `json:",inline"`
}{},
want: structFields{
flattened: []structField{
{id: 0, index: []int{0, 0}, typ: stringType, fieldOptions: fieldOptions{name: "Foo", quotedName: `"Foo"`}},
},
},
}, {
name: jsontest.Name("Recursive"),
in: struct {
A string
Recursive `json:",inline"`
C string
}{},
want: structFields{
flattened: []structField{
{id: 0, index: []int{0}, typ: stringType, fieldOptions: fieldOptions{name: "A", quotedName: `"A"`}},
{id: 2, index: []int{1, 2}, typ: stringType, fieldOptions: fieldOptions{name: "B", quotedName: `"B"`}},
{id: 1, index: []int{2}, typ: stringType, fieldOptions: fieldOptions{name: "C", quotedName: `"C"`}},
},
},
}, {
name: jsontest.Name("InlinedFallback/Cancelation"),
in: struct {
X1 struct {
X jsontext.Value `json:",inline"`
} `json:",inline"`
X2 struct {
X map[string]any `json:",unknown"`
} `json:",inline"`
}{},
want: structFields{},
}, {
name: jsontest.Name("InlinedFallback/Precedence"),
in: struct {
X1 struct {
X jsontext.Value `json:",inline"`
} `json:",inline"`
X2 struct {
X map[string]any `json:",unknown"`
} `json:",inline"`
X map[string]jsontext.Value `json:",unknown"`
}{},
want: structFields{
inlinedFallback: &structField{id: 0, index: []int{2}, typ: T[map[string]jsontext.Value](), fieldOptions: fieldOptions{name: "X", quotedName: `"X"`, unknown: true}},
},
}, {
name: jsontest.Name("InlinedFallback/InvalidImplicit"),
in: struct {
MapStringAny
}{},
want: structFields{
flattened: []structField{
{id: 0, index: []int{0}, typ: reflect.TypeOf(MapStringAny(nil)), fieldOptions: fieldOptions{name: "MapStringAny", quotedName: `"MapStringAny"`}},
},
},
wantErr: errors.New("embedded Go struct field MapStringAny of non-struct type must be explicitly given a JSON name"),
}, {
name: jsontest.Name("InvalidUTF8"),
in: struct {
Name string `json:"'\\xde\\xad\\xbe\\xef'"`
}{},
want: structFields{
flattened: []structField{
{id: 0, index: []int{0}, typ: stringType, fieldOptions: fieldOptions{hasName: true, name: "\u07ad\ufffd\ufffd", quotedName: "\"\u07ad\ufffd\ufffd\"", nameNeedEscape: true}},
},
},
wantErr: errors.New(`Go struct field Name has JSON object name "ޭ\xbe\xef" with invalid UTF-8`),
}, {
name: jsontest.Name("DuplicateName"),
in: struct {
A string `json:"same"`
B string `json:"same"`
}{},
want: structFields{flattened: []structField{}},
wantErr: errors.New(`Go struct fields A and B conflict over JSON object name "same"`),
}, {
name: jsontest.Name("BothInlineAndUnknown"),
in: struct {
A struct{} `json:",inline,unknown"`
}{},
wantErr: errors.New("Go struct field A cannot have both `inline` and `unknown` specified"),
}, {
name: jsontest.Name("InlineWithOptions"),
in: struct {
A struct{} `json:",inline,omitempty"`
}{},
wantErr: errors.New("Go struct field A cannot have any options other than `inline` or `unknown` specified"),
}, {
name: jsontest.Name("UnknownWithOptions"),
in: struct {
A map[string]any `json:",inline,omitempty"`
}{},
want: structFields{inlinedFallback: &structField{
index: []int{0},
typ: reflect.TypeFor[map[string]any](),
fieldOptions: fieldOptions{
name: "A",
quotedName: `"A"`,
inline: true,
},
}},
wantErr: errors.New("Go struct field A cannot have any options other than `inline` or `unknown` specified"),
}, {
name: jsontest.Name("InlineTextMarshaler"),
in: struct {
A struct{ encoding.TextMarshaler } `json:",inline"`
}{},
want: structFields{flattened: []structField{{
index: []int{0, 0},
typ: reflect.TypeFor[encoding.TextMarshaler](),
fieldOptions: fieldOptions{
name: "TextMarshaler",
quotedName: `"TextMarshaler"`,
},
}}},
wantErr: errors.New(`inlined Go struct field A of type struct { encoding.TextMarshaler } must not implement marshal or unmarshal methods`),
}, {
name: jsontest.Name("InlineTextAppender"),
in: struct {
A struct{ encoding.TextAppender } `json:",inline"`
}{},
want: structFields{flattened: []structField{{
index: []int{0, 0},
typ: reflect.TypeFor[encoding.TextAppender](),
fieldOptions: fieldOptions{
name: "TextAppender",
quotedName: `"TextAppender"`,
},
}}},
wantErr: errors.New(`inlined Go struct field A of type struct { encoding.TextAppender } must not implement marshal or unmarshal methods`),
}, {
name: jsontest.Name("UnknownJSONMarshaler"),
in: struct {
A struct{ Marshaler } `json:",unknown"`
}{},
wantErr: errors.New(`inlined Go struct field A of type struct { json.Marshaler } must not implement marshal or unmarshal methods`),
}, {
name: jsontest.Name("InlineJSONMarshalerTo"),
in: struct {
A struct{ MarshalerTo } `json:",inline"`
}{},
want: structFields{flattened: []structField{{
index: []int{0, 0},
typ: reflect.TypeFor[MarshalerTo](),
fieldOptions: fieldOptions{
name: "MarshalerTo",
quotedName: `"MarshalerTo"`,
},
}}},
wantErr: errors.New(`inlined Go struct field A of type struct { json.MarshalerTo } must not implement marshal or unmarshal methods`),
}, {
name: jsontest.Name("UnknownTextUnmarshaler"),
in: struct {
A *struct{ encoding.TextUnmarshaler } `json:",unknown"`
}{},
wantErr: errors.New(`inlined Go struct field A of type struct { encoding.TextUnmarshaler } must not implement marshal or unmarshal methods`),
}, {
name: jsontest.Name("InlineJSONUnmarshaler"),
in: struct {
A *struct{ Unmarshaler } `json:",inline"`
}{},
want: structFields{flattened: []structField{{
index: []int{0, 0},
typ: reflect.TypeFor[Unmarshaler](),
fieldOptions: fieldOptions{
name: "Unmarshaler",
quotedName: `"Unmarshaler"`,
},
}}},
wantErr: errors.New(`inlined Go struct field A of type struct { json.Unmarshaler } must not implement marshal or unmarshal methods`),
}, {
name: jsontest.Name("UnknownJSONUnmarshalerFrom"),
in: struct {
A struct{ UnmarshalerFrom } `json:",unknown"`
}{},
wantErr: errors.New(`inlined Go struct field A of type struct { json.UnmarshalerFrom } must not implement marshal or unmarshal methods`),
}, {
name: jsontest.Name("UnknownStruct"),
in: struct {
A struct {
X, Y, Z string
} `json:",unknown"`
}{},
wantErr: errors.New("inlined Go struct field A of type struct { X string; Y string; Z string } with `unknown` tag must be a Go map of string key or a jsontext.Value"),
}, {
name: jsontest.Name("InlineUnsupported/MapIntKey"),
in: struct {
A map[int]any `json:",unknown"`
}{},
wantErr: errors.New(`inlined Go struct field A of type map[int]interface {} must be a Go struct, Go map of string key, or jsontext.Value`),
}, {
name: jsontest.Name("InlineUnsupported/MapTextMarshalerStringKey"),
in: struct {
A map[nocaseString]any `json:",inline"`
}{},
wantErr: errors.New(`inlined map field A of type map[json.nocaseString]interface {} must have a string key that does not implement marshal or unmarshal methods`),
}, {
name: jsontest.Name("InlineUnsupported/MapMarshalerStringKey"),
in: struct {
A map[stringMarshalEmpty]any `json:",inline"`
}{},
wantErr: errors.New(`inlined map field A of type map[json.stringMarshalEmpty]interface {} must have a string key that does not implement marshal or unmarshal methods`),
}, {
name: jsontest.Name("InlineUnsupported/DoublePointer"),
in: struct {
A **struct{} `json:",inline"`
}{},
wantErr: errors.New(`inlined Go struct field A of type *struct {} must be a Go struct, Go map of string key, or jsontext.Value`),
}, {
name: jsontest.Name("DuplicateInline"),
in: struct {
A map[string]any `json:",inline"`
B jsontext.Value `json:",inline"`
}{},
wantErr: errors.New(`inlined Go struct fields A and B cannot both be a Go map or jsontext.Value`),
}, {
name: jsontest.Name("DuplicateEmbedInline"),
in: struct {
A MapStringAny `json:",inline"`
B jsontext.Value `json:",inline"`
}{},
wantErr: errors.New(`inlined Go struct fields A and B cannot both be a Go map or jsontext.Value`),
}}
for _, tt := range tests {
t.Run(tt.name.Name, func(t *testing.T) {
got, err := makeStructFields(reflect.TypeOf(tt.in))
// Sanity check that pointers are consistent.
pointers := make(map[*structField]bool)
for i := range got.flattened {
pointers[&got.flattened[i]] = true
}
for _, f := range got.byActualName {
if !pointers[f] {
t.Errorf("%s: byActualName pointer not in flattened", tt.name.Where)
}
}
for _, fs := range got.byFoldedName {
for _, f := range fs {
if !pointers[f] {
t.Errorf("%s: byFoldedName pointer not in flattened", tt.name.Where)
}
}
}
// Zero out fields that are incomparable.
for i := range got.flattened {
got.flattened[i].fncs = nil
got.flattened[i].isEmpty = nil
}
if got.inlinedFallback != nil {
got.inlinedFallback.fncs = nil
got.inlinedFallback.isEmpty = nil
}
// Reproduce maps in want.
tt.want.byActualName = make(map[string]*structField)
for i := range tt.want.flattened {
f := &tt.want.flattened[i]
tt.want.byActualName[f.name] = f
}
tt.want.byFoldedName = make(map[string][]*structField)
for i, f := range tt.want.flattened {
foldedName := string(foldName([]byte(f.name)))
tt.want.byFoldedName[foldedName] = append(tt.want.byFoldedName[foldedName], &tt.want.flattened[i])
}
// Only compare underlying error to simplify test logic.
var gotErr error
if err != nil {
gotErr = err.Err
}
tt.want.reindex()
if !reflect.DeepEqual(got, tt.want) || !reflect.DeepEqual(gotErr, tt.wantErr) {
t.Errorf("%s: makeStructFields(%T):\n\tgot (%v, %v)\n\twant (%v, %v)", tt.name.Where, tt.in, got, gotErr, tt.want, tt.wantErr)
}
})
}
}
func TestParseTagOptions(t *testing.T) {
tests := []struct {
name jsontest.CaseName
in any // must be a struct with a single field
wantOpts fieldOptions
wantIgnored bool
wantErr error
}{{
name: jsontest.Name("GoName"),
in: struct {
FieldName int
}{},
wantOpts: fieldOptions{name: "FieldName", quotedName: `"FieldName"`},
}, {
name: jsontest.Name("GoNameWithOptions"),
in: struct {
FieldName int `json:",inline"`
}{},
wantOpts: fieldOptions{name: "FieldName", quotedName: `"FieldName"`, inline: true},
}, {
name: jsontest.Name("Empty"),
in: struct {
V int `json:""`
}{},
wantOpts: fieldOptions{name: "V", quotedName: `"V"`},
}, {
name: jsontest.Name("Unexported"),
in: struct {
v int `json:"Hello"`
}{},
wantIgnored: true,
wantErr: errors.New("unexported Go struct field v cannot have non-ignored `json:\"Hello\"` tag"),
}, {
name: jsontest.Name("UnexportedEmpty"),
in: struct {
v int `json:""`
}{},
wantIgnored: true,
wantErr: errors.New("unexported Go struct field v cannot have non-ignored `json:\"\"` tag"),
}, {
name: jsontest.Name("EmbedUnexported"),
in: struct {
unexported
}{},
wantOpts: fieldOptions{name: "unexported", quotedName: `"unexported"`},
}, {
name: jsontest.Name("Ignored"),
in: struct {
V int `json:"-"`
}{},
wantIgnored: true,
}, {
name: jsontest.Name("IgnoredEmbedUnexported"),
in: struct {
unexported `json:"-"`
}{},
wantIgnored: true,
}, {
name: jsontest.Name("DashComma"),
in: struct {
V int `json:"-,"`
}{},
wantOpts: fieldOptions{hasName: true, name: "-", quotedName: `"-"`},
wantErr: errors.New("Go struct field V has malformed `json` tag: invalid trailing ',' character"),
}, {
name: jsontest.Name("QuotedDashName"),
in: struct {
V int `json:"'-'"`
}{},
wantOpts: fieldOptions{hasName: true, name: "-", quotedName: `"-"`},
}, {
name: jsontest.Name("LatinPunctuationName"),
in: struct {
V int `json:"$%-/"`
}{},
wantOpts: fieldOptions{hasName: true, name: "$%-/", quotedName: `"$%-/"`},
}, {
name: jsontest.Name("QuotedLatinPunctuationName"),
in: struct {
V int `json:"'$%-/'"`
}{},
wantOpts: fieldOptions{hasName: true, name: "$%-/", quotedName: `"$%-/"`},
}, {
name: jsontest.Name("LatinDigitsName"),
in: struct {
V int `json:"0123456789"`
}{},
wantOpts: fieldOptions{hasName: true, name: "0123456789", quotedName: `"0123456789"`},
}, {
name: jsontest.Name("QuotedLatinDigitsName"),
in: struct {
V int `json:"'0123456789'"`
}{},
wantOpts: fieldOptions{hasName: true, name: "0123456789", quotedName: `"0123456789"`},
}, {
name: jsontest.Name("LatinUppercaseName"),
in: struct {
V int `json:"ABCDEFGHIJKLMOPQRSTUVWXYZ"`
}{},
wantOpts: fieldOptions{hasName: true, name: "ABCDEFGHIJKLMOPQRSTUVWXYZ", quotedName: `"ABCDEFGHIJKLMOPQRSTUVWXYZ"`},
}, {
name: jsontest.Name("LatinLowercaseName"),
in: struct {
V int `json:"abcdefghijklmnopqrstuvwxyz_"`
}{},
wantOpts: fieldOptions{hasName: true, name: "abcdefghijklmnopqrstuvwxyz_", quotedName: `"abcdefghijklmnopqrstuvwxyz_"`},
}, {
name: jsontest.Name("GreekName"),
in: struct {
V string `json:"Ελλάδα"`
}{},
wantOpts: fieldOptions{hasName: true, name: "Ελλάδα", quotedName: `"Ελλάδα"`},
}, {
name: jsontest.Name("QuotedGreekName"),
in: struct {
V string `json:"'Ελλάδα'"`
}{},
wantOpts: fieldOptions{hasName: true, name: "Ελλάδα", quotedName: `"Ελλάδα"`},
}, {
name: jsontest.Name("ChineseName"),
in: struct {
V string `json:"世界"`
}{},
wantOpts: fieldOptions{hasName: true, name: "世界", quotedName: `"世界"`},
}, {
name: jsontest.Name("QuotedChineseName"),
in: struct {
V string `json:"'世界'"`
}{},
wantOpts: fieldOptions{hasName: true, name: "世界", quotedName: `"世界"`},
}, {
name: jsontest.Name("PercentSlashName"),
in: struct {
V int `json:"text/html%"`
}{},
wantOpts: fieldOptions{hasName: true, name: "text/html%", quotedName: `"text/html%"`},
}, {
name: jsontest.Name("QuotedPercentSlashName"),
in: struct {
V int `json:"'text/html%'"`
}{},
wantOpts: fieldOptions{hasName: true, name: "text/html%", quotedName: `"text/html%"`},
}, {
name: jsontest.Name("PunctuationName"),
in: struct {
V string `json:"!#$%&()*+-./:;<=>?@[]^_{|}~ "`
}{},
wantOpts: fieldOptions{hasName: true, name: "!#$%&()*+-./:;<=>?@[]^_{|}~ ", quotedName: `"!#$%&()*+-./:;<=>?@[]^_{|}~ "`, nameNeedEscape: true},
}, {
name: jsontest.Name("QuotedPunctuationName"),
in: struct {
V string `json:"'!#$%&()*+-./:;<=>?@[]^_{|}~ '"`
}{},
wantOpts: fieldOptions{hasName: true, name: "!#$%&()*+-./:;<=>?@[]^_{|}~ ", quotedName: `"!#$%&()*+-./:;<=>?@[]^_{|}~ "`, nameNeedEscape: true},
}, {
name: jsontest.Name("EmptyName"),
in: struct {
V int `json:"''"`
}{},
wantOpts: fieldOptions{hasName: true, name: "", quotedName: `""`},
}, {
name: jsontest.Name("SpaceName"),
in: struct {
V int `json:"' '"`
}{},
wantOpts: fieldOptions{hasName: true, name: " ", quotedName: `" "`},
}, {
name: jsontest.Name("CommaQuotes"),
in: struct {
V int `json:"',\\'\"\\\"'"`
}{},
wantOpts: fieldOptions{hasName: true, name: `,'""`, quotedName: `",'\"\""`, nameNeedEscape: true},
}, {
name: jsontest.Name("SingleComma"),
in: struct {
V int `json:","`
}{},
wantOpts: fieldOptions{name: "V", quotedName: `"V"`},
wantErr: errors.New("Go struct field V has malformed `json` tag: invalid trailing ',' character"),
}, {
name: jsontest.Name("SuperfluousCommas"),
in: struct {
V int `json:",,,,\"\",,inline,unknown,,,,"`
}{},
wantOpts: fieldOptions{name: "V", quotedName: `"V"`, inline: true, unknown: true},
wantErr: errors.New("Go struct field V has malformed `json` tag: invalid character ',' at start of option (expecting Unicode letter or single quote)"),
}, {
name: jsontest.Name("CaseAloneOption"),
in: struct {
FieldName int `json:",case"`
}{},
wantOpts: fieldOptions{name: "FieldName", quotedName: `"FieldName"`},
wantErr: errors.New("Go struct field FieldName is missing value for `case` tag option; specify `case:ignore` or `case:strict` instead"),
}, {
name: jsontest.Name("CaseIgnoreOption"),
in: struct {
FieldName int `json:",case:ignore"`
}{},
wantOpts: fieldOptions{name: "FieldName", quotedName: `"FieldName"`, casing: caseIgnore},
}, {
name: jsontest.Name("CaseStrictOption"),
in: struct {
FieldName int `json:",case:strict"`
}{},
wantOpts: fieldOptions{name: "FieldName", quotedName: `"FieldName"`, casing: caseStrict},
}, {
name: jsontest.Name("CaseUnknownOption"),
in: struct {
FieldName int `json:",case:unknown"`
}{},
wantOpts: fieldOptions{name: "FieldName", quotedName: `"FieldName"`},
wantErr: errors.New("Go struct field FieldName has unknown `case:unknown` tag value"),
}, {
name: jsontest.Name("CaseQuotedOption"),
in: struct {
FieldName int `json:",case:'ignore'"`
}{},
wantOpts: fieldOptions{name: "FieldName", quotedName: `"FieldName"`, casing: caseIgnore},
wantErr: errors.New("Go struct field FieldName has unnecessarily quoted appearance of `case:'ignore'` tag option; specify `case:ignore` instead"),
}, {
name: jsontest.Name("BothCaseOptions"),
in: struct {
FieldName int `json:",case:ignore,case:strict"`
}{},
wantOpts: fieldOptions{name: "FieldName", quotedName: `"FieldName"`, casing: caseIgnore | caseStrict},
wantErr: errors.New("Go struct field FieldName cannot have both `case:ignore` and `case:strict` tag options"),
}, {
name: jsontest.Name("InlineOption"),
in: struct {
FieldName int `json:",inline"`
}{},
wantOpts: fieldOptions{name: "FieldName", quotedName: `"FieldName"`, inline: true},
}, {
name: jsontest.Name("UnknownOption"),
in: struct {
FieldName int `json:",unknown"`
}{},
wantOpts: fieldOptions{name: "FieldName", quotedName: `"FieldName"`, unknown: true},
}, {
name: jsontest.Name("OmitZeroOption"),
in: struct {
FieldName int `json:",omitzero"`
}{},
wantOpts: fieldOptions{name: "FieldName", quotedName: `"FieldName"`, omitzero: true},
}, {
name: jsontest.Name("OmitEmptyOption"),
in: struct {
FieldName int `json:",omitempty"`
}{},
wantOpts: fieldOptions{name: "FieldName", quotedName: `"FieldName"`, omitempty: true},
}, {
name: jsontest.Name("StringOption"),
in: struct {
FieldName int `json:",string"`
}{},
wantOpts: fieldOptions{name: "FieldName", quotedName: `"FieldName"`, string: true},
}, {
name: jsontest.Name("FormatOptionEqual"),
in: struct {
FieldName int `json:",format=fizzbuzz"`
}{},
wantOpts: fieldOptions{name: "FieldName", quotedName: `"FieldName"`},
wantErr: errors.New("Go struct field FieldName is missing value for `format` tag option"),
}, {
name: jsontest.Name("FormatOptionColon"),
in: struct {
FieldName int `json:",format:fizzbuzz"`
}{},
wantOpts: fieldOptions{name: "FieldName", quotedName: `"FieldName"`, format: "fizzbuzz"},
}, {
name: jsontest.Name("FormatOptionQuoted"),
in: struct {
FieldName int `json:",format:'2006-01-02'"`
}{},
wantOpts: fieldOptions{name: "FieldName", quotedName: `"FieldName"`, format: "2006-01-02"},
}, {
name: jsontest.Name("FormatOptionInvalid"),
in: struct {
FieldName int `json:",format:'2006-01-02"`
}{},
wantOpts: fieldOptions{name: "FieldName", quotedName: `"FieldName"`},
wantErr: errors.New("Go struct field FieldName has malformed value for `format` tag option: single-quoted string not terminated: '2006-01-0..."),
}, {
name: jsontest.Name("FormatOptionNotLast"),
in: struct {
FieldName int `json:",format:alpha,ordered"`
}{},
wantOpts: fieldOptions{name: "FieldName", quotedName: `"FieldName"`, format: "alpha"},
wantErr: errors.New("Go struct field FieldName has `format` tag option that was not specified last"),
}, {
name: jsontest.Name("AllOptions"),
in: struct {
FieldName int `json:",case:ignore,inline,unknown,omitzero,omitempty,string,format:format"`
}{},
wantOpts: fieldOptions{
name: "FieldName",
quotedName: `"FieldName"`,
casing: caseIgnore,
inline: true,
unknown: true,
omitzero: true,
omitempty: true,
string: true,
format: "format",
},
}, {
name: jsontest.Name("AllOptionsQuoted"),
in: struct {
FieldName int `json:",'case':'ignore','inline','unknown','omitzero','omitempty','string','format':'format'"`
}{},
wantOpts: fieldOptions{
name: "FieldName",
quotedName: `"FieldName"`,
casing: caseIgnore,
inline: true,
unknown: true,
omitzero: true,
omitempty: true,
string: true,
format: "format",
},
wantErr: errors.New("Go struct field FieldName has unnecessarily quoted appearance of `'case'` tag option; specify `case` instead"),
}, {
name: jsontest.Name("AllOptionsCaseSensitive"),
in: struct {
FieldName int `json:",CASE:IGNORE,INLINE,UNKNOWN,OMITZERO,OMITEMPTY,STRING,FORMAT:FORMAT"`
}{},
wantOpts: fieldOptions{name: "FieldName", quotedName: `"FieldName"`},
wantErr: errors.New("Go struct field FieldName has invalid appearance of `CASE` tag option; specify `case` instead"),
}, {
name: jsontest.Name("AllOptionsSpaceSensitive"),
in: struct {
FieldName int `json:", case:ignore , inline , unknown , omitzero , omitempty , string , format:format "`
}{},
wantOpts: fieldOptions{name: "FieldName", quotedName: `"FieldName"`},
wantErr: errors.New("Go struct field FieldName has malformed `json` tag: invalid character ' ' at start of option (expecting Unicode letter or single quote)"),
}, {
name: jsontest.Name("UnknownTagOption"),
in: struct {
FieldName int `json:",inline,whoknows,string"`
}{},
wantOpts: fieldOptions{name: "FieldName", quotedName: `"FieldName"`, inline: true, string: true},
}, {
name: jsontest.Name("MalformedQuotedString/MissingQuote"),
in: struct {
FieldName int `json:"'hello,string"`
}{},
wantOpts: fieldOptions{name: "FieldName", quotedName: `"FieldName"`, string: true},
wantErr: errors.New("Go struct field FieldName has malformed `json` tag: single-quoted string not terminated: 'hello,str..."),
}, {
name: jsontest.Name("MalformedQuotedString/MissingComma"),
in: struct {
FieldName int `json:"'hello'inline,string"`
}{},
wantOpts: fieldOptions{hasName: true, name: "hello", quotedName: `"hello"`, inline: true, string: true},
wantErr: errors.New("Go struct field FieldName has malformed `json` tag: invalid character 'i' before next option (expecting ',')"),
}, {
name: jsontest.Name("MalformedQuotedString/InvalidEscape"),
in: struct {
FieldName int `json:"'hello\\u####',inline,string"`
}{},
wantOpts: fieldOptions{name: "FieldName", quotedName: `"FieldName"`, inline: true, string: true},
wantErr: errors.New("Go struct field FieldName has malformed `json` tag: invalid single-quoted string: 'hello\\u####'"),
}, {
name: jsontest.Name("MisnamedTag"),
in: struct {
V int `jsom:"Misnamed"`
}{},
wantOpts: fieldOptions{name: "V", quotedName: `"V"`},
}}
for _, tt := range tests {
t.Run(tt.name.Name, func(t *testing.T) {
fs := reflect.TypeOf(tt.in).Field(0)
gotOpts, gotIgnored, gotErr := parseFieldOptions(fs)
if !reflect.DeepEqual(gotOpts, tt.wantOpts) || gotIgnored != tt.wantIgnored || !reflect.DeepEqual(gotErr, tt.wantErr) {
t.Errorf("%s: parseFieldOptions(%T) = (\n\t%v,\n\t%v,\n\t%v\n), want (\n\t%v,\n\t%v,\n\t%v\n)", tt.name.Where, tt.in, gotOpts, gotIgnored, gotErr, tt.wantOpts, tt.wantIgnored, tt.wantErr)
}
})
}
}

View File

@ -0,0 +1,58 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"unicode"
"unicode/utf8"
)
// foldName returns a folded string such that foldName(x) == foldName(y)
// is similar to strings.EqualFold(x, y), but ignores underscore and dashes.
// This allows foldName to match common naming conventions.
func foldName(in []byte) []byte {
// This is inlinable to take advantage of "function outlining".
// See https://blog.filippo.io/efficient-go-apis-with-the-inliner/
var arr [32]byte // large enough for most JSON names
return appendFoldedName(arr[:0], in)
}
func appendFoldedName(out, in []byte) []byte {
for i := 0; i < len(in); {
// Handle single-byte ASCII.
if c := in[i]; c < utf8.RuneSelf {
if c != '_' && c != '-' {
if 'a' <= c && c <= 'z' {
c -= 'a' - 'A'
}
out = append(out, c)
}
i++
continue
}
// Handle multi-byte Unicode.
r, n := utf8.DecodeRune(in[i:])
out = utf8.AppendRune(out, foldRune(r))
i += n
}
return out
}
// foldRune is a variation on unicode.SimpleFold that returns the same rune
// for all runes in the same fold set.
//
// Invariant:
//
// foldRune(x) == foldRune(y) ⇔ strings.EqualFold(string(x), string(y))
func foldRune(r rune) rune {
for {
r2 := unicode.SimpleFold(r)
if r2 <= r {
return r2 // smallest character in the fold set
}
r = r2
}
}

View File

@ -0,0 +1,127 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"fmt"
"reflect"
"testing"
"unicode"
)
var equalFoldTestdata = []struct {
in1, in2 string
want bool
}{
{"", "", true},
{"abc", "abc", true},
{"ABcd", "ABcd", true},
{"123abc", "123ABC", true},
{"_1_2_-_3__--a-_-b-c-", "123ABC", true},
{"αβδ", "ΑΒΔ", true},
{"abc", "xyz", false},
{"abc", "XYZ", false},
{"abcdefghijk", "abcdefghijX", false},
{"abcdefghijk", "abcdefghij\u212A", true},
{"abcdefghijK", "abcdefghij\u212A", true},
{"abcdefghijkz", "abcdefghij\u212Ay", false},
{"abcdefghijKz", "abcdefghij\u212Ay", false},
{"1", "2", false},
{"utf-8", "US-ASCII", false},
{"hello, world!", "hello, world!", true},
{"hello, world!", "Hello, World!", true},
{"hello, world!", "HELLO, WORLD!", true},
{"hello, world!", "jello, world!", false},
{"γειά, κόσμε!", "γειά, κόσμε!", true},
{"γειά, κόσμε!", "Γειά, Κόσμε!", true},
{"γειά, κόσμε!", "ΓΕΙΆ, ΚΌΣΜΕ!", true},
{"γειά, κόσμε!", "ΛΕΙΆ, ΚΌΣΜΕ!", false},
{"AESKey", "aesKey", true},
{"γειά, κόσμε!", "Γ\xce_\xb5ιά, Κόσμε!", false},
{"aeskey", "AESKEY", true},
{"AESKEY", "aes_key", true},
{"aes_key", "AES_KEY", true},
{"AES_KEY", "aes-key", true},
{"aes-key", "AES-KEY", true},
{"AES-KEY", "aesKey", true},
{"aesKey", "AesKey", true},
{"AesKey", "AESKey", true},
{"AESKey", "aeskey", true},
{"DESKey", "aeskey", false},
{"AES Key", "aeskey", false},
{"aeskey", "aeskey", false}, // Unicode underscore not handled
{"aes〰key", "aeskey", false}, // Unicode dash not handled
}
func TestEqualFold(t *testing.T) {
for _, tt := range equalFoldTestdata {
got := equalFold([]byte(tt.in1), []byte(tt.in2))
if got != tt.want {
t.Errorf("equalFold(%q, %q) = %v, want %v", tt.in1, tt.in2, got, tt.want)
}
}
}
func equalFold(x, y []byte) bool {
return string(foldName(x)) == string(foldName(y))
}
func TestFoldRune(t *testing.T) {
if testing.Short() {
t.Skip()
}
var foldSet []rune
for r := range rune(unicode.MaxRune + 1) {
// Derive all runes that are all part of the same fold set.
foldSet = foldSet[:0]
for r0 := r; r != r0 || len(foldSet) == 0; r = unicode.SimpleFold(r) {
foldSet = append(foldSet, r)
}
// Normalized form of each rune in a foldset must be the same and
// also be within the set itself.
var withinSet bool
rr0 := foldRune(foldSet[0])
for _, r := range foldSet {
withinSet = withinSet || rr0 == r
rr := foldRune(r)
if rr0 != rr {
t.Errorf("foldRune(%q) = %q, want %q", r, rr, rr0)
}
}
if !withinSet {
t.Errorf("foldRune(%q) = %q not in fold set %q", foldSet[0], rr0, string(foldSet))
}
}
}
// TestBenchmarkUnmarshalUnknown unmarshals an unknown field into a struct with
// varying number of fields. Since the unknown field does not directly match
// any known field by name, it must fall back on case-insensitive matching.
func TestBenchmarkUnmarshalUnknown(t *testing.T) {
in := []byte(`{"NameUnknown":null}`)
for _, n := range []int{1, 2, 5, 10, 20, 50, 100} {
unmarshal := Unmarshal
var fields []reflect.StructField
for i := range n {
fields = append(fields, reflect.StructField{
Name: fmt.Sprintf("Name%d", i),
Type: T[int](),
Tag: `json:",case:ignore"`,
})
}
out := reflect.New(reflect.StructOf(fields)).Interface()
t.Run(fmt.Sprintf("N%d", n), func(t *testing.T) {
if err := unmarshal(in, out); err != nil {
t.Fatalf("Unmarshal error: %v", err)
}
})
}
}

View File

@ -0,0 +1,39 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"bytes"
"testing"
)
func FuzzEqualFold(f *testing.F) {
for _, tt := range equalFoldTestdata {
f.Add([]byte(tt.in1), []byte(tt.in2))
}
equalFoldSimple := func(x, y []byte) bool {
strip := func(b []byte) []byte {
return bytes.Map(func(r rune) rune {
if r == '_' || r == '-' {
return -1 // ignore underscores and dashes
}
return r
}, b)
}
return bytes.EqualFold(strip(x), strip(y))
}
f.Fuzz(func(t *testing.T, s1, s2 []byte) {
// Compare the optimized and simplified implementations.
got := equalFold(s1, s2)
want := equalFoldSimple(s1, s2)
if got != want {
t.Errorf("equalFold(%q, %q) = %v, want %v", s1, s2, got, want)
}
})
}

View File

@ -0,0 +1,109 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"os"
"os/exec"
"strings"
"testing"
)
// Whether a function is inlinable is dependent on the Go compiler version
// and also relies on the presence of the Go toolchain itself being installed.
// This test is disabled by default and explicitly enabled with an
// environment variable that is specified in our integration tests,
// which have fine control over exactly which Go version is being tested.
var testInline = os.Getenv("TEST_INLINE") != ""
func TestInline(t *testing.T) {
if !testInline {
t.SkipNow()
}
pkgs := map[string]map[string]bool{
".": {
"hash64": true,
"foldName": true, // thin wrapper over appendFoldedName
},
"./internal/jsonwire": {
"ConsumeWhitespace": true,
"ConsumeNull": true,
"ConsumeFalse": true,
"ConsumeTrue": true,
"ConsumeSimpleString": true,
"ConsumeString": true, // thin wrapper over consumeStringResumable
"ConsumeSimpleNumber": true,
"ConsumeNumber": true, // thin wrapper over consumeNumberResumable
"UnquoteMayCopy": true, // thin wrapper over unescapeString
"HasSuffixByte": true,
"TrimSuffixByte": true,
"TrimSuffixString": true,
"TrimSuffixWhitespace": true,
},
"./jsontext": {
"encoderState.NeedFlush": true,
"Decoder.ReadToken": true, // thin wrapper over decoderState.ReadToken
"Decoder.ReadValue": true, // thin wrapper over decoderState.ReadValue
"Encoder.WriteToken": true, // thin wrapper over encoderState.WriteToken
"Encoder.WriteValue": true, // thin wrapper over encoderState.WriteValue
"decodeBuffer.needMore": true,
"stateMachine.appendLiteral": true,
"stateMachine.appendNumber": true,
"stateMachine.appendString": true,
"stateMachine.Depth": true,
"stateMachine.reset": true,
"stateMachine.MayAppendDelim": true,
"stateMachine.needDelim": true,
"stateMachine.popArray": true,
"stateMachine.popObject": true,
"stateMachine.pushArray": true,
"stateMachine.pushObject": true,
"stateEntry.Increment": true,
"stateEntry.decrement": true,
"stateEntry.isArray": true,
"stateEntry.isObject": true,
"stateEntry.Length": true,
"stateEntry.needImplicitColon": true,
"stateEntry.needImplicitComma": true,
"stateEntry.NeedObjectName": true,
"stateEntry.needObjectValue": true,
"objectNameStack.reset": true,
"objectNameStack.length": true,
"objectNameStack.getUnquoted": true,
"objectNameStack.push": true,
"objectNameStack.ReplaceLastQuotedOffset": true,
"objectNameStack.replaceLastUnquotedName": true,
"objectNameStack.pop": true,
"objectNameStack.ensureCopiedBuffer": true,
"objectNamespace.insertQuoted": true, // thin wrapper over objectNamespace.insert
"objectNamespace.InsertUnquoted": true, // thin wrapper over objectNamespace.insert
"Token.String": true, // thin wrapper over Token.string
},
}
for pkg, fncs := range pkgs {
cmd := exec.Command("go", "build", "-gcflags=-m", pkg)
b, err := cmd.CombinedOutput()
if err != nil {
t.Fatalf("exec.Command error: %v\n\n%s", err, b)
}
for _, line := range strings.Split(string(b), "\n") {
const phrase = ": can inline "
if i := strings.Index(line, phrase); i >= 0 {
fnc := line[i+len(phrase):]
fnc = strings.ReplaceAll(fnc, "(", "")
fnc = strings.ReplaceAll(fnc, "*", "")
fnc = strings.ReplaceAll(fnc, ")", "")
delete(fncs, fnc)
}
}
for fnc := range fncs {
t.Errorf("%v is not inlinable, expected it to be", fnc)
}
}
}

View File

@ -0,0 +1,88 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"encoding/binary"
"math/bits"
)
// stringCache is a cache for strings converted from a []byte.
type stringCache = [256]string // 256*unsafe.Sizeof(string("")) => 4KiB
// makeString returns the string form of b.
// It returns a pre-allocated string from c if present, otherwise
// it allocates a new string, inserts it into the cache, and returns it.
func makeString(c *stringCache, b []byte) string {
const (
minCachedLen = 2 // single byte strings are already interned by the runtime
maxCachedLen = 256 // large enough for UUIDs, IPv6 addresses, SHA-256 checksums, etc.
)
if c == nil || len(b) < minCachedLen || len(b) > maxCachedLen {
return string(b)
}
// Compute a hash from the fixed-width prefix and suffix of the string.
// This ensures hashing a string is a constant time operation.
var h uint32
switch {
case len(b) >= 8:
lo := binary.LittleEndian.Uint64(b[:8])
hi := binary.LittleEndian.Uint64(b[len(b)-8:])
h = hash64(uint32(lo), uint32(lo>>32)) ^ hash64(uint32(hi), uint32(hi>>32))
case len(b) >= 4:
lo := binary.LittleEndian.Uint32(b[:4])
hi := binary.LittleEndian.Uint32(b[len(b)-4:])
h = hash64(lo, hi)
case len(b) >= 2:
lo := binary.LittleEndian.Uint16(b[:2])
hi := binary.LittleEndian.Uint16(b[len(b)-2:])
h = hash64(uint32(lo), uint32(hi))
}
// Check the cache for the string.
i := h % uint32(len(*c))
if s := (*c)[i]; s == string(b) {
return s
}
s := string(b)
(*c)[i] = s
return s
}
// hash64 returns the hash of two uint32s as a single uint32.
func hash64(lo, hi uint32) uint32 {
// If avalanche=true, this is identical to XXH32 hash on a 8B string:
// var b [8]byte
// binary.LittleEndian.PutUint32(b[:4], lo)
// binary.LittleEndian.PutUint32(b[4:], hi)
// return xxhash.Sum32(b[:])
const (
prime1 = 0x9e3779b1
prime2 = 0x85ebca77
prime3 = 0xc2b2ae3d
prime4 = 0x27d4eb2f
prime5 = 0x165667b1
)
h := prime5 + uint32(8)
h += lo * prime3
h = bits.RotateLeft32(h, 17) * prime4
h += hi * prime3
h = bits.RotateLeft32(h, 17) * prime4
// Skip final mix (avalanche) step of XXH32 for performance reasons.
// Empirical testing shows that the improvements in unbiased distribution
// does not outweigh the extra cost in computational complexity.
const avalanche = false
if avalanche {
h ^= h >> 15
h *= prime2
h ^= h >> 13
h *= prime3
h ^= h >> 16
}
return h
}

View File

@ -0,0 +1,146 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"bytes"
"fmt"
"io"
"testing"
"encoding/json/internal/jsontest"
"encoding/json/jsontext"
)
func TestIntern(t *testing.T) {
var sc stringCache
const alphabet = "abcdefghijklmnopqrstuvwxyz"
for i := range len(alphabet) + 1 {
want := alphabet[i:]
if got := makeString(&sc, []byte(want)); got != want {
t.Fatalf("make = %v, want %v", got, want)
}
}
for i := range 1000 {
want := fmt.Sprintf("test%b", i)
if got := makeString(&sc, []byte(want)); got != want {
t.Fatalf("make = %v, want %v", got, want)
}
}
}
var sink string
func BenchmarkIntern(b *testing.B) {
datasetStrings := func(name string) (out [][]byte) {
var data []byte
for _, ts := range jsontest.Data {
if ts.Name == name {
data = ts.Data()
}
}
dec := jsontext.NewDecoder(bytes.NewReader(data))
for {
k, n := dec.StackIndex(dec.StackDepth())
isObjectName := k == '{' && n%2 == 0
tok, err := dec.ReadToken()
if err != nil {
if err == io.EOF {
break
}
b.Fatalf("ReadToken error: %v", err)
}
if tok.Kind() == '"' && !isObjectName {
out = append(out, []byte(tok.String()))
}
}
return out
}
tests := []struct {
label string
data [][]byte
}{
// Best is the best case scenario where every string is the same.
{"Best", func() (out [][]byte) {
for range 1000 {
out = append(out, []byte("hello, world!"))
}
return out
}()},
// Repeat is a sequence of the same set of names repeated.
// This commonly occurs when unmarshaling a JSON array of JSON objects,
// where the set of all names is usually small.
{"Repeat", func() (out [][]byte) {
for range 100 {
for _, s := range []string{"first_name", "last_name", "age", "address", "street_address", "city", "state", "postal_code", "phone_numbers", "gender"} {
out = append(out, []byte(s))
}
}
return out
}()},
// Synthea is all string values encountered in the Synthea FHIR dataset.
{"Synthea", datasetStrings("SyntheaFhir")},
// Twitter is all string values encountered in the Twitter dataset.
{"Twitter", datasetStrings("TwitterStatus")},
// Worst is the worst case scenario where every string is different
// resulting in wasted time looking up a string that will never match.
{"Worst", func() (out [][]byte) {
for i := range 1000 {
out = append(out, []byte(fmt.Sprintf("%016x", i)))
}
return out
}()},
}
for _, tt := range tests {
b.Run(tt.label, func(b *testing.B) {
// Alloc simply heap allocates each string.
// This provides an upper bound on the number of allocations.
b.Run("Alloc", func(b *testing.B) {
b.ReportAllocs()
for range b.N {
for _, b := range tt.data {
sink = string(b)
}
}
})
// Cache interns strings using stringCache.
// We want to optimize for having a faster runtime than Alloc,
// and also keeping the number of allocations closer to GoMap.
b.Run("Cache", func(b *testing.B) {
b.ReportAllocs()
for range b.N {
var sc stringCache
for _, b := range tt.data {
sink = makeString(&sc, b)
}
}
})
// GoMap interns all strings in a simple Go map.
// This provides a lower bound on the number of allocations.
b.Run("GoMap", func(b *testing.B) {
b.ReportAllocs()
for range b.N {
m := make(map[string]string)
for _, b := range tt.data {
s, ok := m[string(b)]
if !ok {
s = string(b)
m[s] = s
}
sink = s
}
}
})
})
}
}

View File

@ -0,0 +1,288 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"fmt"
"encoding/json/internal"
"encoding/json/internal/jsonflags"
"encoding/json/internal/jsonopts"
)
// Options configure [Marshal], [MarshalWrite], [MarshalEncode],
// [Unmarshal], [UnmarshalRead], and [UnmarshalDecode] with specific features.
// Each function takes in a variadic list of options, where properties
// set in later options override the value of previously set properties.
//
// The Options type is identical to [encoding/json.Options] and
// [encoding/json/jsontext.Options]. Options from the other packages can
// be used interchangeably with functionality in this package.
//
// Options represent either a singular option or a set of options.
// It can be functionally thought of as a Go map of option properties
// (even though the underlying implementation avoids Go maps for performance).
//
// The constructors (e.g., [Deterministic]) return a singular option value:
//
// opt := Deterministic(true)
//
// which is analogous to creating a single entry map:
//
// opt := Options{"Deterministic": true}
//
// [JoinOptions] composes multiple options values to together:
//
// out := JoinOptions(opts...)
//
// which is analogous to making a new map and copying the options over:
//
// out := make(Options)
// for _, m := range opts {
// for k, v := range m {
// out[k] = v
// }
// }
//
// [GetOption] looks up the value of options parameter:
//
// v, ok := GetOption(opts, Deterministic)
//
// which is analogous to a Go map lookup:
//
// v, ok := Options["Deterministic"]
//
// There is a single Options type, which is used with both marshal and unmarshal.
// Some options affect both operations, while others only affect one operation:
//
// - [StringifyNumbers] affects marshaling and unmarshaling
// - [Deterministic] affects marshaling only
// - [FormatNilSliceAsNull] affects marshaling only
// - [FormatNilMapAsNull] affects marshaling only
// - [OmitZeroStructFields] affects marshaling only
// - [MatchCaseInsensitiveNames] affects marshaling and unmarshaling
// - [DiscardUnknownMembers] affects marshaling only
// - [RejectUnknownMembers] affects unmarshaling only
// - [WithMarshalers] affects marshaling only
// - [WithUnmarshalers] affects unmarshaling only
//
// Options that do not affect a particular operation are ignored.
type Options = jsonopts.Options
// JoinOptions coalesces the provided list of options into a single Options.
// Properties set in later options override the value of previously set properties.
func JoinOptions(srcs ...Options) Options {
var dst jsonopts.Struct
dst.Join(srcs...)
return &dst
}
// GetOption returns the value stored in opts with the provided setter,
// reporting whether the value is present.
//
// Example usage:
//
// v, ok := json.GetOption(opts, json.Deterministic)
//
// Options are most commonly introspected to alter the JSON representation of
// [MarshalerTo.MarshalJSONTo] and [UnmarshalerFrom.UnmarshalJSONFrom] methods, and
// [MarshalToFunc] and [UnmarshalFromFunc] functions.
// In such cases, the presence bit should generally be ignored.
func GetOption[T any](opts Options, setter func(T) Options) (T, bool) {
return jsonopts.GetOption(opts, setter)
}
// DefaultOptionsV2 is the full set of all options that define v2 semantics.
// It is equivalent to all options under [Options], [encoding/json.Options],
// and [encoding/json/jsontext.Options] being set to false or the zero value,
// except for the options related to whitespace formatting.
func DefaultOptionsV2() Options {
return &jsonopts.DefaultOptionsV2
}
// StringifyNumbers specifies that numeric Go types should be marshaled
// as a JSON string containing the equivalent JSON number value.
// When unmarshaling, numeric Go types are parsed from a JSON string
// containing the JSON number without any surrounding whitespace.
//
// According to RFC 8259, section 6, a JSON implementation may choose to
// limit the representation of a JSON number to an IEEE 754 binary64 value.
// This may cause decoders to lose precision for int64 and uint64 types.
// Quoting JSON numbers as a JSON string preserves the exact precision.
//
// This affects either marshaling or unmarshaling.
func StringifyNumbers(v bool) Options {
if v {
return jsonflags.StringifyNumbers | 1
} else {
return jsonflags.StringifyNumbers | 0
}
}
// Deterministic specifies that the same input value will be serialized
// as the exact same output bytes. Different processes of
// the same program will serialize equal values to the same bytes,
// but different versions of the same program are not guaranteed
// to produce the exact same sequence of bytes.
//
// This only affects marshaling and is ignored when unmarshaling.
func Deterministic(v bool) Options {
if v {
return jsonflags.Deterministic | 1
} else {
return jsonflags.Deterministic | 0
}
}
// FormatNilSliceAsNull specifies that a nil Go slice should marshal as a
// JSON null instead of the default representation as an empty JSON array
// (or an empty JSON string in the case of ~[]byte).
// Slice fields explicitly marked with `format:emitempty` still marshal
// as an empty JSON array.
//
// This only affects marshaling and is ignored when unmarshaling.
func FormatNilSliceAsNull(v bool) Options {
if v {
return jsonflags.FormatNilSliceAsNull | 1
} else {
return jsonflags.FormatNilSliceAsNull | 0
}
}
// FormatNilMapAsNull specifies that a nil Go map should marshal as a
// JSON null instead of the default representation as an empty JSON object.
// Map fields explicitly marked with `format:emitempty` still marshal
// as an empty JSON object.
//
// This only affects marshaling and is ignored when unmarshaling.
func FormatNilMapAsNull(v bool) Options {
if v {
return jsonflags.FormatNilMapAsNull | 1
} else {
return jsonflags.FormatNilMapAsNull | 0
}
}
// OmitZeroStructFields specifies that a Go struct should marshal in such a way
// that all struct fields that are zero are omitted from the marshaled output
// if the value is zero as determined by the "IsZero() bool" method if present,
// otherwise based on whether the field is the zero Go value.
// This is semantically equivalent to specifying the `omitzero` tag option
// on every field in a Go struct.
//
// This only affects marshaling and is ignored when unmarshaling.
func OmitZeroStructFields(v bool) Options {
if v {
return jsonflags.OmitZeroStructFields | 1
} else {
return jsonflags.OmitZeroStructFields | 0
}
}
// MatchCaseInsensitiveNames specifies that JSON object members are matched
// against Go struct fields using a case-insensitive match of the name.
// Go struct fields explicitly marked with `case:strict` or `case:ignore`
// always use case-sensitive (or case-insensitive) name matching,
// regardless of the value of this option.
//
// This affects either marshaling or unmarshaling.
// For marshaling, this option may alter the detection of duplicate names
// (assuming [jsontext.AllowDuplicateNames] is false) from inlined fields
// if it matches one of the declared fields in the Go struct.
func MatchCaseInsensitiveNames(v bool) Options {
if v {
return jsonflags.MatchCaseInsensitiveNames | 1
} else {
return jsonflags.MatchCaseInsensitiveNames | 0
}
}
// DiscardUnknownMembers specifies that marshaling should ignore any
// JSON object members stored in Go struct fields dedicated to storing
// unknown JSON object members.
//
// This only affects marshaling and is ignored when unmarshaling.
func DiscardUnknownMembers(v bool) Options {
if v {
return jsonflags.DiscardUnknownMembers | 1
} else {
return jsonflags.DiscardUnknownMembers | 0
}
}
// RejectUnknownMembers specifies that unknown members should be rejected
// when unmarshaling a JSON object, regardless of whether there is a field
// to store unknown members.
//
// This only affects unmarshaling and is ignored when marshaling.
func RejectUnknownMembers(v bool) Options {
if v {
return jsonflags.RejectUnknownMembers | 1
} else {
return jsonflags.RejectUnknownMembers | 0
}
}
// WithMarshalers specifies a list of type-specific marshalers to use,
// which can be used to override the default marshal behavior for values
// of particular types.
//
// This only affects marshaling and is ignored when unmarshaling.
func WithMarshalers(v *Marshalers) Options {
return (*marshalersOption)(v)
}
// WithUnmarshalers specifies a list of type-specific unmarshalers to use,
// which can be used to override the default unmarshal behavior for values
// of particular types.
//
// This only affects unmarshaling and is ignored when marshaling.
func WithUnmarshalers(v *Unmarshalers) Options {
return (*unmarshalersOption)(v)
}
// These option types are declared here instead of "jsonopts"
// to avoid a dependency on "reflect" from "jsonopts".
type (
marshalersOption Marshalers
unmarshalersOption Unmarshalers
)
func (*marshalersOption) JSONOptions(internal.NotForPublicUse) {}
func (*unmarshalersOption) JSONOptions(internal.NotForPublicUse) {}
// Inject support into "jsonopts" to handle these types.
func init() {
jsonopts.GetUnknownOption = func(src *jsonopts.Struct, zero jsonopts.Options) (any, bool) {
switch zero.(type) {
case *marshalersOption:
if !src.Flags.Has(jsonflags.Marshalers) {
return (*Marshalers)(nil), false
}
return src.Marshalers.(*Marshalers), true
case *unmarshalersOption:
if !src.Flags.Has(jsonflags.Unmarshalers) {
return (*Unmarshalers)(nil), false
}
return src.Unmarshalers.(*Unmarshalers), true
default:
panic(fmt.Sprintf("unknown option %T", zero))
}
}
jsonopts.JoinUnknownOption = func(dst *jsonopts.Struct, src jsonopts.Options) {
switch src := src.(type) {
case *marshalersOption:
dst.Flags.Set(jsonflags.Marshalers | 1)
dst.Marshalers = (*Marshalers)(src)
case *unmarshalersOption:
dst.Flags.Set(jsonflags.Unmarshalers | 1)
dst.Unmarshalers = (*Unmarshalers)(src)
default:
panic(fmt.Sprintf("unknown option %T", src))
}
}
}

View File

@ -0,0 +1,483 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
// Large data benchmark.
// The JSON data is a summary of agl's changes in the
// go, webkit, and chromium open source projects.
// We benchmark converting between the JSON form
// and in-memory data structures.
package json
import (
"bytes"
"io"
"strings"
"testing"
"encoding/json/internal/jsontest"
)
type codeResponse struct {
Tree *codeNode `json:"tree"`
Username string `json:"username"`
}
type codeNode struct {
Name string `json:"name"`
Kids []*codeNode `json:"kids"`
CLWeight float64 `json:"cl_weight"`
Touches int `json:"touches"`
MinT int64 `json:"min_t"`
MaxT int64 `json:"max_t"`
MeanT int64 `json:"mean_t"`
}
var codeJSON []byte
var codeStruct codeResponse
func codeInit() {
var data []byte
for _, entry := range jsontest.Data {
if entry.Name == "GolangSource" {
data = entry.Data()
}
}
codeJSON = data
if err := Unmarshal(codeJSON, &codeStruct); err != nil {
panic("unmarshal code.json: " + err.Error())
}
var err error
if data, err = Marshal(&codeStruct); err != nil {
panic("marshal code.json: " + err.Error())
}
if !bytes.Equal(data, codeJSON) {
println("different lengths", len(data), len(codeJSON))
for i := 0; i < len(data) && i < len(codeJSON); i++ {
if data[i] != codeJSON[i] {
println("re-marshal: changed at byte", i)
println("orig: ", string(codeJSON[i-10:i+10]))
println("new: ", string(data[i-10:i+10]))
break
}
}
panic("re-marshal code.json: different result")
}
}
func BenchmarkCodeEncoder(b *testing.B) {
b.ReportAllocs()
if codeJSON == nil {
b.StopTimer()
codeInit()
b.StartTimer()
}
b.RunParallel(func(pb *testing.PB) {
enc := NewEncoder(io.Discard)
for pb.Next() {
if err := enc.Encode(&codeStruct); err != nil {
b.Fatalf("Encode error: %v", err)
}
}
})
b.SetBytes(int64(len(codeJSON)))
}
func BenchmarkCodeEncoderError(b *testing.B) {
b.ReportAllocs()
if codeJSON == nil {
b.StopTimer()
codeInit()
b.StartTimer()
}
// Trigger an error in Marshal with cyclic data.
type Dummy struct {
Name string
Next *Dummy
}
dummy := Dummy{Name: "Dummy"}
dummy.Next = &dummy
b.RunParallel(func(pb *testing.PB) {
enc := NewEncoder(io.Discard)
for pb.Next() {
if err := enc.Encode(&codeStruct); err != nil {
b.Fatalf("Encode error: %v", err)
}
if _, err := Marshal(dummy); err == nil {
b.Fatal("Marshal error: got nil, want non-nil")
}
}
})
b.SetBytes(int64(len(codeJSON)))
}
func BenchmarkCodeMarshal(b *testing.B) {
b.ReportAllocs()
if codeJSON == nil {
b.StopTimer()
codeInit()
b.StartTimer()
}
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if _, err := Marshal(&codeStruct); err != nil {
b.Fatalf("Marshal error: %v", err)
}
}
})
b.SetBytes(int64(len(codeJSON)))
}
func BenchmarkCodeMarshalError(b *testing.B) {
b.ReportAllocs()
if codeJSON == nil {
b.StopTimer()
codeInit()
b.StartTimer()
}
// Trigger an error in Marshal with cyclic data.
type Dummy struct {
Name string
Next *Dummy
}
dummy := Dummy{Name: "Dummy"}
dummy.Next = &dummy
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if _, err := Marshal(&codeStruct); err != nil {
b.Fatalf("Marshal error: %v", err)
}
if _, err := Marshal(dummy); err == nil {
b.Fatal("Marshal error: got nil, want non-nil")
}
}
})
b.SetBytes(int64(len(codeJSON)))
}
func benchMarshalBytes(n int) func(*testing.B) {
sample := []byte("hello world")
// Use a struct pointer, to avoid an allocation when passing it as an
// interface parameter to Marshal.
v := &struct {
Bytes []byte
}{
bytes.Repeat(sample, (n/len(sample))+1)[:n],
}
return func(b *testing.B) {
for i := 0; i < b.N; i++ {
if _, err := Marshal(v); err != nil {
b.Fatalf("Marshal error: %v", err)
}
}
}
}
func benchMarshalBytesError(n int) func(*testing.B) {
sample := []byte("hello world")
// Use a struct pointer, to avoid an allocation when passing it as an
// interface parameter to Marshal.
v := &struct {
Bytes []byte
}{
bytes.Repeat(sample, (n/len(sample))+1)[:n],
}
// Trigger an error in Marshal with cyclic data.
type Dummy struct {
Name string
Next *Dummy
}
dummy := Dummy{Name: "Dummy"}
dummy.Next = &dummy
return func(b *testing.B) {
for i := 0; i < b.N; i++ {
if _, err := Marshal(v); err != nil {
b.Fatalf("Marshal error: %v", err)
}
if _, err := Marshal(dummy); err == nil {
b.Fatal("Marshal error: got nil, want non-nil")
}
}
}
}
func BenchmarkMarshalBytes(b *testing.B) {
b.ReportAllocs()
// 32 fits within encodeState.scratch.
b.Run("32", benchMarshalBytes(32))
// 256 doesn't fit in encodeState.scratch, but is small enough to
// allocate and avoid the slower base64.NewEncoder.
b.Run("256", benchMarshalBytes(256))
// 4096 is large enough that we want to avoid allocating for it.
b.Run("4096", benchMarshalBytes(4096))
}
func BenchmarkMarshalBytesError(b *testing.B) {
b.ReportAllocs()
// 32 fits within encodeState.scratch.
b.Run("32", benchMarshalBytesError(32))
// 256 doesn't fit in encodeState.scratch, but is small enough to
// allocate and avoid the slower base64.NewEncoder.
b.Run("256", benchMarshalBytesError(256))
// 4096 is large enough that we want to avoid allocating for it.
b.Run("4096", benchMarshalBytesError(4096))
}
func BenchmarkMarshalMap(b *testing.B) {
b.ReportAllocs()
m := map[string]int{
"key3": 3,
"key2": 2,
"key1": 1,
}
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if _, err := Marshal(m); err != nil {
b.Fatal("Marshal:", err)
}
}
})
}
func BenchmarkCodeDecoder(b *testing.B) {
b.ReportAllocs()
if codeJSON == nil {
b.StopTimer()
codeInit()
b.StartTimer()
}
b.RunParallel(func(pb *testing.PB) {
var buf bytes.Buffer
dec := NewDecoder(&buf)
var r codeResponse
for pb.Next() {
buf.Write(codeJSON)
// hide EOF
buf.WriteByte('\n')
buf.WriteByte('\n')
buf.WriteByte('\n')
if err := dec.Decode(&r); err != nil {
b.Fatalf("Decode error: %v", err)
}
}
})
b.SetBytes(int64(len(codeJSON)))
}
func BenchmarkUnicodeDecoder(b *testing.B) {
b.ReportAllocs()
j := []byte(`"\uD83D\uDE01"`)
b.SetBytes(int64(len(j)))
r := bytes.NewReader(j)
dec := NewDecoder(r)
var out string
b.ResetTimer()
for i := 0; i < b.N; i++ {
if err := dec.Decode(&out); err != nil {
b.Fatalf("Decode error: %v", err)
}
r.Seek(0, 0)
}
}
func BenchmarkDecoderStream(b *testing.B) {
b.ReportAllocs()
b.StopTimer()
var buf bytes.Buffer
dec := NewDecoder(&buf)
buf.WriteString(`"` + strings.Repeat("x", 1000000) + `"` + "\n\n\n")
var x any
if err := dec.Decode(&x); err != nil {
b.Fatalf("Decode error: %v", err)
}
ones := strings.Repeat(" 1\n", 300000) + "\n\n\n"
b.StartTimer()
for i := 0; i < b.N; i++ {
if i%300000 == 0 {
buf.WriteString(ones)
}
x = nil
switch err := dec.Decode(&x); {
case err != nil:
b.Fatalf("Decode error: %v", err)
case x != 1.0:
b.Fatalf("Decode: got %v want 1.0", i)
}
}
}
func BenchmarkCodeUnmarshal(b *testing.B) {
b.ReportAllocs()
if codeJSON == nil {
b.StopTimer()
codeInit()
b.StartTimer()
}
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
var r codeResponse
if err := Unmarshal(codeJSON, &r); err != nil {
b.Fatalf("Unmarshal error: %v", err)
}
}
})
b.SetBytes(int64(len(codeJSON)))
}
func BenchmarkCodeUnmarshalReuse(b *testing.B) {
b.ReportAllocs()
if codeJSON == nil {
b.StopTimer()
codeInit()
b.StartTimer()
}
b.RunParallel(func(pb *testing.PB) {
var r codeResponse
for pb.Next() {
if err := Unmarshal(codeJSON, &r); err != nil {
b.Fatalf("Unmarshal error: %v", err)
}
}
})
b.SetBytes(int64(len(codeJSON)))
}
func BenchmarkUnmarshalString(b *testing.B) {
b.ReportAllocs()
data := []byte(`"hello, world"`)
b.RunParallel(func(pb *testing.PB) {
var s string
for pb.Next() {
if err := Unmarshal(data, &s); err != nil {
b.Fatalf("Unmarshal error: %v", err)
}
}
})
}
func BenchmarkUnmarshalFloat64(b *testing.B) {
b.ReportAllocs()
data := []byte(`3.14`)
b.RunParallel(func(pb *testing.PB) {
var f float64
for pb.Next() {
if err := Unmarshal(data, &f); err != nil {
b.Fatalf("Unmarshal error: %v", err)
}
}
})
}
func BenchmarkUnmarshalInt64(b *testing.B) {
b.ReportAllocs()
data := []byte(`3`)
b.RunParallel(func(pb *testing.PB) {
var x int64
for pb.Next() {
if err := Unmarshal(data, &x); err != nil {
b.Fatalf("Unmarshal error: %v", err)
}
}
})
}
func BenchmarkUnmarshalMap(b *testing.B) {
b.ReportAllocs()
data := []byte(`{"key1":"value1","key2":"value2","key3":"value3"}`)
b.RunParallel(func(pb *testing.PB) {
x := make(map[string]string, 3)
for pb.Next() {
if err := Unmarshal(data, &x); err != nil {
b.Fatalf("Unmarshal error: %v", err)
}
}
})
}
func BenchmarkIssue10335(b *testing.B) {
b.ReportAllocs()
j := []byte(`{"a":{ }}`)
b.RunParallel(func(pb *testing.PB) {
var s struct{}
for pb.Next() {
if err := Unmarshal(j, &s); err != nil {
b.Fatalf("Unmarshal error: %v", err)
}
}
})
}
func BenchmarkIssue34127(b *testing.B) {
b.ReportAllocs()
j := struct {
Bar string `json:"bar,string"`
}{
Bar: `foobar`,
}
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if _, err := Marshal(&j); err != nil {
b.Fatalf("Marshal error: %v", err)
}
}
})
}
func BenchmarkUnmapped(b *testing.B) {
b.ReportAllocs()
j := []byte(`{"s": "hello", "y": 2, "o": {"x": 0}, "a": [1, 99, {"x": 1}]}`)
b.RunParallel(func(pb *testing.PB) {
var s struct{}
for pb.Next() {
if err := Unmarshal(j, &s); err != nil {
b.Fatalf("Unmarshal error: %v", err)
}
}
})
}
func BenchmarkEncodeMarshaler(b *testing.B) {
b.ReportAllocs()
m := struct {
A int
B RawMessage
}{}
b.RunParallel(func(pb *testing.PB) {
enc := NewEncoder(io.Discard)
for pb.Next() {
if err := enc.Encode(&m); err != nil {
b.Fatalf("Encode error: %v", err)
}
}
})
}
func BenchmarkEncoderEncode(b *testing.B) {
b.ReportAllocs()
type T struct {
X, Y string
}
v := &T{"foo", "bar"}
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if err := NewEncoder(io.Discard).Encode(v); err != nil {
b.Fatalf("Encode error: %v", err)
}
}
})
}

View File

@ -0,0 +1,253 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
// Represents JSON data structure using native Go types: booleans, floats,
// strings, arrays, and maps.
package json
import (
"cmp"
"fmt"
"reflect"
"strconv"
"encoding/json/internal/jsonwire"
"encoding/json/jsontext"
jsonv2 "encoding/json/v2"
)
// Unmarshal parses the JSON-encoded data and stores the result
// in the value pointed to by v. If v is nil or not a pointer,
// Unmarshal returns an [InvalidUnmarshalError].
//
// Unmarshal uses the inverse of the encodings that
// [Marshal] uses, allocating maps, slices, and pointers as necessary,
// with the following additional rules:
//
// To unmarshal JSON into a pointer, Unmarshal first handles the case of
// the JSON being the JSON literal null. In that case, Unmarshal sets
// the pointer to nil. Otherwise, Unmarshal unmarshals the JSON into
// the value pointed at by the pointer. If the pointer is nil, Unmarshal
// allocates a new value for it to point to.
//
// To unmarshal JSON into a value implementing [Unmarshaler],
// Unmarshal calls that value's [Unmarshaler.UnmarshalJSON] method, including
// when the input is a JSON null.
// Otherwise, if the value implements [encoding.TextUnmarshaler]
// and the input is a JSON quoted string, Unmarshal calls
// [encoding.TextUnmarshaler.UnmarshalText] with the unquoted form of the string.
//
// To unmarshal JSON into a struct, Unmarshal matches incoming object
// keys to the keys used by [Marshal] (either the struct field name or its tag),
// preferring an exact match but also accepting a case-insensitive match. By
// default, object keys which don't have a corresponding struct field are
// ignored (see [Decoder.DisallowUnknownFields] for an alternative).
//
// To unmarshal JSON into an interface value,
// Unmarshal stores one of these in the interface value:
//
// - bool, for JSON booleans
// - float64, for JSON numbers
// - string, for JSON strings
// - []any, for JSON arrays
// - map[string]any, for JSON objects
// - nil for JSON null
//
// To unmarshal a JSON array into a slice, Unmarshal resets the slice length
// to zero and then appends each element to the slice.
// As a special case, to unmarshal an empty JSON array into a slice,
// Unmarshal replaces the slice with a new empty slice.
//
// To unmarshal a JSON array into a Go array, Unmarshal decodes
// JSON array elements into corresponding Go array elements.
// If the Go array is smaller than the JSON array,
// the additional JSON array elements are discarded.
// If the JSON array is smaller than the Go array,
// the additional Go array elements are set to zero values.
//
// To unmarshal a JSON object into a map, Unmarshal first establishes a map to
// use. If the map is nil, Unmarshal allocates a new map. Otherwise Unmarshal
// reuses the existing map, keeping existing entries. Unmarshal then stores
// key-value pairs from the JSON object into the map. The map's key type must
// either be any string type, an integer, or implement [encoding.TextUnmarshaler].
//
// If the JSON-encoded data contain a syntax error, Unmarshal returns a [SyntaxError].
//
// If a JSON value is not appropriate for a given target type,
// or if a JSON number overflows the target type, Unmarshal
// skips that field and completes the unmarshaling as best it can.
// If no more serious errors are encountered, Unmarshal returns
// an [UnmarshalTypeError] describing the earliest such error. In any
// case, it's not guaranteed that all the remaining fields following
// the problematic one will be unmarshaled into the target object.
//
// The JSON null value unmarshals into an interface, map, pointer, or slice
// by setting that Go value to nil. Because null is often used in JSON to mean
// “not present,” unmarshaling a JSON null into any other Go type has no effect
// on the value and produces no error.
//
// When unmarshaling quoted strings, invalid UTF-8 or
// invalid UTF-16 surrogate pairs are not treated as an error.
// Instead, they are replaced by the Unicode replacement
// character U+FFFD.
func Unmarshal(data []byte, v any) error {
return jsonv2.Unmarshal(data, v, DefaultOptionsV1())
}
// Unmarshaler is the interface implemented by types
// that can unmarshal a JSON description of themselves.
// The input can be assumed to be a valid encoding of
// a JSON value. UnmarshalJSON must copy the JSON data
// if it wishes to retain the data after returning.
type Unmarshaler = jsonv2.Unmarshaler
// An UnmarshalTypeError describes a JSON value that was
// not appropriate for a value of a specific Go type.
type UnmarshalTypeError struct {
Value string // description of JSON value - "bool", "array", "number -5"
Type reflect.Type // type of Go value it could not be assigned to
Offset int64 // error occurred after reading Offset bytes
Struct string // name of the root type containing the field
Field string // the full path from root node to the value
Err error // may be nil
}
func (e *UnmarshalTypeError) Error() string {
s := "json: cannot unmarshal"
if e.Value != "" {
s += " JSON " + e.Value
}
s += " into"
var preposition string
if e.Field != "" {
s += " " + e.Struct + "." + e.Field
preposition = " of"
}
if e.Type != nil {
s += preposition
s += " Go type " + e.Type.String()
}
if e.Err != nil {
s += ": " + e.Err.Error()
}
return s
}
func (e *UnmarshalTypeError) Unwrap() error {
return e.Err
}
// An UnmarshalFieldError describes a JSON object key that
// led to an unexported (and therefore unwritable) struct field.
//
// Deprecated: No longer used; kept for compatibility.
type UnmarshalFieldError struct {
Key string
Type reflect.Type
Field reflect.StructField
}
func (e *UnmarshalFieldError) Error() string {
return "json: cannot unmarshal object key " + strconv.Quote(e.Key) + " into unexported field " + e.Field.Name + " of type " + e.Type.String()
}
// An InvalidUnmarshalError describes an invalid argument passed to [Unmarshal].
// (The argument to [Unmarshal] must be a non-nil pointer.)
type InvalidUnmarshalError struct {
Type reflect.Type
}
func (e *InvalidUnmarshalError) Error() string {
if e.Type == nil {
return "json: Unmarshal(nil)"
}
if e.Type.Kind() != reflect.Pointer {
return "json: Unmarshal(non-pointer " + e.Type.String() + ")"
}
return "json: Unmarshal(nil " + e.Type.String() + ")"
}
// A Number represents a JSON number literal.
type Number string
// String returns the literal text of the number.
func (n Number) String() string { return string(n) }
// Float64 returns the number as a float64.
func (n Number) Float64() (float64, error) {
return strconv.ParseFloat(string(n), 64)
}
// Int64 returns the number as an int64.
func (n Number) Int64() (int64, error) {
return strconv.ParseInt(string(n), 10, 64)
}
var numberType = reflect.TypeFor[Number]()
// MarshalJSONTo implements [jsonv2.MarshalerTo].
func (n Number) MarshalJSONTo(enc *jsontext.Encoder) error {
opts := enc.Options()
stringify, _ := jsonv2.GetOption(opts, jsonv2.StringifyNumbers)
if k, n := enc.StackIndex(enc.StackDepth()); k == '{' && n%2 == 0 {
stringify = true // expecting a JSON object name
}
n = cmp.Or(n, "0")
var num []byte
val := enc.UnusedBuffer()
if stringify {
val = append(val, '"')
val = append(val, n...)
val = append(val, '"')
num = val[len(`"`) : len(val)-len(`"`)]
} else {
val = append(val, n...)
num = val
}
if n, err := jsonwire.ConsumeNumber(num); n != len(num) || err != nil {
return fmt.Errorf("cannot parse %q as JSON number: %w", val, strconv.ErrSyntax)
}
return enc.WriteValue(val)
}
// UnmarshalJSONFrom implements [jsonv2.UnmarshalerFrom].
func (n *Number) UnmarshalJSONFrom(dec *jsontext.Decoder) error {
opts := dec.Options()
stringify, _ := jsonv2.GetOption(opts, jsonv2.StringifyNumbers)
if k, n := dec.StackIndex(dec.StackDepth()); k == '{' && n%2 == 0 {
stringify = true // expecting a JSON object name
}
val, err := dec.ReadValue()
if err != nil {
return err
}
val0 := val
k := val.Kind()
switch k {
case 'n':
if legacy, _ := jsonv2.GetOption(opts, MergeWithLegacySemantics); !legacy {
*n = ""
}
return nil
case '"':
verbatim := jsonwire.ConsumeSimpleString(val) == len(val)
val = jsonwire.UnquoteMayCopy(val, verbatim)
if n, err := jsonwire.ConsumeNumber(val); n != len(val) || err != nil {
return &jsonv2.SemanticError{JSONKind: val0.Kind(), JSONValue: val0.Clone(), GoType: numberType, Err: strconv.ErrSyntax}
}
*n = Number(val)
return nil
case '0':
if stringify {
break
}
*n = Number(val)
return nil
}
return &jsonv2.SemanticError{JSONKind: k, GoType: numberType}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,240 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
// Package json implements encoding and decoding of JSON as defined in
// RFC 7159. The mapping between JSON and Go values is described
// in the documentation for the Marshal and Unmarshal functions.
//
// See "JSON and Go" for an introduction to this package:
// https://golang.org/doc/articles/json_and_go.html
package json
import (
"reflect"
"strconv"
jsonv2 "encoding/json/v2"
)
// Marshal returns the JSON encoding of v.
//
// Marshal traverses the value v recursively.
// If an encountered value implements [Marshaler]
// and is not a nil pointer, Marshal calls [Marshaler.MarshalJSON]
// to produce JSON. If no [Marshaler.MarshalJSON] method is present but the
// value implements [encoding.TextMarshaler] instead, Marshal calls
// [encoding.TextMarshaler.MarshalText] and encodes the result as a JSON string.
// The nil pointer exception is not strictly necessary
// but mimics a similar, necessary exception in the behavior of
// [Unmarshaler.UnmarshalJSON].
//
// Otherwise, Marshal uses the following type-dependent default encodings:
//
// Boolean values encode as JSON booleans.
//
// Floating point, integer, and [Number] values encode as JSON numbers.
// NaN and +/-Inf values will return an [UnsupportedValueError].
//
// String values encode as JSON strings coerced to valid UTF-8,
// replacing invalid bytes with the Unicode replacement rune.
// So that the JSON will be safe to embed inside HTML <script> tags,
// the string is encoded using [HTMLEscape],
// which replaces "<", ">", "&", U+2028, and U+2029 are escaped
// to "\u003c","\u003e", "\u0026", "\u2028", and "\u2029".
// This replacement can be disabled when using an [Encoder],
// by calling [Encoder.SetEscapeHTML](false).
//
// Array and slice values encode as JSON arrays, except that
// []byte encodes as a base64-encoded string, and a nil slice
// encodes as the null JSON value.
//
// Struct values encode as JSON objects.
// Each exported struct field becomes a member of the object, using the
// field name as the object key, unless the field is omitted for one of the
// reasons given below.
//
// The encoding of each struct field can be customized by the format string
// stored under the "json" key in the struct field's tag.
// The format string gives the name of the field, possibly followed by a
// comma-separated list of options. The name may be empty in order to
// specify options without overriding the default field name.
//
// The "omitempty" option specifies that the field should be omitted
// from the encoding if the field has an empty value, defined as
// false, 0, a nil pointer, a nil interface value, and any array,
// slice, map, or string of length zero.
//
// As a special case, if the field tag is "-", the field is always omitted.
// Note that a field with name "-" can still be generated using the tag "-,".
//
// Examples of struct field tags and their meanings:
//
// // Field appears in JSON as key "myName".
// Field int `json:"myName"`
//
// // Field appears in JSON as key "myName" and
// // the field is omitted from the object if its value is empty,
// // as defined above.
// Field int `json:"myName,omitempty"`
//
// // Field appears in JSON as key "Field" (the default), but
// // the field is skipped if empty.
// // Note the leading comma.
// Field int `json:",omitempty"`
//
// // Field is ignored by this package.
// Field int `json:"-"`
//
// // Field appears in JSON as key "-".
// Field int `json:"-,"`
//
// The "omitzero" option specifies that the field should be omitted
// from the encoding if the field has a zero value, according to rules:
//
// 1) If the field type has an "IsZero() bool" method, that will be used to
// determine whether the value is zero.
//
// 2) Otherwise, the value is zero if it is the zero value for its type.
//
// If both "omitempty" and "omitzero" are specified, the field will be omitted
// if the value is either empty or zero (or both).
//
// The "string" option signals that a field is stored as JSON inside a
// JSON-encoded string. It applies only to fields of string, floating point,
// integer, or boolean types. This extra level of encoding is sometimes used
// when communicating with JavaScript programs:
//
// Int64String int64 `json:",string"`
//
// The key name will be used if it's a non-empty string consisting of
// only Unicode letters, digits, and ASCII punctuation except quotation
// marks, backslash, and comma.
//
// Embedded struct fields are usually marshaled as if their inner exported fields
// were fields in the outer struct, subject to the usual Go visibility rules amended
// as described in the next paragraph.
// An anonymous struct field with a name given in its JSON tag is treated as
// having that name, rather than being anonymous.
// An anonymous struct field of interface type is treated the same as having
// that type as its name, rather than being anonymous.
//
// The Go visibility rules for struct fields are amended for JSON when
// deciding which field to marshal or unmarshal. If there are
// multiple fields at the same level, and that level is the least
// nested (and would therefore be the nesting level selected by the
// usual Go rules), the following extra rules apply:
//
// 1) Of those fields, if any are JSON-tagged, only tagged fields are considered,
// even if there are multiple untagged fields that would otherwise conflict.
//
// 2) If there is exactly one field (tagged or not according to the first rule), that is selected.
//
// 3) Otherwise there are multiple fields, and all are ignored; no error occurs.
//
// Handling of anonymous struct fields is new in Go 1.1.
// Prior to Go 1.1, anonymous struct fields were ignored. To force ignoring of
// an anonymous struct field in both current and earlier versions, give the field
// a JSON tag of "-".
//
// Map values encode as JSON objects. The map's key type must either be a
// string, an integer type, or implement [encoding.TextMarshaler]. The map keys
// are sorted and used as JSON object keys by applying the following rules,
// subject to the UTF-8 coercion described for string values above:
// - keys of any string type are used directly
// - keys that implement [encoding.TextMarshaler] are marshaled
// - integer keys are converted to strings
//
// Pointer values encode as the value pointed to.
// A nil pointer encodes as the null JSON value.
//
// Interface values encode as the value contained in the interface.
// A nil interface value encodes as the null JSON value.
//
// Channel, complex, and function values cannot be encoded in JSON.
// Attempting to encode such a value causes Marshal to return
// an [UnsupportedTypeError].
//
// JSON cannot represent cyclic data structures and Marshal does not
// handle them. Passing cyclic structures to Marshal will result in
// an error.
func Marshal(v any) ([]byte, error) {
return jsonv2.Marshal(v, DefaultOptionsV1())
}
// MarshalIndent is like [Marshal] but applies [Indent] to format the output.
// Each JSON element in the output will begin on a new line beginning with prefix
// followed by one or more copies of indent according to the indentation nesting.
func MarshalIndent(v any, prefix, indent string) ([]byte, error) {
b, err := Marshal(v)
if err != nil {
return nil, err
}
b, err = appendIndent(nil, b, prefix, indent)
if err != nil {
return nil, err
}
return b, nil
}
// Marshaler is the interface implemented by types that
// can marshal themselves into valid JSON.
type Marshaler = jsonv2.Marshaler
// An UnsupportedTypeError is returned by [Marshal] when attempting
// to encode an unsupported value type.
type UnsupportedTypeError struct {
Type reflect.Type
}
func (e *UnsupportedTypeError) Error() string {
return "json: unsupported type: " + e.Type.String()
}
// An UnsupportedValueError is returned by [Marshal] when attempting
// to encode an unsupported value.
type UnsupportedValueError struct {
Value reflect.Value
Str string
}
func (e *UnsupportedValueError) Error() string {
return "json: unsupported value: " + e.Str
}
// Before Go 1.2, an InvalidUTF8Error was returned by [Marshal] when
// attempting to encode a string value with invalid UTF-8 sequences.
// As of Go 1.2, [Marshal] instead coerces the string to valid UTF-8 by
// replacing invalid bytes with the Unicode replacement rune U+FFFD.
//
// Deprecated: No longer used; kept for compatibility.
type InvalidUTF8Error struct {
S string // the whole string value that caused the error
}
func (e *InvalidUTF8Error) Error() string {
return "json: invalid UTF-8 in string: " + strconv.Quote(e.S)
}
// A MarshalerError represents an error from calling a
// [Marshaler.MarshalJSON] or [encoding.TextMarshaler.MarshalText] method.
type MarshalerError struct {
Type reflect.Type
Err error
sourceFunc string
}
func (e *MarshalerError) Error() string {
srcFunc := e.sourceFunc
if srcFunc == "" {
srcFunc = "MarshalJSON"
}
return "json: error calling " + srcFunc +
" for type " + e.Type.String() +
": " + e.Err.Error()
}
// Unwrap returns the underlying error.
func (e *MarshalerError) Unwrap() error { return e.Err }

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,76 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json_test
import (
"fmt"
"log"
"strings"
"encoding/json"
)
type Animal int
const (
Unknown Animal = iota
Gopher
Zebra
)
func (a *Animal) UnmarshalJSON(b []byte) error {
var s string
if err := json.Unmarshal(b, &s); err != nil {
return err
}
switch strings.ToLower(s) {
default:
*a = Unknown
case "gopher":
*a = Gopher
case "zebra":
*a = Zebra
}
return nil
}
func (a Animal) MarshalJSON() ([]byte, error) {
var s string
switch a {
default:
s = "unknown"
case Gopher:
s = "gopher"
case Zebra:
s = "zebra"
}
return json.Marshal(s)
}
func Example_customMarshalJSON() {
blob := `["gopher","armadillo","zebra","unknown","gopher","bee","gopher","zebra"]`
var zoo []Animal
if err := json.Unmarshal([]byte(blob), &zoo); err != nil {
log.Fatal(err)
}
census := make(map[Animal]int)
for _, animal := range zoo {
census[animal] += 1
}
fmt.Printf("Zoo Census:\n* Gophers: %d\n* Zebras: %d\n* Unknown: %d\n",
census[Gopher], census[Zebra], census[Unknown])
// Output:
// Zoo Census:
// * Gophers: 3
// * Zebras: 2
// * Unknown: 3
}

View File

@ -0,0 +1,313 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json_test
import (
"bytes"
"fmt"
"io"
"log"
"os"
"strings"
"encoding/json"
)
func ExampleMarshal() {
type ColorGroup struct {
ID int
Name string
Colors []string
}
group := ColorGroup{
ID: 1,
Name: "Reds",
Colors: []string{"Crimson", "Red", "Ruby", "Maroon"},
}
b, err := json.Marshal(group)
if err != nil {
fmt.Println("error:", err)
}
os.Stdout.Write(b)
// Output:
// {"ID":1,"Name":"Reds","Colors":["Crimson","Red","Ruby","Maroon"]}
}
func ExampleUnmarshal() {
var jsonBlob = []byte(`[
{"Name": "Platypus", "Order": "Monotremata"},
{"Name": "Quoll", "Order": "Dasyuromorphia"}
]`)
type Animal struct {
Name string
Order string
}
var animals []Animal
err := json.Unmarshal(jsonBlob, &animals)
if err != nil {
fmt.Println("error:", err)
}
fmt.Printf("%+v", animals)
// Output:
// [{Name:Platypus Order:Monotremata} {Name:Quoll Order:Dasyuromorphia}]
}
// This example uses a Decoder to decode a stream of distinct JSON values.
func ExampleDecoder() {
const jsonStream = `
{"Name": "Ed", "Text": "Knock knock."}
{"Name": "Sam", "Text": "Who's there?"}
{"Name": "Ed", "Text": "Go fmt."}
{"Name": "Sam", "Text": "Go fmt who?"}
{"Name": "Ed", "Text": "Go fmt yourself!"}
`
type Message struct {
Name, Text string
}
dec := json.NewDecoder(strings.NewReader(jsonStream))
for {
var m Message
if err := dec.Decode(&m); err == io.EOF {
break
} else if err != nil {
log.Fatal(err)
}
fmt.Printf("%s: %s\n", m.Name, m.Text)
}
// Output:
// Ed: Knock knock.
// Sam: Who's there?
// Ed: Go fmt.
// Sam: Go fmt who?
// Ed: Go fmt yourself!
}
// This example uses a Decoder to decode a stream of distinct JSON values.
func ExampleDecoder_Token() {
const jsonStream = `
{"Message": "Hello", "Array": [1, 2, 3], "Null": null, "Number": 1.234}
`
dec := json.NewDecoder(strings.NewReader(jsonStream))
for {
t, err := dec.Token()
if err == io.EOF {
break
}
if err != nil {
log.Fatal(err)
}
fmt.Printf("%T: %v", t, t)
if dec.More() {
fmt.Printf(" (more)")
}
fmt.Printf("\n")
}
// Output:
// json.Delim: { (more)
// string: Message (more)
// string: Hello (more)
// string: Array (more)
// json.Delim: [ (more)
// float64: 1 (more)
// float64: 2 (more)
// float64: 3
// json.Delim: ] (more)
// string: Null (more)
// <nil>: <nil> (more)
// string: Number (more)
// float64: 1.234
// json.Delim: }
}
// This example uses a Decoder to decode a streaming array of JSON objects.
func ExampleDecoder_Decode_stream() {
const jsonStream = `
[
{"Name": "Ed", "Text": "Knock knock."},
{"Name": "Sam", "Text": "Who's there?"},
{"Name": "Ed", "Text": "Go fmt."},
{"Name": "Sam", "Text": "Go fmt who?"},
{"Name": "Ed", "Text": "Go fmt yourself!"}
]
`
type Message struct {
Name, Text string
}
dec := json.NewDecoder(strings.NewReader(jsonStream))
// read open bracket
t, err := dec.Token()
if err != nil {
log.Fatal(err)
}
fmt.Printf("%T: %v\n", t, t)
// while the array contains values
for dec.More() {
var m Message
// decode an array value (Message)
err := dec.Decode(&m)
if err != nil {
log.Fatal(err)
}
fmt.Printf("%v: %v\n", m.Name, m.Text)
}
// read closing bracket
t, err = dec.Token()
if err != nil {
log.Fatal(err)
}
fmt.Printf("%T: %v\n", t, t)
// Output:
// json.Delim: [
// Ed: Knock knock.
// Sam: Who's there?
// Ed: Go fmt.
// Sam: Go fmt who?
// Ed: Go fmt yourself!
// json.Delim: ]
}
// This example uses RawMessage to delay parsing part of a JSON message.
func ExampleRawMessage_unmarshal() {
type Color struct {
Space string
Point json.RawMessage // delay parsing until we know the color space
}
type RGB struct {
R uint8
G uint8
B uint8
}
type YCbCr struct {
Y uint8
Cb int8
Cr int8
}
var j = []byte(`[
{"Space": "YCbCr", "Point": {"Y": 255, "Cb": 0, "Cr": -10}},
{"Space": "RGB", "Point": {"R": 98, "G": 218, "B": 255}}
]`)
var colors []Color
err := json.Unmarshal(j, &colors)
if err != nil {
log.Fatalln("error:", err)
}
for _, c := range colors {
var dst any
switch c.Space {
case "RGB":
dst = new(RGB)
case "YCbCr":
dst = new(YCbCr)
}
err := json.Unmarshal(c.Point, dst)
if err != nil {
log.Fatalln("error:", err)
}
fmt.Println(c.Space, dst)
}
// Output:
// YCbCr &{255 0 -10}
// RGB &{98 218 255}
}
// This example uses RawMessage to use a precomputed JSON during marshal.
func ExampleRawMessage_marshal() {
h := json.RawMessage(`{"precomputed": true}`)
c := struct {
Header *json.RawMessage `json:"header"`
Body string `json:"body"`
}{Header: &h, Body: "Hello Gophers!"}
b, err := json.MarshalIndent(&c, "", "\t")
if err != nil {
fmt.Println("error:", err)
}
os.Stdout.Write(b)
// Output:
// {
// "header": {
// "precomputed": true
// },
// "body": "Hello Gophers!"
// }
}
func ExampleIndent() {
type Road struct {
Name string
Number int
}
roads := []Road{
{"Diamond Fork", 29},
{"Sheep Creek", 51},
}
b, err := json.Marshal(roads)
if err != nil {
log.Fatal(err)
}
var out bytes.Buffer
json.Indent(&out, b, "=", "\t")
out.WriteTo(os.Stdout)
// Output:
// [
// = {
// = "Name": "Diamond Fork",
// = "Number": 29
// = },
// = {
// = "Name": "Sheep Creek",
// = "Number": 51
// = }
// =]
}
func ExampleMarshalIndent() {
data := map[string]int{
"a": 1,
"b": 2,
}
b, err := json.MarshalIndent(data, "<prefix>", "<indent>")
if err != nil {
log.Fatal(err)
}
fmt.Println(string(b))
// Output:
// {
// <prefix><indent>"a": 1,
// <prefix><indent>"b": 2
// <prefix>}
}
func ExampleValid() {
goodJSON := `{"example": 1}`
badJSON := `{"example":2:]}}`
fmt.Println(json.Valid([]byte(goodJSON)), json.Valid([]byte(badJSON)))
// Output:
// true false
}
func ExampleHTMLEscape() {
var out bytes.Buffer
json.HTMLEscape(&out, []byte(`{"Name":"<b>HTML content</b>"}`))
out.WriteTo(os.Stdout)
// Output:
//{"Name":"\u003cb\u003eHTML content\u003c/b\u003e"}
}

View File

@ -0,0 +1,70 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json_test
import (
"fmt"
"log"
"strings"
"encoding/json"
)
type Size int
const (
Unrecognized Size = iota
Small
Large
)
func (s *Size) UnmarshalText(text []byte) error {
switch strings.ToLower(string(text)) {
default:
*s = Unrecognized
case "small":
*s = Small
case "large":
*s = Large
}
return nil
}
func (s Size) MarshalText() ([]byte, error) {
var name string
switch s {
default:
name = "unrecognized"
case Small:
name = "small"
case Large:
name = "large"
}
return []byte(name), nil
}
func Example_textMarshalJSON() {
blob := `["small","regular","large","unrecognized","small","normal","small","large"]`
var inventory []Size
if err := json.Unmarshal([]byte(blob), &inventory); err != nil {
log.Fatal(err)
}
counts := make(map[Size]int)
for _, size := range inventory {
counts[size] += 1
}
fmt.Printf("Inventory Counts:\n* Small: %d\n* Large: %d\n* Unrecognized: %d\n",
counts[Small], counts[Large], counts[Unrecognized])
// Output:
// Inventory Counts:
// * Small: 3
// * Large: 2
// * Unrecognized: 3
}

View File

@ -0,0 +1,85 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"bytes"
"io"
"testing"
)
func FuzzUnmarshalJSON(f *testing.F) {
f.Add([]byte(`{
"object": {
"slice": [
1,
2.0,
"3",
[4],
{5: {}}
]
},
"slice": [[]],
"string": ":)",
"int": 1e5,
"float": 3e-9"
}`))
f.Fuzz(func(t *testing.T, b []byte) {
for _, typ := range []func() any{
func() any { return new(any) },
func() any { return new(map[string]any) },
func() any { return new([]any) },
} {
i := typ()
if err := Unmarshal(b, i); err != nil {
return
}
encoded, err := Marshal(i)
if err != nil {
t.Fatalf("failed to marshal: %s", err)
}
if err := Unmarshal(encoded, i); err != nil {
t.Fatalf("failed to roundtrip: %s", err)
}
}
})
}
func FuzzDecoderToken(f *testing.F) {
f.Add([]byte(`{
"object": {
"slice": [
1,
2.0,
"3",
[4],
{5: {}}
]
},
"slice": [[]],
"string": ":)",
"int": 1e5,
"float": 3e-9"
}`))
f.Fuzz(func(t *testing.T, b []byte) {
r := bytes.NewReader(b)
d := NewDecoder(r)
for {
_, err := d.Token()
if err != nil {
if err == io.EOF {
break
}
return
}
}
})
}

View File

@ -0,0 +1,133 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"bytes"
"strings"
"encoding/json/jsontext"
)
// HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029
// characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029
// so that the JSON will be safe to embed inside HTML <script> tags.
// For historical reasons, web browsers don't honor standard HTML
// escaping within <script> tags, so an alternative JSON encoding must be used.
func HTMLEscape(dst *bytes.Buffer, src []byte) {
dst.Grow(len(src))
dst.Write(appendHTMLEscape(dst.AvailableBuffer(), src))
}
func appendHTMLEscape(dst, src []byte) []byte {
const hex = "0123456789abcdef"
// The characters can only appear in string literals,
// so just scan the string one byte at a time.
start := 0
for i, c := range src {
if c == '<' || c == '>' || c == '&' {
dst = append(dst, src[start:i]...)
dst = append(dst, '\\', 'u', '0', '0', hex[c>>4], hex[c&0xF])
start = i + 1
}
// Convert U+2028 and U+2029 (E2 80 A8 and E2 80 A9).
if c == 0xE2 && i+2 < len(src) && src[i+1] == 0x80 && src[i+2]&^1 == 0xA8 {
dst = append(dst, src[start:i]...)
dst = append(dst, '\\', 'u', '2', '0', '2', hex[src[i+2]&0xF])
start = i + len("\u2029")
}
}
return append(dst, src[start:]...)
}
// Compact appends to dst the JSON-encoded src with
// insignificant space characters elided.
func Compact(dst *bytes.Buffer, src []byte) error {
dst.Grow(len(src))
b := dst.AvailableBuffer()
b, err := jsontext.AppendFormat(b, src,
jsontext.AllowDuplicateNames(true),
jsontext.AllowInvalidUTF8(true),
jsontext.PreserveRawStrings(true))
if err != nil {
return transformSyntacticError(err)
}
dst.Write(b)
return nil
}
// indentGrowthFactor specifies the growth factor of indenting JSON input.
// Empirically, the growth factor was measured to be between 1.4x to 1.8x
// for some set of compacted JSON with the indent being a single tab.
// Specify a growth factor slightly larger than what is observed
// to reduce probability of allocation in appendIndent.
// A factor no higher than 2 ensures that wasted space never exceeds 50%.
const indentGrowthFactor = 2
// Indent appends to dst an indented form of the JSON-encoded src.
// Each element in a JSON object or array begins on a new,
// indented line beginning with prefix followed by one or more
// copies of indent according to the indentation nesting.
// The data appended to dst does not begin with the prefix nor
// any indentation, to make it easier to embed inside other formatted JSON data.
// Although leading space characters (space, tab, carriage return, newline)
// at the beginning of src are dropped, trailing space characters
// at the end of src are preserved and copied to dst.
// For example, if src has no trailing spaces, neither will dst;
// if src ends in a trailing newline, so will dst.
func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error {
dst.Grow(indentGrowthFactor * len(src))
b := dst.AvailableBuffer()
b, err := appendIndent(b, src, prefix, indent)
dst.Write(b)
return err
}
func appendIndent(dst, src []byte, prefix, indent string) ([]byte, error) {
// In v2, trailing whitespace is discarded, while v1 preserved it.
dstLen := len(dst)
if n := len(src) - len(bytes.TrimRight(src, " \n\r\t")); n > 0 {
// Append the trailing whitespace afterwards.
defer func() {
if len(dst) > dstLen {
dst = append(dst, src[len(src)-n:]...)
}
}()
}
// In v2, only spaces and tabs are allowed, while v1 allowed any character.
if len(strings.Trim(prefix, " \t"))+len(strings.Trim(indent, " \t")) > 0 {
// Use placeholder spaces of correct length, and replace afterwards.
invalidPrefix, invalidIndent := prefix, indent
prefix = strings.Repeat(" ", len(prefix))
indent = strings.Repeat(" ", len(indent))
defer func() {
b := dst[dstLen:]
for i := bytes.IndexByte(b, '\n'); i >= 0; i = bytes.IndexByte(b, '\n') {
b = b[i+len("\n"):]
n := len(b) - len(bytes.TrimLeft(b, " ")) // len(prefix)+n*len(indent)
spaces := b[:n]
spaces = spaces[copy(spaces, invalidPrefix):]
for len(spaces) > 0 {
spaces = spaces[copy(spaces, invalidIndent):]
}
b = b[n:]
}
}()
}
dst, err := jsontext.AppendFormat(dst, src,
jsontext.AllowDuplicateNames(true),
jsontext.AllowInvalidUTF8(true),
jsontext.PreserveRawStrings(true),
jsontext.Multiline(true),
jsontext.WithIndentPrefix(prefix),
jsontext.WithIndent(indent))
if err != nil {
return dst[:dstLen], transformSyntacticError(err)
}
return dst, nil
}

View File

@ -0,0 +1,153 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"fmt"
"reflect"
"strconv"
"strings"
"encoding/json/internal"
"encoding/json/jsontext"
jsonv2 "encoding/json/v2"
)
// Inject functionality into v2 to properly handle v1 types.
func init() {
internal.TransformMarshalError = transformMarshalError
internal.TransformUnmarshalError = transformUnmarshalError
internal.NewMarshalerError = func(val any, err error, funcName string) error {
return &MarshalerError{reflect.TypeOf(val), err, funcName}
}
internal.NewRawNumber = func() any { return new(Number) }
internal.RawNumberOf = func(b []byte) any { return Number(b) }
}
func transformMarshalError(root any, err error) error {
// Historically, errors returned from Marshal methods were wrapped
// in a [MarshalerError]. This is directly performed by the v2 package
// via the injected [internal.NewMarshalerError] constructor
// while operating under [ReportErrorsWithLegacySemantics].
// Note that errors from a Marshal method were always wrapped,
// even if wrapped for multiple layers.
if err, ok := err.(*jsonv2.SemanticError); err != nil {
if err.Err == nil {
// Historically, this was only reported for unserializable types
// like complex numbers, channels, functions, and unsafe.Pointers.
return &UnsupportedTypeError{Type: err.GoType}
} else {
// Historically, this was only reported for NaN or ±Inf values
// and cycles detected in the value.
// The Val used to be populated with the reflect.Value,
// but this is no longer supported.
errStr := err.Err.Error()
if err.Err == internal.ErrCycle && err.GoType != nil {
errStr += " via " + err.GoType.String()
}
errStr = strings.TrimPrefix(errStr, "unsupported value: ")
return &UnsupportedValueError{Str: errStr}
}
} else if ok {
return (*UnsupportedValueError)(nil)
}
if err, _ := err.(*MarshalerError); err != nil {
err.Err = transformSyntacticError(err.Err)
return err
}
return transformSyntacticError(err)
}
func transformUnmarshalError(root any, err error) error {
// Historically, errors from Unmarshal methods were never wrapped and
// returned verbatim while operating under [ReportErrorsWithLegacySemantics].
if err, ok := err.(*jsonv2.SemanticError); err != nil {
if err.Err == internal.ErrNonNilReference {
return &InvalidUnmarshalError{err.GoType}
}
if err.Err == jsonv2.ErrUnknownName {
return fmt.Errorf("json: unknown field %q", err.JSONPointer.LastToken())
}
// Historically, UnmarshalTypeError has always been inconsistent
// about how it reported position information.
//
// The Struct field now points to the root type,
// rather than some intermediate struct in the path.
// This better matches the original intent of the field based
// on how the Error message was formatted.
//
// For a representation closer to the historical representation,
// we switch the '/'-delimited representation of a JSON pointer
// to use a '.'-delimited representation. This may be ambiguous,
// but the prior representation was always ambiguous as well.
// Users that care about precise positions should use v2 errors
// by disabling [ReportErrorsWithLegacySemantics].
//
// The introduction of a Err field is new to the v1-to-v2 migration
// and allows us to preserve stronger error information
// that may be surfaced by the v2 package.
//
// See https://go.dev/issue/43126
var value string
switch err.JSONKind {
case 'n', '"', '0':
value = err.JSONKind.String()
case 'f', 't':
value = "bool"
case '[', ']':
value = "array"
case '{', '}':
value = "object"
}
if len(err.JSONValue) > 0 {
isStrconvError := err.Err == strconv.ErrRange || err.Err == strconv.ErrSyntax
isNumericKind := func(t reflect.Type) bool {
if t == nil {
return false
}
switch t.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr,
reflect.Float32, reflect.Float64:
return true
}
return false
}
if isStrconvError && isNumericKind(err.GoType) {
value = "number"
if err.JSONKind == '"' {
err.JSONValue, _ = jsontext.AppendUnquote(nil, err.JSONValue)
}
err.Err = nil
}
value += " " + string(err.JSONValue)
}
var rootName string
if t := reflect.TypeOf(root); t != nil && err.JSONPointer != "" {
if t.Kind() == reflect.Pointer {
t = t.Elem()
}
rootName = t.Name()
}
fieldPath := string(err.JSONPointer)
fieldPath = strings.TrimPrefix(fieldPath, "/")
fieldPath = strings.ReplaceAll(fieldPath, "/", ".")
return &UnmarshalTypeError{
Value: value,
Type: err.GoType,
Offset: err.ByteOffset,
Struct: rootName,
Field: fieldPath,
Err: transformSyntacticError(err.Err),
}
} else if ok {
return (*UnmarshalTypeError)(nil)
}
return transformSyntacticError(err)
}

View File

@ -0,0 +1,528 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
// Migrating to v2
//
// This package (i.e., [encoding/json]) is now formally known as the v1 package
// since a v2 package now exists at [encoding/json/v2].
// All the behavior of the v1 package is implemented in terms of
// the v2 package with the appropriate set of options specified that
// preserve the historical behavior of v1.
//
// The [jsonv2.Marshal] function is the newer equivalent of v1 [Marshal].
// The [jsonv2.Unmarshal] function is the newer equivalent of v1 [Unmarshal].
// The v2 functions have the same calling signature as the v1 equivalent
// except that they take in variadic [Options] arguments that can be specified
// to alter the behavior of marshal or unmarshal. Both v1 and v2 generally
// behave in similar ways, but there are some notable differences.
//
// The following is a list of differences between v1 and v2:
//
// - In v1, JSON object members are unmarshaled into a Go struct using a
// case-insensitive name match with the JSON name of the fields.
// In contrast, v2 matches fields using an exact, case-sensitive match.
// The [jsonv2.MatchCaseInsensitiveNames] and [MatchCaseSensitiveDelimiter]
// options control this behavior difference. To explicitly specify a Go struct
// field to use a particular name matching scheme, either the `case:ignore`
// or the `case:strict` field option can be specified.
// Field-specified options take precedence over caller-specified options.
//
// - In v1, when marshaling a Go struct, a field marked as `omitempty`
// is omitted if the field value is an "empty" Go value, which is defined as
// false, 0, a nil pointer, a nil interface value, and
// any empty array, slice, map, or string. In contrast, v2 redefines
// `omitempty` to omit a field if it encodes as an "empty" JSON value,
// which is defined as a JSON null, or an empty JSON string, object, or array.
// The [OmitEmptyWithLegacyDefinition] option controls this behavior difference.
// Note that `omitempty` behaves identically in both v1 and v2 for a
// Go array, slice, map, or string (assuming no user-defined MarshalJSON method
// overrides the default representation). Existing usages of `omitempty` on a
// Go bool, number, pointer, or interface value should migrate to specifying
// `omitzero` instead (which is identically supported in both v1 and v2).
//
// - In v1, a Go struct field marked as `string` can be used to quote a
// Go string, bool, or number as a JSON string. It does not recursively
// take effect on composite Go types. In contrast, v2 restricts
// the `string` option to only quote a Go number as a JSON string.
// It does recursively take effect on Go numbers within a composite Go type.
// The [StringifyWithLegacySemantics] option controls this behavior difference.
//
// - In v1, a nil Go slice or Go map is marshaled as a JSON null.
// In contrast, v2 marshals a nil Go slice or Go map as
// an empty JSON array or JSON object, respectively.
// The [jsonv2.FormatNilSliceAsNull] and [jsonv2.FormatNilMapAsNull] options
// control this behavior difference. To explicitly specify a Go struct field
// to use a particular representation for nil, either the `format:emitempty`
// or `format:emitnull` field option can be specified.
// Field-specified options take precedence over caller-specified options.
//
// - In v1, a Go array may be unmarshaled from a JSON array of any length.
// In contrast, in v2 a Go array must be unmarshaled from a JSON array
// of the same length, otherwise it results in an error.
// The [UnmarshalArrayFromAnyLength] option controls this behavior difference.
//
// - In v1, a Go byte array is represented as a JSON array of JSON numbers.
// In contrast, in v2 a Go byte array is represented as a Base64-encoded JSON string.
// The [FormatBytesWithLegacySemantics] option controls this behavior difference.
// To explicitly specify a Go struct field to use a particular representation,
// either the `format:array` or `format:base64` field option can be specified.
// Field-specified options take precedence over caller-specified options.
//
// - In v1, MarshalJSON methods declared on a pointer receiver are only called
// if the Go value is addressable. In contrast, in v2 a MarshalJSON method
// is always callable regardless of addressability.
// The [CallMethodsWithLegacySemantics] option controls this behavior difference.
//
// - In v1, MarshalJSON and UnmarshalJSON methods are never called for Go map keys.
// In contrast, in v2 a MarshalJSON or UnmarshalJSON method is eligible for
// being called for Go map keys.
// The [CallMethodsWithLegacySemantics] option controls this behavior difference.
//
// - In v1, a Go map is marshaled in a deterministic order.
// In contrast, in v2 a Go map is marshaled in a non-deterministic order.
// The [jsonv2.Deterministic] option controls this behavior difference.
//
// - In v1, JSON strings are encoded with HTML-specific or JavaScript-specific
// characters being escaped. In contrast, in v2 JSON strings use the minimal
// encoding and only escape if required by the JSON grammar.
// The [jsontext.EscapeForHTML] and [jsontext.EscapeForJS] options
// control this behavior difference.
//
// - In v1, bytes of invalid UTF-8 within a string are silently replaced with
// the Unicode replacement character. In contrast, in v2 the presence of
// invalid UTF-8 results in an error. The [jsontext.AllowInvalidUTF8] option
// controls this behavior difference.
//
// - In v1, a JSON object with duplicate names is permitted.
// In contrast, in v2 a JSON object with duplicate names results in an error.
// The [jsontext.AllowDuplicateNames] option controls this behavior difference.
//
// - In v1, when unmarshaling a JSON null into a non-empty Go value it will
// inconsistently either zero out the value or do nothing.
// In contrast, in v2 unmarshaling a JSON null will consistently and always
// zero out the underlying Go value. The [MergeWithLegacySemantics] option
// controls this behavior difference.
//
// - In v1, when unmarshaling a JSON value into a non-zero Go value,
// it merges into the original Go value for array elements, slice elements,
// struct fields (but not map values),
// pointer values, and interface values (only if a non-nil pointer).
// In contrast, in v2 unmarshal merges into the Go value
// for struct fields, map values, pointer values, and interface values.
// In general, the v2 semantic merges when unmarshaling a JSON object,
// otherwise it replaces the value. The [MergeWithLegacySemantics] option
// controls this behavior difference.
//
// - In v1, a [time.Duration] is represented as a JSON number containing
// the decimal number of nanoseconds. In contrast, in v2 a [time.Duration]
// is represented as a JSON string containing the formatted duration
// (e.g., "1h2m3.456s") according to [time.Duration.String].
// The [FormatTimeWithLegacySemantics] option controls this behavior difference.
// To explicitly specify a Go struct field to use a particular representation,
// either the `format:nano` or `format:units` field option can be specified.
// Field-specified options take precedence over caller-specified options.
//
// - In v1, errors are never reported at runtime for Go struct types
// that have some form of structural error (e.g., a malformed tag option).
// In contrast, v2 reports a runtime error for Go types that are invalid
// as they relate to JSON serialization. For example, a Go struct
// with only unexported fields cannot be serialized.
// The [ReportErrorsWithLegacySemantics] option controls this behavior difference.
//
// As mentioned, the entirety of v1 is implemented in terms of v2,
// where options are implicitly specified to opt into legacy behavior.
// For example, [Marshal] directly calls [jsonv2.Marshal] with [DefaultOptionsV1].
// Similarly, [Unmarshal] directly calls [jsonv2.Unmarshal] with [DefaultOptionsV1].
// The [DefaultOptionsV1] option represents the set of all options that specify
// default v1 behavior.
//
// For many of the behavior differences, there are Go struct field options
// that the author of a Go type can specify to control the behavior such that
// the type is represented identically in JSON under either v1 or v2 semantics.
//
// The availability of [DefaultOptionsV1] and [jsonv2.DefaultOptionsV2],
// where later options take precedence over former options allows for
// a gradual migration from v1 to v2. For example:
//
// - jsonv1.Marshal(v)
// uses default v1 semantics.
//
// - jsonv2.Marshal(v, jsonv1.DefaultOptionsV1())
// is semantically equivalent to jsonv1.Marshal
// and thus uses default v1 semantics.
//
// - jsonv2.Marshal(v, jsonv1.DefaultOptionsV1(), jsontext.AllowDuplicateNames(false))
// uses mostly v1 semantics, but opts into one particular v2-specific behavior.
//
// - jsonv2.Marshal(v, jsonv1.CallMethodsWithLegacySemantics(true))
// uses mostly v2 semantics, but opts into one particular v1-specific behavior.
//
// - jsonv2.Marshal(v, ..., jsonv2.DefaultOptionsV2())
// is semantically equivalent to jsonv2.Marshal since
// jsonv2.DefaultOptionsV2 overrides any options specified earlier
// and thus uses default v2 semantics.
//
// - jsonv2.Marshal(v)
// uses default v2 semantics.
//
// All new usages of "json" in Go should use the v2 package,
// but the v1 package will forever remain supported.
package json
import (
"encoding"
"encoding/json/internal/jsonflags"
"encoding/json/internal/jsonopts"
"encoding/json/jsontext"
jsonv2 "encoding/json/v2"
)
// Reference encoding, jsonv2, and jsontext packages to assist pkgsite
// in being able to hotlink references to those packages.
var (
_ encoding.TextMarshaler
_ encoding.TextUnmarshaler
_ jsonv2.Options
_ jsontext.Options
)
// Options are a set of options to configure the v2 "json" package
// to operate with v1 semantics for particular features.
// Values of this type can be passed to v2 functions like
// [jsonv2.Marshal] or [jsonv2.Unmarshal].
// Instead of referencing this type, use [jsonv2.Options].
//
// See the "Migrating to v2" section for guidance on how to migrate usage
// of "json" from using v1 to using v2 instead.
type Options = jsonopts.Options
// DefaultOptionsV1 is the full set of all options that define v1 semantics.
// It is equivalent to the following boolean options being set to true:
//
// - [CallMethodsWithLegacySemantics]
// - [EscapeInvalidUTF8]
// - [FormatBytesWithLegacySemantics]
// - [FormatTimeWithLegacySemantics]
// - [MatchCaseSensitiveDelimiter]
// - [MergeWithLegacySemantics]
// - [OmitEmptyWithLegacyDefinition]
// - [ReportErrorsWithLegacySemantics]
// - [StringifyWithLegacySemantics]
// - [UnmarshalArrayFromAnyLength]
// - [jsonv2.Deterministic]
// - [jsonv2.FormatNilMapAsNull]
// - [jsonv2.FormatNilSliceAsNull]
// - [jsonv2.MatchCaseInsensitiveNames]
// - [jsontext.AllowDuplicateNames]
// - [jsontext.AllowInvalidUTF8]
// - [jsontext.EscapeForHTML]
// - [jsontext.EscapeForJS]
// - [jsontext.PreserveRawString]
//
// All other boolean options are set to false.
// All non-boolean options are set to the zero value,
// except for [jsontext.WithIndent], which defaults to "\t".
//
// The [Marshal] and [Unmarshal] functions in this package are
// semantically identical to calling the v2 equivalents with this option:
//
// jsonv2.Marshal(v, jsonv1.DefaultOptionsV1())
// jsonv2.Unmarshal(b, v, jsonv1.DefaultOptionsV1())
func DefaultOptionsV1() Options {
return &jsonopts.DefaultOptionsV1
}
// CallMethodsWithLegacySemantics specifies that calling of type-provided
// marshal and unmarshal methods follow legacy semantics:
//
// - When marshaling, a marshal method declared on a pointer receiver
// is only called if the Go value is addressable.
// Values obtained from an interface or map element are not addressable.
// Values obtained from a pointer or slice element are addressable.
// Values obtained from an array element or struct field inherit
// the addressability of the parent. In contrast, the v2 semantic
// is to always call marshal methods regardless of addressability.
//
// - When marshaling or unmarshaling, the [Marshaler] or [Unmarshaler]
// methods are ignored for map keys. However, [encoding.TextMarshaler]
// or [encoding.TextUnmarshaler] are still callable.
// In contrast, the v2 semantic is to serialize map keys
// like any other value (with regard to calling methods),
// which may include calling [Marshaler] or [Unmarshaler] methods,
// where it is the implementation's responsibility to represent the
// Go value as a JSON string (as required for JSON object names).
//
// - When marshaling, if a map key value implements a marshal method
// and is a nil pointer, then it is serialized as an empty JSON string.
// In contrast, the v2 semantic is to report an error.
//
// - When marshaling, if an interface type implements a marshal method
// and the interface value is a nil pointer to a concrete type,
// then the marshal method is always called.
// In contrast, the v2 semantic is to never directly call methods
// on interface values and to instead defer evaluation based upon
// the underlying concrete value. Similar to non-interface values,
// marshal methods are not called on nil pointers and
// are instead serialized as a JSON null.
//
// This affects either marshaling or unmarshaling.
// The v1 default is true.
func CallMethodsWithLegacySemantics(v bool) Options {
if v {
return jsonflags.CallMethodsWithLegacySemantics | 1
} else {
return jsonflags.CallMethodsWithLegacySemantics | 0
}
}
// EscapeInvalidUTF8 specifies that when encoding a [jsontext.String]
// with bytes of invalid UTF-8, such bytes are escaped as
// a hexadecimal Unicode codepoint (i.e., \ufffd).
// In contrast, the v2 default is to use the minimal representation,
// which is to encode invalid UTF-8 as the Unicode replacement rune itself
// (without any form of escaping).
//
// This only affects encoding and is ignored when decoding.
// The v1 default is true.
func EscapeInvalidUTF8(v bool) Options {
if v {
return jsonflags.EscapeInvalidUTF8 | 1
} else {
return jsonflags.EscapeInvalidUTF8 | 0
}
}
// FormatBytesWithLegacySemantics specifies that handling of
// []~byte and [N]~byte types follow legacy semantics:
//
// - A Go [N]~byte is always treated as as a normal Go array
// in contrast to the v2 default of treating [N]byte as
// using some form of binary data encoding (RFC 4648).
//
// - A Go []~byte is to be treated as using some form of
// binary data encoding (RFC 4648) in contrast to the v2 default
// of only treating []byte as such. In particular, v2 does not
// treat slices of named byte types as representing binary data.
//
// - When marshaling, if a named byte implements a marshal method,
// then the slice is serialized as a JSON array of elements,
// each of which call the marshal method.
//
// - When unmarshaling, if the input is a JSON array,
// then unmarshal into the []~byte as if it were a normal Go slice.
// In contrast, the v2 default is to report an error unmarshaling
// a JSON array when expecting some form of binary data encoding.
//
// - When unmarshaling, '\r' and '\n' characters are ignored
// within the encoded "base32" and "base64" data.
// In contrast, the v2 default is to report an error in order to be
// strictly compliant with RFC 4648, section 3.3,
// which specifies that non-alphabet characters must be rejected.
//
// This affects either marshaling or unmarshaling.
// The v1 default is true.
func FormatBytesWithLegacySemantics(v bool) Options {
if v {
return jsonflags.FormatBytesWithLegacySemantics | 1
} else {
return jsonflags.FormatBytesWithLegacySemantics | 0
}
}
// FormatTimeWithLegacySemantics specifies that [time] types are formatted
// with legacy semantics:
//
// - When marshaling or unmarshaling, a [time.Duration] is formatted as
// a JSON number representing the number of nanoseconds.
// In contrast, the default v2 behavior uses a JSON string
// with the duration formatted with [time.Duration.String].
// If a duration field has a `format` tag option,
// then the specified formatting takes precedence.
//
// - When unmarshaling, a [time.Time] follows loose adherence to RFC 3339.
// In particular, it permits historically incorrect representations,
// allowing for deviations in hour format, sub-second separator,
// and timezone representation. In contrast, the default v2 behavior
// is to strictly comply with the grammar specified in RFC 3339.
//
// This affects either marshaling or unmarshaling.
// The v1 default is true.
func FormatTimeWithLegacySemantics(v bool) Options {
if v {
return jsonflags.FormatTimeWithLegacySemantics | 1
} else {
return jsonflags.FormatTimeWithLegacySemantics | 0
}
}
// MatchCaseSensitiveDelimiter specifies that underscores and dashes are
// not to be ignored when performing case-insensitive name matching which
// occurs under [jsonv2.MatchCaseInsensitiveNames] or the `case:ignore` tag option.
// Thus, case-insensitive name matching is identical to [strings.EqualFold].
// Use of this option diminishes the ability of case-insensitive matching
// to be able to match common case variants (e.g, "foo_bar" with "fooBar").
//
// This affects either marshaling or unmarshaling.
// The v1 default is true.
func MatchCaseSensitiveDelimiter(v bool) Options {
if v {
return jsonflags.MatchCaseSensitiveDelimiter | 1
} else {
return jsonflags.MatchCaseSensitiveDelimiter | 0
}
}
// MergeWithLegacySemantics specifies that unmarshaling into a non-zero
// Go value follows legacy semantics:
//
// - When unmarshaling a JSON null, this preserves the original Go value
// if the kind is a bool, int, uint, float, string, array, or struct.
// Otherwise, it zeros the Go value.
// In contrast, the default v2 behavior is to consistently and always
// zero the Go value when unmarshaling a JSON null into it.
//
// - When unmarshaling a JSON value other than null, this merges into
// the original Go value for array elements, slice elements,
// struct fields (but not map values),
// pointer values, and interface values (only if a non-nil pointer).
// In contrast, the default v2 behavior is to merge into the Go value
// for struct fields, map values, pointer values, and interface values.
// In general, the v2 semantic merges when unmarshaling a JSON object,
// otherwise it replaces the original value.
//
// This only affects unmarshaling and is ignored when marshaling.
// The v1 default is true.
func MergeWithLegacySemantics(v bool) Options {
if v {
return jsonflags.MergeWithLegacySemantics | 1
} else {
return jsonflags.MergeWithLegacySemantics | 0
}
}
// OmitEmptyWithLegacyDefinition specifies that the `omitempty` tag option
// follows a definition of empty where a field is omitted if the Go value is
// false, 0, a nil pointer, a nil interface value,
// or any empty array, slice, map, or string.
// This overrides the v2 semantic where a field is empty if the value
// marshals as a JSON null or an empty JSON string, object, or array.
//
// The v1 and v2 definitions of `omitempty` are practically the same for
// Go strings, slices, arrays, and maps. Usages of `omitempty` on
// Go bools, ints, uints floats, pointers, and interfaces should migrate to use
// the `omitzero` tag option, which omits a field if it is the zero Go value.
//
// This only affects marshaling and is ignored when unmarshaling.
// The v1 default is true.
func OmitEmptyWithLegacyDefinition(v bool) Options {
if v {
return jsonflags.OmitEmptyWithLegacyDefinition | 1
} else {
return jsonflags.OmitEmptyWithLegacyDefinition | 0
}
}
// ReportErrorsWithLegacySemantics specifies that Marshal and Unmarshal
// should report errors with legacy semantics:
//
// - When marshaling or unmarshaling, the returned error values are
// usually of types such as [SyntaxError], [MarshalerError],
// [UnsupportedTypeError], [UnsupportedValueError],
// [InvalidUnmarshalError], or [UnmarshalTypeError].
// In contrast, the v2 semantic is to always return errors as either
// [jsonv2.SemanticError] or [jsontext.SyntacticError].
//
// - When marshaling, if a user-defined marshal method reports an error,
// it is always wrapped in a [MarshalerError], even if the error itself
// is already a [MarshalerError], which may lead to multiple redundant
// layers of wrapping. In contrast, the v2 semantic is to
// always wrap an error within [jsonv2.SemanticError]
// unless it is already a semantic error.
//
// - When unmarshaling, if a user-defined unmarshal method reports an error,
// it is never wrapped and reported verbatim. In contrast, the v2 semantic
// is to always wrap an error within [jsonv2.SemanticError]
// unless it is already a semantic error.
//
// - When marshaling or unmarshaling, if a Go struct contains type errors
// (e.g., conflicting names or malformed field tags), then such errors
// are ignored and the Go struct uses a best-effort representation.
// In contrast, the v2 semantic is to report a runtime error.
//
// - When unmarshaling, the syntactic structure of the JSON input
// is fully validated before performing the semantic unmarshaling
// of the JSON data into the Go value. Practically speaking,
// this means that JSON input with syntactic errors do not result
// in any mutations of the target Go value. In contrast, the v2 semantic
// is to perform a streaming decode and gradually unmarshal the JSON input
// into the target Go value, which means that the Go value may be
// partially mutated when a syntactic error is encountered.
//
// - When unmarshaling, a semantic error does not immediately terminate the
// unmarshal procedure, but rather evaluation continues.
// When unmarshal returns, only the first semantic error is reported.
// In contrast, the v2 semantic is to terminate unmarshal the moment
// an error is encountered.
//
// This affects either marshaling or unmarshaling.
// The v1 default is true.
func ReportErrorsWithLegacySemantics(v bool) Options {
if v {
return jsonflags.ReportErrorsWithLegacySemantics | 1
} else {
return jsonflags.ReportErrorsWithLegacySemantics | 0
}
}
// StringifyWithLegacySemantics specifies that the `string` tag option
// may stringify bools and string values. It only takes effect on fields
// where the top-level type is a bool, string, numeric kind, or a pointer to
// such a kind. Specifically, `string` will not stringify bool, string,
// or numeric kinds within a composite data type
// (e.g., array, slice, struct, map, or interface).
//
// When marshaling, such Go values are serialized as their usual
// JSON representation, but quoted within a JSON string.
// When unmarshaling, such Go values must be deserialized from
// a JSON string containing their usual JSON representation.
// A JSON null quoted in a JSON string is a valid substitute for JSON null
// while unmarshaling into a Go value that `string` takes effect on.
//
// This affects either marshaling or unmarshaling.
// The v1 default is true.
func StringifyWithLegacySemantics(v bool) Options {
if v {
return jsonflags.StringifyWithLegacySemantics | 1
} else {
return jsonflags.StringifyWithLegacySemantics | 0
}
}
// UnmarshalArrayFromAnyLength specifies that Go arrays can be unmarshaled
// from input JSON arrays of any length. If the JSON array is too short,
// then the remaining Go array elements are zeroed. If the JSON array
// is too long, then the excess JSON array elements are skipped over.
//
// This only affects unmarshaling and is ignored when marshaling.
// The v1 default is true.
func UnmarshalArrayFromAnyLength(v bool) Options {
if v {
return jsonflags.UnmarshalArrayFromAnyLength | 1
} else {
return jsonflags.UnmarshalArrayFromAnyLength | 0
}
}
// unmarshalAnyWithRawNumber specifies that unmarshaling a JSON number into
// an empty Go interface should use the Number type instead of a float64.
func unmarshalAnyWithRawNumber(v bool) Options {
if v {
return jsonflags.UnmarshalAnyWithRawNumber | 1
} else {
return jsonflags.UnmarshalAnyWithRawNumber | 0
}
}

View File

@ -0,0 +1,82 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"errors"
"io"
"strings"
"encoding/json/internal"
"encoding/json/internal/jsonflags"
"encoding/json/jsontext"
)
// export exposes internal functionality of the "jsontext" package.
var export = jsontext.Internal.Export(&internal.AllowInternalUse)
// Valid reports whether data is a valid JSON encoding.
func Valid(data []byte) bool {
return checkValid(data) == nil
}
func checkValid(data []byte) error {
d := export.GetBufferedDecoder(data)
defer export.PutBufferedDecoder(d)
xd := export.Decoder(d)
xd.Struct.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1)
if _, err := d.ReadValue(); err != nil {
return transformSyntacticError(err)
}
if err := xd.CheckEOF(); err != nil {
return transformSyntacticError(err)
}
return nil
}
// A SyntaxError is a description of a JSON syntax error.
// [Unmarshal] will return a SyntaxError if the JSON can't be parsed.
type SyntaxError struct {
msg string // description of error
Offset int64 // error occurred after reading Offset bytes
}
func (e *SyntaxError) Error() string { return e.msg }
var errUnexpectedEnd = errors.New("unexpected end of JSON input")
func transformSyntacticError(err error) error {
switch serr, ok := err.(*jsontext.SyntacticError); {
case serr != nil:
if serr.Err == io.ErrUnexpectedEOF {
serr.Err = errUnexpectedEnd
}
msg := serr.Err.Error()
if i := strings.Index(msg, " (expecting"); i >= 0 && !strings.Contains(msg, " in literal") {
msg = msg[:i]
}
return &SyntaxError{Offset: serr.ByteOffset, msg: syntaxErrorReplacer.Replace(msg)}
case ok:
return (*SyntaxError)(nil)
case export.IsIOError(err):
return errors.Unwrap(err) // v1 historically did not wrap IO errors
default:
return err
}
}
// syntaxErrorReplacer replaces certain string literals in the v2 error
// to better match the historical string rendering of syntax errors.
// In particular, v2 uses the terminology "object name" to match RFC 8259,
// while v1 uses "object key", which is not a term found in JSON literature.
var syntaxErrorReplacer = strings.NewReplacer(
"object name", "object key",
"at start of value", "looking for beginning of value",
"at start of string", "looking for beginning of object key string",
"after object value", "after object key:value pair",
"in number", "in numeric literal",
)

View File

@ -0,0 +1,306 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"bytes"
"math"
"math/rand"
"reflect"
"strings"
"testing"
)
func indentNewlines(s string) string {
return strings.Join(strings.Split(s, "\n"), "\n\t")
}
func stripWhitespace(s string) string {
return strings.Map(func(r rune) rune {
if r == ' ' || r == '\n' || r == '\r' || r == '\t' {
return -1
}
return r
}, s)
}
func TestValid(t *testing.T) {
tests := []struct {
CaseName
data string
ok bool
}{
{Name(""), `foo`, false},
{Name(""), `}{`, false},
{Name(""), `{]`, false},
{Name(""), `{}`, true},
{Name(""), `{"foo":"bar"}`, true},
{Name(""), `{"foo":"bar","bar":{"baz":["qux"]}}`, true},
}
for _, tt := range tests {
t.Run(tt.Name, func(t *testing.T) {
if ok := Valid([]byte(tt.data)); ok != tt.ok {
t.Errorf("%s: Valid(`%s`) = %v, want %v", tt.Where, tt.data, ok, tt.ok)
}
})
}
}
func TestCompactAndIndent(t *testing.T) {
tests := []struct {
CaseName
compact string
indent string
}{
{Name(""), `1`, `1`},
{Name(""), `{}`, `{}`},
{Name(""), `[]`, `[]`},
{Name(""), `{"":2}`, "{\n\t\"\": 2\n}"},
{Name(""), `[3]`, "[\n\t3\n]"},
{Name(""), `[1,2,3]`, "[\n\t1,\n\t2,\n\t3\n]"},
{Name(""), `{"x":1}`, "{\n\t\"x\": 1\n}"},
{Name(""), `[true,false,null,"x",1,1.5,0,-5e+2]`, `[
true,
false,
null,
"x",
1,
1.5,
0,
-5e+2
]`},
{Name(""), "{\"\":\"<>&\u2028\u2029\"}", "{\n\t\"\": \"<>&\u2028\u2029\"\n}"}, // See golang.org/issue/34070
}
var buf bytes.Buffer
for _, tt := range tests {
t.Run(tt.Name, func(t *testing.T) {
buf.Reset()
if err := Compact(&buf, []byte(tt.compact)); err != nil {
t.Errorf("%s: Compact error: %v", tt.Where, err)
} else if got := buf.String(); got != tt.compact {
t.Errorf("%s: Compact:\n\tgot: %s\n\twant: %s", tt.Where, indentNewlines(got), indentNewlines(tt.compact))
}
buf.Reset()
if err := Compact(&buf, []byte(tt.indent)); err != nil {
t.Errorf("%s: Compact error: %v", tt.Where, err)
} else if got := buf.String(); got != tt.compact {
t.Errorf("%s: Compact:\n\tgot: %s\n\twant: %s", tt.Where, indentNewlines(got), indentNewlines(tt.compact))
}
buf.Reset()
if err := Indent(&buf, []byte(tt.indent), "", "\t"); err != nil {
t.Errorf("%s: Indent error: %v", tt.Where, err)
} else if got := buf.String(); got != tt.indent {
t.Errorf("%s: Compact:\n\tgot: %s\n\twant: %s", tt.Where, indentNewlines(got), indentNewlines(tt.indent))
}
buf.Reset()
if err := Indent(&buf, []byte(tt.compact), "", "\t"); err != nil {
t.Errorf("%s: Indent error: %v", tt.Where, err)
} else if got := buf.String(); got != tt.indent {
t.Errorf("%s: Compact:\n\tgot: %s\n\twant: %s", tt.Where, indentNewlines(got), indentNewlines(tt.indent))
}
})
}
}
func TestCompactSeparators(t *testing.T) {
// U+2028 and U+2029 should be escaped inside strings.
// They should not appear outside strings.
tests := []struct {
CaseName
in, compact string
}{
{Name(""), "{\"\u2028\": 1}", "{\"\u2028\":1}"},
{Name(""), "{\"\u2029\" :2}", "{\"\u2029\":2}"},
}
for _, tt := range tests {
t.Run(tt.Name, func(t *testing.T) {
var buf bytes.Buffer
if err := Compact(&buf, []byte(tt.in)); err != nil {
t.Errorf("%s: Compact error: %v", tt.Where, err)
} else if got := buf.String(); got != tt.compact {
t.Errorf("%s: Compact:\n\tgot: %s\n\twant: %s", tt.Where, indentNewlines(got), indentNewlines(tt.compact))
}
})
}
}
// Tests of a large random structure.
func TestCompactBig(t *testing.T) {
initBig()
var buf bytes.Buffer
if err := Compact(&buf, jsonBig); err != nil {
t.Fatalf("Compact error: %v", err)
}
b := buf.Bytes()
if !bytes.Equal(b, jsonBig) {
t.Error("Compact:")
diff(t, b, jsonBig)
return
}
}
func TestIndentBig(t *testing.T) {
t.Parallel()
initBig()
var buf bytes.Buffer
if err := Indent(&buf, jsonBig, "", "\t"); err != nil {
t.Fatalf("Indent error: %v", err)
}
b := buf.Bytes()
if len(b) == len(jsonBig) {
// jsonBig is compact (no unnecessary spaces);
// indenting should make it bigger
t.Fatalf("Indent did not expand the input")
}
// should be idempotent
var buf1 bytes.Buffer
if err := Indent(&buf1, b, "", "\t"); err != nil {
t.Fatalf("Indent error: %v", err)
}
b1 := buf1.Bytes()
if !bytes.Equal(b1, b) {
t.Error("Indent(Indent(jsonBig)) != Indent(jsonBig):")
diff(t, b1, b)
return
}
// should get back to original
buf1.Reset()
if err := Compact(&buf1, b); err != nil {
t.Fatalf("Compact error: %v", err)
}
b1 = buf1.Bytes()
if !bytes.Equal(b1, jsonBig) {
t.Error("Compact(Indent(jsonBig)) != jsonBig:")
diff(t, b1, jsonBig)
return
}
}
func TestIndentErrors(t *testing.T) {
tests := []struct {
CaseName
in string
err error
}{
{Name(""), `{"X": "foo", "Y"}`, &SyntaxError{"invalid character '}' after object key", len64(`{"X": "foo", "Y"`)}},
{Name(""), `{"X": "foo" "Y": "bar"}`, &SyntaxError{"invalid character '\"' after object key:value pair", len64(`{"X": "foo" `)}},
}
for _, tt := range tests {
t.Run(tt.Name, func(t *testing.T) {
slice := make([]uint8, 0)
buf := bytes.NewBuffer(slice)
if err := Indent(buf, []uint8(tt.in), "", ""); err != nil {
if !reflect.DeepEqual(err, tt.err) {
t.Fatalf("%s: Indent error:\n\tgot: %v\n\twant: %v", tt.Where, err, tt.err)
}
}
})
}
}
func diff(t *testing.T, a, b []byte) {
t.Helper()
for i := 0; ; i++ {
if i >= len(a) || i >= len(b) || a[i] != b[i] {
j := i - 10
if j < 0 {
j = 0
}
t.Errorf("diverge at %d: «%s» vs «%s»", i, trim(a[j:]), trim(b[j:]))
return
}
}
}
func trim(b []byte) []byte {
return b[:min(len(b), 20)]
}
// Generate a random JSON object.
var jsonBig []byte
func initBig() {
n := 10000
if testing.Short() {
n = 100
}
b, err := Marshal(genValue(n))
if err != nil {
panic(err)
}
jsonBig = b
}
func genValue(n int) any {
if n > 1 {
switch rand.Intn(2) {
case 0:
return genArray(n)
case 1:
return genMap(n)
}
}
switch rand.Intn(3) {
case 0:
return rand.Intn(2) == 0
case 1:
return rand.NormFloat64()
case 2:
return genString(30)
}
panic("unreachable")
}
func genString(stddev float64) string {
n := int(math.Abs(rand.NormFloat64()*stddev + stddev/2))
c := make([]rune, n)
for i := range c {
f := math.Abs(rand.NormFloat64()*64 + 32)
if f > 0x10ffff {
f = 0x10ffff
}
c[i] = rune(f)
}
return string(c)
}
func genArray(n int) []any {
f := int(math.Abs(rand.NormFloat64()) * math.Min(10, float64(n/2)))
if f > n {
f = n
}
if f < 1 {
f = 1
}
x := make([]any, f)
for i := range x {
x[i] = genValue(((i+1)*n)/f - (i*n)/f)
}
return x
}
func genMap(n int) map[string]any {
f := int(math.Abs(rand.NormFloat64()) * math.Min(10, float64(n/2)))
if f > n {
f = n
}
if n > 0 && f == 0 {
f = 1
}
x := make(map[string]any)
for i := 0; i < f; i++ {
x[genString(10)] = genValue(((i+1)*n)/f - (i*n)/f)
}
return x
}

View File

@ -0,0 +1,231 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"bytes"
"io"
"encoding/json/jsontext"
jsonv2 "encoding/json/v2"
)
// A Decoder reads and decodes JSON values from an input stream.
type Decoder struct {
dec *jsontext.Decoder
opts jsonv2.Options
err error
}
// NewDecoder returns a new decoder that reads from r.
//
// The decoder introduces its own buffering and may
// read data from r beyond the JSON values requested.
func NewDecoder(r io.Reader) *Decoder {
// Hide bytes.Buffer from jsontext since it implements optimizations that
// also limits certain ways it could be used. For example, one cannot write
// to the bytes.Buffer while it is in use by jsontext.Decoder.
if _, ok := r.(*bytes.Buffer); ok {
r = struct{ io.Reader }{r}
}
dec := new(Decoder)
dec.opts = DefaultOptionsV1()
dec.dec = jsontext.NewDecoder(r, dec.opts)
return dec
}
// UseNumber causes the Decoder to unmarshal a number into an
// interface value as a [Number] instead of as a float64.
func (dec *Decoder) UseNumber() {
if useNumber, _ := jsonv2.GetOption(dec.opts, unmarshalAnyWithRawNumber); !useNumber {
dec.opts = jsonv2.JoinOptions(dec.opts, unmarshalAnyWithRawNumber(true))
}
}
// DisallowUnknownFields causes the Decoder to return an error when the destination
// is a struct and the input contains object keys which do not match any
// non-ignored, exported fields in the destination.
func (dec *Decoder) DisallowUnknownFields() {
if reject, _ := jsonv2.GetOption(dec.opts, jsonv2.RejectUnknownMembers); !reject {
dec.opts = jsonv2.JoinOptions(dec.opts, jsonv2.RejectUnknownMembers(true))
}
}
// Decode reads the next JSON-encoded value from its
// input and stores it in the value pointed to by v.
//
// See the documentation for [Unmarshal] for details about
// the conversion of JSON into a Go value.
func (dec *Decoder) Decode(v any) error {
if dec.err != nil {
return dec.err
}
b, err := dec.dec.ReadValue()
if err != nil {
dec.err = transformSyntacticError(err)
if dec.err == errUnexpectedEnd {
// NOTE: Decode has always been inconsistent with Unmarshal
// with regard to the exact error value for truncated input.
dec.err = io.ErrUnexpectedEOF
}
return dec.err
}
return jsonv2.Unmarshal(b, v, dec.opts)
}
// Buffered returns a reader of the data remaining in the Decoder's
// buffer. The reader is valid until the next call to [Decoder.Decode].
func (dec *Decoder) Buffered() io.Reader {
return bytes.NewReader(dec.dec.UnreadBuffer())
}
// An Encoder writes JSON values to an output stream.
type Encoder struct {
w io.Writer
opts jsonv2.Options
err error
buf bytes.Buffer
indentBuf bytes.Buffer
indentPrefix string
indentValue string
}
// NewEncoder returns a new encoder that writes to w.
func NewEncoder(w io.Writer) *Encoder {
enc := new(Encoder)
enc.w = w
enc.opts = DefaultOptionsV1()
return enc
}
// Encode writes the JSON encoding of v to the stream,
// followed by a newline character.
//
// See the documentation for [Marshal] for details about the
// conversion of Go values to JSON.
func (enc *Encoder) Encode(v any) error {
if enc.err != nil {
return enc.err
}
buf := &enc.buf
buf.Reset()
if err := jsonv2.MarshalWrite(buf, v, enc.opts); err != nil {
return err
}
if len(enc.indentPrefix)+len(enc.indentValue) > 0 {
enc.indentBuf.Reset()
if err := Indent(&enc.indentBuf, buf.Bytes(), enc.indentPrefix, enc.indentValue); err != nil {
return err
}
buf = &enc.indentBuf
}
buf.WriteByte('\n')
if _, err := enc.w.Write(buf.Bytes()); err != nil {
enc.err = err
return err
}
return nil
}
// SetIndent instructs the encoder to format each subsequent encoded
// value as if indented by the package-level function Indent(dst, src, prefix, indent).
// Calling SetIndent("", "") disables indentation.
func (enc *Encoder) SetIndent(prefix, indent string) {
enc.indentPrefix = prefix
enc.indentValue = indent
}
// SetEscapeHTML specifies whether problematic HTML characters
// should be escaped inside JSON quoted strings.
// The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
// to avoid certain safety problems that can arise when embedding JSON in HTML.
//
// In non-HTML settings where the escaping interferes with the readability
// of the output, SetEscapeHTML(false) disables this behavior.
func (enc *Encoder) SetEscapeHTML(on bool) {
if escape, _ := jsonv2.GetOption(enc.opts, jsontext.EscapeForHTML); escape != on {
enc.opts = jsonv2.JoinOptions(enc.opts, jsontext.EscapeForHTML(on))
}
}
// RawMessage is a raw encoded JSON value.
// It implements [Marshaler] and [Unmarshaler] and can
// be used to delay JSON decoding or precompute a JSON encoding.
type RawMessage = jsontext.Value
// A Token holds a value of one of these types:
//
// - [Delim], for the four JSON delimiters [ ] { }
// - bool, for JSON booleans
// - float64, for JSON numbers
// - [Number], for JSON numbers
// - string, for JSON string literals
// - nil, for JSON null
type Token any
// A Delim is a JSON array or object delimiter, one of [ ] { or }.
type Delim rune
func (d Delim) String() string {
return string(d)
}
// Token returns the next JSON token in the input stream.
// At the end of the input stream, Token returns nil, [io.EOF].
//
// Token guarantees that the delimiters [ ] { } it returns are
// properly nested and matched: if Token encounters an unexpected
// delimiter in the input, it will return an error.
//
// The input stream consists of basic JSON values—bool, string,
// number, and null—along with delimiters [ ] { } of type [Delim]
// to mark the start and end of arrays and objects.
// Commas and colons are elided.
func (dec *Decoder) Token() (Token, error) {
tok, err := dec.dec.ReadToken()
if err != nil {
return nil, transformSyntacticError(err)
}
switch k := tok.Kind(); k {
case 'n':
return nil, nil
case 'f':
return false, nil
case 't':
return true, nil
case '"':
return tok.String(), nil
case '0':
if useNumber, _ := jsonv2.GetOption(dec.opts, unmarshalAnyWithRawNumber); useNumber {
return Number(tok.String()), nil
}
return tok.Float(), nil
case '{', '}', '[', ']':
return Delim(k), nil
default:
panic("unreachable")
}
}
// More reports whether there is another element in the
// current array or object being parsed.
func (dec *Decoder) More() bool {
k := dec.dec.PeekKind()
return k > 0 && k != ']' && k != '}'
}
// InputOffset returns the input stream byte offset of the current decoder position.
// The offset gives the location of the end of the most recently returned token
// and the beginning of the next token.
func (dec *Decoder) InputOffset() int64 {
return dec.dec.InputOffset()
}

Some files were not shown because too many files have changed in this diff Show More