diff --git a/go.mod b/go.mod index e6de236f19..ebbfa4f604 100644 --- a/go.mod +++ b/go.mod @@ -4,5 +4,5 @@ go 1.14 require ( github.com/valyala/fasthttp v1.16.0 - golang.org/x/sys v0.0.0-20201026173827-119d4633e4d1 + golang.org/x/sys v0.0.0-20201101102859-da207088b7d1 ) diff --git a/go.sum b/go.sum index 0c99bf2f9c..1d959dc901 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,11 @@ github.com/andybalholm/brotli v1.0.0 h1:7UCwP93aiSfvWpapti8g88vVVGp2qqtGyePsSuDafo4= github.com/andybalholm/brotli v1.0.0/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= +github.com/andybalholm/brotli v1.0.1 h1:KqhlKozYbRtJvsPrrEeXcO+N2l6NYT5A2QAFmSULpEc= +github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= github.com/klauspost/compress v1.10.7 h1:7rix8v8GpI3ZBb0nSozFRgbtXKv+hOe+qfEpZqybrAg= github.com/klauspost/compress v1.10.7/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/klauspost/compress v1.11.2 h1:MiK62aErc3gIiVEtyzKfeOHgW7atJb5g/KNX5m3c2nQ= +github.com/klauspost/compress v1.11.2/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/fasthttp v1.16.0 h1:9zAqOYLl8Tuy3E5R6ckzGDJ1g8+pw15oQp2iL9Jl6gQ= @@ -17,7 +21,7 @@ golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd h1:xhmwyvizuTgC2qz7ZlMluP20u golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200602225109-6fdc65e7d980 h1:OjiUf46hAmXblsZdnoSXsEUSKU8r1UEzcL5RVZ4gO9Y= golang.org/x/sys v0.0.0-20200602225109-6fdc65e7d980/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201026173827-119d4633e4d1 h1:/DtoiOYKoQCcIFXQjz07RnWNPRCbqmSXSpgEzhC9ZHM= -golang.org/x/sys v0.0.0-20201026173827-119d4633e4d1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201101102859-da207088b7d1 h1:a/mKvvZr9Jcc8oKfcmgzyp7OwF73JPWsQLvH1z2Kxck= +golang.org/x/sys v0.0.0-20201101102859-da207088b7d1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/internal/encoding/ascii/valid.go b/internal/encoding/ascii/valid.go index f583050180..b16e0433ea 100644 --- a/internal/encoding/ascii/valid.go +++ b/internal/encoding/ascii/valid.go @@ -14,12 +14,12 @@ func ValidString(s string) bool { return valid(unsafe.Pointer(&s), uintptr(len(s))) } -// ValidByte returns true if b is an ASCII character. +// ValidBytes returns true if b is an ASCII character. func ValidByte(b byte) bool { return b <= 0x7f } -// ValidRune returns true if r is an ASCII character. +// ValidBytes returns true if b is an ASCII character. func ValidRune(r rune) bool { return r <= 0x7f } @@ -59,22 +59,22 @@ func valid(s unsafe.Pointer, n uintptr) bool { return (x & 0x80808080) == 0 } -// ValidPrint returns true if b contains only printable ASCII characters. +// Valid returns true if b contains only printable ASCII characters. func ValidPrint(b []byte) bool { return validPrint(unsafe.Pointer(&b), uintptr(len(b))) } -// ValidPrintString returns true if s contains only printable ASCII characters. +// ValidString returns true if s contains only printable ASCII characters. func ValidPrintString(s string) bool { return validPrint(unsafe.Pointer(&s), uintptr(len(s))) } -// ValidPrintByte returns true if b is an ASCII character. +// ValidBytes returns true if b is an ASCII character. func ValidPrintByte(b byte) bool { return 0x20 <= b && b <= 0x7e } -// ValidPrintRune returns true if r is an ASCII character. +// ValidBytes returns true if b is an ASCII character. func ValidPrintRune(r rune) bool { return 0x20 <= r && r <= 0x7e } diff --git a/internal/encoding/json/codec.go b/internal/encoding/json/codec.go index cf7216d9e7..371da3f1b8 100644 --- a/internal/encoding/json/codec.go +++ b/internal/encoding/json/codec.go @@ -675,18 +675,21 @@ func appendStructFields(fields []structField, t reflect.Type, offset uintptr, se } for i := range fields { - fields[i].json = encodeString(fields[i].name, 0) - fields[i].html = encodeString(fields[i].name, EscapeHTML) + name := fields[i].name + fields[i].json = encodeKeyFragment(name, 0) + fields[i].html = encodeKeyFragment(name, EscapeHTML) } sort.Slice(fields, func(i, j int) bool { return fields[i].index < fields[j].index }) return fields } -func encodeString(s string, flags AppendFlags) string { - b := make([]byte, 0, len(s)+2) +func encodeKeyFragment(s string, flags AppendFlags) string { + b := make([]byte, 1, len(s)+4) + b[0] = ',' e := encoder{flags: flags} b, _ = e.encodeString(b, unsafe.Pointer(&s)) + b = append(b, ':') return *(*string)(unsafe.Pointer(&b)) } diff --git a/internal/encoding/json/encode.go b/internal/encoding/json/encode.go index e6e1e29c21..d0b0b3ce0a 100644 --- a/internal/encoding/json/encode.go +++ b/internal/encoding/json/encode.go @@ -27,47 +27,47 @@ func (e encoder) encodeBool(b []byte, p unsafe.Pointer) ([]byte, error) { } func (e encoder) encodeInt(b []byte, p unsafe.Pointer) ([]byte, error) { - return strconv.AppendInt(b, int64(*(*int)(p)), 10), nil + return appendInt(b, int64(*(*int)(p))), nil } func (e encoder) encodeInt8(b []byte, p unsafe.Pointer) ([]byte, error) { - return strconv.AppendInt(b, int64(*(*int8)(p)), 10), nil + return appendInt(b, int64(*(*int8)(p))), nil } func (e encoder) encodeInt16(b []byte, p unsafe.Pointer) ([]byte, error) { - return strconv.AppendInt(b, int64(*(*int16)(p)), 10), nil + return appendInt(b, int64(*(*int16)(p))), nil } func (e encoder) encodeInt32(b []byte, p unsafe.Pointer) ([]byte, error) { - return strconv.AppendInt(b, int64(*(*int32)(p)), 10), nil + return appendInt(b, int64(*(*int32)(p))), nil } func (e encoder) encodeInt64(b []byte, p unsafe.Pointer) ([]byte, error) { - return strconv.AppendInt(b, *(*int64)(p), 10), nil + return appendInt(b, *(*int64)(p)), nil } func (e encoder) encodeUint(b []byte, p unsafe.Pointer) ([]byte, error) { - return strconv.AppendUint(b, uint64(*(*uint)(p)), 10), nil + return appendUint(b, uint64(*(*uint)(p))), nil } func (e encoder) encodeUintptr(b []byte, p unsafe.Pointer) ([]byte, error) { - return strconv.AppendUint(b, uint64(*(*uintptr)(p)), 10), nil + return appendUint(b, uint64(*(*uintptr)(p))), nil } func (e encoder) encodeUint8(b []byte, p unsafe.Pointer) ([]byte, error) { - return strconv.AppendUint(b, uint64(*(*uint8)(p)), 10), nil + return appendUint(b, uint64(*(*uint8)(p))), nil } func (e encoder) encodeUint16(b []byte, p unsafe.Pointer) ([]byte, error) { - return strconv.AppendUint(b, uint64(*(*uint16)(p)), 10), nil + return appendUint(b, uint64(*(*uint16)(p))), nil } func (e encoder) encodeUint32(b []byte, p unsafe.Pointer) ([]byte, error) { - return strconv.AppendUint(b, uint64(*(*uint32)(p)), 10), nil + return appendUint(b, uint64(*(*uint32)(p))), nil } func (e encoder) encodeUint64(b []byte, p unsafe.Pointer) ([]byte, error) { - return strconv.AppendUint(b, *(*uint64)(p), 10), nil + return appendUint(b, *(*uint64)(p)), nil } func (e encoder) encodeFloat32(b []byte, p unsafe.Pointer) ([]byte, error) { @@ -130,12 +130,21 @@ func (e encoder) encodeNumber(b []byte, p unsafe.Pointer) ([]byte, error) { func (e encoder) encodeString(b []byte, p unsafe.Pointer) ([]byte, error) { s := *(*string)(p) + if len(s) == 0 { + return append(b, `""`...), nil + } i := 0 j := 0 escapeHTML := (e.flags & EscapeHTML) != 0 b = append(b, '"') + if len(s) >= 8 { + if j = escapeIndex(s, escapeHTML); j < 0 { + return append(append(b, s...), '"'), nil + } + } + for j < len(s) { c := s[j] @@ -532,6 +541,8 @@ func (e encoder) encodeStruct(b []byte, p unsafe.Pointer, st *structType) ([]byt var n int b = append(b, '{') + escapeHTML := (e.flags & EscapeHTML) != 0 + for i := range st.fields { f := &st.fields[i] v := unsafe.Pointer(uintptr(p) + f.offset) @@ -540,7 +551,7 @@ func (e encoder) encodeStruct(b []byte, p unsafe.Pointer, st *structType) ([]byt continue } - if (e.flags & EscapeHTML) != 0 { + if escapeHTML { k = f.html } else { k = f.json @@ -549,12 +560,11 @@ func (e encoder) encodeStruct(b []byte, p unsafe.Pointer, st *structType) ([]byt lengthBeforeKey := len(b) if n != 0 { - b = append(b, ',') + b = append(b, k...) + } else { + b = append(b, k[1:]...) } - b = append(b, k...) - b = append(b, ':') - if b, err = f.codec.encode(e, b, v); err != nil { if err == (rollback{}) { b = b[:lengthBeforeKey] diff --git a/internal/encoding/json/int.go b/internal/encoding/json/int.go new file mode 100644 index 0000000000..b53149cbd7 --- /dev/null +++ b/internal/encoding/json/int.go @@ -0,0 +1,98 @@ +package json + +import ( + "unsafe" +) + +var endianness int + +func init() { + var b [2]byte + *(*uint16)(unsafe.Pointer(&b)) = uint16(0xABCD) + + switch b[0] { + case 0xCD: + endianness = 0 // LE + case 0xAB: + endianness = 1 // BE + default: + panic("could not determine endianness") + } +} + +// "00010203...96979899" cast to []uint16 +var intLELookup = [100]uint16{ + 0x3030, 0x3130, 0x3230, 0x3330, 0x3430, 0x3530, 0x3630, 0x3730, 0x3830, 0x3930, + 0x3031, 0x3131, 0x3231, 0x3331, 0x3431, 0x3531, 0x3631, 0x3731, 0x3831, 0x3931, + 0x3032, 0x3132, 0x3232, 0x3332, 0x3432, 0x3532, 0x3632, 0x3732, 0x3832, 0x3932, + 0x3033, 0x3133, 0x3233, 0x3333, 0x3433, 0x3533, 0x3633, 0x3733, 0x3833, 0x3933, + 0x3034, 0x3134, 0x3234, 0x3334, 0x3434, 0x3534, 0x3634, 0x3734, 0x3834, 0x3934, + 0x3035, 0x3135, 0x3235, 0x3335, 0x3435, 0x3535, 0x3635, 0x3735, 0x3835, 0x3935, + 0x3036, 0x3136, 0x3236, 0x3336, 0x3436, 0x3536, 0x3636, 0x3736, 0x3836, 0x3936, + 0x3037, 0x3137, 0x3237, 0x3337, 0x3437, 0x3537, 0x3637, 0x3737, 0x3837, 0x3937, + 0x3038, 0x3138, 0x3238, 0x3338, 0x3438, 0x3538, 0x3638, 0x3738, 0x3838, 0x3938, + 0x3039, 0x3139, 0x3239, 0x3339, 0x3439, 0x3539, 0x3639, 0x3739, 0x3839, 0x3939, +} + +var intBELookup = [100]uint16{ + 0x3030, 0x3031, 0x3032, 0x3033, 0x3034, 0x3035, 0x3036, 0x3037, 0x3038, 0x3039, + 0x3130, 0x3131, 0x3132, 0x3133, 0x3134, 0x3135, 0x3136, 0x3137, 0x3138, 0x3139, + 0x3230, 0x3231, 0x3232, 0x3233, 0x3234, 0x3235, 0x3236, 0x3237, 0x3238, 0x3239, + 0x3330, 0x3331, 0x3332, 0x3333, 0x3334, 0x3335, 0x3336, 0x3337, 0x3338, 0x3339, + 0x3430, 0x3431, 0x3432, 0x3433, 0x3434, 0x3435, 0x3436, 0x3437, 0x3438, 0x3439, + 0x3530, 0x3531, 0x3532, 0x3533, 0x3534, 0x3535, 0x3536, 0x3537, 0x3538, 0x3539, + 0x3630, 0x3631, 0x3632, 0x3633, 0x3634, 0x3635, 0x3636, 0x3637, 0x3638, 0x3639, + 0x3730, 0x3731, 0x3732, 0x3733, 0x3734, 0x3735, 0x3736, 0x3737, 0x3738, 0x3739, + 0x3830, 0x3831, 0x3832, 0x3833, 0x3834, 0x3835, 0x3836, 0x3837, 0x3838, 0x3839, + 0x3930, 0x3931, 0x3932, 0x3933, 0x3934, 0x3935, 0x3936, 0x3937, 0x3938, 0x3939, +} + +var intLookup = [2]*[100]uint16{&intLELookup, &intBELookup} + +func appendInt(b []byte, n int64) []byte { + return formatInteger(b, uint64(n), n < 0) +} + +func appendUint(b []byte, n uint64) []byte { + return formatInteger(b, n, false) +} + +func formatInteger(out []byte, n uint64, negative bool) []byte { + if !negative { + if n < 10 { + return append(out, byte(n+'0')) + } else if n < 100 { + u := intLELookup[n] + return append(out, byte(u), byte(u>>8)) + } + } else { + n = -n + } + + lookup := intLookup[endianness] + + var b [22]byte + u := (*[11]uint16)(unsafe.Pointer(&b)) + i := 11 + + for n >= 100 { + j := n % 100 + n /= 100 + i-- + u[i] = lookup[j] + } + + i-- + u[i] = lookup[n] + + i *= 2 // convert to byte index + if n < 10 { + i++ // remove leading zero + } + if negative { + i-- + b[i] = '-' + } + + return append(out, b[i:]...) +} diff --git a/internal/encoding/json/int_test.go b/internal/encoding/json/int_test.go new file mode 100644 index 0000000000..46113750aa --- /dev/null +++ b/internal/encoding/json/int_test.go @@ -0,0 +1,58 @@ +package json + +import ( + "math" + "strconv" + "testing" +) + +func TestAppendInt(t *testing.T) { + var ints []int64 + for i := 0; i < 64; i++ { + u := uint64(1) << i + ints = append(ints, int64(u-1), int64(u), int64(u+1), -int64(u)) + } + + var std [20]byte + var our [20]byte + + for _, i := range ints { + expected := strconv.AppendInt(std[:], i, 10) + actual := appendInt(our[:], i) + if string(expected) != string(actual) { + t.Fatalf("appendInt(%d) = %v, expected = %v", i, string(actual), string(expected)) + } + } +} + +func benchStd(b *testing.B, n int64) { + var buf [20]byte + b.ResetTimer() + for i := 0; i < b.N; i++ { + strconv.AppendInt(buf[:0], n, 10) + } +} + +func benchNew(b *testing.B, n int64) { + var buf [20]byte + b.ResetTimer() + for i := 0; i < b.N; i++ { + appendInt(buf[:0], n) + } +} + +func BenchmarkAppendIntStd1(b *testing.B) { + benchStd(b, 1) +} + +func BenchmarkAppendInt1(b *testing.B) { + benchNew(b, 1) +} + +func BenchmarkAppendIntStdMinI64(b *testing.B) { + benchStd(b, math.MinInt64) +} + +func BenchmarkAppendIntMinI64(b *testing.B) { + benchNew(b, math.MinInt64) +} diff --git a/internal/encoding/json/parse.go b/internal/encoding/json/parse.go index e483f063a8..a033d47bf3 100644 --- a/internal/encoding/json/parse.go +++ b/internal/encoding/json/parse.go @@ -371,16 +371,16 @@ func parseNumber(b []byte) (v, r []byte, err error) { func parseUnicode(b []byte) (rune, int, error) { if len(b) < 4 { - return 0, 0, syntaxError(b, "unicode code point must have at least 4 characters") + return 0, len(b), syntaxError(b, "unicode code point must have at least 4 characters") } u, r, err := parseUintHex(b[:4]) if err != nil { - return 0, 0, syntaxError(b, "parsing unicode code point: %s", err) + return 0, 4, syntaxError(b, "parsing unicode code point: %s", err) } if len(r) != 0 { - return 0, 0, syntaxError(b, "invalid unicode code point") + return 0, 4, syntaxError(b, "invalid unicode code point") } return rune(u), 4, nil @@ -411,7 +411,7 @@ func parseStringFast(b []byte) ([]byte, []byte, bool, error) { case 'u': _, n, err := parseUnicode(b[i+1:]) if err != nil { - return nil, b, false, err + return nil, b[i+1+n:], false, err } i += n default: diff --git a/internal/encoding/json/reflect.go b/internal/encoding/json/reflect.go index 3f852f1cdb..be9344ed88 100644 --- a/internal/encoding/json/reflect.go +++ b/internal/encoding/json/reflect.go @@ -1,4 +1,4 @@ -// +build go1.15 +// +build go1.16 package json diff --git a/internal/encoding/json/reflect_optimize.go b/internal/encoding/json/reflect_optimize.go index ab8fc9eebf..31eea727db 100644 --- a/internal/encoding/json/reflect_optimize.go +++ b/internal/encoding/json/reflect_optimize.go @@ -1,4 +1,4 @@ -// +build !go1.15 +// +build !go1.16 package json diff --git a/internal/encoding/json/string.go b/internal/encoding/json/string.go new file mode 100644 index 0000000000..ae7f57b85d --- /dev/null +++ b/internal/encoding/json/string.go @@ -0,0 +1,71 @@ +package json + +import ( + "math/bits" + "reflect" + "unsafe" +) + +const ( + lsb = 0x0101010101010101 + msb = 0x8080808080808080 +) + +// escapeIndex finds the index of the first char in `s` that requires escaping. +// A char requires escaping if it's outside of the range of [0x20, 0x7F] or if +// it includes a double quote or backslash. If the escapeHTML mode is enabled, +// the chars <, > and & also require escaping. If no chars in `s` require +// escaping, the return value is -1. +func escapeIndex(s string, escapeHTML bool) int { + chunks := stringToUint64(s) + for _, n := range chunks { + // combine masks before checking for the MSB of each byte. We include + // `n` in the mask to check whether any of the *input* byte MSBs were + // set (i.e. the byte was outside the ASCII range). + mask := n | below(n, 0x20) | contains(n, '"') | contains(n, '\\') + if escapeHTML { + mask |= contains(n, '<') | contains(n, '>') | contains(n, '&') + } + if (mask & msb) != 0 { + return bits.TrailingZeros64(mask&msb) / 8 + } + } + + for i := len(chunks) * 8; i < len(s); i++ { + c := s[i] + if c < 0x20 || c > 0x7f || c == '"' || c == '\\' || (escapeHTML && (c == '<' || c == '>' || c == '&')) { + return i + } + } + + return -1 +} + +// below return a mask that can be used to determine if any of the bytes +// in `n` are below `b`. If a byte's MSB is set in the mask then that byte was +// below `b`. The result is only valid if `b`, and each byte in `n`, is below +// 0x80. +func below(n uint64, b byte) uint64 { + return n - expand(b) +} + +// contains returns a mask that can be used to determine if any of the +// bytes in `n` are equal to `b`. If a byte's MSB is set in the mask then +// that byte is equal to `b`. The result is only valid if `b`, and each +// byte in `n`, is below 0x80. +func contains(n uint64, b byte) uint64 { + return (n ^ expand(b)) - lsb +} + +// expand puts the specified byte into each of the 8 bytes of a uint64. +func expand(b byte) uint64 { + return lsb * uint64(b) +} + +func stringToUint64(s string) []uint64 { + return *(*[]uint64)(unsafe.Pointer(&reflect.SliceHeader{ + Data: ((*reflect.StringHeader)(unsafe.Pointer(&s))).Data, + Len: len(s) / 8, + Cap: len(s) / 8, + })) +} diff --git a/internal/encoding/json/token.go b/internal/encoding/json/token.go index b79940182c..9cac4db32e 100644 --- a/internal/encoding/json/token.go +++ b/internal/encoding/json/token.go @@ -189,6 +189,10 @@ skipLoop: case ':': t.isKey = false case ',': + if len(t.stack) == 0 { + t.Err = syntaxError(t.json, "found unexpected comma") + return false + } if t.is(inObject) { t.isKey = true }