Source file src/encoding/json/internal/jsonwire/encode.go

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build goexperiment.jsonv2
     6  
     7  package jsonwire
     8  
     9  import (
    10  	"math"
    11  	"slices"
    12  	"strconv"
    13  	"unicode/utf16"
    14  	"unicode/utf8"
    15  
    16  	"encoding/json/internal/jsonflags"
    17  )
    18  
    19  // escapeASCII reports whether the ASCII character needs to be escaped.
    20  // It conservatively assumes EscapeForHTML.
    21  var escapeASCII = [...]uint8{
    22  	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // escape control characters
    23  	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // escape control characters
    24  	0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // escape '"' and '&'
    25  	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, // escape '<' and '>'
    26  	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    27  	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // escape '\\'
    28  	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    29  	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    30  }
    31  
    32  // NeedEscape reports whether src needs escaping of any characters.
    33  // It conservatively assumes EscapeForHTML and EscapeForJS.
    34  // It reports true for inputs with invalid UTF-8.
    35  func NeedEscape[Bytes ~[]byte | ~string](src Bytes) bool {
    36  	var i int
    37  	for uint(len(src)) > uint(i) {
    38  		if c := src[i]; c < utf8.RuneSelf {
    39  			if escapeASCII[c] > 0 {
    40  				return true
    41  			}
    42  			i++
    43  		} else {
    44  			r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[i:])))
    45  			if r == utf8.RuneError || r == '\u2028' || r == '\u2029' {
    46  				return true
    47  			}
    48  			i += rn
    49  		}
    50  	}
    51  	return false
    52  }
    53  
    54  // AppendQuote appends src to dst as a JSON string per RFC 7159, section 7.
    55  //
    56  // It takes in flags and respects the following:
    57  //   - EscapeForHTML escapes '<', '>', and '&'.
    58  //   - EscapeForJS escapes '\u2028' and '\u2029'.
    59  //   - AllowInvalidUTF8 avoids reporting an error for invalid UTF-8.
    60  //
    61  // Regardless of whether AllowInvalidUTF8 is specified,
    62  // invalid bytes are replaced with the Unicode replacement character ('\ufffd').
    63  // If no escape flags are set, then the shortest representable form is used,
    64  // which is also the canonical form for strings (RFC 8785, section 3.2.2.2).
    65  func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes, flags *jsonflags.Flags) ([]byte, error) {
    66  	var i, n int
    67  	var hasInvalidUTF8 bool
    68  	dst = slices.Grow(dst, len(`"`)+len(src)+len(`"`))
    69  	dst = append(dst, '"')
    70  	for uint(len(src)) > uint(n) {
    71  		if c := src[n]; c < utf8.RuneSelf {
    72  			// Handle single-byte ASCII.
    73  			n++
    74  			if escapeASCII[c] == 0 {
    75  				continue // no escaping possibly needed
    76  			}
    77  			// Handle escaping of single-byte ASCII.
    78  			if !(c == '<' || c == '>' || c == '&') || flags.Get(jsonflags.EscapeForHTML) {
    79  				dst = append(dst, src[i:n-1]...)
    80  				dst = appendEscapedASCII(dst, c)
    81  				i = n
    82  			}
    83  		} else {
    84  			// Handle multi-byte Unicode.
    85  			r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:])))
    86  			n += rn
    87  			if r != utf8.RuneError && r != '\u2028' && r != '\u2029' {
    88  				continue // no escaping possibly needed
    89  			}
    90  			// Handle escaping of multi-byte Unicode.
    91  			switch {
    92  			case isInvalidUTF8(r, rn):
    93  				hasInvalidUTF8 = true
    94  				dst = append(dst, src[i:n-rn]...)
    95  				dst = append(dst, "\ufffd"...)
    96  				i = n
    97  			case (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS):
    98  				dst = append(dst, src[i:n-rn]...)
    99  				dst = appendEscapedUnicode(dst, r)
   100  				i = n
   101  			}
   102  		}
   103  	}
   104  	dst = append(dst, src[i:n]...)
   105  	dst = append(dst, '"')
   106  	if hasInvalidUTF8 && !flags.Get(jsonflags.AllowInvalidUTF8) {
   107  		return dst, ErrInvalidUTF8
   108  	}
   109  	return dst, nil
   110  }
   111  
   112  func appendEscapedASCII(dst []byte, c byte) []byte {
   113  	switch c {
   114  	case '"', '\\':
   115  		dst = append(dst, '\\', c)
   116  	case '\b':
   117  		dst = append(dst, "\\b"...)
   118  	case '\f':
   119  		dst = append(dst, "\\f"...)
   120  	case '\n':
   121  		dst = append(dst, "\\n"...)
   122  	case '\r':
   123  		dst = append(dst, "\\r"...)
   124  	case '\t':
   125  		dst = append(dst, "\\t"...)
   126  	default:
   127  		dst = appendEscapedUTF16(dst, uint16(c))
   128  	}
   129  	return dst
   130  }
   131  
   132  func appendEscapedUnicode(dst []byte, r rune) []byte {
   133  	if r1, r2 := utf16.EncodeRune(r); r1 != '\ufffd' && r2 != '\ufffd' {
   134  		dst = appendEscapedUTF16(dst, uint16(r1))
   135  		dst = appendEscapedUTF16(dst, uint16(r2))
   136  	} else {
   137  		dst = appendEscapedUTF16(dst, uint16(r))
   138  	}
   139  	return dst
   140  }
   141  
   142  func appendEscapedUTF16(dst []byte, x uint16) []byte {
   143  	const hex = "0123456789abcdef"
   144  	return append(dst, '\\', 'u', hex[(x>>12)&0xf], hex[(x>>8)&0xf], hex[(x>>4)&0xf], hex[(x>>0)&0xf])
   145  }
   146  
   147  // ReformatString consumes a JSON string from src and appends it to dst,
   148  // reformatting it if necessary according to the specified flags.
   149  // It returns the appended output and the number of consumed input bytes.
   150  func ReformatString(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error) {
   151  	// TODO: Should this update ValueFlags as input?
   152  	var valFlags ValueFlags
   153  	n, err := ConsumeString(&valFlags, src, !flags.Get(jsonflags.AllowInvalidUTF8))
   154  	if err != nil {
   155  		return dst, n, err
   156  	}
   157  
   158  	// If the output requires no special escapes, and the input
   159  	// is already in canonical form or should be preserved verbatim,
   160  	// then directly copy the input to the output.
   161  	if !flags.Get(jsonflags.AnyEscape) &&
   162  		(valFlags.IsCanonical() || flags.Get(jsonflags.PreserveRawStrings)) {
   163  		dst = append(dst, src[:n]...) // copy the string verbatim
   164  		return dst, n, nil
   165  	}
   166  
   167  	// Under [jsonflags.PreserveRawStrings], any pre-escaped sequences
   168  	// remain escaped, however we still need to respect the
   169  	// [jsonflags.EscapeForHTML] and [jsonflags.EscapeForJS] options.
   170  	if flags.Get(jsonflags.PreserveRawStrings) {
   171  		var i, lastAppendIndex int
   172  		for i < n {
   173  			if c := src[i]; c < utf8.RuneSelf {
   174  				if (c == '<' || c == '>' || c == '&') && flags.Get(jsonflags.EscapeForHTML) {
   175  					dst = append(dst, src[lastAppendIndex:i]...)
   176  					dst = appendEscapedASCII(dst, c)
   177  					lastAppendIndex = i + 1
   178  				}
   179  				i++
   180  			} else {
   181  				r, rn := utf8.DecodeRune(truncateMaxUTF8(src[i:]))
   182  				if (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS) {
   183  					dst = append(dst, src[lastAppendIndex:i]...)
   184  					dst = appendEscapedUnicode(dst, r)
   185  					lastAppendIndex = i + rn
   186  				}
   187  				i += rn
   188  			}
   189  		}
   190  		return append(dst, src[lastAppendIndex:n]...), n, nil
   191  	}
   192  
   193  	// The input contains characters that might need escaping,
   194  	// unnecessary escape sequences, or invalid UTF-8.
   195  	// Perform a round-trip unquote and quote to properly reformat
   196  	// these sequences according the current flags.
   197  	b, _ := AppendUnquote(nil, src[:n])
   198  	dst, _ = AppendQuote(dst, b, flags)
   199  	return dst, n, nil
   200  }
   201  
   202  // AppendFloat appends src to dst as a JSON number per RFC 7159, section 6.
   203  // It formats numbers similar to the ES6 number-to-string conversion.
   204  // See https://go.dev/issue/14135.
   205  //
   206  // The output is identical to ECMA-262, 6th edition, section 7.1.12.1 and with
   207  // RFC 8785, section 3.2.2.3 for 64-bit floating-point numbers except for -0,
   208  // which is formatted as -0 instead of just 0.
   209  //
   210  // For 32-bit floating-point numbers,
   211  // the output is a 32-bit equivalent of the algorithm.
   212  // Note that ECMA-262 specifies no algorithm for 32-bit numbers.
   213  func AppendFloat(dst []byte, src float64, bits int) []byte {
   214  	if bits == 32 {
   215  		src = float64(float32(src))
   216  	}
   217  
   218  	abs := math.Abs(src)
   219  	fmt := byte('f')
   220  	if abs != 0 {
   221  		if bits == 64 && (float64(abs) < 1e-6 || float64(abs) >= 1e21) ||
   222  			bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) {
   223  			fmt = 'e'
   224  		}
   225  	}
   226  	dst = strconv.AppendFloat(dst, src, fmt, -1, bits)
   227  	if fmt == 'e' {
   228  		// Clean up e-09 to e-9.
   229  		n := len(dst)
   230  		if n >= 4 && dst[n-4] == 'e' && dst[n-3] == '-' && dst[n-2] == '0' {
   231  			dst[n-2] = dst[n-1]
   232  			dst = dst[:n-1]
   233  		}
   234  	}
   235  	return dst
   236  }
   237  
   238  // ReformatNumber consumes a JSON string from src and appends it to dst,
   239  // canonicalizing it if specified.
   240  // It returns the appended output and the number of consumed input bytes.
   241  func ReformatNumber(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error) {
   242  	n, err := ConsumeNumber(src)
   243  	if err != nil {
   244  		return dst, n, err
   245  	}
   246  	if !flags.Get(jsonflags.CanonicalizeNumbers) {
   247  		dst = append(dst, src[:n]...) // copy the number verbatim
   248  		return dst, n, nil
   249  	}
   250  
   251  	// Identify the kind of number.
   252  	var isFloat bool
   253  	for _, c := range src[:n] {
   254  		if c == '.' || c == 'e' || c == 'E' {
   255  			isFloat = true // has fraction or exponent
   256  			break
   257  		}
   258  	}
   259  
   260  	// Check if need to canonicalize this kind of number.
   261  	switch {
   262  	case string(src[:n]) == "-0":
   263  		break // canonicalize -0 as 0 regardless of kind
   264  	case isFloat:
   265  		if !flags.Get(jsonflags.CanonicalizeRawFloats) {
   266  			dst = append(dst, src[:n]...) // copy the number verbatim
   267  			return dst, n, nil
   268  		}
   269  	default:
   270  		// As an optimization, we can copy integer numbers below 2⁵³ verbatim
   271  		// since the canonical form is always identical.
   272  		const maxExactIntegerDigits = 16 // len(strconv.AppendUint(nil, 1<<53, 10))
   273  		if !flags.Get(jsonflags.CanonicalizeRawInts) || n < maxExactIntegerDigits {
   274  			dst = append(dst, src[:n]...) // copy the number verbatim
   275  			return dst, n, nil
   276  		}
   277  	}
   278  
   279  	// Parse and reformat the number (which uses a canonical format).
   280  	fv, _ := strconv.ParseFloat(string(src[:n]), 64)
   281  	switch {
   282  	case fv == 0:
   283  		fv = 0 // normalize negative zero as just zero
   284  	case math.IsInf(fv, +1):
   285  		fv = +math.MaxFloat64
   286  	case math.IsInf(fv, -1):
   287  		fv = -math.MaxFloat64
   288  	}
   289  	return AppendFloat(dst, fv, 64), n, nil
   290  }
   291  

View as plain text