Source file src/internal/pkgbits/encoder.go

     1  // Copyright 2021 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package pkgbits
     6  
     7  import (
     8  	"bytes"
     9  	"crypto/sha256"
    10  	"encoding/binary"
    11  	"go/constant"
    12  	"io"
    13  	"math/big"
    14  	"runtime"
    15  	"strings"
    16  )
    17  
    18  // A PkgEncoder provides methods for encoding a package's Unified IR
    19  // export data.
    20  type PkgEncoder struct {
    21  	// version of the bitstream.
    22  	version Version
    23  
    24  	// elems holds the bitstream for previously encoded elements.
    25  	elems [numRelocs][]string
    26  
    27  	// stringsIdx maps previously encoded strings to their index within
    28  	// the RelocString section, to allow deduplication. That is,
    29  	// elems[RelocString][stringsIdx[s]] == s (if present).
    30  	stringsIdx map[string]RelElemIdx
    31  
    32  	// syncFrames is the number of frames to write at each sync
    33  	// marker. A negative value means sync markers are omitted.
    34  	syncFrames int
    35  }
    36  
    37  // SyncMarkers reports whether pw uses sync markers.
    38  func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 }
    39  
    40  // NewPkgEncoder returns an initialized PkgEncoder.
    41  //
    42  // syncFrames is the number of caller frames that should be serialized
    43  // at Sync points. Serializing additional frames results in larger
    44  // export data files, but can help diagnosing desync errors in
    45  // higher-level Unified IR reader/writer code. If syncFrames is
    46  // negative, then sync markers are omitted entirely.
    47  func NewPkgEncoder(version Version, syncFrames int) PkgEncoder {
    48  	return PkgEncoder{
    49  		version:    version,
    50  		stringsIdx: make(map[string]RelElemIdx),
    51  		syncFrames: syncFrames,
    52  	}
    53  }
    54  
    55  // DumpTo writes the package's encoded data to out0 and returns the
    56  // package fingerprint.
    57  func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) {
    58  	h := sha256.New()
    59  	out := io.MultiWriter(out0, h)
    60  
    61  	writeUint32 := func(x uint32) {
    62  		assert(binary.Write(out, binary.LittleEndian, x) == nil)
    63  	}
    64  
    65  	writeUint32(uint32(pw.version))
    66  
    67  	if pw.version.Has(Flags) {
    68  		var flags uint32
    69  		if pw.SyncMarkers() {
    70  			flags |= flagSyncMarkers
    71  		}
    72  		writeUint32(flags)
    73  	}
    74  
    75  	// TODO(markfreeman): Also can use delta encoding to write section ends,
    76  	// but not as impactful.
    77  	var sum uint32
    78  	for _, elems := range &pw.elems {
    79  		sum += uint32(len(elems))
    80  		writeUint32(sum)
    81  	}
    82  
    83  	// TODO(markfreeman): Use delta encoding to store element ends and inflate
    84  	// back to this representation during decoding; the numbers will be much
    85  	// smaller.
    86  	sum = 0
    87  	for _, elems := range &pw.elems {
    88  		for _, elem := range elems {
    89  			sum += uint32(len(elem))
    90  			writeUint32(sum)
    91  		}
    92  	}
    93  
    94  	// Write elemData.
    95  	for _, elems := range &pw.elems {
    96  		for _, elem := range elems {
    97  			_, err := io.WriteString(out, elem)
    98  			assert(err == nil)
    99  		}
   100  	}
   101  
   102  	// Write fingerprint.
   103  	copy(fingerprint[:], h.Sum(nil))
   104  	_, err := out0.Write(fingerprint[:])
   105  	assert(err == nil)
   106  
   107  	return
   108  }
   109  
   110  // StringIdx adds a string value to the strings section, if not
   111  // already present, and returns its index.
   112  func (pw *PkgEncoder) StringIdx(s string) RelElemIdx {
   113  	if idx, ok := pw.stringsIdx[s]; ok {
   114  		assert(pw.elems[SectionString][idx] == s)
   115  		return idx
   116  	}
   117  
   118  	idx := RelElemIdx(len(pw.elems[SectionString]))
   119  	pw.elems[SectionString] = append(pw.elems[SectionString], s)
   120  	pw.stringsIdx[s] = idx
   121  	return idx
   122  }
   123  
   124  // NewEncoder returns an Encoder for a new element within the given
   125  // section, and encodes the given SyncMarker as the start of the
   126  // element bitstream.
   127  func (pw *PkgEncoder) NewEncoder(k SectionKind, marker SyncMarker) *Encoder {
   128  	e := pw.NewEncoderRaw(k)
   129  	e.Sync(marker)
   130  	return e
   131  }
   132  
   133  // NewEncoderRaw returns an Encoder for a new element within the given
   134  // section.
   135  //
   136  // Most callers should use NewEncoder instead.
   137  func (pw *PkgEncoder) NewEncoderRaw(k SectionKind) *Encoder {
   138  	idx := RelElemIdx(len(pw.elems[k]))
   139  	pw.elems[k] = append(pw.elems[k], "") // placeholder
   140  
   141  	return &Encoder{
   142  		p:   pw,
   143  		k:   k,
   144  		Idx: idx,
   145  	}
   146  }
   147  
   148  // An Encoder provides methods for encoding an individual element's
   149  // bitstream data.
   150  type Encoder struct {
   151  	p *PkgEncoder
   152  
   153  	Relocs   []RefTableEntry
   154  	RelocMap map[RefTableEntry]uint32
   155  	Data     bytes.Buffer // accumulated element bitstream data
   156  
   157  	encodingRelocHeader bool
   158  
   159  	k   SectionKind
   160  	Idx RelElemIdx // index within relocation section
   161  }
   162  
   163  // Flush finalizes the element's bitstream and returns its [RelElemIdx].
   164  func (w *Encoder) Flush() RelElemIdx {
   165  	var sb strings.Builder
   166  
   167  	// Backup the data so we write the relocations at the front.
   168  	var tmp bytes.Buffer
   169  	io.Copy(&tmp, &w.Data)
   170  
   171  	// TODO(mdempsky): Consider writing these out separately so they're
   172  	// easier to strip, along with function bodies, so that we can prune
   173  	// down to just the data that's relevant to go/types.
   174  	if w.encodingRelocHeader {
   175  		panic("encodingRelocHeader already true; recursive flush?")
   176  	}
   177  	w.encodingRelocHeader = true
   178  	w.Sync(SyncRelocs)
   179  	w.Len(len(w.Relocs))
   180  	for _, rEnt := range w.Relocs {
   181  		w.Sync(SyncReloc)
   182  		w.Len(int(rEnt.Kind))
   183  		w.Len(int(rEnt.Idx))
   184  	}
   185  
   186  	io.Copy(&sb, &w.Data)
   187  	io.Copy(&sb, &tmp)
   188  	w.p.elems[w.k][w.Idx] = sb.String()
   189  
   190  	return w.Idx
   191  }
   192  
   193  func (w *Encoder) checkErr(err error) {
   194  	if err != nil {
   195  		panicf("unexpected encoding error: %v", err)
   196  	}
   197  }
   198  
   199  func (w *Encoder) rawUvarint(x uint64) {
   200  	var buf [binary.MaxVarintLen64]byte
   201  	n := binary.PutUvarint(buf[:], x)
   202  	_, err := w.Data.Write(buf[:n])
   203  	w.checkErr(err)
   204  }
   205  
   206  func (w *Encoder) rawVarint(x int64) {
   207  	// Zig-zag encode.
   208  	ux := uint64(x) << 1
   209  	if x < 0 {
   210  		ux = ^ux
   211  	}
   212  
   213  	w.rawUvarint(ux)
   214  }
   215  
   216  func (w *Encoder) rawReloc(k SectionKind, idx RelElemIdx) int {
   217  	e := RefTableEntry{k, idx}
   218  	if w.RelocMap != nil {
   219  		if i, ok := w.RelocMap[e]; ok {
   220  			return int(i)
   221  		}
   222  	} else {
   223  		w.RelocMap = make(map[RefTableEntry]uint32)
   224  	}
   225  
   226  	i := len(w.Relocs)
   227  	w.RelocMap[e] = uint32(i)
   228  	w.Relocs = append(w.Relocs, e)
   229  	return i
   230  }
   231  
   232  func (w *Encoder) Sync(m SyncMarker) {
   233  	if !w.p.SyncMarkers() {
   234  		return
   235  	}
   236  
   237  	// Writing out stack frame string references requires working
   238  	// relocations, but writing out the relocations themselves involves
   239  	// sync markers. To prevent infinite recursion, we simply trim the
   240  	// stack frame for sync markers within the relocation header.
   241  	var frames []string
   242  	if !w.encodingRelocHeader && w.p.syncFrames > 0 {
   243  		pcs := make([]uintptr, w.p.syncFrames)
   244  		n := runtime.Callers(2, pcs)
   245  		frames = fmtFrames(pcs[:n]...)
   246  	}
   247  
   248  	// TODO(mdempsky): Save space by writing out stack frames as a
   249  	// linked list so we can share common stack frames.
   250  	w.rawUvarint(uint64(m))
   251  	w.rawUvarint(uint64(len(frames)))
   252  	for _, frame := range frames {
   253  		w.rawUvarint(uint64(w.rawReloc(SectionString, w.p.StringIdx(frame))))
   254  	}
   255  }
   256  
   257  // Bool encodes and writes a bool value into the element bitstream,
   258  // and then returns the bool value.
   259  //
   260  // For simple, 2-alternative encodings, the idiomatic way to call Bool
   261  // is something like:
   262  //
   263  //	if w.Bool(x != 0) {
   264  //		// alternative #1
   265  //	} else {
   266  //		// alternative #2
   267  //	}
   268  //
   269  // For multi-alternative encodings, use Code instead.
   270  func (w *Encoder) Bool(b bool) bool {
   271  	w.Sync(SyncBool)
   272  	var x byte
   273  	if b {
   274  		x = 1
   275  	}
   276  	err := w.Data.WriteByte(x)
   277  	w.checkErr(err)
   278  	return b
   279  }
   280  
   281  // Int64 encodes and writes an int64 value into the element bitstream.
   282  func (w *Encoder) Int64(x int64) {
   283  	w.Sync(SyncInt64)
   284  	w.rawVarint(x)
   285  }
   286  
   287  // Uint64 encodes and writes a uint64 value into the element bitstream.
   288  func (w *Encoder) Uint64(x uint64) {
   289  	w.Sync(SyncUint64)
   290  	w.rawUvarint(x)
   291  }
   292  
   293  // Len encodes and writes a non-negative int value into the element bitstream.
   294  func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) }
   295  
   296  // Int encodes and writes an int value into the element bitstream.
   297  func (w *Encoder) Int(x int) { w.Int64(int64(x)) }
   298  
   299  // Uint encodes and writes a uint value into the element bitstream.
   300  func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) }
   301  
   302  // Reloc encodes and writes a relocation for the given (section,
   303  // index) pair into the element bitstream.
   304  //
   305  // Note: Only the index is formally written into the element
   306  // bitstream, so bitstream decoders must know from context which
   307  // section an encoded relocation refers to.
   308  func (w *Encoder) Reloc(k SectionKind, idx RelElemIdx) {
   309  	w.Sync(SyncUseReloc)
   310  	w.Len(w.rawReloc(k, idx))
   311  }
   312  
   313  // Code encodes and writes a Code value into the element bitstream.
   314  func (w *Encoder) Code(c Code) {
   315  	w.Sync(c.Marker())
   316  	w.Len(c.Value())
   317  }
   318  
   319  // String encodes and writes a string value into the element
   320  // bitstream.
   321  //
   322  // Internally, strings are deduplicated by adding them to the strings
   323  // section (if not already present), and then writing a relocation
   324  // into the element bitstream.
   325  func (w *Encoder) String(s string) {
   326  	w.StringRef(w.p.StringIdx(s))
   327  }
   328  
   329  // StringRef writes a reference to the given index, which must be a
   330  // previously encoded string value.
   331  func (w *Encoder) StringRef(idx RelElemIdx) {
   332  	w.Sync(SyncString)
   333  	w.Reloc(SectionString, idx)
   334  }
   335  
   336  // Strings encodes and writes a variable-length slice of strings into
   337  // the element bitstream.
   338  func (w *Encoder) Strings(ss []string) {
   339  	w.Len(len(ss))
   340  	for _, s := range ss {
   341  		w.String(s)
   342  	}
   343  }
   344  
   345  // Value encodes and writes a constant.Value into the element
   346  // bitstream.
   347  func (w *Encoder) Value(val constant.Value) {
   348  	w.Sync(SyncValue)
   349  	if w.Bool(val.Kind() == constant.Complex) {
   350  		w.scalar(constant.Real(val))
   351  		w.scalar(constant.Imag(val))
   352  	} else {
   353  		w.scalar(val)
   354  	}
   355  }
   356  
   357  func (w *Encoder) scalar(val constant.Value) {
   358  	switch v := constant.Val(val).(type) {
   359  	default:
   360  		panicf("unhandled %v (%v)", val, val.Kind())
   361  	case bool:
   362  		w.Code(ValBool)
   363  		w.Bool(v)
   364  	case string:
   365  		w.Code(ValString)
   366  		w.String(v)
   367  	case int64:
   368  		w.Code(ValInt64)
   369  		w.Int64(v)
   370  	case *big.Int:
   371  		w.Code(ValBigInt)
   372  		w.bigInt(v)
   373  	case *big.Rat:
   374  		w.Code(ValBigRat)
   375  		w.bigInt(v.Num())
   376  		w.bigInt(v.Denom())
   377  	case *big.Float:
   378  		w.Code(ValBigFloat)
   379  		w.bigFloat(v)
   380  	}
   381  }
   382  
   383  func (w *Encoder) bigInt(v *big.Int) {
   384  	b := v.Bytes()
   385  	w.String(string(b)) // TODO: More efficient encoding.
   386  	w.Bool(v.Sign() < 0)
   387  }
   388  
   389  func (w *Encoder) bigFloat(v *big.Float) {
   390  	b := v.Append(nil, 'p', -1)
   391  	w.String(string(b)) // TODO: More efficient encoding.
   392  }
   393  
   394  // Version reports the version of the bitstream.
   395  func (w *Encoder) Version() Version { return w.p.version }
   396  

View as plain text