build_read.go

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file is a lightly modified copy go/build/read.go with unused parts
     6  // removed.
     7  
     8  package modindex
     9  
    10  import (
    11  	"bufio"
    12  	"bytes"
    13  	"errors"
    14  	"fmt"
    15  	"go/ast"
    16  	"go/build"
    17  	"go/parser"
    18  	"go/scanner"
    19  	"go/token"
    20  	"io"
    21  	"strconv"
    22  	"strings"
    23  	"unicode"
    24  	"unicode/utf8"
    25  )
    26  
    27  type importReader struct {
    28  	b    *bufio.Reader
    29  	buf  []byte
    30  	peek byte
    31  	err  error
    32  	eof  bool
    33  	nerr int
    34  	pos  token.Position
    35  }
    36  
    37  var bom = []byte{0xef, 0xbb, 0xbf}
    38  
    39  func newImportReader(name string, r io.Reader) *importReader {
    40  	b := bufio.NewReader(r)
    41  	// Remove leading UTF-8 BOM.
    42  	// Per https://golang.org/ref/spec#Source_code_representation:
    43  	// a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
    44  	// if it is the first Unicode code point in the source text.
    45  	if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) {
    46  		b.Discard(3)
    47  	}
    48  	return &importReader{
    49  		b: b,
    50  		pos: token.Position{
    51  			Filename: name,
    52  			Line:     1,
    53  			Column:   1,
    54  		},
    55  	}
    56  }
    57  
    58  func isIdent(c byte) bool {
    59  	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf
    60  }
    61  
    62  var (
    63  	errSyntax = errors.New("syntax error")
    64  	errNUL    = errors.New("unexpected NUL in input")
    65  )
    66  
    67  // syntaxError records a syntax error, but only if an I/O error has not already been recorded.
    68  func (r *importReader) syntaxError() {
    69  	if r.err == nil {
    70  		r.err = errSyntax
    71  	}
    72  }
    73  
    74  // readByte reads the next byte from the input, saves it in buf, and returns it.
    75  // If an error occurs, readByte records the error in r.err and returns 0.
    76  func (r *importReader) readByte() byte {
    77  	c, err := r.b.ReadByte()
    78  	if err == nil {
    79  		r.buf = append(r.buf, c)
    80  		if c == 0 {
    81  			err = errNUL
    82  		}
    83  	}
    84  	if err != nil {
    85  		if err == io.EOF {
    86  			r.eof = true
    87  		} else if r.err == nil {
    88  			r.err = err
    89  		}
    90  		c = 0
    91  	}
    92  	return c
    93  }
    94  
    95  // readByteNoBuf is like readByte but doesn't buffer the byte.
    96  // It exhausts r.buf before reading from r.b.
    97  func (r *importReader) readByteNoBuf() byte {
    98  	var c byte
    99  	var err error
   100  	if len(r.buf) > 0 {
   101  		c = r.buf[0]
   102  		r.buf = r.buf[1:]
   103  	} else {
   104  		c, err = r.b.ReadByte()
   105  		if err == nil && c == 0 {
   106  			err = errNUL
   107  		}
   108  	}
   109  
   110  	if err != nil {
   111  		if err == io.EOF {
   112  			r.eof = true
   113  		} else if r.err == nil {
   114  			r.err = err
   115  		}
   116  		return 0
   117  	}
   118  	r.pos.Offset++
   119  	if c == '\n' {
   120  		r.pos.Line++
   121  		r.pos.Column = 1
   122  	} else {
   123  		r.pos.Column++
   124  	}
   125  	return c
   126  }
   127  
   128  // peekByte returns the next byte from the input reader but does not advance beyond it.
   129  // If skipSpace is set, peekByte skips leading spaces and comments.
   130  func (r *importReader) peekByte(skipSpace bool) byte {
   131  	if r.err != nil {
   132  		if r.nerr++; r.nerr > 10000 {
   133  			panic("go/build: import reader looping")
   134  		}
   135  		return 0
   136  	}
   137  
   138  	// Use r.peek as first input byte.
   139  	// Don't just return r.peek here: it might have been left by peekByte(false)
   140  	// and this might be peekByte(true).
   141  	c := r.peek
   142  	if c == 0 {
   143  		c = r.readByte()
   144  	}
   145  	for r.err == nil && !r.eof {
   146  		if skipSpace {
   147  			// For the purposes of this reader, semicolons are never necessary to
   148  			// understand the input and are treated as spaces.
   149  			switch c {
   150  			case ' ', '\f', '\t', '\r', '\n', ';':
   151  				c = r.readByte()
   152  				continue
   153  
   154  			case '/':
   155  				c = r.readByte()
   156  				if c == '/' {
   157  					for c != '\n' && r.err == nil && !r.eof {
   158  						c = r.readByte()
   159  					}
   160  				} else if c == '*' {
   161  					var c1 byte
   162  					for (c != '*' || c1 != '/') && r.err == nil {
   163  						if r.eof {
   164  							r.syntaxError()
   165  						}
   166  						c, c1 = c1, r.readByte()
   167  					}
   168  				} else {
   169  					r.syntaxError()
   170  				}
   171  				c = r.readByte()
   172  				continue
   173  			}
   174  		}
   175  		break
   176  	}
   177  	r.peek = c
   178  	return r.peek
   179  }
   180  
   181  // nextByte is like peekByte but advances beyond the returned byte.
   182  func (r *importReader) nextByte(skipSpace bool) byte {
   183  	c := r.peekByte(skipSpace)
   184  	r.peek = 0
   185  	return c
   186  }
   187  
   188  var goEmbed = []byte("go:embed")
   189  
   190  // findEmbed advances the input reader to the next //go:embed comment.
   191  // It reports whether it found a comment.
   192  // (Otherwise it found an error or EOF.)
   193  func (r *importReader) findEmbed(first bool) bool {
   194  	// The import block scan stopped after a non-space character,
   195  	// so the reader is not at the start of a line on the first call.
   196  	// After that, each //go:embed extraction leaves the reader
   197  	// at the end of a line.
   198  	startLine := !first
   199  	var c byte
   200  	for r.err == nil && !r.eof {
   201  		c = r.readByteNoBuf()
   202  	Reswitch:
   203  		switch c {
   204  		default:
   205  			startLine = false
   206  
   207  		case '\n':
   208  			startLine = true
   209  
   210  		case ' ', '\t':
   211  			// leave startLine alone
   212  
   213  		case '"':
   214  			startLine = false
   215  			for r.err == nil {
   216  				if r.eof {
   217  					r.syntaxError()
   218  				}
   219  				c = r.readByteNoBuf()
   220  				if c == '\\' {
   221  					r.readByteNoBuf()
   222  					if r.err != nil {
   223  						r.syntaxError()
   224  						return false
   225  					}
   226  					continue
   227  				}
   228  				if c == '"' {
   229  					c = r.readByteNoBuf()
   230  					goto Reswitch
   231  				}
   232  			}
   233  			goto Reswitch
   234  
   235  		case '`':
   236  			startLine = false
   237  			for r.err == nil {
   238  				if r.eof {
   239  					r.syntaxError()
   240  				}
   241  				c = r.readByteNoBuf()
   242  				if c == '`' {
   243  					c = r.readByteNoBuf()
   244  					goto Reswitch
   245  				}
   246  			}
   247  
   248  		case '\'':
   249  			startLine = false
   250  			for r.err == nil {
   251  				if r.eof {
   252  					r.syntaxError()
   253  				}
   254  				c = r.readByteNoBuf()
   255  				if c == '\\' {
   256  					r.readByteNoBuf()
   257  					if r.err != nil {
   258  						r.syntaxError()
   259  						return false
   260  					}
   261  					continue
   262  				}
   263  				if c == '\'' {
   264  					c = r.readByteNoBuf()
   265  					goto Reswitch
   266  				}
   267  			}
   268  
   269  		case '/':
   270  			c = r.readByteNoBuf()
   271  			switch c {
   272  			default:
   273  				startLine = false
   274  				goto Reswitch
   275  
   276  			case '*':
   277  				var c1 byte
   278  				for (c != '*' || c1 != '/') && r.err == nil {
   279  					if r.eof {
   280  						r.syntaxError()
   281  					}
   282  					c, c1 = c1, r.readByteNoBuf()
   283  				}
   284  				startLine = false
   285  
   286  			case '/':
   287  				if startLine {
   288  					// Try to read this as a //go:embed comment.
   289  					for i := range goEmbed {
   290  						c = r.readByteNoBuf()
   291  						if c != goEmbed[i] {
   292  							goto SkipSlashSlash
   293  						}
   294  					}
   295  					c = r.readByteNoBuf()
   296  					if c == ' ' || c == '\t' {
   297  						// Found one!
   298  						return true
   299  					}
   300  				}
   301  			SkipSlashSlash:
   302  				for c != '\n' && r.err == nil && !r.eof {
   303  					c = r.readByteNoBuf()
   304  				}
   305  				startLine = true
   306  			}
   307  		}
   308  	}
   309  	return false
   310  }
   311  
   312  // readKeyword reads the given keyword from the input.
   313  // If the keyword is not present, readKeyword records a syntax error.
   314  func (r *importReader) readKeyword(kw string) {
   315  	r.peekByte(true)
   316  	for i := 0; i < len(kw); i++ {
   317  		if r.nextByte(false) != kw[i] {
   318  			r.syntaxError()
   319  			return
   320  		}
   321  	}
   322  	if isIdent(r.peekByte(false)) {
   323  		r.syntaxError()
   324  	}
   325  }
   326  
   327  // readIdent reads an identifier from the input.
   328  // If an identifier is not present, readIdent records a syntax error.
   329  func (r *importReader) readIdent() {
   330  	c := r.peekByte(true)
   331  	if !isIdent(c) {
   332  		r.syntaxError()
   333  		return
   334  	}
   335  	for isIdent(r.peekByte(false)) {
   336  		r.peek = 0
   337  	}
   338  }
   339  
   340  // readString reads a quoted string literal from the input.
   341  // If an identifier is not present, readString records a syntax error.
   342  func (r *importReader) readString() {
   343  	switch r.nextByte(true) {
   344  	case '`':
   345  		for r.err == nil {
   346  			if r.nextByte(false) == '`' {
   347  				break
   348  			}
   349  			if r.eof {
   350  				r.syntaxError()
   351  			}
   352  		}
   353  	case '"':
   354  		for r.err == nil {
   355  			c := r.nextByte(false)
   356  			if c == '"' {
   357  				break
   358  			}
   359  			if r.eof || c == '\n' {
   360  				r.syntaxError()
   361  			}
   362  			if c == '\\' {
   363  				r.nextByte(false)
   364  			}
   365  		}
   366  	default:
   367  		r.syntaxError()
   368  	}
   369  }
   370  
   371  // readImport reads an import clause - optional identifier followed by quoted string -
   372  // from the input.
   373  func (r *importReader) readImport() {
   374  	c := r.peekByte(true)
   375  	if c == '.' {
   376  		r.peek = 0
   377  	} else if isIdent(c) {
   378  		r.readIdent()
   379  	}
   380  	r.readString()
   381  }
   382  
   383  // readComments is like io.ReadAll, except that it only reads the leading
   384  // block of comments in the file.
   385  func readComments(f io.Reader) ([]byte, error) {
   386  	r := newImportReader("", f)
   387  	r.peekByte(true)
   388  	if r.err == nil && !r.eof {
   389  		// Didn't reach EOF, so must have found a non-space byte. Remove it.
   390  		r.buf = r.buf[:len(r.buf)-1]
   391  	}
   392  	return r.buf, r.err
   393  }
   394  
   395  // readGoInfo expects a Go file as input and reads the file up to and including the import section.
   396  // It records what it learned in *info.
   397  // If info.fset is non-nil, readGoInfo parses the file and sets info.parsed, info.parseErr,
   398  // info.imports and info.embeds.
   399  //
   400  // It only returns an error if there are problems reading the file,
   401  // not for syntax errors in the file itself.
   402  func readGoInfo(f io.Reader, info *fileInfo) error {
   403  	r := newImportReader(info.name, f)
   404  
   405  	r.readKeyword("package")
   406  	r.readIdent()
   407  	for r.peekByte(true) == 'i' {
   408  		r.readKeyword("import")
   409  		if r.peekByte(true) == '(' {
   410  			r.nextByte(false)
   411  			for r.peekByte(true) != ')' && r.err == nil {
   412  				r.readImport()
   413  			}
   414  			r.nextByte(false)
   415  		} else {
   416  			r.readImport()
   417  		}
   418  	}
   419  
   420  	info.header = r.buf
   421  
   422  	// If we stopped successfully before EOF, we read a byte that told us we were done.
   423  	// Return all but that last byte, which would cause a syntax error if we let it through.
   424  	if r.err == nil && !r.eof {
   425  		info.header = r.buf[:len(r.buf)-1]
   426  	}
   427  
   428  	// If we stopped for a syntax error, consume the whole file so that
   429  	// we are sure we don't change the errors that go/parser returns.
   430  	if r.err == errSyntax {
   431  		r.err = nil
   432  		for r.err == nil && !r.eof {
   433  			r.readByte()
   434  		}
   435  		info.header = r.buf
   436  	}
   437  	if r.err != nil {
   438  		return r.err
   439  	}
   440  
   441  	if info.fset == nil {
   442  		return nil
   443  	}
   444  
   445  	// Parse file header & record imports.
   446  	info.parsed, info.parseErr = parser.ParseFile(info.fset, info.name, info.header, parser.ImportsOnly|parser.ParseComments)
   447  	if info.parseErr != nil {
   448  		return nil
   449  	}
   450  
   451  	hasEmbed := false
   452  	for _, decl := range info.parsed.Decls {
   453  		d, ok := decl.(*ast.GenDecl)
   454  		if !ok {
   455  			continue
   456  		}
   457  		for _, dspec := range d.Specs {
   458  			spec, ok := dspec.(*ast.ImportSpec)
   459  			if !ok {
   460  				continue
   461  			}
   462  			quoted := spec.Path.Value
   463  			path, err := strconv.Unquote(quoted)
   464  			if err != nil {
   465  				return fmt.Errorf("parser returned invalid quoted string: <%s>", quoted)
   466  			}
   467  			if !isValidImport(path) {
   468  				// The parser used to return a parse error for invalid import paths, but
   469  				// no longer does, so check for and create the error here instead.
   470  				info.parseErr = scanner.Error{Pos: info.fset.Position(spec.Pos()), Msg: "invalid import path: " + path}
   471  				info.imports = nil
   472  				return nil
   473  			}
   474  			if path == "embed" {
   475  				hasEmbed = true
   476  			}
   477  
   478  			doc := spec.Doc
   479  			if doc == nil && len(d.Specs) == 1 {
   480  				doc = d.Doc
   481  			}
   482  			info.imports = append(info.imports, fileImport{path, spec.Pos(), doc})
   483  		}
   484  	}
   485  
   486  	// Extract directives.
   487  	for _, group := range info.parsed.Comments {
   488  		if group.Pos() >= info.parsed.Package {
   489  			break
   490  		}
   491  		for _, c := range group.List {
   492  			if strings.HasPrefix(c.Text, "//go:") {
   493  				info.directives = append(info.directives, build.Directive{Text: c.Text, Pos: info.fset.Position(c.Slash)})
   494  			}
   495  		}
   496  	}
   497  
   498  	// If the file imports "embed",
   499  	// we have to look for //go:embed comments
   500  	// in the remainder of the file.
   501  	// The compiler will enforce the mapping of comments to
   502  	// declared variables. We just need to know the patterns.
   503  	// If there were //go:embed comments earlier in the file
   504  	// (near the package statement or imports), the compiler
   505  	// will reject them. They can be (and have already been) ignored.
   506  	if hasEmbed {
   507  		var line []byte
   508  		for first := true; r.findEmbed(first); first = false {
   509  			line = line[:0]
   510  			pos := r.pos
   511  			for {
   512  				c := r.readByteNoBuf()
   513  				if c == '\n' || r.err != nil || r.eof {
   514  					break
   515  				}
   516  				line = append(line, c)
   517  			}
   518  			// Add args if line is well-formed.
   519  			// Ignore badly-formed lines - the compiler will report them when it finds them,
   520  			// and we can pretend they are not there to help go list succeed with what it knows.
   521  			embs, err := parseGoEmbed(string(line), pos)
   522  			if err == nil {
   523  				info.embeds = append(info.embeds, embs...)
   524  			}
   525  		}
   526  	}
   527  
   528  	return nil
   529  }
   530  
   531  // isValidImport checks if the import is a valid import using the more strict
   532  // checks allowed by the implementation restriction in https://go.dev/ref/spec#Import_declarations.
   533  // It was ported from the function of the same name that was removed from the
   534  // parser in CL 424855, when the parser stopped doing these checks.
   535  func isValidImport(s string) bool {
   536  	const illegalChars = `!"#$%&'()*,:;<=>?[\]^{|}` + "`\uFFFD"
   537  	for _, r := range s {
   538  		if !unicode.IsGraphic(r) || unicode.IsSpace(r) || strings.ContainsRune(illegalChars, r) {
   539  			return false
   540  		}
   541  	}
   542  	return s != ""
   543  }
   544  
   545  // parseGoEmbed parses the text following "//go:embed" to extract the glob patterns.
   546  // It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings.
   547  // This is based on a similar function in cmd/compile/internal/gc/noder.go;
   548  // this version calculates position information as well.
   549  func parseGoEmbed(args string, pos token.Position) ([]fileEmbed, error) {
   550  	trimBytes := func(n int) {
   551  		pos.Offset += n
   552  		pos.Column += utf8.RuneCountInString(args[:n])
   553  		args = args[n:]
   554  	}
   555  	trimSpace := func() {
   556  		trim := strings.TrimLeftFunc(args, unicode.IsSpace)
   557  		trimBytes(len(args) - len(trim))
   558  	}
   559  
   560  	var list []fileEmbed
   561  	for trimSpace(); args != ""; trimSpace() {
   562  		var path string
   563  		pathPos := pos
   564  	Switch:
   565  		switch args[0] {
   566  		default:
   567  			i := len(args)
   568  			for j, c := range args {
   569  				if unicode.IsSpace(c) {
   570  					i = j
   571  					break
   572  				}
   573  			}
   574  			path = args[:i]
   575  			trimBytes(i)
   576  
   577  		case '`':
   578  			var ok bool
   579  			path, _, ok = strings.Cut(args[1:], "`")
   580  			if !ok {
   581  				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
   582  			}
   583  			trimBytes(1 + len(path) + 1)
   584  
   585  		case '"':
   586  			i := 1
   587  			for ; i < len(args); i++ {
   588  				if args[i] == '\\' {
   589  					i++
   590  					continue
   591  				}
   592  				if args[i] == '"' {
   593  					q, err := strconv.Unquote(args[:i+1])
   594  					if err != nil {
   595  						return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1])
   596  					}
   597  					path = q
   598  					trimBytes(i + 1)
   599  					break Switch
   600  				}
   601  			}
   602  			if i >= len(args) {
   603  				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
   604  			}
   605  		}
   606  
   607  		if args != "" {
   608  			r, _ := utf8.DecodeRuneInString(args)
   609  			if !unicode.IsSpace(r) {
   610  				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
   611  			}
   612  		}
   613  		list = append(list, fileEmbed{path, pathPos})
   614  	}
   615  	return list, nil
   616  }
   617
View as plain text