Source file src/cmd/internal/obj/x86/obj6.go

     1  // Inferno utils/6l/pass.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/pass.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"cmd/internal/objabi"
    36  	"cmd/internal/src"
    37  	"cmd/internal/sys"
    38  	"internal/abi"
    39  	"log"
    40  	"math"
    41  	"path"
    42  	"strings"
    43  )
    44  
    45  func CanUse1InsnTLS(ctxt *obj.Link) bool {
    46  	if isAndroid {
    47  		// Android uses a global variable for the tls offset.
    48  		return false
    49  	}
    50  
    51  	if ctxt.Arch.Family == sys.I386 {
    52  		switch ctxt.Headtype {
    53  		case objabi.Hlinux,
    54  			objabi.Hplan9,
    55  			objabi.Hwindows:
    56  			return false
    57  		}
    58  
    59  		return true
    60  	}
    61  
    62  	switch ctxt.Headtype {
    63  	case objabi.Hplan9, objabi.Hwindows:
    64  		return false
    65  	case objabi.Hlinux, objabi.Hfreebsd:
    66  		return !ctxt.Flag_shared
    67  	}
    68  
    69  	return true
    70  }
    71  
    72  func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
    73  	// Thread-local storage references use the TLS pseudo-register.
    74  	// As a register, TLS refers to the thread-local storage base, and it
    75  	// can only be loaded into another register:
    76  	//
    77  	//         MOVQ TLS, AX
    78  	//
    79  	// An offset from the thread-local storage base is written off(reg)(TLS*1).
    80  	// Semantically it is off(reg), but the (TLS*1) annotation marks this as
    81  	// indexing from the loaded TLS base. This emits a relocation so that
    82  	// if the linker needs to adjust the offset, it can. For example:
    83  	//
    84  	//         MOVQ TLS, AX
    85  	//         MOVQ 0(AX)(TLS*1), CX // load g into CX
    86  	//
    87  	// On systems that support direct access to the TLS memory, this
    88  	// pair of instructions can be reduced to a direct TLS memory reference:
    89  	//
    90  	//         MOVQ 0(TLS), CX // load g into CX
    91  	//
    92  	// The 2-instruction and 1-instruction forms correspond to the two code
    93  	// sequences for loading a TLS variable in the local exec model given in "ELF
    94  	// Handling For Thread-Local Storage".
    95  	//
    96  	// We apply this rewrite on systems that support the 1-instruction form.
    97  	// The decision is made using only the operating system and the -shared flag,
    98  	// not the link mode. If some link modes on a particular operating system
    99  	// require the 2-instruction form, then all builds for that operating system
   100  	// will use the 2-instruction form, so that the link mode decision can be
   101  	// delayed to link time.
   102  	//
   103  	// In this way, all supported systems use identical instructions to
   104  	// access TLS, and they are rewritten appropriately first here in
   105  	// liblink and then finally using relocations in the linker.
   106  	//
   107  	// When -shared is passed, we leave the code in the 2-instruction form but
   108  	// assemble (and relocate) them in different ways to generate the initial
   109  	// exec code sequence. It's a bit of a fluke that this is possible without
   110  	// rewriting the instructions more comprehensively, and it only does because
   111  	// we only support a single TLS variable (g).
   112  
   113  	if CanUse1InsnTLS(ctxt) {
   114  		// Reduce 2-instruction sequence to 1-instruction sequence.
   115  		// Sequences like
   116  		//	MOVQ TLS, BX
   117  		//	... off(BX)(TLS*1) ...
   118  		// become
   119  		//	NOP
   120  		//	... off(TLS) ...
   121  		//
   122  		// TODO(rsc): Remove the Hsolaris special case. It exists only to
   123  		// guarantee we are producing byte-identical binaries as before this code.
   124  		// But it should be unnecessary.
   125  		if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != objabi.Hsolaris {
   126  			obj.Nopout(p)
   127  		}
   128  		if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 {
   129  			p.From.Reg = REG_TLS
   130  			p.From.Scale = 0
   131  			p.From.Index = REG_NONE
   132  		}
   133  
   134  		if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   135  			p.To.Reg = REG_TLS
   136  			p.To.Scale = 0
   137  			p.To.Index = REG_NONE
   138  		}
   139  	} else {
   140  		// load_g, below, always inserts the 1-instruction sequence. Rewrite it
   141  		// as the 2-instruction sequence if necessary.
   142  		//	MOVQ 0(TLS), BX
   143  		// becomes
   144  		//	MOVQ TLS, BX
   145  		//	MOVQ 0(BX)(TLS*1), BX
   146  		if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   147  			q := obj.Appendp(p, newprog)
   148  			q.As = p.As
   149  			q.From = p.From
   150  			q.From.Type = obj.TYPE_MEM
   151  			q.From.Reg = p.To.Reg
   152  			q.From.Index = REG_TLS
   153  			q.From.Scale = 2 // TODO: use 1
   154  			q.To = p.To
   155  			p.From.Type = obj.TYPE_REG
   156  			p.From.Reg = REG_TLS
   157  			p.From.Index = REG_NONE
   158  			p.From.Offset = 0
   159  		}
   160  	}
   161  
   162  	// Android and Windows use a tls offset determined at runtime. Rewrite
   163  	//	MOVQ TLS, BX
   164  	// to
   165  	//	MOVQ runtime.tls_g(SB), BX
   166  	if (isAndroid || ctxt.Headtype == objabi.Hwindows) &&
   167  		(p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   168  		p.From.Type = obj.TYPE_MEM
   169  		p.From.Name = obj.NAME_EXTERN
   170  		p.From.Reg = REG_NONE
   171  		p.From.Sym = ctxt.Lookup("runtime.tls_g")
   172  		p.From.Index = REG_NONE
   173  		if ctxt.Headtype == objabi.Hwindows {
   174  			// Windows requires an additional indirection
   175  			// to retrieve the TLS pointer,
   176  			// as runtime.tls_g contains the TLS offset from GS or FS.
   177  			// on AMD64 add
   178  			//	MOVQ 0(BX)(GS*1), BX
   179  			// on 386 add
   180  			//	MOVQ 0(BX)(FS*1), BX4
   181  			q := obj.Appendp(p, newprog)
   182  			q.As = p.As
   183  			q.From = obj.Addr{}
   184  			q.From.Type = obj.TYPE_MEM
   185  			q.From.Reg = p.To.Reg
   186  			if ctxt.Arch.Family == sys.AMD64 {
   187  				q.From.Index = REG_GS
   188  			} else {
   189  				q.From.Index = REG_FS
   190  			}
   191  			q.From.Scale = 1
   192  			q.From.Offset = 0
   193  			q.To = p.To
   194  		}
   195  	}
   196  
   197  	// TODO: Remove.
   198  	if ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64 || ctxt.Headtype == objabi.Hplan9 {
   199  		if p.From.Scale == 1 && p.From.Index == REG_TLS {
   200  			p.From.Scale = 2
   201  		}
   202  		if p.To.Scale == 1 && p.To.Index == REG_TLS {
   203  			p.To.Scale = 2
   204  		}
   205  	}
   206  
   207  	// Rewrite 0 to $0 in 3rd argument to CMPPS etc.
   208  	// That's what the tables expect.
   209  	switch p.As {
   210  	case ACMPPD, ACMPPS, ACMPSD, ACMPSS:
   211  		if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil {
   212  			p.To.Type = obj.TYPE_CONST
   213  		}
   214  	}
   215  
   216  	// Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH.
   217  	switch p.As {
   218  	case obj.ACALL, obj.AJMP, obj.ARET:
   219  		if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil {
   220  			p.To.Type = obj.TYPE_BRANCH
   221  		}
   222  	}
   223  
   224  	// Rewrite MOVL/MOVQ $XXX(FP/SP) as LEAL/LEAQ.
   225  	if p.From.Type == obj.TYPE_ADDR && (ctxt.Arch.Family == sys.AMD64 || p.From.Name != obj.NAME_EXTERN && p.From.Name != obj.NAME_STATIC) {
   226  		switch p.As {
   227  		case AMOVL:
   228  			p.As = ALEAL
   229  			p.From.Type = obj.TYPE_MEM
   230  		case AMOVQ:
   231  			p.As = ALEAQ
   232  			p.From.Type = obj.TYPE_MEM
   233  		}
   234  	}
   235  
   236  	// Rewrite float constants to values stored in memory.
   237  	switch p.As {
   238  	// Convert AMOVSS $(0), Xx to AXORPS Xx, Xx
   239  	case AMOVSS:
   240  		if p.From.Type == obj.TYPE_FCONST {
   241  			//  f == 0 can't be used here due to -0, so use Float64bits
   242  			if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
   243  				if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
   244  					p.As = AXORPS
   245  					p.From = p.To
   246  					break
   247  				}
   248  			}
   249  		}
   250  		fallthrough
   251  
   252  	case AFMOVF,
   253  		AFADDF,
   254  		AFSUBF,
   255  		AFSUBRF,
   256  		AFMULF,
   257  		AFDIVF,
   258  		AFDIVRF,
   259  		AFCOMF,
   260  		AFCOMFP,
   261  		AADDSS,
   262  		ASUBSS,
   263  		AMULSS,
   264  		ADIVSS,
   265  		ACOMISS,
   266  		AUCOMISS:
   267  		if p.From.Type == obj.TYPE_FCONST {
   268  			f32 := float32(p.From.Val.(float64))
   269  			p.From.Type = obj.TYPE_MEM
   270  			p.From.Name = obj.NAME_EXTERN
   271  			p.From.Sym = ctxt.Float32Sym(f32)
   272  			p.From.Offset = 0
   273  		}
   274  
   275  	case AMOVSD:
   276  		// Convert AMOVSD $(0), Xx to AXORPS Xx, Xx
   277  		if p.From.Type == obj.TYPE_FCONST {
   278  			//  f == 0 can't be used here due to -0, so use Float64bits
   279  			if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
   280  				if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
   281  					p.As = AXORPS
   282  					p.From = p.To
   283  					break
   284  				}
   285  			}
   286  		}
   287  		fallthrough
   288  
   289  	case AFMOVD,
   290  		AFADDD,
   291  		AFSUBD,
   292  		AFSUBRD,
   293  		AFMULD,
   294  		AFDIVD,
   295  		AFDIVRD,
   296  		AFCOMD,
   297  		AFCOMDP,
   298  		AADDSD,
   299  		ASUBSD,
   300  		AMULSD,
   301  		ADIVSD,
   302  		ACOMISD,
   303  		AUCOMISD:
   304  		if p.From.Type == obj.TYPE_FCONST {
   305  			f64 := p.From.Val.(float64)
   306  			p.From.Type = obj.TYPE_MEM
   307  			p.From.Name = obj.NAME_EXTERN
   308  			p.From.Sym = ctxt.Float64Sym(f64)
   309  			p.From.Offset = 0
   310  		}
   311  	}
   312  
   313  	if ctxt.Flag_dynlink {
   314  		rewriteToUseGot(ctxt, p, newprog)
   315  	}
   316  
   317  	if ctxt.Flag_shared && ctxt.Arch.Family == sys.I386 {
   318  		rewriteToPcrel(ctxt, p, newprog)
   319  	}
   320  }
   321  
   322  // Rewrite p, if necessary, to access global data via the global offset table.
   323  func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
   324  	var lea, mov obj.As
   325  	var reg int16
   326  	if ctxt.Arch.Family == sys.AMD64 {
   327  		lea = ALEAQ
   328  		mov = AMOVQ
   329  		reg = REG_R15
   330  	} else {
   331  		lea = ALEAL
   332  		mov = AMOVL
   333  		reg = REG_CX
   334  		if p.As == ALEAL && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
   335  			// Special case: clobber the destination register with
   336  			// the PC so we don't have to clobber CX.
   337  			// The SSA backend depends on CX not being clobbered across LEAL.
   338  			// See cmd/compile/internal/ssa/gen/386.rules (search for Flag_shared).
   339  			reg = p.To.Reg
   340  		}
   341  	}
   342  
   343  	if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO {
   344  		//     ADUFFxxx $offset
   345  		// becomes
   346  		//     $MOV runtime.duffxxx@GOT, $reg
   347  		//     $LEA $offset($reg), $reg
   348  		//     CALL $reg
   349  		// (we use LEAx rather than ADDx because ADDx clobbers
   350  		// flags and duffzero on 386 does not otherwise do so).
   351  		var sym *obj.LSym
   352  		if p.As == obj.ADUFFZERO {
   353  			sym = ctxt.LookupABI("runtime.duffzero", obj.ABIInternal)
   354  		} else {
   355  			sym = ctxt.LookupABI("runtime.duffcopy", obj.ABIInternal)
   356  		}
   357  		offset := p.To.Offset
   358  		p.As = mov
   359  		p.From.Type = obj.TYPE_MEM
   360  		p.From.Name = obj.NAME_GOTREF
   361  		p.From.Sym = sym
   362  		p.To.Type = obj.TYPE_REG
   363  		p.To.Reg = reg
   364  		p.To.Offset = 0
   365  		p.To.Sym = nil
   366  		p1 := obj.Appendp(p, newprog)
   367  		p1.As = lea
   368  		p1.From.Type = obj.TYPE_MEM
   369  		p1.From.Offset = offset
   370  		p1.From.Reg = reg
   371  		p1.To.Type = obj.TYPE_REG
   372  		p1.To.Reg = reg
   373  		p2 := obj.Appendp(p1, newprog)
   374  		p2.As = obj.ACALL
   375  		p2.To.Type = obj.TYPE_REG
   376  		p2.To.Reg = reg
   377  	}
   378  
   379  	// We only care about global data: NAME_EXTERN means a global
   380  	// symbol in the Go sense, and p.Sym.Local is true for a few
   381  	// internally defined symbols.
   382  	if p.As == lea && p.From.Type == obj.TYPE_MEM && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   383  		// $LEA sym, Rx becomes $MOV $sym, Rx which will be rewritten below
   384  		p.As = mov
   385  		p.From.Type = obj.TYPE_ADDR
   386  	}
   387  	if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   388  		// $MOV $sym, Rx becomes $MOV sym@GOT, Rx
   389  		// $MOV $sym+<off>, Rx becomes $MOV sym@GOT, Rx; $LEA <off>(Rx), Rx
   390  		// On 386 only, more complicated things like PUSHL $sym become $MOV sym@GOT, CX; PUSHL CX
   391  		cmplxdest := false
   392  		pAs := p.As
   393  		var dest obj.Addr
   394  		if p.To.Type != obj.TYPE_REG || pAs != mov {
   395  			if ctxt.Arch.Family == sys.AMD64 {
   396  				ctxt.Diag("do not know how to handle LEA-type insn to non-register in %v with -dynlink", p)
   397  			}
   398  			cmplxdest = true
   399  			dest = p.To
   400  			p.As = mov
   401  			p.To.Type = obj.TYPE_REG
   402  			p.To.Reg = reg
   403  			p.To.Sym = nil
   404  			p.To.Name = obj.NAME_NONE
   405  		}
   406  		p.From.Type = obj.TYPE_MEM
   407  		p.From.Name = obj.NAME_GOTREF
   408  		q := p
   409  		if p.From.Offset != 0 {
   410  			q = obj.Appendp(p, newprog)
   411  			q.As = lea
   412  			q.From.Type = obj.TYPE_MEM
   413  			q.From.Reg = p.To.Reg
   414  			q.From.Offset = p.From.Offset
   415  			q.To = p.To
   416  			p.From.Offset = 0
   417  		}
   418  		if cmplxdest {
   419  			q = obj.Appendp(q, newprog)
   420  			q.As = pAs
   421  			q.To = dest
   422  			q.From.Type = obj.TYPE_REG
   423  			q.From.Reg = reg
   424  		}
   425  	}
   426  	if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN {
   427  		ctxt.Diag("don't know how to handle %v with -dynlink", p)
   428  	}
   429  	var source *obj.Addr
   430  	// MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry
   431  	// MOVx Ry, sym becomes $MOV sym@GOT, R15; MOVx Ry, (R15)
   432  	// An addition may be inserted between the two MOVs if there is an offset.
   433  	if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   434  		if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
   435  			ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p)
   436  		}
   437  		source = &p.From
   438  	} else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
   439  		source = &p.To
   440  	} else {
   441  		return
   442  	}
   443  	if p.As == obj.ACALL {
   444  		// When dynlinking on 386, almost any call might end up being a call
   445  		// to a PLT, so make sure the GOT pointer is loaded into BX.
   446  		// RegTo2 is set on the replacement call insn to stop it being
   447  		// processed when it is in turn passed to progedit.
   448  		//
   449  		// We disable open-coded defers in buildssa() on 386 ONLY with shared
   450  		// libraries because of this extra code added before deferreturn calls.
   451  		//
   452  		// computeDeferReturn in cmd/link/internal/ld/pcln.go depends
   453  		// on the size of these instructions.
   454  		if ctxt.Arch.Family == sys.AMD64 || (p.To.Sym != nil && p.To.Sym.Local()) || p.RegTo2 != 0 {
   455  			return
   456  		}
   457  		p1 := obj.Appendp(p, newprog)
   458  		p2 := obj.Appendp(p1, newprog)
   459  
   460  		p1.As = ALEAL
   461  		p1.From.Type = obj.TYPE_MEM
   462  		p1.From.Name = obj.NAME_STATIC
   463  		p1.From.Sym = ctxt.Lookup("_GLOBAL_OFFSET_TABLE_")
   464  		p1.To.Type = obj.TYPE_REG
   465  		p1.To.Reg = REG_BX
   466  
   467  		p2.As = p.As
   468  		p2.Scond = p.Scond
   469  		p2.From = p.From
   470  		if p.RestArgs != nil {
   471  			p2.RestArgs = append(p2.RestArgs, p.RestArgs...)
   472  		}
   473  		p2.Reg = p.Reg
   474  		p2.To = p.To
   475  		// p.To.Type was set to TYPE_BRANCH above, but that makes checkaddr
   476  		// in ../pass.go complain, so set it back to TYPE_MEM here, until p2
   477  		// itself gets passed to progedit.
   478  		p2.To.Type = obj.TYPE_MEM
   479  		p2.RegTo2 = 1
   480  
   481  		obj.Nopout(p)
   482  		return
   483  
   484  	}
   485  	if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
   486  		return
   487  	}
   488  	if source.Type != obj.TYPE_MEM {
   489  		ctxt.Diag("don't know how to handle %v with -dynlink", p)
   490  	}
   491  	p1 := obj.Appendp(p, newprog)
   492  	p2 := obj.Appendp(p1, newprog)
   493  
   494  	p1.As = mov
   495  	p1.From.Type = obj.TYPE_MEM
   496  	p1.From.Sym = source.Sym
   497  	p1.From.Name = obj.NAME_GOTREF
   498  	p1.To.Type = obj.TYPE_REG
   499  	p1.To.Reg = reg
   500  
   501  	p2.As = p.As
   502  	p2.From = p.From
   503  	p2.To = p.To
   504  	if from3 := p.GetFrom3(); from3 != nil {
   505  		p2.AddRestSource(*from3)
   506  	}
   507  	if p.From.Name == obj.NAME_EXTERN {
   508  		p2.From.Reg = reg
   509  		p2.From.Name = obj.NAME_NONE
   510  		p2.From.Sym = nil
   511  	} else if p.To.Name == obj.NAME_EXTERN {
   512  		p2.To.Reg = reg
   513  		p2.To.Name = obj.NAME_NONE
   514  		p2.To.Sym = nil
   515  	} else {
   516  		return
   517  	}
   518  	obj.Nopout(p)
   519  }
   520  
   521  func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
   522  	// RegTo2 is set on the instructions we insert here so they don't get
   523  	// processed twice.
   524  	if p.RegTo2 != 0 {
   525  		return
   526  	}
   527  	if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP {
   528  		return
   529  	}
   530  	// Any Prog (aside from the above special cases) with an Addr with Name ==
   531  	// NAME_EXTERN, NAME_STATIC or NAME_GOTREF has a CALL __x86.get_pc_thunk.XX
   532  	// inserted before it.
   533  	isName := func(a *obj.Addr) bool {
   534  		if a.Sym == nil || (a.Type != obj.TYPE_MEM && a.Type != obj.TYPE_ADDR) || a.Reg != 0 {
   535  			return false
   536  		}
   537  		if a.Sym.Type == objabi.STLSBSS {
   538  			return false
   539  		}
   540  		return a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_STATIC || a.Name == obj.NAME_GOTREF
   541  	}
   542  
   543  	if isName(&p.From) && p.From.Type == obj.TYPE_ADDR {
   544  		// Handle things like "MOVL $sym, (SP)" or "PUSHL $sym" by rewriting
   545  		// to "MOVL $sym, CX; MOVL CX, (SP)" or "MOVL $sym, CX; PUSHL CX"
   546  		// respectively.
   547  		if p.To.Type != obj.TYPE_REG {
   548  			q := obj.Appendp(p, newprog)
   549  			q.As = p.As
   550  			q.From.Type = obj.TYPE_REG
   551  			q.From.Reg = REG_CX
   552  			q.To = p.To
   553  			p.As = AMOVL
   554  			p.To.Type = obj.TYPE_REG
   555  			p.To.Reg = REG_CX
   556  			p.To.Sym = nil
   557  			p.To.Name = obj.NAME_NONE
   558  		}
   559  	}
   560  
   561  	if !isName(&p.From) && !isName(&p.To) && (p.GetFrom3() == nil || !isName(p.GetFrom3())) {
   562  		return
   563  	}
   564  	var dst int16 = REG_CX
   565  	if (p.As == ALEAL || p.As == AMOVL) && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
   566  		dst = p.To.Reg
   567  		// Why? See the comment near the top of rewriteToUseGot above.
   568  		// AMOVLs might be introduced by the GOT rewrites.
   569  	}
   570  	q := obj.Appendp(p, newprog)
   571  	q.RegTo2 = 1
   572  	r := obj.Appendp(q, newprog)
   573  	r.RegTo2 = 1
   574  	q.As = obj.ACALL
   575  	thunkname := "__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))
   576  	q.To.Sym = ctxt.LookupInit(thunkname, func(s *obj.LSym) { s.Set(obj.AttrLocal, true) })
   577  	q.To.Type = obj.TYPE_MEM
   578  	q.To.Name = obj.NAME_EXTERN
   579  	r.As = p.As
   580  	r.Scond = p.Scond
   581  	r.From = p.From
   582  	r.RestArgs = p.RestArgs
   583  	r.Reg = p.Reg
   584  	r.To = p.To
   585  	if isName(&p.From) {
   586  		r.From.Reg = dst
   587  	}
   588  	if isName(&p.To) {
   589  		r.To.Reg = dst
   590  	}
   591  	if p.GetFrom3() != nil && isName(p.GetFrom3()) {
   592  		r.GetFrom3().Reg = dst
   593  	}
   594  	obj.Nopout(p)
   595  }
   596  
   597  // Prog.mark
   598  const (
   599  	markBit = 1 << 0 // used in errorCheck to avoid duplicate work
   600  )
   601  
   602  func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
   603  	if cursym.Func().Text == nil || cursym.Func().Text.Link == nil {
   604  		return
   605  	}
   606  
   607  	p := cursym.Func().Text
   608  	autoffset := int32(p.To.Offset)
   609  	if autoffset < 0 {
   610  		autoffset = 0
   611  	}
   612  
   613  	hasCall := false
   614  	for q := p; q != nil; q = q.Link {
   615  		if q.As == obj.ACALL || q.As == obj.ADUFFCOPY || q.As == obj.ADUFFZERO {
   616  			hasCall = true
   617  			break
   618  		}
   619  	}
   620  
   621  	var bpsize int
   622  	if ctxt.Arch.Family == sys.AMD64 &&
   623  		!p.From.Sym.NoFrame() && // (1) below
   624  		!(autoffset == 0 && !hasCall) { // (2) below
   625  		// Make room to save a base pointer.
   626  		// There are 2 cases we must avoid:
   627  		// 1) If noframe is set (which we do for functions which tail call).
   628  		// For performance, we also want to avoid:
   629  		// 2) Frameless leaf functions
   630  		bpsize = ctxt.Arch.PtrSize
   631  		autoffset += int32(bpsize)
   632  		p.To.Offset += int64(bpsize)
   633  	} else {
   634  		bpsize = 0
   635  		p.From.Sym.Set(obj.AttrNoFrame, true)
   636  	}
   637  
   638  	textarg := int64(p.To.Val.(int32))
   639  	cursym.Func().Args = int32(textarg)
   640  	cursym.Func().Locals = int32(p.To.Offset)
   641  
   642  	// TODO(rsc): Remove.
   643  	if ctxt.Arch.Family == sys.I386 && cursym.Func().Locals < 0 {
   644  		cursym.Func().Locals = 0
   645  	}
   646  
   647  	// TODO(rsc): Remove 'ctxt.Arch.Family == sys.AMD64 &&'.
   648  	if ctxt.Arch.Family == sys.AMD64 && autoffset < abi.StackSmall && !p.From.Sym.NoSplit() {
   649  		leaf := true
   650  	LeafSearch:
   651  		for q := p; q != nil; q = q.Link {
   652  			switch q.As {
   653  			case obj.ACALL:
   654  				// Treat common runtime calls that take no arguments
   655  				// the same as duffcopy and duffzero.
   656  
   657  				// Note that of these functions, panicBounds does
   658  				// use some stack, but its stack together with the
   659  				// < StackSmall used by this function is still
   660  				// less than stackNosplit. See issue 31219.
   661  				if !isZeroArgRuntimeCall(q.To.Sym) {
   662  					leaf = false
   663  					break LeafSearch
   664  				}
   665  				fallthrough
   666  			case obj.ADUFFCOPY, obj.ADUFFZERO:
   667  				if autoffset >= abi.StackSmall-8 {
   668  					leaf = false
   669  					break LeafSearch
   670  				}
   671  			}
   672  		}
   673  
   674  		if leaf {
   675  			p.From.Sym.Set(obj.AttrNoSplit, true)
   676  		}
   677  	}
   678  
   679  	if !p.From.Sym.NoSplit() {
   680  		// Emit split check.
   681  		p = stacksplit(ctxt, cursym, p, newprog, autoffset, int32(textarg))
   682  	}
   683  
   684  	if bpsize > 0 {
   685  		// Save caller's BP
   686  		p = obj.Appendp(p, newprog)
   687  
   688  		p.As = APUSHQ
   689  		p.From.Type = obj.TYPE_REG
   690  		p.From.Reg = REG_BP
   691  
   692  		// Move current frame to BP
   693  		p = obj.Appendp(p, newprog)
   694  
   695  		p.As = AMOVQ
   696  		p.From.Type = obj.TYPE_REG
   697  		p.From.Reg = REG_SP
   698  		p.To.Type = obj.TYPE_REG
   699  		p.To.Reg = REG_BP
   700  	}
   701  
   702  	if autoffset%int32(ctxt.Arch.RegSize) != 0 {
   703  		ctxt.Diag("unaligned stack size %d", autoffset)
   704  	}
   705  
   706  	// localoffset is autoffset discounting the frame pointer,
   707  	// which has already been allocated in the stack.
   708  	localoffset := autoffset - int32(bpsize)
   709  	if localoffset != 0 {
   710  		p = obj.Appendp(p, newprog)
   711  		p.As = AADJSP
   712  		p.From.Type = obj.TYPE_CONST
   713  		p.From.Offset = int64(localoffset)
   714  		p.Spadj = localoffset
   715  	}
   716  
   717  	// Delve debugger would like the next instruction to be noted as the end of the function prologue.
   718  	// TODO: are there other cases (e.g., wrapper functions) that need marking?
   719  	if autoffset != 0 {
   720  		p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
   721  	}
   722  
   723  	var deltasp int32
   724  	for p = cursym.Func().Text; p != nil; p = p.Link {
   725  		pcsize := ctxt.Arch.RegSize
   726  		switch p.From.Name {
   727  		case obj.NAME_AUTO:
   728  			p.From.Offset += int64(deltasp) - int64(bpsize)
   729  		case obj.NAME_PARAM:
   730  			p.From.Offset += int64(deltasp) + int64(pcsize)
   731  		}
   732  		if p.GetFrom3() != nil {
   733  			switch p.GetFrom3().Name {
   734  			case obj.NAME_AUTO:
   735  				p.GetFrom3().Offset += int64(deltasp) - int64(bpsize)
   736  			case obj.NAME_PARAM:
   737  				p.GetFrom3().Offset += int64(deltasp) + int64(pcsize)
   738  			}
   739  		}
   740  		switch p.To.Name {
   741  		case obj.NAME_AUTO:
   742  			p.To.Offset += int64(deltasp) - int64(bpsize)
   743  		case obj.NAME_PARAM:
   744  			p.To.Offset += int64(deltasp) + int64(pcsize)
   745  		}
   746  
   747  		switch p.As {
   748  		default:
   749  			if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_SP && p.As != ACMPL && p.As != ACMPQ {
   750  				f := cursym.Func()
   751  				if f.FuncFlag&abi.FuncFlagSPWrite == 0 {
   752  					f.FuncFlag |= abi.FuncFlagSPWrite
   753  					if ctxt.Debugvlog || !ctxt.IsAsm {
   754  						ctxt.Logf("auto-SPWRITE: %s %v\n", cursym.Name, p)
   755  						if !ctxt.IsAsm {
   756  							ctxt.Diag("invalid auto-SPWRITE in non-assembly")
   757  							ctxt.DiagFlush()
   758  							log.Fatalf("bad SPWRITE")
   759  						}
   760  					}
   761  				}
   762  			}
   763  			continue
   764  
   765  		case APUSHL, APUSHFL:
   766  			deltasp += 4
   767  			p.Spadj = 4
   768  			continue
   769  
   770  		case APUSHQ, APUSHFQ:
   771  			deltasp += 8
   772  			p.Spadj = 8
   773  			continue
   774  
   775  		case APUSHW, APUSHFW:
   776  			deltasp += 2
   777  			p.Spadj = 2
   778  			continue
   779  
   780  		case APOPL, APOPFL:
   781  			deltasp -= 4
   782  			p.Spadj = -4
   783  			continue
   784  
   785  		case APOPQ, APOPFQ:
   786  			deltasp -= 8
   787  			p.Spadj = -8
   788  			continue
   789  
   790  		case APOPW, APOPFW:
   791  			deltasp -= 2
   792  			p.Spadj = -2
   793  			continue
   794  
   795  		case AADJSP:
   796  			p.Spadj = int32(p.From.Offset)
   797  			deltasp += int32(p.From.Offset)
   798  			continue
   799  
   800  		case obj.ARET:
   801  			// do nothing
   802  		}
   803  
   804  		if autoffset != deltasp {
   805  			ctxt.Diag("%s: unbalanced PUSH/POP", cursym)
   806  		}
   807  
   808  		if autoffset != 0 {
   809  			to := p.To // Keep To attached to RET for retjmp below
   810  			p.To = obj.Addr{}
   811  			if localoffset != 0 {
   812  				p.As = AADJSP
   813  				p.From.Type = obj.TYPE_CONST
   814  				p.From.Offset = int64(-localoffset)
   815  				p.Spadj = -localoffset
   816  				p = obj.Appendp(p, newprog)
   817  			}
   818  
   819  			if bpsize > 0 {
   820  				// Restore caller's BP
   821  				p.As = APOPQ
   822  				p.To.Type = obj.TYPE_REG
   823  				p.To.Reg = REG_BP
   824  				p.Spadj = -int32(bpsize)
   825  				p = obj.Appendp(p, newprog)
   826  			}
   827  
   828  			p.As = obj.ARET
   829  			p.To = to
   830  
   831  			// If there are instructions following
   832  			// this ARET, they come from a branch
   833  			// with the same stackframe, so undo
   834  			// the cleanup.
   835  			p.Spadj = +autoffset
   836  		}
   837  
   838  		if p.To.Sym != nil { // retjmp
   839  			p.As = obj.AJMP
   840  		}
   841  	}
   842  }
   843  
   844  func isZeroArgRuntimeCall(s *obj.LSym) bool {
   845  	if s == nil {
   846  		return false
   847  	}
   848  	switch s.Name {
   849  	case "runtime.panicdivide", "runtime.panicwrap", "runtime.panicshift", "runtime.panicBounds", "runtime.panicExtend":
   850  		return true
   851  	}
   852  	return false
   853  }
   854  
   855  func indir_cx(ctxt *obj.Link, a *obj.Addr) {
   856  	a.Type = obj.TYPE_MEM
   857  	a.Reg = REG_CX
   858  }
   859  
   860  // loadG ensures the G is loaded into a register (either CX or REGG),
   861  // appending instructions to p if necessary. It returns the new last
   862  // instruction and the G register.
   863  func loadG(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc) (*obj.Prog, int16) {
   864  	if ctxt.Arch.Family == sys.AMD64 && cursym.ABI() == obj.ABIInternal {
   865  		// Use the G register directly in ABIInternal
   866  		return p, REGG
   867  	}
   868  
   869  	var regg int16 = REG_CX
   870  	if ctxt.Arch.Family == sys.AMD64 {
   871  		regg = REGG // == REG_R14
   872  	}
   873  
   874  	p = obj.Appendp(p, newprog)
   875  	p.As = AMOVQ
   876  	if ctxt.Arch.PtrSize == 4 {
   877  		p.As = AMOVL
   878  	}
   879  	p.From.Type = obj.TYPE_MEM
   880  	p.From.Reg = REG_TLS
   881  	p.From.Offset = 0
   882  	p.To.Type = obj.TYPE_REG
   883  	p.To.Reg = regg
   884  
   885  	// Rewrite TLS instruction if necessary.
   886  	next := p.Link
   887  	progedit(ctxt, p, newprog)
   888  	for p.Link != next {
   889  		p = p.Link
   890  		progedit(ctxt, p, newprog)
   891  	}
   892  
   893  	if p.From.Index == REG_TLS {
   894  		p.From.Scale = 2
   895  	}
   896  
   897  	return p, regg
   898  }
   899  
   900  // Append code to p to check for stack split.
   901  // Appends to (does not overwrite) p.
   902  // Assumes g is in rg.
   903  // Returns last new instruction.
   904  func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc, framesize int32, textarg int32) *obj.Prog {
   905  	cmp := ACMPQ
   906  	lea := ALEAQ
   907  	mov := AMOVQ
   908  	sub := ASUBQ
   909  	push, pop := APUSHQ, APOPQ
   910  
   911  	if ctxt.Arch.Family == sys.I386 {
   912  		cmp = ACMPL
   913  		lea = ALEAL
   914  		mov = AMOVL
   915  		sub = ASUBL
   916  		push, pop = APUSHL, APOPL
   917  	}
   918  
   919  	tmp := int16(REG_AX) // use AX for 32-bit
   920  	if ctxt.Arch.Family == sys.AMD64 {
   921  		// Avoid register parameters.
   922  		tmp = int16(REGENTRYTMP0)
   923  	}
   924  
   925  	if ctxt.Flag_maymorestack != "" {
   926  		p = cursym.Func().SpillRegisterArgs(p, newprog)
   927  
   928  		if cursym.Func().Text.From.Sym.NeedCtxt() {
   929  			p = obj.Appendp(p, newprog)
   930  			p.As = push
   931  			p.From.Type = obj.TYPE_REG
   932  			p.From.Reg = REGCTXT
   933  		}
   934  
   935  		// We call maymorestack with an ABI matching the
   936  		// caller's ABI. Since this is the first thing that
   937  		// happens in the function, we have to be consistent
   938  		// with the caller about CPU state (notably,
   939  		// fixed-meaning registers).
   940  
   941  		p = obj.Appendp(p, newprog)
   942  		p.As = obj.ACALL
   943  		p.To.Type = obj.TYPE_BRANCH
   944  		p.To.Name = obj.NAME_EXTERN
   945  		p.To.Sym = ctxt.LookupABI(ctxt.Flag_maymorestack, cursym.ABI())
   946  
   947  		if cursym.Func().Text.From.Sym.NeedCtxt() {
   948  			p = obj.Appendp(p, newprog)
   949  			p.As = pop
   950  			p.To.Type = obj.TYPE_REG
   951  			p.To.Reg = REGCTXT
   952  		}
   953  
   954  		p = cursym.Func().UnspillRegisterArgs(p, newprog)
   955  	}
   956  
   957  	// Jump back to here after morestack returns.
   958  	startPred := p
   959  
   960  	// Load G register
   961  	var rg int16
   962  	p, rg = loadG(ctxt, cursym, p, newprog)
   963  
   964  	var q1 *obj.Prog
   965  	if framesize <= abi.StackSmall {
   966  		// small stack: SP <= stackguard
   967  		//	CMPQ SP, stackguard
   968  		p = obj.Appendp(p, newprog)
   969  
   970  		p.As = cmp
   971  		p.From.Type = obj.TYPE_REG
   972  		p.From.Reg = REG_SP
   973  		p.To.Type = obj.TYPE_MEM
   974  		p.To.Reg = rg
   975  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
   976  		if cursym.CFunc() {
   977  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
   978  		}
   979  
   980  		// Mark the stack bound check and morestack call async nonpreemptible.
   981  		// If we get preempted here, when resumed the preemption request is
   982  		// cleared, but we'll still call morestack, which will double the stack
   983  		// unnecessarily. See issue #35470.
   984  		p = ctxt.StartUnsafePoint(p, newprog)
   985  	} else if framesize <= abi.StackBig {
   986  		// large stack: SP-framesize <= stackguard-StackSmall
   987  		//	LEAQ -xxx(SP), tmp
   988  		//	CMPQ tmp, stackguard
   989  		p = obj.Appendp(p, newprog)
   990  
   991  		p.As = lea
   992  		p.From.Type = obj.TYPE_MEM
   993  		p.From.Reg = REG_SP
   994  		p.From.Offset = -(int64(framesize) - abi.StackSmall)
   995  		p.To.Type = obj.TYPE_REG
   996  		p.To.Reg = tmp
   997  
   998  		p = obj.Appendp(p, newprog)
   999  		p.As = cmp
  1000  		p.From.Type = obj.TYPE_REG
  1001  		p.From.Reg = tmp
  1002  		p.To.Type = obj.TYPE_MEM
  1003  		p.To.Reg = rg
  1004  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
  1005  		if cursym.CFunc() {
  1006  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
  1007  		}
  1008  
  1009  		p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
  1010  	} else {
  1011  		// Such a large stack we need to protect against underflow.
  1012  		// The runtime guarantees SP > objabi.StackBig, but
  1013  		// framesize is large enough that SP-framesize may
  1014  		// underflow, causing a direct comparison with the
  1015  		// stack guard to incorrectly succeed. We explicitly
  1016  		// guard against underflow.
  1017  		//
  1018  		//	MOVQ	SP, tmp
  1019  		//	SUBQ	$(framesize - StackSmall), tmp
  1020  		//	// If subtraction wrapped (carry set), morestack.
  1021  		//	JCS	label-of-call-to-morestack
  1022  		//	CMPQ	tmp, stackguard
  1023  
  1024  		p = obj.Appendp(p, newprog)
  1025  
  1026  		p.As = mov
  1027  		p.From.Type = obj.TYPE_REG
  1028  		p.From.Reg = REG_SP
  1029  		p.To.Type = obj.TYPE_REG
  1030  		p.To.Reg = tmp
  1031  
  1032  		p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
  1033  
  1034  		p = obj.Appendp(p, newprog)
  1035  		p.As = sub
  1036  		p.From.Type = obj.TYPE_CONST
  1037  		p.From.Offset = int64(framesize) - abi.StackSmall
  1038  		p.To.Type = obj.TYPE_REG
  1039  		p.To.Reg = tmp
  1040  
  1041  		p = obj.Appendp(p, newprog)
  1042  		p.As = AJCS
  1043  		p.To.Type = obj.TYPE_BRANCH
  1044  		q1 = p
  1045  
  1046  		p = obj.Appendp(p, newprog)
  1047  		p.As = cmp
  1048  		p.From.Type = obj.TYPE_REG
  1049  		p.From.Reg = tmp
  1050  		p.To.Type = obj.TYPE_MEM
  1051  		p.To.Reg = rg
  1052  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
  1053  		if cursym.CFunc() {
  1054  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
  1055  		}
  1056  	}
  1057  
  1058  	// common
  1059  	jls := obj.Appendp(p, newprog)
  1060  	jls.As = AJLS
  1061  	jls.To.Type = obj.TYPE_BRANCH
  1062  
  1063  	end := ctxt.EndUnsafePoint(jls, newprog, -1)
  1064  
  1065  	var last *obj.Prog
  1066  	for last = cursym.Func().Text; last.Link != nil; last = last.Link {
  1067  	}
  1068  
  1069  	// Now we are at the end of the function, but logically
  1070  	// we are still in function prologue. We need to fix the
  1071  	// SP data and PCDATA.
  1072  	spfix := obj.Appendp(last, newprog)
  1073  	spfix.As = obj.ANOP
  1074  	spfix.Spadj = -framesize
  1075  
  1076  	pcdata := ctxt.EmitEntryStackMap(cursym, spfix, newprog)
  1077  	spill := ctxt.StartUnsafePoint(pcdata, newprog)
  1078  	pcdata = cursym.Func().SpillRegisterArgs(spill, newprog)
  1079  
  1080  	call := obj.Appendp(pcdata, newprog)
  1081  	call.Pos = cursym.Func().Text.Pos
  1082  	call.As = obj.ACALL
  1083  	call.To.Type = obj.TYPE_BRANCH
  1084  	call.To.Name = obj.NAME_EXTERN
  1085  	morestack := "runtime.morestack"
  1086  	switch {
  1087  	case cursym.CFunc():
  1088  		morestack = "runtime.morestackc"
  1089  	case !cursym.Func().Text.From.Sym.NeedCtxt():
  1090  		morestack = "runtime.morestack_noctxt"
  1091  	}
  1092  	call.To.Sym = ctxt.Lookup(morestack)
  1093  	// When compiling 386 code for dynamic linking, the call needs to be adjusted
  1094  	// to follow PIC rules. This in turn can insert more instructions, so we need
  1095  	// to keep track of the start of the call (where the jump will be to) and the
  1096  	// end (which following instructions are appended to).
  1097  	callend := call
  1098  	progedit(ctxt, callend, newprog)
  1099  	for ; callend.Link != nil; callend = callend.Link {
  1100  		progedit(ctxt, callend.Link, newprog)
  1101  	}
  1102  
  1103  	// The instructions which unspill regs should be preemptible.
  1104  	pcdata = ctxt.EndUnsafePoint(callend, newprog, -1)
  1105  	unspill := cursym.Func().UnspillRegisterArgs(pcdata, newprog)
  1106  
  1107  	jmp := obj.Appendp(unspill, newprog)
  1108  	jmp.As = obj.AJMP
  1109  	jmp.To.Type = obj.TYPE_BRANCH
  1110  	jmp.To.SetTarget(startPred.Link)
  1111  	jmp.Spadj = +framesize
  1112  
  1113  	jls.To.SetTarget(spill)
  1114  	if q1 != nil {
  1115  		q1.To.SetTarget(spill)
  1116  	}
  1117  
  1118  	return end
  1119  }
  1120  
  1121  func isR15(r int16) bool {
  1122  	return r == REG_R15 || r == REG_R15B
  1123  }
  1124  func addrMentionsR15(a *obj.Addr) bool {
  1125  	if a == nil {
  1126  		return false
  1127  	}
  1128  	return isR15(a.Reg) || isR15(a.Index)
  1129  }
  1130  func progMentionsR15(p *obj.Prog) bool {
  1131  	return addrMentionsR15(&p.From) || addrMentionsR15(&p.To) || isR15(p.Reg) || addrMentionsR15(p.GetFrom3())
  1132  }
  1133  
  1134  func addrUsesGlobal(a *obj.Addr) bool {
  1135  	if a == nil {
  1136  		return false
  1137  	}
  1138  	return a.Name == obj.NAME_EXTERN && !a.Sym.Local()
  1139  }
  1140  func progUsesGlobal(p *obj.Prog) bool {
  1141  	if p.As == obj.ACALL || p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
  1142  		// These opcodes don't use a GOT to access their argument (see rewriteToUseGot),
  1143  		// or R15 would be dead at them anyway.
  1144  		return false
  1145  	}
  1146  	if p.As == ALEAQ {
  1147  		// The GOT entry is placed directly in the destination register; R15 is not used.
  1148  		return false
  1149  	}
  1150  	return addrUsesGlobal(&p.From) || addrUsesGlobal(&p.To) || addrUsesGlobal(p.GetFrom3())
  1151  }
  1152  
  1153  type rwMask int
  1154  
  1155  const (
  1156  	readFrom rwMask = 1 << iota
  1157  	readTo
  1158  	readReg
  1159  	readFrom3
  1160  	writeFrom
  1161  	writeTo
  1162  	writeReg
  1163  	writeFrom3
  1164  )
  1165  
  1166  // progRW returns a mask describing the effects of the instruction p.
  1167  // Note: this isn't exhaustively accurate. It is only currently used for detecting
  1168  // reads/writes to R15, so SSE register behavior isn't fully correct, and
  1169  // other weird cases (e.g. writes to DX by CLD) also aren't captured.
  1170  func progRW(p *obj.Prog) rwMask {
  1171  	var m rwMask
  1172  	// Default for most instructions
  1173  	if p.From.Type != obj.TYPE_NONE {
  1174  		m |= readFrom
  1175  	}
  1176  	if p.To.Type != obj.TYPE_NONE {
  1177  		// Most x86 instructions update the To value
  1178  		m |= readTo | writeTo
  1179  	}
  1180  	if p.Reg != 0 {
  1181  		m |= readReg
  1182  	}
  1183  	if p.GetFrom3() != nil {
  1184  		m |= readFrom3
  1185  	}
  1186  
  1187  	// Lots of exceptions to the above defaults.
  1188  	name := p.As.String()
  1189  	if strings.HasPrefix(name, "MOV") || strings.HasPrefix(name, "PMOV") {
  1190  		// MOV instructions don't read To.
  1191  		m &^= readTo
  1192  	}
  1193  	switch p.As {
  1194  	case APOPW, APOPL, APOPQ,
  1195  		ALEAL, ALEAQ,
  1196  		AIMUL3W, AIMUL3L, AIMUL3Q,
  1197  		APEXTRB, APEXTRW, APEXTRD, APEXTRQ, AVPEXTRB, AVPEXTRW, AVPEXTRD, AVPEXTRQ, AEXTRACTPS,
  1198  		ABSFW, ABSFL, ABSFQ, ABSRW, ABSRL, ABSRQ, APOPCNTW, APOPCNTL, APOPCNTQ, ALZCNTW, ALZCNTL, ALZCNTQ,
  1199  		ASHLXL, ASHLXQ, ASHRXL, ASHRXQ, ASARXL, ASARXQ:
  1200  		// These instructions are pure writes to To. They don't use its old value.
  1201  		m &^= readTo
  1202  	case AXORL, AXORQ:
  1203  		// Register-clearing idiom doesn't read previous value.
  1204  		if p.From.Type == obj.TYPE_REG && p.To.Type == obj.TYPE_REG && p.From.Reg == p.To.Reg {
  1205  			m &^= readFrom | readTo
  1206  		}
  1207  	case AMULXL, AMULXQ:
  1208  		// These are write-only to both To and From3.
  1209  		m &^= readTo | readFrom3
  1210  		m |= writeFrom3
  1211  	}
  1212  	return m
  1213  }
  1214  
  1215  // progReadsR15 reports whether p reads the register R15.
  1216  func progReadsR15(p *obj.Prog) bool {
  1217  	m := progRW(p)
  1218  	if m&readFrom != 0 && p.From.Type == obj.TYPE_REG && isR15(p.From.Reg) {
  1219  		return true
  1220  	}
  1221  	if m&readTo != 0 && p.To.Type == obj.TYPE_REG && isR15(p.To.Reg) {
  1222  		return true
  1223  	}
  1224  	if m&readReg != 0 && isR15(p.Reg) {
  1225  		return true
  1226  	}
  1227  	if m&readFrom3 != 0 && p.GetFrom3().Type == obj.TYPE_REG && isR15(p.GetFrom3().Reg) {
  1228  		return true
  1229  	}
  1230  	// reads of the index registers
  1231  	if p.From.Type == obj.TYPE_MEM && (isR15(p.From.Reg) || isR15(p.From.Index)) {
  1232  		return true
  1233  	}
  1234  	if p.To.Type == obj.TYPE_MEM && (isR15(p.To.Reg) || isR15(p.To.Index)) {
  1235  		return true
  1236  	}
  1237  	if f3 := p.GetFrom3(); f3 != nil && f3.Type == obj.TYPE_MEM && (isR15(f3.Reg) || isR15(f3.Index)) {
  1238  		return true
  1239  	}
  1240  	return false
  1241  }
  1242  
  1243  // progWritesR15 reports whether p writes the register R15.
  1244  func progWritesR15(p *obj.Prog) bool {
  1245  	m := progRW(p)
  1246  	if m&writeFrom != 0 && p.From.Type == obj.TYPE_REG && isR15(p.From.Reg) {
  1247  		return true
  1248  	}
  1249  	if m&writeTo != 0 && p.To.Type == obj.TYPE_REG && isR15(p.To.Reg) {
  1250  		return true
  1251  	}
  1252  	if m&writeReg != 0 && isR15(p.Reg) {
  1253  		return true
  1254  	}
  1255  	if m&writeFrom3 != 0 && p.GetFrom3().Type == obj.TYPE_REG && isR15(p.GetFrom3().Reg) {
  1256  		return true
  1257  	}
  1258  	return false
  1259  }
  1260  
  1261  func errorCheck(ctxt *obj.Link, s *obj.LSym) {
  1262  	// When dynamic linking, R15 is used to access globals. Reject code that
  1263  	// uses R15 after a global variable access.
  1264  	if !ctxt.Flag_dynlink {
  1265  		return
  1266  	}
  1267  
  1268  	// Flood fill all the instructions where R15's value is junk.
  1269  	// If there are any uses of R15 in that set, report an error.
  1270  	var work []*obj.Prog
  1271  	var mentionsR15 bool
  1272  	for p := s.Func().Text; p != nil; p = p.Link {
  1273  		if progUsesGlobal(p) {
  1274  			work = append(work, p)
  1275  			p.Mark |= markBit
  1276  		}
  1277  		if progMentionsR15(p) {
  1278  			mentionsR15 = true
  1279  		}
  1280  	}
  1281  	if mentionsR15 {
  1282  		for len(work) > 0 {
  1283  			p := work[len(work)-1]
  1284  			work = work[:len(work)-1]
  1285  			if progReadsR15(p) {
  1286  				pos := ctxt.PosTable.Pos(p.Pos)
  1287  				ctxt.Diag("%s:%s: when dynamic linking, R15 is clobbered by a global variable access and is used here: %v", path.Base(pos.Filename()), pos.LineNumber(), p)
  1288  				break // only report one error
  1289  			}
  1290  			if progWritesR15(p) {
  1291  				// R15 is overwritten by this instruction. Its value is not junk any more.
  1292  				continue
  1293  			}
  1294  			if q := p.To.Target(); q != nil && q.Mark&markBit == 0 {
  1295  				q.Mark |= markBit
  1296  				work = append(work, q)
  1297  			}
  1298  			if p.As == obj.AJMP || p.As == obj.ARET {
  1299  				continue // no fallthrough
  1300  			}
  1301  			if q := p.Link; q != nil && q.Mark&markBit == 0 {
  1302  				q.Mark |= markBit
  1303  				work = append(work, q)
  1304  			}
  1305  		}
  1306  	}
  1307  
  1308  	// Clean up.
  1309  	for p := s.Func().Text; p != nil; p = p.Link {
  1310  		p.Mark &^= markBit
  1311  	}
  1312  }
  1313  
  1314  var unaryDst = map[obj.As]bool{
  1315  	ABSWAPL:     true,
  1316  	ABSWAPQ:     true,
  1317  	ACLDEMOTE:   true,
  1318  	ACLFLUSH:    true,
  1319  	ACLFLUSHOPT: true,
  1320  	ACLWB:       true,
  1321  	ACMPXCHG16B: true,
  1322  	ACMPXCHG8B:  true,
  1323  	ADECB:       true,
  1324  	ADECL:       true,
  1325  	ADECQ:       true,
  1326  	ADECW:       true,
  1327  	AFBSTP:      true,
  1328  	AFFREE:      true,
  1329  	AFLDENV:     true,
  1330  	AFSAVE:      true,
  1331  	AFSTCW:      true,
  1332  	AFSTENV:     true,
  1333  	AFSTSW:      true,
  1334  	AFXSAVE64:   true,
  1335  	AFXSAVE:     true,
  1336  	AINCB:       true,
  1337  	AINCL:       true,
  1338  	AINCQ:       true,
  1339  	AINCW:       true,
  1340  	ANEGB:       true,
  1341  	ANEGL:       true,
  1342  	ANEGQ:       true,
  1343  	ANEGW:       true,
  1344  	ANOTB:       true,
  1345  	ANOTL:       true,
  1346  	ANOTQ:       true,
  1347  	ANOTW:       true,
  1348  	APOPL:       true,
  1349  	APOPQ:       true,
  1350  	APOPW:       true,
  1351  	ARDFSBASEL:  true,
  1352  	ARDFSBASEQ:  true,
  1353  	ARDGSBASEL:  true,
  1354  	ARDGSBASEQ:  true,
  1355  	ARDPID:      true,
  1356  	ARDRANDL:    true,
  1357  	ARDRANDQ:    true,
  1358  	ARDRANDW:    true,
  1359  	ARDSEEDL:    true,
  1360  	ARDSEEDQ:    true,
  1361  	ARDSEEDW:    true,
  1362  	ASETCC:      true,
  1363  	ASETCS:      true,
  1364  	ASETEQ:      true,
  1365  	ASETGE:      true,
  1366  	ASETGT:      true,
  1367  	ASETHI:      true,
  1368  	ASETLE:      true,
  1369  	ASETLS:      true,
  1370  	ASETLT:      true,
  1371  	ASETMI:      true,
  1372  	ASETNE:      true,
  1373  	ASETOC:      true,
  1374  	ASETOS:      true,
  1375  	ASETPC:      true,
  1376  	ASETPL:      true,
  1377  	ASETPS:      true,
  1378  	ASGDT:       true,
  1379  	ASIDT:       true,
  1380  	ASLDTL:      true,
  1381  	ASLDTQ:      true,
  1382  	ASLDTW:      true,
  1383  	ASMSWL:      true,
  1384  	ASMSWQ:      true,
  1385  	ASMSWW:      true,
  1386  	ASTMXCSR:    true,
  1387  	ASTRL:       true,
  1388  	ASTRQ:       true,
  1389  	ASTRW:       true,
  1390  	AXSAVE64:    true,
  1391  	AXSAVE:      true,
  1392  	AXSAVEC64:   true,
  1393  	AXSAVEC:     true,
  1394  	AXSAVEOPT64: true,
  1395  	AXSAVEOPT:   true,
  1396  	AXSAVES64:   true,
  1397  	AXSAVES:     true,
  1398  }
  1399  
  1400  var Linkamd64 = obj.LinkArch{
  1401  	Arch:           sys.ArchAMD64,
  1402  	Init:           instinit,
  1403  	ErrorCheck:     errorCheck,
  1404  	Preprocess:     preprocess,
  1405  	Assemble:       span6,
  1406  	Progedit:       progedit,
  1407  	SEH:            populateSeh,
  1408  	UnaryDst:       unaryDst,
  1409  	DWARFRegisters: AMD64DWARFRegisters,
  1410  }
  1411  
  1412  var Link386 = obj.LinkArch{
  1413  	Arch:           sys.Arch386,
  1414  	Init:           instinit,
  1415  	Preprocess:     preprocess,
  1416  	Assemble:       span6,
  1417  	Progedit:       progedit,
  1418  	UnaryDst:       unaryDst,
  1419  	DWARFRegisters: X86DWARFRegisters,
  1420  }
  1421  

View as plain text