Source file src/cmd/compile/internal/x86/ssa.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package x86
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  
    11  	"cmd/compile/internal/base"
    12  	"cmd/compile/internal/ir"
    13  	"cmd/compile/internal/logopt"
    14  	"cmd/compile/internal/ssa"
    15  	"cmd/compile/internal/ssagen"
    16  	"cmd/compile/internal/types"
    17  	"cmd/internal/obj"
    18  	"cmd/internal/obj/x86"
    19  	"internal/abi"
    20  )
    21  
    22  // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
    23  func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
    24  	flive := b.FlagsLiveAtEnd
    25  	for _, c := range b.ControlValues() {
    26  		flive = c.Type.IsFlags() || flive
    27  	}
    28  	for i := len(b.Values) - 1; i >= 0; i-- {
    29  		v := b.Values[i]
    30  		if flive && v.Op == ssa.Op386MOVLconst {
    31  			// The "mark" is any non-nil Aux value.
    32  			v.Aux = ssa.AuxMark
    33  		}
    34  		if v.Type.IsFlags() {
    35  			flive = false
    36  		}
    37  		for _, a := range v.Args {
    38  			if a.Type.IsFlags() {
    39  				flive = true
    40  			}
    41  		}
    42  	}
    43  }
    44  
    45  // loadByType returns the load instruction of the given type.
    46  func loadByType(t *types.Type) obj.As {
    47  	// Avoid partial register write
    48  	if !t.IsFloat() {
    49  		switch t.Size() {
    50  		case 1:
    51  			return x86.AMOVBLZX
    52  		case 2:
    53  			return x86.AMOVWLZX
    54  		}
    55  	}
    56  	// Otherwise, there's no difference between load and store opcodes.
    57  	return storeByType(t)
    58  }
    59  
    60  // storeByType returns the store instruction of the given type.
    61  func storeByType(t *types.Type) obj.As {
    62  	width := t.Size()
    63  	if t.IsFloat() {
    64  		switch width {
    65  		case 4:
    66  			return x86.AMOVSS
    67  		case 8:
    68  			return x86.AMOVSD
    69  		}
    70  	} else {
    71  		switch width {
    72  		case 1:
    73  			return x86.AMOVB
    74  		case 2:
    75  			return x86.AMOVW
    76  		case 4:
    77  			return x86.AMOVL
    78  		}
    79  	}
    80  	panic("bad store type")
    81  }
    82  
    83  // moveByType returns the reg->reg move instruction of the given type.
    84  func moveByType(t *types.Type) obj.As {
    85  	if t.IsFloat() {
    86  		switch t.Size() {
    87  		case 4:
    88  			return x86.AMOVSS
    89  		case 8:
    90  			return x86.AMOVSD
    91  		default:
    92  			panic(fmt.Sprintf("bad float register width %d:%s", t.Size(), t))
    93  		}
    94  	} else {
    95  		switch t.Size() {
    96  		case 1:
    97  			// Avoids partial register write
    98  			return x86.AMOVL
    99  		case 2:
   100  			return x86.AMOVL
   101  		case 4:
   102  			return x86.AMOVL
   103  		default:
   104  			panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
   105  		}
   106  	}
   107  }
   108  
   109  // opregreg emits instructions for
   110  //
   111  //	dest := dest(To) op src(From)
   112  //
   113  // and also returns the created obj.Prog so it
   114  // may be further adjusted (offset, scale, etc).
   115  func opregreg(s *ssagen.State, op obj.As, dest, src int16) *obj.Prog {
   116  	p := s.Prog(op)
   117  	p.From.Type = obj.TYPE_REG
   118  	p.To.Type = obj.TYPE_REG
   119  	p.To.Reg = dest
   120  	p.From.Reg = src
   121  	return p
   122  }
   123  
   124  func ssaGenValue(s *ssagen.State, v *ssa.Value) {
   125  	switch v.Op {
   126  	case ssa.Op386ADDL:
   127  		r := v.Reg()
   128  		r1 := v.Args[0].Reg()
   129  		r2 := v.Args[1].Reg()
   130  		switch {
   131  		case r == r1:
   132  			p := s.Prog(v.Op.Asm())
   133  			p.From.Type = obj.TYPE_REG
   134  			p.From.Reg = r2
   135  			p.To.Type = obj.TYPE_REG
   136  			p.To.Reg = r
   137  		case r == r2:
   138  			p := s.Prog(v.Op.Asm())
   139  			p.From.Type = obj.TYPE_REG
   140  			p.From.Reg = r1
   141  			p.To.Type = obj.TYPE_REG
   142  			p.To.Reg = r
   143  		default:
   144  			p := s.Prog(x86.ALEAL)
   145  			p.From.Type = obj.TYPE_MEM
   146  			p.From.Reg = r1
   147  			p.From.Scale = 1
   148  			p.From.Index = r2
   149  			p.To.Type = obj.TYPE_REG
   150  			p.To.Reg = r
   151  		}
   152  
   153  	// 2-address opcode arithmetic
   154  	case ssa.Op386SUBL,
   155  		ssa.Op386MULL,
   156  		ssa.Op386ANDL,
   157  		ssa.Op386ORL,
   158  		ssa.Op386XORL,
   159  		ssa.Op386SHLL,
   160  		ssa.Op386SHRL, ssa.Op386SHRW, ssa.Op386SHRB,
   161  		ssa.Op386SARL, ssa.Op386SARW, ssa.Op386SARB,
   162  		ssa.Op386ROLL, ssa.Op386ROLW, ssa.Op386ROLB,
   163  		ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD,
   164  		ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD,
   165  		ssa.Op386PXOR,
   166  		ssa.Op386ADCL,
   167  		ssa.Op386SBBL:
   168  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
   169  
   170  	case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry:
   171  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   172  		opregreg(s, v.Op.Asm(), v.Reg0(), v.Args[1].Reg())
   173  
   174  	case ssa.Op386ADDLconstcarry, ssa.Op386SUBLconstcarry:
   175  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   176  		p := s.Prog(v.Op.Asm())
   177  		p.From.Type = obj.TYPE_CONST
   178  		p.From.Offset = v.AuxInt
   179  		p.To.Type = obj.TYPE_REG
   180  		p.To.Reg = v.Reg0()
   181  
   182  	case ssa.Op386DIVL, ssa.Op386DIVW,
   183  		ssa.Op386DIVLU, ssa.Op386DIVWU,
   184  		ssa.Op386MODL, ssa.Op386MODW,
   185  		ssa.Op386MODLU, ssa.Op386MODWU:
   186  
   187  		// Arg[0] is already in AX as it's the only register we allow
   188  		// and AX is the only output
   189  		x := v.Args[1].Reg()
   190  
   191  		// CPU faults upon signed overflow, which occurs when most
   192  		// negative int is divided by -1.
   193  		var j *obj.Prog
   194  		if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW ||
   195  			v.Op == ssa.Op386MODL || v.Op == ssa.Op386MODW {
   196  
   197  			if ssa.DivisionNeedsFixUp(v) {
   198  				var c *obj.Prog
   199  				switch v.Op {
   200  				case ssa.Op386DIVL, ssa.Op386MODL:
   201  					c = s.Prog(x86.ACMPL)
   202  					j = s.Prog(x86.AJEQ)
   203  
   204  				case ssa.Op386DIVW, ssa.Op386MODW:
   205  					c = s.Prog(x86.ACMPW)
   206  					j = s.Prog(x86.AJEQ)
   207  				}
   208  				c.From.Type = obj.TYPE_REG
   209  				c.From.Reg = x
   210  				c.To.Type = obj.TYPE_CONST
   211  				c.To.Offset = -1
   212  
   213  				j.To.Type = obj.TYPE_BRANCH
   214  			}
   215  			// sign extend the dividend
   216  			switch v.Op {
   217  			case ssa.Op386DIVL, ssa.Op386MODL:
   218  				s.Prog(x86.ACDQ)
   219  			case ssa.Op386DIVW, ssa.Op386MODW:
   220  				s.Prog(x86.ACWD)
   221  			}
   222  		}
   223  
   224  		// for unsigned ints, we sign extend by setting DX = 0
   225  		// signed ints were sign extended above
   226  		if v.Op == ssa.Op386DIVLU || v.Op == ssa.Op386MODLU ||
   227  			v.Op == ssa.Op386DIVWU || v.Op == ssa.Op386MODWU {
   228  			c := s.Prog(x86.AXORL)
   229  			c.From.Type = obj.TYPE_REG
   230  			c.From.Reg = x86.REG_DX
   231  			c.To.Type = obj.TYPE_REG
   232  			c.To.Reg = x86.REG_DX
   233  		}
   234  
   235  		p := s.Prog(v.Op.Asm())
   236  		p.From.Type = obj.TYPE_REG
   237  		p.From.Reg = x
   238  
   239  		// signed division, rest of the check for -1 case
   240  		if j != nil {
   241  			j2 := s.Prog(obj.AJMP)
   242  			j2.To.Type = obj.TYPE_BRANCH
   243  
   244  			var n *obj.Prog
   245  			if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW {
   246  				// n * -1 = -n
   247  				n = s.Prog(x86.ANEGL)
   248  				n.To.Type = obj.TYPE_REG
   249  				n.To.Reg = x86.REG_AX
   250  			} else {
   251  				// n % -1 == 0
   252  				n = s.Prog(x86.AXORL)
   253  				n.From.Type = obj.TYPE_REG
   254  				n.From.Reg = x86.REG_DX
   255  				n.To.Type = obj.TYPE_REG
   256  				n.To.Reg = x86.REG_DX
   257  			}
   258  
   259  			j.To.SetTarget(n)
   260  			j2.To.SetTarget(s.Pc())
   261  		}
   262  
   263  	case ssa.Op386HMULL, ssa.Op386HMULLU:
   264  		// the frontend rewrites constant division by 8/16/32 bit integers into
   265  		// HMUL by a constant
   266  		// SSA rewrites generate the 64 bit versions
   267  
   268  		// Arg[0] is already in AX as it's the only register we allow
   269  		// and DX is the only output we care about (the high bits)
   270  		p := s.Prog(v.Op.Asm())
   271  		p.From.Type = obj.TYPE_REG
   272  		p.From.Reg = v.Args[1].Reg()
   273  
   274  		// IMULB puts the high portion in AH instead of DL,
   275  		// so move it to DL for consistency
   276  		if v.Type.Size() == 1 {
   277  			m := s.Prog(x86.AMOVB)
   278  			m.From.Type = obj.TYPE_REG
   279  			m.From.Reg = x86.REG_AH
   280  			m.To.Type = obj.TYPE_REG
   281  			m.To.Reg = x86.REG_DX
   282  		}
   283  
   284  	case ssa.Op386MULLU:
   285  		// Arg[0] is already in AX as it's the only register we allow
   286  		// results lo in AX
   287  		p := s.Prog(v.Op.Asm())
   288  		p.From.Type = obj.TYPE_REG
   289  		p.From.Reg = v.Args[1].Reg()
   290  
   291  	case ssa.Op386MULLQU:
   292  		// AX * args[1], high 32 bits in DX (result[0]), low 32 bits in AX (result[1]).
   293  		p := s.Prog(v.Op.Asm())
   294  		p.From.Type = obj.TYPE_REG
   295  		p.From.Reg = v.Args[1].Reg()
   296  
   297  	case ssa.Op386AVGLU:
   298  		// compute (x+y)/2 unsigned.
   299  		// Do a 32-bit add, the overflow goes into the carry.
   300  		// Shift right once and pull the carry back into the 31st bit.
   301  		p := s.Prog(x86.AADDL)
   302  		p.From.Type = obj.TYPE_REG
   303  		p.To.Type = obj.TYPE_REG
   304  		p.To.Reg = v.Reg()
   305  		p.From.Reg = v.Args[1].Reg()
   306  		p = s.Prog(x86.ARCRL)
   307  		p.From.Type = obj.TYPE_CONST
   308  		p.From.Offset = 1
   309  		p.To.Type = obj.TYPE_REG
   310  		p.To.Reg = v.Reg()
   311  
   312  	case ssa.Op386ADDLconst:
   313  		r := v.Reg()
   314  		a := v.Args[0].Reg()
   315  		if r == a {
   316  			if v.AuxInt == 1 {
   317  				p := s.Prog(x86.AINCL)
   318  				p.To.Type = obj.TYPE_REG
   319  				p.To.Reg = r
   320  				return
   321  			}
   322  			if v.AuxInt == -1 {
   323  				p := s.Prog(x86.ADECL)
   324  				p.To.Type = obj.TYPE_REG
   325  				p.To.Reg = r
   326  				return
   327  			}
   328  			p := s.Prog(v.Op.Asm())
   329  			p.From.Type = obj.TYPE_CONST
   330  			p.From.Offset = v.AuxInt
   331  			p.To.Type = obj.TYPE_REG
   332  			p.To.Reg = r
   333  			return
   334  		}
   335  		p := s.Prog(x86.ALEAL)
   336  		p.From.Type = obj.TYPE_MEM
   337  		p.From.Reg = a
   338  		p.From.Offset = v.AuxInt
   339  		p.To.Type = obj.TYPE_REG
   340  		p.To.Reg = r
   341  
   342  	case ssa.Op386MULLconst:
   343  		r := v.Reg()
   344  		p := s.Prog(v.Op.Asm())
   345  		p.From.Type = obj.TYPE_CONST
   346  		p.From.Offset = v.AuxInt
   347  		p.To.Type = obj.TYPE_REG
   348  		p.To.Reg = r
   349  		p.AddRestSourceReg(v.Args[0].Reg())
   350  
   351  	case ssa.Op386SUBLconst,
   352  		ssa.Op386ADCLconst,
   353  		ssa.Op386SBBLconst,
   354  		ssa.Op386ANDLconst,
   355  		ssa.Op386ORLconst,
   356  		ssa.Op386XORLconst,
   357  		ssa.Op386SHLLconst,
   358  		ssa.Op386SHRLconst, ssa.Op386SHRWconst, ssa.Op386SHRBconst,
   359  		ssa.Op386SARLconst, ssa.Op386SARWconst, ssa.Op386SARBconst,
   360  		ssa.Op386ROLLconst, ssa.Op386ROLWconst, ssa.Op386ROLBconst:
   361  		p := s.Prog(v.Op.Asm())
   362  		p.From.Type = obj.TYPE_CONST
   363  		p.From.Offset = v.AuxInt
   364  		p.To.Type = obj.TYPE_REG
   365  		p.To.Reg = v.Reg()
   366  	case ssa.Op386SBBLcarrymask:
   367  		r := v.Reg()
   368  		p := s.Prog(v.Op.Asm())
   369  		p.From.Type = obj.TYPE_REG
   370  		p.From.Reg = r
   371  		p.To.Type = obj.TYPE_REG
   372  		p.To.Reg = r
   373  	case ssa.Op386LEAL1, ssa.Op386LEAL2, ssa.Op386LEAL4, ssa.Op386LEAL8:
   374  		r := v.Args[0].Reg()
   375  		i := v.Args[1].Reg()
   376  		p := s.Prog(x86.ALEAL)
   377  		switch v.Op {
   378  		case ssa.Op386LEAL1:
   379  			p.From.Scale = 1
   380  			if i == x86.REG_SP {
   381  				r, i = i, r
   382  			}
   383  		case ssa.Op386LEAL2:
   384  			p.From.Scale = 2
   385  		case ssa.Op386LEAL4:
   386  			p.From.Scale = 4
   387  		case ssa.Op386LEAL8:
   388  			p.From.Scale = 8
   389  		}
   390  		p.From.Type = obj.TYPE_MEM
   391  		p.From.Reg = r
   392  		p.From.Index = i
   393  		ssagen.AddAux(&p.From, v)
   394  		p.To.Type = obj.TYPE_REG
   395  		p.To.Reg = v.Reg()
   396  	case ssa.Op386LEAL:
   397  		p := s.Prog(x86.ALEAL)
   398  		p.From.Type = obj.TYPE_MEM
   399  		p.From.Reg = v.Args[0].Reg()
   400  		ssagen.AddAux(&p.From, v)
   401  		p.To.Type = obj.TYPE_REG
   402  		p.To.Reg = v.Reg()
   403  	case ssa.Op386CMPL, ssa.Op386CMPW, ssa.Op386CMPB,
   404  		ssa.Op386TESTL, ssa.Op386TESTW, ssa.Op386TESTB:
   405  		opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   406  	case ssa.Op386UCOMISS, ssa.Op386UCOMISD:
   407  		// Go assembler has swapped operands for UCOMISx relative to CMP,
   408  		// must account for that right here.
   409  		opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   410  	case ssa.Op386CMPLconst, ssa.Op386CMPWconst, ssa.Op386CMPBconst:
   411  		p := s.Prog(v.Op.Asm())
   412  		p.From.Type = obj.TYPE_REG
   413  		p.From.Reg = v.Args[0].Reg()
   414  		p.To.Type = obj.TYPE_CONST
   415  		p.To.Offset = v.AuxInt
   416  	case ssa.Op386TESTLconst, ssa.Op386TESTWconst, ssa.Op386TESTBconst:
   417  		p := s.Prog(v.Op.Asm())
   418  		p.From.Type = obj.TYPE_CONST
   419  		p.From.Offset = v.AuxInt
   420  		p.To.Type = obj.TYPE_REG
   421  		p.To.Reg = v.Args[0].Reg()
   422  	case ssa.Op386CMPLload, ssa.Op386CMPWload, ssa.Op386CMPBload:
   423  		p := s.Prog(v.Op.Asm())
   424  		p.From.Type = obj.TYPE_MEM
   425  		p.From.Reg = v.Args[0].Reg()
   426  		ssagen.AddAux(&p.From, v)
   427  		p.To.Type = obj.TYPE_REG
   428  		p.To.Reg = v.Args[1].Reg()
   429  	case ssa.Op386CMPLconstload, ssa.Op386CMPWconstload, ssa.Op386CMPBconstload:
   430  		sc := v.AuxValAndOff()
   431  		p := s.Prog(v.Op.Asm())
   432  		p.From.Type = obj.TYPE_MEM
   433  		p.From.Reg = v.Args[0].Reg()
   434  		ssagen.AddAux2(&p.From, v, sc.Off64())
   435  		p.To.Type = obj.TYPE_CONST
   436  		p.To.Offset = sc.Val64()
   437  	case ssa.Op386MOVLconst:
   438  		x := v.Reg()
   439  
   440  		// If flags aren't live (indicated by v.Aux == nil),
   441  		// then we can rewrite MOV $0, AX into XOR AX, AX.
   442  		if v.AuxInt == 0 && v.Aux == nil {
   443  			p := s.Prog(x86.AXORL)
   444  			p.From.Type = obj.TYPE_REG
   445  			p.From.Reg = x
   446  			p.To.Type = obj.TYPE_REG
   447  			p.To.Reg = x
   448  			break
   449  		}
   450  
   451  		p := s.Prog(v.Op.Asm())
   452  		p.From.Type = obj.TYPE_CONST
   453  		p.From.Offset = v.AuxInt
   454  		p.To.Type = obj.TYPE_REG
   455  		p.To.Reg = x
   456  	case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst:
   457  		x := v.Reg()
   458  		p := s.Prog(v.Op.Asm())
   459  		p.From.Type = obj.TYPE_FCONST
   460  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   461  		p.To.Type = obj.TYPE_REG
   462  		p.To.Reg = x
   463  	case ssa.Op386MOVSSconst1, ssa.Op386MOVSDconst1:
   464  		p := s.Prog(x86.ALEAL)
   465  		p.From.Type = obj.TYPE_MEM
   466  		p.From.Name = obj.NAME_EXTERN
   467  		f := math.Float64frombits(uint64(v.AuxInt))
   468  		if v.Op == ssa.Op386MOVSDconst1 {
   469  			p.From.Sym = base.Ctxt.Float64Sym(f)
   470  		} else {
   471  			p.From.Sym = base.Ctxt.Float32Sym(float32(f))
   472  		}
   473  		p.To.Type = obj.TYPE_REG
   474  		p.To.Reg = v.Reg()
   475  	case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2:
   476  		p := s.Prog(v.Op.Asm())
   477  		p.From.Type = obj.TYPE_MEM
   478  		p.From.Reg = v.Args[0].Reg()
   479  		p.To.Type = obj.TYPE_REG
   480  		p.To.Reg = v.Reg()
   481  
   482  	case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload, ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload:
   483  		p := s.Prog(v.Op.Asm())
   484  		p.From.Type = obj.TYPE_MEM
   485  		p.From.Reg = v.Args[0].Reg()
   486  		ssagen.AddAux(&p.From, v)
   487  		p.To.Type = obj.TYPE_REG
   488  		p.To.Reg = v.Reg()
   489  	case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1,
   490  		ssa.Op386MOVSDloadidx8, ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4, ssa.Op386MOVWloadidx2:
   491  		r := v.Args[0].Reg()
   492  		i := v.Args[1].Reg()
   493  		p := s.Prog(v.Op.Asm())
   494  		p.From.Type = obj.TYPE_MEM
   495  		switch v.Op {
   496  		case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1:
   497  			if i == x86.REG_SP {
   498  				r, i = i, r
   499  			}
   500  			p.From.Scale = 1
   501  		case ssa.Op386MOVSDloadidx8:
   502  			p.From.Scale = 8
   503  		case ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4:
   504  			p.From.Scale = 4
   505  		case ssa.Op386MOVWloadidx2:
   506  			p.From.Scale = 2
   507  		}
   508  		p.From.Reg = r
   509  		p.From.Index = i
   510  		ssagen.AddAux(&p.From, v)
   511  		p.To.Type = obj.TYPE_REG
   512  		p.To.Reg = v.Reg()
   513  	case ssa.Op386ADDLloadidx4, ssa.Op386SUBLloadidx4, ssa.Op386MULLloadidx4,
   514  		ssa.Op386ANDLloadidx4, ssa.Op386ORLloadidx4, ssa.Op386XORLloadidx4:
   515  		p := s.Prog(v.Op.Asm())
   516  		p.From.Type = obj.TYPE_MEM
   517  		p.From.Reg = v.Args[1].Reg()
   518  		p.From.Index = v.Args[2].Reg()
   519  		p.From.Scale = 4
   520  		ssagen.AddAux(&p.From, v)
   521  		p.To.Type = obj.TYPE_REG
   522  		p.To.Reg = v.Reg()
   523  	case ssa.Op386ADDLload, ssa.Op386SUBLload, ssa.Op386MULLload,
   524  		ssa.Op386ANDLload, ssa.Op386ORLload, ssa.Op386XORLload,
   525  		ssa.Op386ADDSDload, ssa.Op386ADDSSload, ssa.Op386SUBSDload, ssa.Op386SUBSSload,
   526  		ssa.Op386MULSDload, ssa.Op386MULSSload, ssa.Op386DIVSSload, ssa.Op386DIVSDload:
   527  		p := s.Prog(v.Op.Asm())
   528  		p.From.Type = obj.TYPE_MEM
   529  		p.From.Reg = v.Args[1].Reg()
   530  		ssagen.AddAux(&p.From, v)
   531  		p.To.Type = obj.TYPE_REG
   532  		p.To.Reg = v.Reg()
   533  	case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore,
   534  		ssa.Op386ADDLmodify, ssa.Op386SUBLmodify, ssa.Op386ANDLmodify, ssa.Op386ORLmodify, ssa.Op386XORLmodify:
   535  		p := s.Prog(v.Op.Asm())
   536  		p.From.Type = obj.TYPE_REG
   537  		p.From.Reg = v.Args[1].Reg()
   538  		p.To.Type = obj.TYPE_MEM
   539  		p.To.Reg = v.Args[0].Reg()
   540  		ssagen.AddAux(&p.To, v)
   541  	case ssa.Op386ADDLconstmodify:
   542  		sc := v.AuxValAndOff()
   543  		val := sc.Val()
   544  		if val == 1 || val == -1 {
   545  			var p *obj.Prog
   546  			if val == 1 {
   547  				p = s.Prog(x86.AINCL)
   548  			} else {
   549  				p = s.Prog(x86.ADECL)
   550  			}
   551  			off := sc.Off64()
   552  			p.To.Type = obj.TYPE_MEM
   553  			p.To.Reg = v.Args[0].Reg()
   554  			ssagen.AddAux2(&p.To, v, off)
   555  			break
   556  		}
   557  		fallthrough
   558  	case ssa.Op386ANDLconstmodify, ssa.Op386ORLconstmodify, ssa.Op386XORLconstmodify:
   559  		sc := v.AuxValAndOff()
   560  		off := sc.Off64()
   561  		val := sc.Val64()
   562  		p := s.Prog(v.Op.Asm())
   563  		p.From.Type = obj.TYPE_CONST
   564  		p.From.Offset = val
   565  		p.To.Type = obj.TYPE_MEM
   566  		p.To.Reg = v.Args[0].Reg()
   567  		ssagen.AddAux2(&p.To, v, off)
   568  	case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1,
   569  		ssa.Op386MOVSDstoreidx8, ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4, ssa.Op386MOVWstoreidx2,
   570  		ssa.Op386ADDLmodifyidx4, ssa.Op386SUBLmodifyidx4, ssa.Op386ANDLmodifyidx4, ssa.Op386ORLmodifyidx4, ssa.Op386XORLmodifyidx4:
   571  		r := v.Args[0].Reg()
   572  		i := v.Args[1].Reg()
   573  		p := s.Prog(v.Op.Asm())
   574  		p.From.Type = obj.TYPE_REG
   575  		p.From.Reg = v.Args[2].Reg()
   576  		p.To.Type = obj.TYPE_MEM
   577  		switch v.Op {
   578  		case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1:
   579  			if i == x86.REG_SP {
   580  				r, i = i, r
   581  			}
   582  			p.To.Scale = 1
   583  		case ssa.Op386MOVSDstoreidx8:
   584  			p.To.Scale = 8
   585  		case ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4,
   586  			ssa.Op386ADDLmodifyidx4, ssa.Op386SUBLmodifyidx4, ssa.Op386ANDLmodifyidx4, ssa.Op386ORLmodifyidx4, ssa.Op386XORLmodifyidx4:
   587  			p.To.Scale = 4
   588  		case ssa.Op386MOVWstoreidx2:
   589  			p.To.Scale = 2
   590  		}
   591  		p.To.Reg = r
   592  		p.To.Index = i
   593  		ssagen.AddAux(&p.To, v)
   594  	case ssa.Op386MOVLstoreconst, ssa.Op386MOVWstoreconst, ssa.Op386MOVBstoreconst:
   595  		p := s.Prog(v.Op.Asm())
   596  		p.From.Type = obj.TYPE_CONST
   597  		sc := v.AuxValAndOff()
   598  		p.From.Offset = sc.Val64()
   599  		p.To.Type = obj.TYPE_MEM
   600  		p.To.Reg = v.Args[0].Reg()
   601  		ssagen.AddAux2(&p.To, v, sc.Off64())
   602  	case ssa.Op386ADDLconstmodifyidx4:
   603  		sc := v.AuxValAndOff()
   604  		val := sc.Val()
   605  		if val == 1 || val == -1 {
   606  			var p *obj.Prog
   607  			if val == 1 {
   608  				p = s.Prog(x86.AINCL)
   609  			} else {
   610  				p = s.Prog(x86.ADECL)
   611  			}
   612  			off := sc.Off64()
   613  			p.To.Type = obj.TYPE_MEM
   614  			p.To.Reg = v.Args[0].Reg()
   615  			p.To.Scale = 4
   616  			p.To.Index = v.Args[1].Reg()
   617  			ssagen.AddAux2(&p.To, v, off)
   618  			break
   619  		}
   620  		fallthrough
   621  	case ssa.Op386MOVLstoreconstidx1, ssa.Op386MOVLstoreconstidx4, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVWstoreconstidx2, ssa.Op386MOVBstoreconstidx1,
   622  		ssa.Op386ANDLconstmodifyidx4, ssa.Op386ORLconstmodifyidx4, ssa.Op386XORLconstmodifyidx4:
   623  		p := s.Prog(v.Op.Asm())
   624  		p.From.Type = obj.TYPE_CONST
   625  		sc := v.AuxValAndOff()
   626  		p.From.Offset = sc.Val64()
   627  		r := v.Args[0].Reg()
   628  		i := v.Args[1].Reg()
   629  		switch v.Op {
   630  		case ssa.Op386MOVBstoreconstidx1, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVLstoreconstidx1:
   631  			p.To.Scale = 1
   632  			if i == x86.REG_SP {
   633  				r, i = i, r
   634  			}
   635  		case ssa.Op386MOVWstoreconstidx2:
   636  			p.To.Scale = 2
   637  		case ssa.Op386MOVLstoreconstidx4,
   638  			ssa.Op386ADDLconstmodifyidx4, ssa.Op386ANDLconstmodifyidx4, ssa.Op386ORLconstmodifyidx4, ssa.Op386XORLconstmodifyidx4:
   639  			p.To.Scale = 4
   640  		}
   641  		p.To.Type = obj.TYPE_MEM
   642  		p.To.Reg = r
   643  		p.To.Index = i
   644  		ssagen.AddAux2(&p.To, v, sc.Off64())
   645  	case ssa.Op386MOVWLSX, ssa.Op386MOVBLSX, ssa.Op386MOVWLZX, ssa.Op386MOVBLZX,
   646  		ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD,
   647  		ssa.Op386CVTTSS2SL, ssa.Op386CVTTSD2SL,
   648  		ssa.Op386CVTSS2SD, ssa.Op386CVTSD2SS:
   649  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   650  	case ssa.Op386DUFFZERO:
   651  		p := s.Prog(obj.ADUFFZERO)
   652  		p.To.Type = obj.TYPE_ADDR
   653  		p.To.Sym = ir.Syms.Duffzero
   654  		p.To.Offset = v.AuxInt
   655  	case ssa.Op386DUFFCOPY:
   656  		p := s.Prog(obj.ADUFFCOPY)
   657  		p.To.Type = obj.TYPE_ADDR
   658  		p.To.Sym = ir.Syms.Duffcopy
   659  		p.To.Offset = v.AuxInt
   660  
   661  	case ssa.OpCopy: // TODO: use MOVLreg for reg->reg copies instead of OpCopy?
   662  		if v.Type.IsMemory() {
   663  			return
   664  		}
   665  		x := v.Args[0].Reg()
   666  		y := v.Reg()
   667  		if x != y {
   668  			opregreg(s, moveByType(v.Type), y, x)
   669  		}
   670  	case ssa.OpLoadReg:
   671  		if v.Type.IsFlags() {
   672  			v.Fatalf("load flags not implemented: %v", v.LongString())
   673  			return
   674  		}
   675  		p := s.Prog(loadByType(v.Type))
   676  		ssagen.AddrAuto(&p.From, v.Args[0])
   677  		p.To.Type = obj.TYPE_REG
   678  		p.To.Reg = v.Reg()
   679  
   680  	case ssa.OpStoreReg:
   681  		if v.Type.IsFlags() {
   682  			v.Fatalf("store flags not implemented: %v", v.LongString())
   683  			return
   684  		}
   685  		p := s.Prog(storeByType(v.Type))
   686  		p.From.Type = obj.TYPE_REG
   687  		p.From.Reg = v.Args[0].Reg()
   688  		ssagen.AddrAuto(&p.To, v)
   689  	case ssa.Op386LoweredGetClosurePtr:
   690  		// Closure pointer is DX.
   691  		ssagen.CheckLoweredGetClosurePtr(v)
   692  	case ssa.Op386LoweredGetG:
   693  		r := v.Reg()
   694  		// See the comments in cmd/internal/obj/x86/obj6.go
   695  		// near CanUse1InsnTLS for a detailed explanation of these instructions.
   696  		if x86.CanUse1InsnTLS(base.Ctxt) {
   697  			// MOVL (TLS), r
   698  			p := s.Prog(x86.AMOVL)
   699  			p.From.Type = obj.TYPE_MEM
   700  			p.From.Reg = x86.REG_TLS
   701  			p.To.Type = obj.TYPE_REG
   702  			p.To.Reg = r
   703  		} else {
   704  			// MOVL TLS, r
   705  			// MOVL (r)(TLS*1), r
   706  			p := s.Prog(x86.AMOVL)
   707  			p.From.Type = obj.TYPE_REG
   708  			p.From.Reg = x86.REG_TLS
   709  			p.To.Type = obj.TYPE_REG
   710  			p.To.Reg = r
   711  			q := s.Prog(x86.AMOVL)
   712  			q.From.Type = obj.TYPE_MEM
   713  			q.From.Reg = r
   714  			q.From.Index = x86.REG_TLS
   715  			q.From.Scale = 1
   716  			q.To.Type = obj.TYPE_REG
   717  			q.To.Reg = r
   718  		}
   719  
   720  	case ssa.Op386LoweredGetCallerPC:
   721  		p := s.Prog(x86.AMOVL)
   722  		p.From.Type = obj.TYPE_MEM
   723  		p.From.Offset = -4 // PC is stored 4 bytes below first parameter.
   724  		p.From.Name = obj.NAME_PARAM
   725  		p.To.Type = obj.TYPE_REG
   726  		p.To.Reg = v.Reg()
   727  
   728  	case ssa.Op386LoweredGetCallerSP:
   729  		// caller's SP is the address of the first arg
   730  		p := s.Prog(x86.AMOVL)
   731  		p.From.Type = obj.TYPE_ADDR
   732  		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize // 0 on 386, just to be consistent with other architectures
   733  		p.From.Name = obj.NAME_PARAM
   734  		p.To.Type = obj.TYPE_REG
   735  		p.To.Reg = v.Reg()
   736  
   737  	case ssa.Op386LoweredWB:
   738  		p := s.Prog(obj.ACALL)
   739  		p.To.Type = obj.TYPE_MEM
   740  		p.To.Name = obj.NAME_EXTERN
   741  		// AuxInt encodes how many buffer entries we need.
   742  		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
   743  
   744  	case ssa.Op386LoweredPanicBoundsRR, ssa.Op386LoweredPanicBoundsRC, ssa.Op386LoweredPanicBoundsCR, ssa.Op386LoweredPanicBoundsCC,
   745  		ssa.Op386LoweredPanicExtendRR, ssa.Op386LoweredPanicExtendRC:
   746  		// Compute the constant we put in the PCData entry for this call.
   747  		code, signed := ssa.BoundsKind(v.AuxInt).Code()
   748  		xIsReg := false
   749  		yIsReg := false
   750  		xVal := 0
   751  		yVal := 0
   752  		extend := false
   753  		switch v.Op {
   754  		case ssa.Op386LoweredPanicBoundsRR:
   755  			xIsReg = true
   756  			xVal = int(v.Args[0].Reg() - x86.REG_AX)
   757  			yIsReg = true
   758  			yVal = int(v.Args[1].Reg() - x86.REG_AX)
   759  		case ssa.Op386LoweredPanicExtendRR:
   760  			extend = true
   761  			xIsReg = true
   762  			hi := int(v.Args[0].Reg() - x86.REG_AX)
   763  			lo := int(v.Args[1].Reg() - x86.REG_AX)
   764  			xVal = hi<<2 + lo // encode 2 register numbers
   765  			yIsReg = true
   766  			yVal = int(v.Args[2].Reg() - x86.REG_AX)
   767  		case ssa.Op386LoweredPanicBoundsRC:
   768  			xIsReg = true
   769  			xVal = int(v.Args[0].Reg() - x86.REG_AX)
   770  			c := v.Aux.(ssa.PanicBoundsC).C
   771  			if c >= 0 && c <= abi.BoundsMaxConst {
   772  				yVal = int(c)
   773  			} else {
   774  				// Move constant to a register
   775  				yIsReg = true
   776  				if yVal == xVal {
   777  					yVal = 1
   778  				}
   779  				p := s.Prog(x86.AMOVL)
   780  				p.From.Type = obj.TYPE_CONST
   781  				p.From.Offset = c
   782  				p.To.Type = obj.TYPE_REG
   783  				p.To.Reg = x86.REG_AX + int16(yVal)
   784  			}
   785  		case ssa.Op386LoweredPanicExtendRC:
   786  			extend = true
   787  			xIsReg = true
   788  			hi := int(v.Args[0].Reg() - x86.REG_AX)
   789  			lo := int(v.Args[1].Reg() - x86.REG_AX)
   790  			xVal = hi<<2 + lo // encode 2 register numbers
   791  			c := v.Aux.(ssa.PanicBoundsC).C
   792  			if c >= 0 && c <= abi.BoundsMaxConst {
   793  				yVal = int(c)
   794  			} else {
   795  				// Move constant to a register
   796  				for yVal == hi || yVal == lo {
   797  					yVal++
   798  				}
   799  				p := s.Prog(x86.AMOVL)
   800  				p.From.Type = obj.TYPE_CONST
   801  				p.From.Offset = c
   802  				p.To.Type = obj.TYPE_REG
   803  				p.To.Reg = x86.REG_AX + int16(yVal)
   804  			}
   805  		case ssa.Op386LoweredPanicBoundsCR:
   806  			yIsReg = true
   807  			yVal := int(v.Args[0].Reg() - x86.REG_AX)
   808  			c := v.Aux.(ssa.PanicBoundsC).C
   809  			if c >= 0 && c <= abi.BoundsMaxConst {
   810  				xVal = int(c)
   811  			} else if signed && int64(int32(c)) == c || !signed && int64(uint32(c)) == c {
   812  				// Move constant to a register
   813  				xIsReg = true
   814  				if xVal == yVal {
   815  					xVal = 1
   816  				}
   817  				p := s.Prog(x86.AMOVL)
   818  				p.From.Type = obj.TYPE_CONST
   819  				p.From.Offset = c
   820  				p.To.Type = obj.TYPE_REG
   821  				p.To.Reg = x86.REG_AX + int16(xVal)
   822  			} else {
   823  				// Move constant to two registers
   824  				extend = true
   825  				xIsReg = true
   826  				hi := 0
   827  				lo := 1
   828  				if hi == yVal {
   829  					hi = 2
   830  				}
   831  				if lo == yVal {
   832  					lo = 2
   833  				}
   834  				xVal = hi<<2 + lo
   835  				p := s.Prog(x86.AMOVL)
   836  				p.From.Type = obj.TYPE_CONST
   837  				p.From.Offset = c >> 32
   838  				p.To.Type = obj.TYPE_REG
   839  				p.To.Reg = x86.REG_AX + int16(hi)
   840  				p = s.Prog(x86.AMOVL)
   841  				p.From.Type = obj.TYPE_CONST
   842  				p.From.Offset = int64(int32(c))
   843  				p.To.Type = obj.TYPE_REG
   844  				p.To.Reg = x86.REG_AX + int16(lo)
   845  			}
   846  		case ssa.Op386LoweredPanicBoundsCC:
   847  			c := v.Aux.(ssa.PanicBoundsCC).Cx
   848  			if c >= 0 && c <= abi.BoundsMaxConst {
   849  				xVal = int(c)
   850  			} else if signed && int64(int32(c)) == c || !signed && int64(uint32(c)) == c {
   851  				// Move constant to a register
   852  				xIsReg = true
   853  				p := s.Prog(x86.AMOVL)
   854  				p.From.Type = obj.TYPE_CONST
   855  				p.From.Offset = c
   856  				p.To.Type = obj.TYPE_REG
   857  				p.To.Reg = x86.REG_AX + int16(xVal)
   858  			} else {
   859  				// Move constant to two registers
   860  				extend = true
   861  				xIsReg = true
   862  				hi := 0
   863  				lo := 1
   864  				xVal = hi<<2 + lo
   865  				p := s.Prog(x86.AMOVL)
   866  				p.From.Type = obj.TYPE_CONST
   867  				p.From.Offset = c >> 32
   868  				p.To.Type = obj.TYPE_REG
   869  				p.To.Reg = x86.REG_AX + int16(hi)
   870  				p = s.Prog(x86.AMOVL)
   871  				p.From.Type = obj.TYPE_CONST
   872  				p.From.Offset = int64(int32(c))
   873  				p.To.Type = obj.TYPE_REG
   874  				p.To.Reg = x86.REG_AX + int16(lo)
   875  			}
   876  			c = v.Aux.(ssa.PanicBoundsCC).Cy
   877  			if c >= 0 && c <= abi.BoundsMaxConst {
   878  				yVal = int(c)
   879  			} else {
   880  				// Move constant to a register
   881  				yIsReg = true
   882  				yVal = 2
   883  				p := s.Prog(x86.AMOVL)
   884  				p.From.Type = obj.TYPE_CONST
   885  				p.From.Offset = c
   886  				p.To.Type = obj.TYPE_REG
   887  				p.To.Reg = x86.REG_AX + int16(yVal)
   888  			}
   889  		}
   890  		c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
   891  
   892  		p := s.Prog(obj.APCDATA)
   893  		p.From.SetConst(abi.PCDATA_PanicBounds)
   894  		p.To.SetConst(int64(c))
   895  		p = s.Prog(obj.ACALL)
   896  		p.To.Type = obj.TYPE_MEM
   897  		p.To.Name = obj.NAME_EXTERN
   898  		if extend {
   899  			p.To.Sym = ir.Syms.PanicExtend
   900  		} else {
   901  			p.To.Sym = ir.Syms.PanicBounds
   902  		}
   903  
   904  	case ssa.Op386CALLstatic, ssa.Op386CALLclosure, ssa.Op386CALLinter:
   905  		s.Call(v)
   906  	case ssa.Op386CALLtail:
   907  		s.TailCall(v)
   908  	case ssa.Op386NEGL,
   909  		ssa.Op386BSWAPL,
   910  		ssa.Op386NOTL:
   911  		p := s.Prog(v.Op.Asm())
   912  		p.To.Type = obj.TYPE_REG
   913  		p.To.Reg = v.Reg()
   914  	case ssa.Op386BSFL, ssa.Op386BSFW,
   915  		ssa.Op386BSRL, ssa.Op386BSRW,
   916  		ssa.Op386SQRTSS, ssa.Op386SQRTSD:
   917  		p := s.Prog(v.Op.Asm())
   918  		p.From.Type = obj.TYPE_REG
   919  		p.From.Reg = v.Args[0].Reg()
   920  		p.To.Type = obj.TYPE_REG
   921  		p.To.Reg = v.Reg()
   922  	case ssa.Op386SETEQ, ssa.Op386SETNE,
   923  		ssa.Op386SETL, ssa.Op386SETLE,
   924  		ssa.Op386SETG, ssa.Op386SETGE,
   925  		ssa.Op386SETGF, ssa.Op386SETGEF,
   926  		ssa.Op386SETB, ssa.Op386SETBE,
   927  		ssa.Op386SETORD, ssa.Op386SETNAN,
   928  		ssa.Op386SETA, ssa.Op386SETAE,
   929  		ssa.Op386SETO:
   930  		p := s.Prog(v.Op.Asm())
   931  		p.To.Type = obj.TYPE_REG
   932  		p.To.Reg = v.Reg()
   933  
   934  	case ssa.Op386SETNEF:
   935  		p := s.Prog(v.Op.Asm())
   936  		p.To.Type = obj.TYPE_REG
   937  		p.To.Reg = v.Reg()
   938  		q := s.Prog(x86.ASETPS)
   939  		q.To.Type = obj.TYPE_REG
   940  		q.To.Reg = x86.REG_AX
   941  		opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
   942  
   943  	case ssa.Op386SETEQF:
   944  		p := s.Prog(v.Op.Asm())
   945  		p.To.Type = obj.TYPE_REG
   946  		p.To.Reg = v.Reg()
   947  		q := s.Prog(x86.ASETPC)
   948  		q.To.Type = obj.TYPE_REG
   949  		q.To.Reg = x86.REG_AX
   950  		opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
   951  
   952  	case ssa.Op386InvertFlags:
   953  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
   954  	case ssa.Op386FlagEQ, ssa.Op386FlagLT_ULT, ssa.Op386FlagLT_UGT, ssa.Op386FlagGT_ULT, ssa.Op386FlagGT_UGT:
   955  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
   956  	case ssa.Op386REPSTOSL:
   957  		s.Prog(x86.AREP)
   958  		s.Prog(x86.ASTOSL)
   959  	case ssa.Op386REPMOVSL:
   960  		s.Prog(x86.AREP)
   961  		s.Prog(x86.AMOVSL)
   962  	case ssa.Op386LoweredNilCheck:
   963  		// Issue a load which will fault if the input is nil.
   964  		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
   965  		// Should we use the 3-byte TESTB $0, (reg) instead? It is larger
   966  		// but it doesn't have false dependency on AX.
   967  		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
   968  		// That trades clobbering flags for clobbering a register.
   969  		p := s.Prog(x86.ATESTB)
   970  		p.From.Type = obj.TYPE_REG
   971  		p.From.Reg = x86.REG_AX
   972  		p.To.Type = obj.TYPE_MEM
   973  		p.To.Reg = v.Args[0].Reg()
   974  		ssagen.AddAux(&p.To, v)
   975  		if logopt.Enabled() {
   976  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
   977  		}
   978  		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
   979  			base.WarnfAt(v.Pos, "generated nil check")
   980  		}
   981  	case ssa.Op386LoweredCtz32:
   982  		// BSFL in, out
   983  		p := s.Prog(x86.ABSFL)
   984  		p.From.Type = obj.TYPE_REG
   985  		p.From.Reg = v.Args[0].Reg()
   986  		p.To.Type = obj.TYPE_REG
   987  		p.To.Reg = v.Reg()
   988  
   989  		// JNZ 2(PC)
   990  		p1 := s.Prog(x86.AJNE)
   991  		p1.To.Type = obj.TYPE_BRANCH
   992  
   993  		// MOVL $32, out
   994  		p2 := s.Prog(x86.AMOVL)
   995  		p2.From.Type = obj.TYPE_CONST
   996  		p2.From.Offset = 32
   997  		p2.To.Type = obj.TYPE_REG
   998  		p2.To.Reg = v.Reg()
   999  
  1000  		// NOP (so the JNZ has somewhere to land)
  1001  		nop := s.Prog(obj.ANOP)
  1002  		p1.To.SetTarget(nop)
  1003  	case ssa.Op386LoweredCtz64:
  1004  		if v.Args[0].Reg() == v.Reg() {
  1005  			v.Fatalf("input[0] and output in the same register %s", v.LongString())
  1006  		}
  1007  		if v.Args[1].Reg() == v.Reg() {
  1008  			v.Fatalf("input[1] and output in the same register %s", v.LongString())
  1009  		}
  1010  
  1011  		// BSFL arg0, out
  1012  		p := s.Prog(x86.ABSFL)
  1013  		p.From.Type = obj.TYPE_REG
  1014  		p.From.Reg = v.Args[0].Reg()
  1015  		p.To.Type = obj.TYPE_REG
  1016  		p.To.Reg = v.Reg()
  1017  
  1018  		// JNZ 5(PC)
  1019  		p1 := s.Prog(x86.AJNE)
  1020  		p1.To.Type = obj.TYPE_BRANCH
  1021  
  1022  		// BSFL arg1, out
  1023  		p2 := s.Prog(x86.ABSFL)
  1024  		p2.From.Type = obj.TYPE_REG
  1025  		p2.From.Reg = v.Args[1].Reg()
  1026  		p2.To.Type = obj.TYPE_REG
  1027  		p2.To.Reg = v.Reg()
  1028  
  1029  		// JNZ 2(PC)
  1030  		p3 := s.Prog(x86.AJNE)
  1031  		p3.To.Type = obj.TYPE_BRANCH
  1032  
  1033  		// MOVL $32, out
  1034  		p4 := s.Prog(x86.AMOVL)
  1035  		p4.From.Type = obj.TYPE_CONST
  1036  		p4.From.Offset = 32
  1037  		p4.To.Type = obj.TYPE_REG
  1038  		p4.To.Reg = v.Reg()
  1039  
  1040  		// ADDL $32, out
  1041  		p5 := s.Prog(x86.AADDL)
  1042  		p5.From.Type = obj.TYPE_CONST
  1043  		p5.From.Offset = 32
  1044  		p5.To.Type = obj.TYPE_REG
  1045  		p5.To.Reg = v.Reg()
  1046  		p3.To.SetTarget(p5)
  1047  
  1048  		// NOP (so the JNZ has somewhere to land)
  1049  		nop := s.Prog(obj.ANOP)
  1050  		p1.To.SetTarget(nop)
  1051  
  1052  	case ssa.OpClobber:
  1053  		p := s.Prog(x86.AMOVL)
  1054  		p.From.Type = obj.TYPE_CONST
  1055  		p.From.Offset = 0xdeaddead
  1056  		p.To.Type = obj.TYPE_MEM
  1057  		p.To.Reg = x86.REG_SP
  1058  		ssagen.AddAux(&p.To, v)
  1059  	case ssa.OpClobberReg:
  1060  		// TODO: implement for clobberdead experiment. Nop is ok for now.
  1061  	default:
  1062  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1063  	}
  1064  }
  1065  
  1066  var blockJump = [...]struct {
  1067  	asm, invasm obj.As
  1068  }{
  1069  	ssa.Block386EQ:  {x86.AJEQ, x86.AJNE},
  1070  	ssa.Block386NE:  {x86.AJNE, x86.AJEQ},
  1071  	ssa.Block386LT:  {x86.AJLT, x86.AJGE},
  1072  	ssa.Block386GE:  {x86.AJGE, x86.AJLT},
  1073  	ssa.Block386LE:  {x86.AJLE, x86.AJGT},
  1074  	ssa.Block386GT:  {x86.AJGT, x86.AJLE},
  1075  	ssa.Block386OS:  {x86.AJOS, x86.AJOC},
  1076  	ssa.Block386OC:  {x86.AJOC, x86.AJOS},
  1077  	ssa.Block386ULT: {x86.AJCS, x86.AJCC},
  1078  	ssa.Block386UGE: {x86.AJCC, x86.AJCS},
  1079  	ssa.Block386UGT: {x86.AJHI, x86.AJLS},
  1080  	ssa.Block386ULE: {x86.AJLS, x86.AJHI},
  1081  	ssa.Block386ORD: {x86.AJPC, x86.AJPS},
  1082  	ssa.Block386NAN: {x86.AJPS, x86.AJPC},
  1083  }
  1084  
  1085  var eqfJumps = [2][2]ssagen.IndexJump{
  1086  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
  1087  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
  1088  }
  1089  var nefJumps = [2][2]ssagen.IndexJump{
  1090  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
  1091  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
  1092  }
  1093  
  1094  func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
  1095  	switch b.Kind {
  1096  	case ssa.BlockPlain, ssa.BlockDefer:
  1097  		if b.Succs[0].Block() != next {
  1098  			p := s.Prog(obj.AJMP)
  1099  			p.To.Type = obj.TYPE_BRANCH
  1100  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  1101  		}
  1102  	case ssa.BlockExit, ssa.BlockRetJmp:
  1103  	case ssa.BlockRet:
  1104  		s.Prog(obj.ARET)
  1105  
  1106  	case ssa.Block386EQF:
  1107  		s.CombJump(b, next, &eqfJumps)
  1108  
  1109  	case ssa.Block386NEF:
  1110  		s.CombJump(b, next, &nefJumps)
  1111  
  1112  	case ssa.Block386EQ, ssa.Block386NE,
  1113  		ssa.Block386LT, ssa.Block386GE,
  1114  		ssa.Block386LE, ssa.Block386GT,
  1115  		ssa.Block386OS, ssa.Block386OC,
  1116  		ssa.Block386ULT, ssa.Block386UGT,
  1117  		ssa.Block386ULE, ssa.Block386UGE:
  1118  		jmp := blockJump[b.Kind]
  1119  		switch next {
  1120  		case b.Succs[0].Block():
  1121  			s.Br(jmp.invasm, b.Succs[1].Block())
  1122  		case b.Succs[1].Block():
  1123  			s.Br(jmp.asm, b.Succs[0].Block())
  1124  		default:
  1125  			if b.Likely != ssa.BranchUnlikely {
  1126  				s.Br(jmp.asm, b.Succs[0].Block())
  1127  				s.Br(obj.AJMP, b.Succs[1].Block())
  1128  			} else {
  1129  				s.Br(jmp.invasm, b.Succs[1].Block())
  1130  				s.Br(obj.AJMP, b.Succs[0].Block())
  1131  			}
  1132  		}
  1133  	default:
  1134  		b.Fatalf("branch not implemented: %s", b.LongString())
  1135  	}
  1136  }
  1137  

View as plain text