rewrite.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/logopt"
    10  	"cmd/compile/internal/reflectdata"
    11  	"cmd/compile/internal/types"
    12  	"cmd/internal/obj"
    13  	"cmd/internal/obj/s390x"
    14  	"cmd/internal/objabi"
    15  	"cmd/internal/src"
    16  	"encoding/binary"
    17  	"fmt"
    18  	"internal/buildcfg"
    19  	"io"
    20  	"math"
    21  	"math/bits"
    22  	"os"
    23  	"path/filepath"
    24  	"strings"
    25  )
    26  
    27  type deadValueChoice bool
    28  
    29  const (
    30  	leaveDeadValues  deadValueChoice = false
    31  	removeDeadValues                 = true
    32  )
    33  
    34  // deadcode indicates whether rewrite should try to remove any values that become dead.
    35  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
    36  	// repeat rewrites until we find no more rewrites
    37  	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
    38  	pendingLines.clear()
    39  	debug := f.pass.debug
    40  	if debug > 1 {
    41  		fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
    42  	}
    43  	// if the number of rewrite iterations reaches itersLimit we will
    44  	// at that point turn on cycle detection. Instead of a fixed limit,
    45  	// size the limit according to func size to allow for cases such
    46  	// as the one in issue #66773.
    47  	itersLimit := f.NumBlocks()
    48  	if itersLimit < 20 {
    49  		itersLimit = 20
    50  	}
    51  	var iters int
    52  	var states map[string]bool
    53  	for {
    54  		change := false
    55  		deadChange := false
    56  		for _, b := range f.Blocks {
    57  			var b0 *Block
    58  			if debug > 1 {
    59  				b0 = new(Block)
    60  				*b0 = *b
    61  				b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
    62  			}
    63  			for i, c := range b.ControlValues() {
    64  				for c.Op == OpCopy {
    65  					c = c.Args[0]
    66  					b.ReplaceControl(i, c)
    67  				}
    68  			}
    69  			if rb(b) {
    70  				change = true
    71  				if debug > 1 {
    72  					fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
    73  				}
    74  			}
    75  			for j, v := range b.Values {
    76  				var v0 *Value
    77  				if debug > 1 {
    78  					v0 = new(Value)
    79  					*v0 = *v
    80  					v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
    81  				}
    82  				if v.Uses == 0 && v.removeable() {
    83  					if v.Op != OpInvalid && deadcode == removeDeadValues {
    84  						// Reset any values that are now unused, so that we decrement
    85  						// the use count of all of its arguments.
    86  						// Not quite a deadcode pass, because it does not handle cycles.
    87  						// But it should help Uses==1 rules to fire.
    88  						v.reset(OpInvalid)
    89  						deadChange = true
    90  					}
    91  					// No point rewriting values which aren't used.
    92  					continue
    93  				}
    94  
    95  				vchange := phielimValue(v)
    96  				if vchange && debug > 1 {
    97  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
    98  				}
    99  
   100  				// Eliminate copy inputs.
   101  				// If any copy input becomes unused, mark it
   102  				// as invalid and discard its argument. Repeat
   103  				// recursively on the discarded argument.
   104  				// This phase helps remove phantom "dead copy" uses
   105  				// of a value so that a x.Uses==1 rule condition
   106  				// fires reliably.
   107  				for i, a := range v.Args {
   108  					if a.Op != OpCopy {
   109  						continue
   110  					}
   111  					aa := copySource(a)
   112  					v.SetArg(i, aa)
   113  					// If a, a copy, has a line boundary indicator, attempt to find a new value
   114  					// to hold it.  The first candidate is the value that will replace a (aa),
   115  					// if it shares the same block and line and is eligible.
   116  					// The second option is v, which has a as an input.  Because aa is earlier in
   117  					// the data flow, it is the better choice.
   118  					if a.Pos.IsStmt() == src.PosIsStmt {
   119  						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
   120  							aa.Pos = aa.Pos.WithIsStmt()
   121  						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
   122  							v.Pos = v.Pos.WithIsStmt()
   123  						} else {
   124  							// Record the lost line and look for a new home after all rewrites are complete.
   125  							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
   126  							// line to appear in more than one block, but only one block is stored, so if both end
   127  							// up here, then one will be lost.
   128  							pendingLines.set(a.Pos, int32(a.Block.ID))
   129  						}
   130  						a.Pos = a.Pos.WithNotStmt()
   131  					}
   132  					vchange = true
   133  					for a.Uses == 0 {
   134  						b := a.Args[0]
   135  						a.reset(OpInvalid)
   136  						a = b
   137  					}
   138  				}
   139  				if vchange && debug > 1 {
   140  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   141  				}
   142  
   143  				// apply rewrite function
   144  				if rv(v) {
   145  					vchange = true
   146  					// If value changed to a poor choice for a statement boundary, move the boundary
   147  					if v.Pos.IsStmt() == src.PosIsStmt {
   148  						if k := nextGoodStatementIndex(v, j, b); k != j {
   149  							v.Pos = v.Pos.WithNotStmt()
   150  							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
   151  						}
   152  					}
   153  				}
   154  
   155  				change = change || vchange
   156  				if vchange && debug > 1 {
   157  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   158  				}
   159  			}
   160  		}
   161  		if !change && !deadChange {
   162  			break
   163  		}
   164  		iters++
   165  		if (iters > itersLimit || debug >= 2) && change {
   166  			// We've done a suspiciously large number of rewrites (or we're in debug mode).
   167  			// As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
   168  			// and the maximum value encountered during make.bash is 12.
   169  			// Start checking for cycles. (This is too expensive to do routinely.)
   170  			// Note: we avoid this path for deadChange-only iterations, to fix #51639.
   171  			if states == nil {
   172  				states = make(map[string]bool)
   173  			}
   174  			h := f.rewriteHash()
   175  			if _, ok := states[h]; ok {
   176  				// We've found a cycle.
   177  				// To diagnose it, set debug to 2 and start again,
   178  				// so that we'll print all rules applied until we complete another cycle.
   179  				// If debug is already >= 2, we've already done that, so it's time to crash.
   180  				if debug < 2 {
   181  					debug = 2
   182  					states = make(map[string]bool)
   183  				} else {
   184  					f.Fatalf("rewrite cycle detected")
   185  				}
   186  			}
   187  			states[h] = true
   188  		}
   189  	}
   190  	// remove clobbered values
   191  	for _, b := range f.Blocks {
   192  		j := 0
   193  		for i, v := range b.Values {
   194  			vl := v.Pos
   195  			if v.Op == OpInvalid {
   196  				if v.Pos.IsStmt() == src.PosIsStmt {
   197  					pendingLines.set(vl, int32(b.ID))
   198  				}
   199  				f.freeValue(v)
   200  				continue
   201  			}
   202  			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) {
   203  				if pl, ok := pendingLines.get(vl); ok && pl == int32(b.ID) {
   204  					pendingLines.remove(vl)
   205  					v.Pos = v.Pos.WithIsStmt()
   206  				}
   207  			}
   208  			if i != j {
   209  				b.Values[j] = v
   210  			}
   211  			j++
   212  		}
   213  		if pl, ok := pendingLines.get(b.Pos); ok && pl == int32(b.ID) {
   214  			b.Pos = b.Pos.WithIsStmt()
   215  			pendingLines.remove(b.Pos)
   216  		}
   217  		b.truncateValues(j)
   218  	}
   219  }
   220  
   221  // Common functions called from rewriting rules
   222  
   223  func is64BitFloat(t *types.Type) bool {
   224  	return t.Size() == 8 && t.IsFloat()
   225  }
   226  
   227  func is32BitFloat(t *types.Type) bool {
   228  	return t.Size() == 4 && t.IsFloat()
   229  }
   230  
   231  func is64BitInt(t *types.Type) bool {
   232  	return t.Size() == 8 && t.IsInteger()
   233  }
   234  
   235  func is32BitInt(t *types.Type) bool {
   236  	return t.Size() == 4 && t.IsInteger()
   237  }
   238  
   239  func is16BitInt(t *types.Type) bool {
   240  	return t.Size() == 2 && t.IsInteger()
   241  }
   242  
   243  func is8BitInt(t *types.Type) bool {
   244  	return t.Size() == 1 && t.IsInteger()
   245  }
   246  
   247  func isPtr(t *types.Type) bool {
   248  	return t.IsPtrShaped()
   249  }
   250  
   251  func copyCompatibleType(t1, t2 *types.Type) bool {
   252  	if t1.Size() != t2.Size() {
   253  		return false
   254  	}
   255  	if t1.IsInteger() {
   256  		return t2.IsInteger()
   257  	}
   258  	if isPtr(t1) {
   259  		return isPtr(t2)
   260  	}
   261  	return t1.Compare(t2) == types.CMPeq
   262  }
   263  
   264  // mergeSym merges two symbolic offsets. There is no real merging of
   265  // offsets, we just pick the non-nil one.
   266  func mergeSym(x, y Sym) Sym {
   267  	if x == nil {
   268  		return y
   269  	}
   270  	if y == nil {
   271  		return x
   272  	}
   273  	panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
   274  }
   275  
   276  func canMergeSym(x, y Sym) bool {
   277  	return x == nil || y == nil
   278  }
   279  
   280  // canMergeLoadClobber reports whether the load can be merged into target without
   281  // invalidating the schedule.
   282  // It also checks that the other non-load argument x is something we
   283  // are ok with clobbering.
   284  func canMergeLoadClobber(target, load, x *Value) bool {
   285  	// The register containing x is going to get clobbered.
   286  	// Don't merge if we still need the value of x.
   287  	// We don't have liveness information here, but we can
   288  	// approximate x dying with:
   289  	//  1) target is x's only use.
   290  	//  2) target is not in a deeper loop than x.
   291  	switch {
   292  	case x.Uses == 2 && x.Op == OpPhi && len(x.Args) == 2 && (x.Args[0] == target || x.Args[1] == target) && target.Uses == 1:
   293  		// This is a simple detector to determine that x is probably
   294  		// not live after target. (It does not need to be perfect,
   295  		// regalloc will issue a reg-reg move to save it if we are wrong.)
   296  		// We have:
   297  		//   x = Phi(?, target)
   298  		//   target = Op(load, x)
   299  		// Because target has only one use as a Phi argument, we can schedule it
   300  		// very late. Hopefully, later than the other use of x. (The other use died
   301  		// between x and target, or exists on another branch entirely).
   302  	case x.Uses > 1:
   303  		return false
   304  	}
   305  	loopnest := x.Block.Func.loopnest()
   306  	loopnest.calculateDepths()
   307  	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   308  		return false
   309  	}
   310  	return canMergeLoad(target, load)
   311  }
   312  
   313  // canMergeLoad reports whether the load can be merged into target without
   314  // invalidating the schedule.
   315  func canMergeLoad(target, load *Value) bool {
   316  	if target.Block.ID != load.Block.ID {
   317  		// If the load is in a different block do not merge it.
   318  		return false
   319  	}
   320  
   321  	// We can't merge the load into the target if the load
   322  	// has more than one use.
   323  	if load.Uses != 1 {
   324  		return false
   325  	}
   326  
   327  	mem := load.MemoryArg()
   328  
   329  	// We need the load's memory arg to still be alive at target. That
   330  	// can't be the case if one of target's args depends on a memory
   331  	// state that is a successor of load's memory arg.
   332  	//
   333  	// For example, it would be invalid to merge load into target in
   334  	// the following situation because newmem has killed oldmem
   335  	// before target is reached:
   336  	//     load = read ... oldmem
   337  	//   newmem = write ... oldmem
   338  	//     arg0 = read ... newmem
   339  	//   target = add arg0 load
   340  	//
   341  	// If the argument comes from a different block then we can exclude
   342  	// it immediately because it must dominate load (which is in the
   343  	// same block as target).
   344  	var args []*Value
   345  	for _, a := range target.Args {
   346  		if a != load && a.Block.ID == target.Block.ID {
   347  			args = append(args, a)
   348  		}
   349  	}
   350  
   351  	// memPreds contains memory states known to be predecessors of load's
   352  	// memory state. It is lazily initialized.
   353  	var memPreds map[*Value]bool
   354  	for i := 0; len(args) > 0; i++ {
   355  		const limit = 100
   356  		if i >= limit {
   357  			// Give up if we have done a lot of iterations.
   358  			return false
   359  		}
   360  		v := args[len(args)-1]
   361  		args = args[:len(args)-1]
   362  		if target.Block.ID != v.Block.ID {
   363  			// Since target and load are in the same block
   364  			// we can stop searching when we leave the block.
   365  			continue
   366  		}
   367  		if v.Op == OpPhi {
   368  			// A Phi implies we have reached the top of the block.
   369  			// The memory phi, if it exists, is always
   370  			// the first logical store in the block.
   371  			continue
   372  		}
   373  		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   374  			// We could handle this situation however it is likely
   375  			// to be very rare.
   376  			return false
   377  		}
   378  		if v.Op.SymEffect()&SymAddr != 0 {
   379  			// This case prevents an operation that calculates the
   380  			// address of a local variable from being forced to schedule
   381  			// before its corresponding VarDef.
   382  			// See issue 28445.
   383  			//   v1 = LOAD ...
   384  			//   v2 = VARDEF
   385  			//   v3 = LEAQ
   386  			//   v4 = CMPQ v1 v3
   387  			// We don't want to combine the CMPQ with the load, because
   388  			// that would force the CMPQ to schedule before the VARDEF, which
   389  			// in turn requires the LEAQ to schedule before the VARDEF.
   390  			return false
   391  		}
   392  		if v.Type.IsMemory() {
   393  			if memPreds == nil {
   394  				// Initialise a map containing memory states
   395  				// known to be predecessors of load's memory
   396  				// state.
   397  				memPreds = make(map[*Value]bool)
   398  				m := mem
   399  				const limit = 50
   400  				for i := 0; i < limit; i++ {
   401  					if m.Op == OpPhi {
   402  						// The memory phi, if it exists, is always
   403  						// the first logical store in the block.
   404  						break
   405  					}
   406  					if m.Block.ID != target.Block.ID {
   407  						break
   408  					}
   409  					if !m.Type.IsMemory() {
   410  						break
   411  					}
   412  					memPreds[m] = true
   413  					if len(m.Args) == 0 {
   414  						break
   415  					}
   416  					m = m.MemoryArg()
   417  				}
   418  			}
   419  
   420  			// We can merge if v is a predecessor of mem.
   421  			//
   422  			// For example, we can merge load into target in the
   423  			// following scenario:
   424  			//      x = read ... v
   425  			//    mem = write ... v
   426  			//   load = read ... mem
   427  			// target = add x load
   428  			if memPreds[v] {
   429  				continue
   430  			}
   431  			return false
   432  		}
   433  		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   434  			// If v takes mem as an input then we know mem
   435  			// is valid at this point.
   436  			continue
   437  		}
   438  		for _, a := range v.Args {
   439  			if target.Block.ID == a.Block.ID {
   440  				args = append(args, a)
   441  			}
   442  		}
   443  	}
   444  
   445  	return true
   446  }
   447  
   448  // isSameCall reports whether aux is the same as the given named symbol.
   449  func isSameCall(aux Aux, name string) bool {
   450  	fn := aux.(*AuxCall).Fn
   451  	return fn != nil && fn.String() == name
   452  }
   453  
   454  // canLoadUnaligned reports if the architecture supports unaligned load operations.
   455  func canLoadUnaligned(c *Config) bool {
   456  	return c.ctxt.Arch.Alignment == 1
   457  }
   458  
   459  // nlzX returns the number of leading zeros.
   460  func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
   461  func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
   462  func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
   463  func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
   464  
   465  // ntzX returns the number of trailing zeros.
   466  func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
   467  func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
   468  func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
   469  func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
   470  
   471  func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
   472  func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
   473  func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
   474  func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
   475  func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
   476  
   477  // nto returns the number of trailing ones.
   478  func nto(x int64) int64 {
   479  	return int64(ntz64(^x))
   480  }
   481  
   482  // logX returns logarithm of n base 2.
   483  // n must be a positive power of 2 (isPowerOfTwoX returns true).
   484  func log8(n int8) int64 {
   485  	return int64(bits.Len8(uint8(n))) - 1
   486  }
   487  func log16(n int16) int64 {
   488  	return int64(bits.Len16(uint16(n))) - 1
   489  }
   490  func log32(n int32) int64 {
   491  	return int64(bits.Len32(uint32(n))) - 1
   492  }
   493  func log64(n int64) int64 {
   494  	return int64(bits.Len64(uint64(n))) - 1
   495  }
   496  
   497  // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
   498  // Rounds down.
   499  func log2uint32(n int64) int64 {
   500  	return int64(bits.Len32(uint32(n))) - 1
   501  }
   502  
   503  // isPowerOfTwoX functions report whether n is a power of 2.
   504  func isPowerOfTwo[T int8 | int16 | int32 | int64](n T) bool {
   505  	return n > 0 && n&(n-1) == 0
   506  }
   507  
   508  // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
   509  func isUint64PowerOfTwo(in int64) bool {
   510  	n := uint64(in)
   511  	return n > 0 && n&(n-1) == 0
   512  }
   513  
   514  // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
   515  func isUint32PowerOfTwo(in int64) bool {
   516  	n := uint64(uint32(in))
   517  	return n > 0 && n&(n-1) == 0
   518  }
   519  
   520  // is32Bit reports whether n can be represented as a signed 32 bit integer.
   521  func is32Bit(n int64) bool {
   522  	return n == int64(int32(n))
   523  }
   524  
   525  // is16Bit reports whether n can be represented as a signed 16 bit integer.
   526  func is16Bit(n int64) bool {
   527  	return n == int64(int16(n))
   528  }
   529  
   530  // is8Bit reports whether n can be represented as a signed 8 bit integer.
   531  func is8Bit(n int64) bool {
   532  	return n == int64(int8(n))
   533  }
   534  
   535  // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
   536  func isU8Bit(n int64) bool {
   537  	return n == int64(uint8(n))
   538  }
   539  
   540  // is12Bit reports whether n can be represented as a signed 12 bit integer.
   541  func is12Bit(n int64) bool {
   542  	return -(1<<11) <= n && n < (1<<11)
   543  }
   544  
   545  // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   546  func isU12Bit(n int64) bool {
   547  	return 0 <= n && n < (1<<12)
   548  }
   549  
   550  // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   551  func isU16Bit(n int64) bool {
   552  	return n == int64(uint16(n))
   553  }
   554  
   555  // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   556  func isU32Bit(n int64) bool {
   557  	return n == int64(uint32(n))
   558  }
   559  
   560  // is20Bit reports whether n can be represented as a signed 20 bit integer.
   561  func is20Bit(n int64) bool {
   562  	return -(1<<19) <= n && n < (1<<19)
   563  }
   564  
   565  // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   566  func b2i(b bool) int64 {
   567  	if b {
   568  		return 1
   569  	}
   570  	return 0
   571  }
   572  
   573  // b2i32 translates a boolean value to 0 or 1.
   574  func b2i32(b bool) int32 {
   575  	if b {
   576  		return 1
   577  	}
   578  	return 0
   579  }
   580  
   581  func canMulStrengthReduce(config *Config, x int64) bool {
   582  	_, ok := config.mulRecipes[x]
   583  	return ok
   584  }
   585  func canMulStrengthReduce32(config *Config, x int32) bool {
   586  	_, ok := config.mulRecipes[int64(x)]
   587  	return ok
   588  }
   589  
   590  // mulStrengthReduce returns v*x evaluated at the location
   591  // (block and source position) of m.
   592  // canMulStrengthReduce must have returned true.
   593  func mulStrengthReduce(m *Value, v *Value, x int64) *Value {
   594  	return v.Block.Func.Config.mulRecipes[x].build(m, v)
   595  }
   596  
   597  // mulStrengthReduce32 returns v*x evaluated at the location
   598  // (block and source position) of m.
   599  // canMulStrengthReduce32 must have returned true.
   600  // The upper 32 bits of m might be set to junk.
   601  func mulStrengthReduce32(m *Value, v *Value, x int32) *Value {
   602  	return v.Block.Func.Config.mulRecipes[int64(x)].build(m, v)
   603  }
   604  
   605  // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
   606  // A shift is bounded if it is shifting by less than the width of the shifted value.
   607  func shiftIsBounded(v *Value) bool {
   608  	return v.AuxInt != 0
   609  }
   610  
   611  // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
   612  // generated code as much as possible.
   613  func canonLessThan(x, y *Value) bool {
   614  	if x.Op != y.Op {
   615  		return x.Op < y.Op
   616  	}
   617  	if !x.Pos.SameFileAndLine(y.Pos) {
   618  		return x.Pos.Before(y.Pos)
   619  	}
   620  	return x.ID < y.ID
   621  }
   622  
   623  // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
   624  // of the mantissa. It will panic if the truncation results in lost information.
   625  func truncate64Fto32F(f float64) float32 {
   626  	if !isExactFloat32(f) {
   627  		panic("truncate64Fto32F: truncation is not exact")
   628  	}
   629  	if !math.IsNaN(f) {
   630  		return float32(f)
   631  	}
   632  	// NaN bit patterns aren't necessarily preserved across conversion
   633  	// instructions so we need to do the conversion manually.
   634  	b := math.Float64bits(f)
   635  	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
   636  	//          | sign                  | exponent   | mantissa       |
   637  	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
   638  	return math.Float32frombits(r)
   639  }
   640  
   641  // extend32Fto64F converts a float32 value to a float64 value preserving the bit
   642  // pattern of the mantissa.
   643  func extend32Fto64F(f float32) float64 {
   644  	if !math.IsNaN(float64(f)) {
   645  		return float64(f)
   646  	}
   647  	// NaN bit patterns aren't necessarily preserved across conversion
   648  	// instructions so we need to do the conversion manually.
   649  	b := uint64(math.Float32bits(f))
   650  	//   | sign                  | exponent      | mantissa                    |
   651  	r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
   652  	return math.Float64frombits(r)
   653  }
   654  
   655  // DivisionNeedsFixUp reports whether the division needs fix-up code.
   656  func DivisionNeedsFixUp(v *Value) bool {
   657  	return v.AuxInt == 0
   658  }
   659  
   660  // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
   661  func auxFrom64F(f float64) int64 {
   662  	if f != f {
   663  		panic("can't encode a NaN in AuxInt field")
   664  	}
   665  	return int64(math.Float64bits(f))
   666  }
   667  
   668  // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
   669  func auxFrom32F(f float32) int64 {
   670  	if f != f {
   671  		panic("can't encode a NaN in AuxInt field")
   672  	}
   673  	return int64(math.Float64bits(extend32Fto64F(f)))
   674  }
   675  
   676  // auxTo32F decodes a float32 from the AuxInt value provided.
   677  func auxTo32F(i int64) float32 {
   678  	return truncate64Fto32F(math.Float64frombits(uint64(i)))
   679  }
   680  
   681  // auxTo64F decodes a float64 from the AuxInt value provided.
   682  func auxTo64F(i int64) float64 {
   683  	return math.Float64frombits(uint64(i))
   684  }
   685  
   686  func auxIntToBool(i int64) bool {
   687  	if i == 0 {
   688  		return false
   689  	}
   690  	return true
   691  }
   692  func auxIntToInt8(i int64) int8 {
   693  	return int8(i)
   694  }
   695  func auxIntToInt16(i int64) int16 {
   696  	return int16(i)
   697  }
   698  func auxIntToInt32(i int64) int32 {
   699  	return int32(i)
   700  }
   701  func auxIntToInt64(i int64) int64 {
   702  	return i
   703  }
   704  func auxIntToUint8(i int64) uint8 {
   705  	return uint8(i)
   706  }
   707  func auxIntToFloat32(i int64) float32 {
   708  	return float32(math.Float64frombits(uint64(i)))
   709  }
   710  func auxIntToFloat64(i int64) float64 {
   711  	return math.Float64frombits(uint64(i))
   712  }
   713  func auxIntToValAndOff(i int64) ValAndOff {
   714  	return ValAndOff(i)
   715  }
   716  func auxIntToArm64BitField(i int64) arm64BitField {
   717  	return arm64BitField(i)
   718  }
   719  func auxIntToInt128(x int64) int128 {
   720  	if x != 0 {
   721  		panic("nonzero int128 not allowed")
   722  	}
   723  	return 0
   724  }
   725  func auxIntToFlagConstant(x int64) flagConstant {
   726  	return flagConstant(x)
   727  }
   728  
   729  func auxIntToOp(cc int64) Op {
   730  	return Op(cc)
   731  }
   732  
   733  func boolToAuxInt(b bool) int64 {
   734  	if b {
   735  		return 1
   736  	}
   737  	return 0
   738  }
   739  func int8ToAuxInt(i int8) int64 {
   740  	return int64(i)
   741  }
   742  func int16ToAuxInt(i int16) int64 {
   743  	return int64(i)
   744  }
   745  func int32ToAuxInt(i int32) int64 {
   746  	return int64(i)
   747  }
   748  func int64ToAuxInt(i int64) int64 {
   749  	return int64(i)
   750  }
   751  func uint8ToAuxInt(i uint8) int64 {
   752  	return int64(int8(i))
   753  }
   754  func float32ToAuxInt(f float32) int64 {
   755  	return int64(math.Float64bits(float64(f)))
   756  }
   757  func float64ToAuxInt(f float64) int64 {
   758  	return int64(math.Float64bits(f))
   759  }
   760  func valAndOffToAuxInt(v ValAndOff) int64 {
   761  	return int64(v)
   762  }
   763  func arm64BitFieldToAuxInt(v arm64BitField) int64 {
   764  	return int64(v)
   765  }
   766  func int128ToAuxInt(x int128) int64 {
   767  	if x != 0 {
   768  		panic("nonzero int128 not allowed")
   769  	}
   770  	return 0
   771  }
   772  func flagConstantToAuxInt(x flagConstant) int64 {
   773  	return int64(x)
   774  }
   775  
   776  func opToAuxInt(o Op) int64 {
   777  	return int64(o)
   778  }
   779  
   780  // Aux is an interface to hold miscellaneous data in Blocks and Values.
   781  type Aux interface {
   782  	CanBeAnSSAAux()
   783  }
   784  
   785  // for now only used to mark moves that need to avoid clobbering flags
   786  type auxMark bool
   787  
   788  func (auxMark) CanBeAnSSAAux() {}
   789  
   790  var AuxMark auxMark
   791  
   792  // stringAux wraps string values for use in Aux.
   793  type stringAux string
   794  
   795  func (stringAux) CanBeAnSSAAux() {}
   796  
   797  func auxToString(i Aux) string {
   798  	return string(i.(stringAux))
   799  }
   800  func auxToSym(i Aux) Sym {
   801  	// TODO: kind of a hack - allows nil interface through
   802  	s, _ := i.(Sym)
   803  	return s
   804  }
   805  func auxToType(i Aux) *types.Type {
   806  	return i.(*types.Type)
   807  }
   808  func auxToCall(i Aux) *AuxCall {
   809  	return i.(*AuxCall)
   810  }
   811  func auxToS390xCCMask(i Aux) s390x.CCMask {
   812  	return i.(s390x.CCMask)
   813  }
   814  func auxToS390xRotateParams(i Aux) s390x.RotateParams {
   815  	return i.(s390x.RotateParams)
   816  }
   817  
   818  func StringToAux(s string) Aux {
   819  	return stringAux(s)
   820  }
   821  func symToAux(s Sym) Aux {
   822  	return s
   823  }
   824  func callToAux(s *AuxCall) Aux {
   825  	return s
   826  }
   827  func typeToAux(t *types.Type) Aux {
   828  	return t
   829  }
   830  func s390xCCMaskToAux(c s390x.CCMask) Aux {
   831  	return c
   832  }
   833  func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
   834  	return r
   835  }
   836  
   837  // uaddOvf reports whether unsigned a+b would overflow.
   838  func uaddOvf(a, b int64) bool {
   839  	return uint64(a)+uint64(b) < uint64(a)
   840  }
   841  
   842  // loadLSymOffset simulates reading a word at an offset into a
   843  // read-only symbol's runtime memory. If it would read a pointer to
   844  // another symbol, that symbol is returned. Otherwise, it returns nil.
   845  func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
   846  	if lsym.Type != objabi.SRODATA {
   847  		return nil
   848  	}
   849  
   850  	for _, r := range lsym.R {
   851  		if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
   852  			return r.Sym
   853  		}
   854  	}
   855  
   856  	return nil
   857  }
   858  
   859  func devirtLECall(v *Value, sym *obj.LSym) *Value {
   860  	v.Op = OpStaticLECall
   861  	auxcall := v.Aux.(*AuxCall)
   862  	auxcall.Fn = sym
   863  	// Remove first arg
   864  	v.Args[0].Uses--
   865  	copy(v.Args[0:], v.Args[1:])
   866  	v.Args[len(v.Args)-1] = nil // aid GC
   867  	v.Args = v.Args[:len(v.Args)-1]
   868  	if f := v.Block.Func; f.pass.debug > 0 {
   869  		f.Warnl(v.Pos, "de-virtualizing call")
   870  	}
   871  	return v
   872  }
   873  
   874  // isSamePtr reports whether p1 and p2 point to the same address.
   875  func isSamePtr(p1, p2 *Value) bool {
   876  	if p1 == p2 {
   877  		return true
   878  	}
   879  	if p1.Op != p2.Op {
   880  		for p1.Op == OpOffPtr && p1.AuxInt == 0 {
   881  			p1 = p1.Args[0]
   882  		}
   883  		for p2.Op == OpOffPtr && p2.AuxInt == 0 {
   884  			p2 = p2.Args[0]
   885  		}
   886  		if p1 == p2 {
   887  			return true
   888  		}
   889  		if p1.Op != p2.Op {
   890  			return false
   891  		}
   892  	}
   893  	switch p1.Op {
   894  	case OpOffPtr:
   895  		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   896  	case OpAddr, OpLocalAddr:
   897  		return p1.Aux == p2.Aux
   898  	case OpAddPtr:
   899  		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   900  	}
   901  	return false
   902  }
   903  
   904  func isStackPtr(v *Value) bool {
   905  	for v.Op == OpOffPtr || v.Op == OpAddPtr {
   906  		v = v.Args[0]
   907  	}
   908  	return v.Op == OpSP || v.Op == OpLocalAddr
   909  }
   910  
   911  // disjoint reports whether the memory region specified by [p1:p1+n1)
   912  // does not overlap with [p2:p2+n2).
   913  // A return value of false does not imply the regions overlap.
   914  func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
   915  	if n1 == 0 || n2 == 0 {
   916  		return true
   917  	}
   918  	if p1 == p2 {
   919  		return false
   920  	}
   921  	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
   922  		base, offset = ptr, 0
   923  		for base.Op == OpOffPtr {
   924  			offset += base.AuxInt
   925  			base = base.Args[0]
   926  		}
   927  		if opcodeTable[base.Op].nilCheck {
   928  			base = base.Args[0]
   929  		}
   930  		return base, offset
   931  	}
   932  
   933  	// Run types-based analysis
   934  	if disjointTypes(p1.Type, p2.Type) {
   935  		return true
   936  	}
   937  
   938  	p1, off1 := baseAndOffset(p1)
   939  	p2, off2 := baseAndOffset(p2)
   940  	if isSamePtr(p1, p2) {
   941  		return !overlap(off1, n1, off2, n2)
   942  	}
   943  	// p1 and p2 are not the same, so if they are both OpAddrs then
   944  	// they point to different variables.
   945  	// If one pointer is on the stack and the other is an argument
   946  	// then they can't overlap.
   947  	switch p1.Op {
   948  	case OpAddr, OpLocalAddr:
   949  		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
   950  			return true
   951  		}
   952  		return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
   953  	case OpArg, OpArgIntReg:
   954  		if p2.Op == OpSP || p2.Op == OpLocalAddr {
   955  			return true
   956  		}
   957  	case OpSP:
   958  		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
   959  	}
   960  	return false
   961  }
   962  
   963  // disjointTypes reports whether a memory region pointed to by a pointer of type
   964  // t1 does not overlap with a memory region pointed to by a pointer of type t2 --
   965  // based on type aliasing rules.
   966  func disjointTypes(t1 *types.Type, t2 *types.Type) bool {
   967  	// Unsafe pointer can alias with anything.
   968  	if t1.IsUnsafePtr() || t2.IsUnsafePtr() {
   969  		return false
   970  	}
   971  
   972  	if !t1.IsPtr() || !t2.IsPtr() {
   973  		panic("disjointTypes: one of arguments is not a pointer")
   974  	}
   975  
   976  	t1 = t1.Elem()
   977  	t2 = t2.Elem()
   978  
   979  	// Not-in-heap types are not supported -- they are rare and non-important; also,
   980  	// type.HasPointers check doesn't work for them correctly.
   981  	if t1.NotInHeap() || t2.NotInHeap() {
   982  		return false
   983  	}
   984  
   985  	isPtrShaped := func(t *types.Type) bool { return int(t.Size()) == types.PtrSize && t.HasPointers() }
   986  
   987  	// Pointers and non-pointers are disjoint (https://pkg.go.dev/unsafe#Pointer).
   988  	if (isPtrShaped(t1) && !t2.HasPointers()) ||
   989  		(isPtrShaped(t2) && !t1.HasPointers()) {
   990  		return true
   991  	}
   992  
   993  	return false
   994  }
   995  
   996  // moveSize returns the number of bytes an aligned MOV instruction moves.
   997  func moveSize(align int64, c *Config) int64 {
   998  	switch {
   999  	case align%8 == 0 && c.PtrSize == 8:
  1000  		return 8
  1001  	case align%4 == 0:
  1002  		return 4
  1003  	case align%2 == 0:
  1004  		return 2
  1005  	}
  1006  	return 1
  1007  }
  1008  
  1009  // mergePoint finds a block among a's blocks which dominates b and is itself
  1010  // dominated by all of a's blocks. Returns nil if it can't find one.
  1011  // Might return nil even if one does exist.
  1012  func mergePoint(b *Block, a ...*Value) *Block {
  1013  	// Walk backward from b looking for one of the a's blocks.
  1014  
  1015  	// Max distance
  1016  	d := 100
  1017  
  1018  	for d > 0 {
  1019  		for _, x := range a {
  1020  			if b == x.Block {
  1021  				goto found
  1022  			}
  1023  		}
  1024  		if len(b.Preds) > 1 {
  1025  			// Don't know which way to go back. Abort.
  1026  			return nil
  1027  		}
  1028  		b = b.Preds[0].b
  1029  		d--
  1030  	}
  1031  	return nil // too far away
  1032  found:
  1033  	// At this point, r is the first value in a that we find by walking backwards.
  1034  	// if we return anything, r will be it.
  1035  	r := b
  1036  
  1037  	// Keep going, counting the other a's that we find. They must all dominate r.
  1038  	na := 0
  1039  	for d > 0 {
  1040  		for _, x := range a {
  1041  			if b == x.Block {
  1042  				na++
  1043  			}
  1044  		}
  1045  		if na == len(a) {
  1046  			// Found all of a in a backwards walk. We can return r.
  1047  			return r
  1048  		}
  1049  		if len(b.Preds) > 1 {
  1050  			return nil
  1051  		}
  1052  		b = b.Preds[0].b
  1053  		d--
  1054  
  1055  	}
  1056  	return nil // too far away
  1057  }
  1058  
  1059  // clobber invalidates values. Returns true.
  1060  // clobber is used by rewrite rules to:
  1061  //
  1062  //	A) make sure the values are really dead and never used again.
  1063  //	B) decrement use counts of the values' args.
  1064  func clobber(vv ...*Value) bool {
  1065  	for _, v := range vv {
  1066  		v.reset(OpInvalid)
  1067  		// Note: leave v.Block intact.  The Block field is used after clobber.
  1068  	}
  1069  	return true
  1070  }
  1071  
  1072  // resetCopy resets v to be a copy of arg.
  1073  // Always returns true.
  1074  func resetCopy(v *Value, arg *Value) bool {
  1075  	v.reset(OpCopy)
  1076  	v.AddArg(arg)
  1077  	return true
  1078  }
  1079  
  1080  // clobberIfDead resets v when use count is 1. Returns true.
  1081  // clobberIfDead is used by rewrite rules to decrement
  1082  // use counts of v's args when v is dead and never used.
  1083  func clobberIfDead(v *Value) bool {
  1084  	if v.Uses == 1 {
  1085  		v.reset(OpInvalid)
  1086  	}
  1087  	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
  1088  	return true
  1089  }
  1090  
  1091  // noteRule is an easy way to track if a rule is matched when writing
  1092  // new ones.  Make the rule of interest also conditional on
  1093  //
  1094  //	noteRule("note to self: rule of interest matched")
  1095  //
  1096  // and that message will print when the rule matches.
  1097  func noteRule(s string) bool {
  1098  	fmt.Println(s)
  1099  	return true
  1100  }
  1101  
  1102  // countRule increments Func.ruleMatches[key].
  1103  // If Func.ruleMatches is non-nil at the end
  1104  // of compilation, it will be printed to stdout.
  1105  // This is intended to make it easier to find which functions
  1106  // which contain lots of rules matches when developing new rules.
  1107  func countRule(v *Value, key string) bool {
  1108  	f := v.Block.Func
  1109  	if f.ruleMatches == nil {
  1110  		f.ruleMatches = make(map[string]int)
  1111  	}
  1112  	f.ruleMatches[key]++
  1113  	return true
  1114  }
  1115  
  1116  // warnRule generates compiler debug output with string s when
  1117  // v is not in autogenerated code, cond is true and the rule has fired.
  1118  func warnRule(cond bool, v *Value, s string) bool {
  1119  	if pos := v.Pos; pos.Line() > 1 && cond {
  1120  		v.Block.Func.Warnl(pos, s)
  1121  	}
  1122  	return true
  1123  }
  1124  
  1125  // for a pseudo-op like (LessThan x), extract x.
  1126  func flagArg(v *Value) *Value {
  1127  	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
  1128  		return nil
  1129  	}
  1130  	return v.Args[0]
  1131  }
  1132  
  1133  // arm64Negate finds the complement to an ARM64 condition code,
  1134  // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
  1135  //
  1136  // For floating point, it's more subtle because NaN is unordered. We do
  1137  // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
  1138  func arm64Negate(op Op) Op {
  1139  	switch op {
  1140  	case OpARM64LessThan:
  1141  		return OpARM64GreaterEqual
  1142  	case OpARM64LessThanU:
  1143  		return OpARM64GreaterEqualU
  1144  	case OpARM64GreaterThan:
  1145  		return OpARM64LessEqual
  1146  	case OpARM64GreaterThanU:
  1147  		return OpARM64LessEqualU
  1148  	case OpARM64LessEqual:
  1149  		return OpARM64GreaterThan
  1150  	case OpARM64LessEqualU:
  1151  		return OpARM64GreaterThanU
  1152  	case OpARM64GreaterEqual:
  1153  		return OpARM64LessThan
  1154  	case OpARM64GreaterEqualU:
  1155  		return OpARM64LessThanU
  1156  	case OpARM64Equal:
  1157  		return OpARM64NotEqual
  1158  	case OpARM64NotEqual:
  1159  		return OpARM64Equal
  1160  	case OpARM64LessThanF:
  1161  		return OpARM64NotLessThanF
  1162  	case OpARM64NotLessThanF:
  1163  		return OpARM64LessThanF
  1164  	case OpARM64LessEqualF:
  1165  		return OpARM64NotLessEqualF
  1166  	case OpARM64NotLessEqualF:
  1167  		return OpARM64LessEqualF
  1168  	case OpARM64GreaterThanF:
  1169  		return OpARM64NotGreaterThanF
  1170  	case OpARM64NotGreaterThanF:
  1171  		return OpARM64GreaterThanF
  1172  	case OpARM64GreaterEqualF:
  1173  		return OpARM64NotGreaterEqualF
  1174  	case OpARM64NotGreaterEqualF:
  1175  		return OpARM64GreaterEqualF
  1176  	default:
  1177  		panic("unreachable")
  1178  	}
  1179  }
  1180  
  1181  // arm64Invert evaluates (InvertFlags op), which
  1182  // is the same as altering the condition codes such
  1183  // that the same result would be produced if the arguments
  1184  // to the flag-generating instruction were reversed, e.g.
  1185  // (InvertFlags (CMP x y)) -> (CMP y x)
  1186  func arm64Invert(op Op) Op {
  1187  	switch op {
  1188  	case OpARM64LessThan:
  1189  		return OpARM64GreaterThan
  1190  	case OpARM64LessThanU:
  1191  		return OpARM64GreaterThanU
  1192  	case OpARM64GreaterThan:
  1193  		return OpARM64LessThan
  1194  	case OpARM64GreaterThanU:
  1195  		return OpARM64LessThanU
  1196  	case OpARM64LessEqual:
  1197  		return OpARM64GreaterEqual
  1198  	case OpARM64LessEqualU:
  1199  		return OpARM64GreaterEqualU
  1200  	case OpARM64GreaterEqual:
  1201  		return OpARM64LessEqual
  1202  	case OpARM64GreaterEqualU:
  1203  		return OpARM64LessEqualU
  1204  	case OpARM64Equal, OpARM64NotEqual:
  1205  		return op
  1206  	case OpARM64LessThanF:
  1207  		return OpARM64GreaterThanF
  1208  	case OpARM64GreaterThanF:
  1209  		return OpARM64LessThanF
  1210  	case OpARM64LessEqualF:
  1211  		return OpARM64GreaterEqualF
  1212  	case OpARM64GreaterEqualF:
  1213  		return OpARM64LessEqualF
  1214  	case OpARM64NotLessThanF:
  1215  		return OpARM64NotGreaterThanF
  1216  	case OpARM64NotGreaterThanF:
  1217  		return OpARM64NotLessThanF
  1218  	case OpARM64NotLessEqualF:
  1219  		return OpARM64NotGreaterEqualF
  1220  	case OpARM64NotGreaterEqualF:
  1221  		return OpARM64NotLessEqualF
  1222  	default:
  1223  		panic("unreachable")
  1224  	}
  1225  }
  1226  
  1227  // evaluate an ARM64 op against a flags value
  1228  // that is potentially constant; return 1 for true,
  1229  // -1 for false, and 0 for not constant.
  1230  func ccARM64Eval(op Op, flags *Value) int {
  1231  	fop := flags.Op
  1232  	if fop == OpARM64InvertFlags {
  1233  		return -ccARM64Eval(op, flags.Args[0])
  1234  	}
  1235  	if fop != OpARM64FlagConstant {
  1236  		return 0
  1237  	}
  1238  	fc := flagConstant(flags.AuxInt)
  1239  	b2i := func(b bool) int {
  1240  		if b {
  1241  			return 1
  1242  		}
  1243  		return -1
  1244  	}
  1245  	switch op {
  1246  	case OpARM64Equal:
  1247  		return b2i(fc.eq())
  1248  	case OpARM64NotEqual:
  1249  		return b2i(fc.ne())
  1250  	case OpARM64LessThan:
  1251  		return b2i(fc.lt())
  1252  	case OpARM64LessThanU:
  1253  		return b2i(fc.ult())
  1254  	case OpARM64GreaterThan:
  1255  		return b2i(fc.gt())
  1256  	case OpARM64GreaterThanU:
  1257  		return b2i(fc.ugt())
  1258  	case OpARM64LessEqual:
  1259  		return b2i(fc.le())
  1260  	case OpARM64LessEqualU:
  1261  		return b2i(fc.ule())
  1262  	case OpARM64GreaterEqual:
  1263  		return b2i(fc.ge())
  1264  	case OpARM64GreaterEqualU:
  1265  		return b2i(fc.uge())
  1266  	}
  1267  	return 0
  1268  }
  1269  
  1270  // logRule logs the use of the rule s. This will only be enabled if
  1271  // rewrite rules were generated with the -log option, see _gen/rulegen.go.
  1272  func logRule(s string) {
  1273  	if ruleFile == nil {
  1274  		// Open a log file to write log to. We open in append
  1275  		// mode because all.bash runs the compiler lots of times,
  1276  		// and we want the concatenation of all of those logs.
  1277  		// This means, of course, that users need to rm the old log
  1278  		// to get fresh data.
  1279  		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
  1280  		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
  1281  			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
  1282  		if err != nil {
  1283  			panic(err)
  1284  		}
  1285  		ruleFile = w
  1286  	}
  1287  	_, err := fmt.Fprintln(ruleFile, s)
  1288  	if err != nil {
  1289  		panic(err)
  1290  	}
  1291  }
  1292  
  1293  var ruleFile io.Writer
  1294  
  1295  func isConstZero(v *Value) bool {
  1296  	switch v.Op {
  1297  	case OpConstNil:
  1298  		return true
  1299  	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
  1300  		return v.AuxInt == 0
  1301  	case OpStringMake, OpIMake, OpComplexMake:
  1302  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1])
  1303  	case OpSliceMake:
  1304  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1]) && isConstZero(v.Args[2])
  1305  	case OpStringPtr, OpStringLen, OpSlicePtr, OpSliceLen, OpSliceCap, OpITab, OpIData, OpComplexReal, OpComplexImag:
  1306  		return isConstZero(v.Args[0])
  1307  	}
  1308  	return false
  1309  }
  1310  
  1311  // reciprocalExact64 reports whether 1/c is exactly representable.
  1312  func reciprocalExact64(c float64) bool {
  1313  	b := math.Float64bits(c)
  1314  	man := b & (1<<52 - 1)
  1315  	if man != 0 {
  1316  		return false // not a power of 2, denormal, or NaN
  1317  	}
  1318  	exp := b >> 52 & (1<<11 - 1)
  1319  	// exponent bias is 0x3ff.  So taking the reciprocal of a number
  1320  	// changes the exponent to 0x7fe-exp.
  1321  	switch exp {
  1322  	case 0:
  1323  		return false // ±0
  1324  	case 0x7ff:
  1325  		return false // ±inf
  1326  	case 0x7fe:
  1327  		return false // exponent is not representable
  1328  	default:
  1329  		return true
  1330  	}
  1331  }
  1332  
  1333  // reciprocalExact32 reports whether 1/c is exactly representable.
  1334  func reciprocalExact32(c float32) bool {
  1335  	b := math.Float32bits(c)
  1336  	man := b & (1<<23 - 1)
  1337  	if man != 0 {
  1338  		return false // not a power of 2, denormal, or NaN
  1339  	}
  1340  	exp := b >> 23 & (1<<8 - 1)
  1341  	// exponent bias is 0x7f.  So taking the reciprocal of a number
  1342  	// changes the exponent to 0xfe-exp.
  1343  	switch exp {
  1344  	case 0:
  1345  		return false // ±0
  1346  	case 0xff:
  1347  		return false // ±inf
  1348  	case 0xfe:
  1349  		return false // exponent is not representable
  1350  	default:
  1351  		return true
  1352  	}
  1353  }
  1354  
  1355  // check if an immediate can be directly encoded into an ARM's instruction.
  1356  func isARMImmRot(v uint32) bool {
  1357  	for i := 0; i < 16; i++ {
  1358  		if v&^0xff == 0 {
  1359  			return true
  1360  		}
  1361  		v = v<<2 | v>>30
  1362  	}
  1363  
  1364  	return false
  1365  }
  1366  
  1367  // overlap reports whether the ranges given by the given offset and
  1368  // size pairs overlap.
  1369  func overlap(offset1, size1, offset2, size2 int64) bool {
  1370  	if offset1 >= offset2 && offset2+size2 > offset1 {
  1371  		return true
  1372  	}
  1373  	if offset2 >= offset1 && offset1+size1 > offset2 {
  1374  		return true
  1375  	}
  1376  	return false
  1377  }
  1378  
  1379  // check if value zeroes out upper 32-bit of 64-bit register.
  1380  // depth limits recursion depth. In AMD64.rules 3 is used as limit,
  1381  // because it catches same amount of cases as 4.
  1382  func zeroUpper32Bits(x *Value, depth int) bool {
  1383  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1384  		// If the value is signed, it might get re-sign-extended
  1385  		// during spill and restore. See issue 68227.
  1386  		return false
  1387  	}
  1388  	switch x.Op {
  1389  	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
  1390  		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
  1391  		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
  1392  		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
  1393  		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
  1394  		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
  1395  		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
  1396  		OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
  1397  		OpAMD64SHLL, OpAMD64SHLLconst:
  1398  		return true
  1399  	case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
  1400  		OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
  1401  		OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
  1402  		return true
  1403  	case OpArg: // note: but not ArgIntReg
  1404  		// amd64 always loads args from the stack unsigned.
  1405  		// most other architectures load them sign/zero extended based on the type.
  1406  		return x.Type.Size() == 4 && x.Block.Func.Config.arch == "amd64"
  1407  	case OpPhi, OpSelect0, OpSelect1:
  1408  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1409  		// just limit recursion depth.
  1410  		if depth <= 0 {
  1411  			return false
  1412  		}
  1413  		for i := range x.Args {
  1414  			if !zeroUpper32Bits(x.Args[i], depth-1) {
  1415  				return false
  1416  			}
  1417  		}
  1418  		return true
  1419  
  1420  	}
  1421  	return false
  1422  }
  1423  
  1424  // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
  1425  func zeroUpper48Bits(x *Value, depth int) bool {
  1426  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1427  		return false
  1428  	}
  1429  	switch x.Op {
  1430  	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
  1431  		return true
  1432  	case OpArg: // note: but not ArgIntReg
  1433  		return x.Type.Size() == 2 && x.Block.Func.Config.arch == "amd64"
  1434  	case OpPhi, OpSelect0, OpSelect1:
  1435  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1436  		// just limit recursion depth.
  1437  		if depth <= 0 {
  1438  			return false
  1439  		}
  1440  		for i := range x.Args {
  1441  			if !zeroUpper48Bits(x.Args[i], depth-1) {
  1442  				return false
  1443  			}
  1444  		}
  1445  		return true
  1446  
  1447  	}
  1448  	return false
  1449  }
  1450  
  1451  // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
  1452  func zeroUpper56Bits(x *Value, depth int) bool {
  1453  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1454  		return false
  1455  	}
  1456  	switch x.Op {
  1457  	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
  1458  		return true
  1459  	case OpArg: // note: but not ArgIntReg
  1460  		return x.Type.Size() == 1 && x.Block.Func.Config.arch == "amd64"
  1461  	case OpPhi, OpSelect0, OpSelect1:
  1462  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1463  		// just limit recursion depth.
  1464  		if depth <= 0 {
  1465  			return false
  1466  		}
  1467  		for i := range x.Args {
  1468  			if !zeroUpper56Bits(x.Args[i], depth-1) {
  1469  				return false
  1470  			}
  1471  		}
  1472  		return true
  1473  
  1474  	}
  1475  	return false
  1476  }
  1477  
  1478  func isInlinableMemclr(c *Config, sz int64) bool {
  1479  	if sz < 0 {
  1480  		return false
  1481  	}
  1482  	// TODO: expand this check to allow other architectures
  1483  	// see CL 454255 and issue 56997
  1484  	switch c.arch {
  1485  	case "amd64", "arm64":
  1486  		return true
  1487  	case "ppc64le", "ppc64", "loong64":
  1488  		return sz < 512
  1489  	}
  1490  	return false
  1491  }
  1492  
  1493  // isInlinableMemmove reports whether the given arch performs a Move of the given size
  1494  // faster than memmove. It will only return true if replacing the memmove with a Move is
  1495  // safe, either because Move will do all of its loads before any of its stores, or
  1496  // because the arguments are known to be disjoint.
  1497  // This is used as a check for replacing memmove with Move ops.
  1498  func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1499  	// It is always safe to convert memmove into Move when its arguments are disjoint.
  1500  	// Move ops may or may not be faster for large sizes depending on how the platform
  1501  	// lowers them, so we only perform this optimization on platforms that we know to
  1502  	// have fast Move ops.
  1503  	switch c.arch {
  1504  	case "amd64":
  1505  		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
  1506  	case "arm64":
  1507  		return sz <= 64 || (sz <= 1024 && disjoint(dst, sz, src, sz))
  1508  	case "386":
  1509  		return sz <= 8
  1510  	case "s390x", "ppc64", "ppc64le":
  1511  		return sz <= 8 || disjoint(dst, sz, src, sz)
  1512  	case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
  1513  		return sz <= 4
  1514  	}
  1515  	return false
  1516  }
  1517  func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1518  	return isInlinableMemmove(dst, src, sz, c)
  1519  }
  1520  
  1521  // logLargeCopy logs the occurrence of a large copy.
  1522  // The best place to do this is in the rewrite rules where the size of the move is easy to find.
  1523  // "Large" is arbitrarily chosen to be 128 bytes; this may change.
  1524  func logLargeCopy(v *Value, s int64) bool {
  1525  	if s < 128 {
  1526  		return true
  1527  	}
  1528  	if logopt.Enabled() {
  1529  		logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
  1530  	}
  1531  	return true
  1532  }
  1533  func LogLargeCopy(funcName string, pos src.XPos, s int64) {
  1534  	if s < 128 {
  1535  		return
  1536  	}
  1537  	if logopt.Enabled() {
  1538  		logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
  1539  	}
  1540  }
  1541  
  1542  // hasSmallRotate reports whether the architecture has rotate instructions
  1543  // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
  1544  func hasSmallRotate(c *Config) bool {
  1545  	switch c.arch {
  1546  	case "amd64", "386":
  1547  		return true
  1548  	default:
  1549  		return false
  1550  	}
  1551  }
  1552  
  1553  func supportsPPC64PCRel() bool {
  1554  	// PCRel is currently supported for >= power10, linux only
  1555  	// Internal and external linking supports this on ppc64le; internal linking on ppc64.
  1556  	return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
  1557  }
  1558  
  1559  func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
  1560  	if sh < 0 || sh >= sz {
  1561  		panic("PPC64 shift arg sh out of range")
  1562  	}
  1563  	if mb < 0 || mb >= sz {
  1564  		panic("PPC64 shift arg mb out of range")
  1565  	}
  1566  	if me < 0 || me >= sz {
  1567  		panic("PPC64 shift arg me out of range")
  1568  	}
  1569  	return int32(sh<<16 | mb<<8 | me)
  1570  }
  1571  
  1572  func GetPPC64Shiftsh(auxint int64) int64 {
  1573  	return int64(int8(auxint >> 16))
  1574  }
  1575  
  1576  func GetPPC64Shiftmb(auxint int64) int64 {
  1577  	return int64(int8(auxint >> 8))
  1578  }
  1579  
  1580  func GetPPC64Shiftme(auxint int64) int64 {
  1581  	return int64(int8(auxint))
  1582  }
  1583  
  1584  // Test if this value can encoded as a mask for a rlwinm like
  1585  // operation.  Masks can also extend from the msb and wrap to
  1586  // the lsb too.  That is, the valid masks are 32 bit strings
  1587  // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
  1588  //
  1589  // Note: This ignores the upper 32 bits of the input. When a
  1590  // zero extended result is desired (e.g a 64 bit result), the
  1591  // user must verify the upper 32 bits are 0 and the mask is
  1592  // contiguous (that is, non-wrapping).
  1593  func isPPC64WordRotateMask(v64 int64) bool {
  1594  	// Isolate rightmost 1 (if none 0) and add.
  1595  	v := uint32(v64)
  1596  	vp := (v & -v) + v
  1597  	// Likewise, for the wrapping case.
  1598  	vn := ^v
  1599  	vpn := (vn & -vn) + vn
  1600  	return (v&vp == 0 || vn&vpn == 0) && v != 0
  1601  }
  1602  
  1603  // Test if this mask is a valid, contiguous bitmask which can be
  1604  // represented by a RLWNM mask and also clears the upper 32 bits
  1605  // of the register.
  1606  func isPPC64WordRotateMaskNonWrapping(v64 int64) bool {
  1607  	// Isolate rightmost 1 (if none 0) and add.
  1608  	v := uint32(v64)
  1609  	vp := (v & -v) + v
  1610  	return (v&vp == 0) && v != 0 && uint64(uint32(v64)) == uint64(v64)
  1611  }
  1612  
  1613  // Compress mask and shift into single value of the form
  1614  // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
  1615  // be used to regenerate the input mask.
  1616  func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
  1617  	var mb, me, mbn, men int
  1618  
  1619  	// Determine boundaries and then decode them
  1620  	if mask == 0 || ^mask == 0 || rotate >= nbits {
  1621  		panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
  1622  	} else if nbits == 32 {
  1623  		mb = bits.LeadingZeros32(uint32(mask))
  1624  		me = 32 - bits.TrailingZeros32(uint32(mask))
  1625  		mbn = bits.LeadingZeros32(^uint32(mask))
  1626  		men = 32 - bits.TrailingZeros32(^uint32(mask))
  1627  	} else {
  1628  		mb = bits.LeadingZeros64(uint64(mask))
  1629  		me = 64 - bits.TrailingZeros64(uint64(mask))
  1630  		mbn = bits.LeadingZeros64(^uint64(mask))
  1631  		men = 64 - bits.TrailingZeros64(^uint64(mask))
  1632  	}
  1633  	// Check for a wrapping mask (e.g bits at 0 and 63)
  1634  	if mb == 0 && me == int(nbits) {
  1635  		// swap the inverted values
  1636  		mb, me = men, mbn
  1637  	}
  1638  
  1639  	return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
  1640  }
  1641  
  1642  // Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
  1643  // SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
  1644  // RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
  1645  // operations can be combined. This functions assumes the two opcodes can
  1646  // be merged, and returns an encoded rotate+mask value of the combined RLDICL.
  1647  func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
  1648  	mb := s
  1649  	r := 64 - s
  1650  	// A larger mb is a smaller mask.
  1651  	if (encoded>>8)&0xFF < mb {
  1652  		encoded = (encoded &^ 0xFF00) | mb<<8
  1653  	}
  1654  	// The rotate is expected to be 0.
  1655  	if (encoded & 0xFF0000) != 0 {
  1656  		panic("non-zero rotate")
  1657  	}
  1658  	return encoded | r<<16
  1659  }
  1660  
  1661  // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
  1662  // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
  1663  func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
  1664  	auxint := uint64(sauxint)
  1665  	rotate = int64((auxint >> 16) & 0xFF)
  1666  	mb = int64((auxint >> 8) & 0xFF)
  1667  	me = int64((auxint >> 0) & 0xFF)
  1668  	nbits := int64((auxint >> 24) & 0xFF)
  1669  	mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
  1670  	if mb > me {
  1671  		mask = ^mask
  1672  	}
  1673  	if nbits == 32 {
  1674  		mask = uint64(uint32(mask))
  1675  	}
  1676  
  1677  	// Fixup ME to match ISA definition.  The second argument to MASK(..,me)
  1678  	// is inclusive.
  1679  	me = (me - 1) & (nbits - 1)
  1680  	return
  1681  }
  1682  
  1683  // This verifies that the mask is a set of
  1684  // consecutive bits including the least
  1685  // significant bit.
  1686  func isPPC64ValidShiftMask(v int64) bool {
  1687  	if (v != 0) && ((v+1)&v) == 0 {
  1688  		return true
  1689  	}
  1690  	return false
  1691  }
  1692  
  1693  func getPPC64ShiftMaskLength(v int64) int64 {
  1694  	return int64(bits.Len64(uint64(v)))
  1695  }
  1696  
  1697  // Decompose a shift right into an equivalent rotate/mask,
  1698  // and return mask & m.
  1699  func mergePPC64RShiftMask(m, s, nbits int64) int64 {
  1700  	smask := uint64((1<<uint(nbits))-1) >> uint(s)
  1701  	return m & int64(smask)
  1702  }
  1703  
  1704  // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
  1705  func mergePPC64AndSrwi(m, s int64) int64 {
  1706  	mask := mergePPC64RShiftMask(m, s, 32)
  1707  	if !isPPC64WordRotateMask(mask) {
  1708  		return 0
  1709  	}
  1710  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1711  }
  1712  
  1713  // Combine (ANDconst [m] (SRDconst [s])) into (RLWINM [y]) or return 0
  1714  func mergePPC64AndSrdi(m, s int64) int64 {
  1715  	mask := mergePPC64RShiftMask(m, s, 64)
  1716  
  1717  	// Verify the rotate and mask result only uses the lower 32 bits.
  1718  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, -int(s))
  1719  	if rv&uint64(mask) != 0 {
  1720  		return 0
  1721  	}
  1722  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1723  		return 0
  1724  	}
  1725  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1726  }
  1727  
  1728  // Combine (ANDconst [m] (SLDconst [s])) into (RLWINM [y]) or return 0
  1729  func mergePPC64AndSldi(m, s int64) int64 {
  1730  	mask := -1 << s & m
  1731  
  1732  	// Verify the rotate and mask result only uses the lower 32 bits.
  1733  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, int(s))
  1734  	if rv&uint64(mask) != 0 {
  1735  		return 0
  1736  	}
  1737  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1738  		return 0
  1739  	}
  1740  	return encodePPC64RotateMask(s&31, mask, 32)
  1741  }
  1742  
  1743  // Test if a word shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1744  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1745  func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
  1746  	mask_1 := uint64(0xFFFFFFFF >> uint(srw))
  1747  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1748  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1749  
  1750  	// Rewrite mask to apply after the final left shift.
  1751  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1752  
  1753  	r_1 := 32 - srw
  1754  	r_2 := GetPPC64Shiftsh(sld)
  1755  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1756  
  1757  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1758  		return 0
  1759  	}
  1760  	return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
  1761  }
  1762  
  1763  // Test if a doubleword shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1764  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1765  func mergePPC64ClrlsldiSrd(sld, srd int64) int64 {
  1766  	mask_1 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(srd)
  1767  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1768  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1769  
  1770  	// Rewrite mask to apply after the final left shift.
  1771  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1772  
  1773  	r_1 := 64 - srd
  1774  	r_2 := GetPPC64Shiftsh(sld)
  1775  	r_3 := (r_1 + r_2) & 63 // This can wrap.
  1776  
  1777  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1778  		return 0
  1779  	}
  1780  	// This combine only works when selecting and shifting the lower 32 bits.
  1781  	v1 := bits.RotateLeft64(0xFFFFFFFF00000000, int(r_3))
  1782  	if v1&mask_3 != 0 {
  1783  		return 0
  1784  	}
  1785  	return encodePPC64RotateMask(int64(r_3&31), int64(mask_3), 32)
  1786  }
  1787  
  1788  // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
  1789  // the encoded RLWINM constant, or 0 if they cannot be merged.
  1790  func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
  1791  	r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
  1792  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1793  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1794  
  1795  	// combine the masks, and adjust for the final left shift.
  1796  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
  1797  	r_2 := GetPPC64Shiftsh(int64(sld))
  1798  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1799  
  1800  	// Verify the result is still a valid bitmask of <= 32 bits.
  1801  	if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
  1802  		return 0
  1803  	}
  1804  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1805  }
  1806  
  1807  // Test if RLWINM feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1808  // or 0 if they cannot be merged.
  1809  func mergePPC64AndRlwinm(mask uint32, rlw int64) int64 {
  1810  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1811  	mask_out := (mask_rlw & uint64(mask))
  1812  
  1813  	// Verify the result is still a valid bitmask of <= 32 bits.
  1814  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1815  		return 0
  1816  	}
  1817  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1818  }
  1819  
  1820  // Test if RLWINM opcode rlw clears the upper 32 bits of the
  1821  // result. Return rlw if it does, 0 otherwise.
  1822  func mergePPC64MovwzregRlwinm(rlw int64) int64 {
  1823  	_, mb, me, _ := DecodePPC64RotateMask(rlw)
  1824  	if mb > me {
  1825  		return 0
  1826  	}
  1827  	return rlw
  1828  }
  1829  
  1830  // Test if AND feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1831  // or 0 if they cannot be merged.
  1832  func mergePPC64RlwinmAnd(rlw int64, mask uint32) int64 {
  1833  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1834  
  1835  	// Rotate the input mask, combine with the rlwnm mask, and test if it is still a valid rlwinm mask.
  1836  	r_mask := bits.RotateLeft32(mask, int(r))
  1837  
  1838  	mask_out := (mask_rlw & uint64(r_mask))
  1839  
  1840  	// Verify the result is still a valid bitmask of <= 32 bits.
  1841  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1842  		return 0
  1843  	}
  1844  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1845  }
  1846  
  1847  // Test if RLWINM feeding into SRDconst can be merged. Return the encoded RLIWNM constant,
  1848  // or 0 if they cannot be merged.
  1849  func mergePPC64SldiRlwinm(sldi, rlw int64) int64 {
  1850  	r_1, mb, me, mask_1 := DecodePPC64RotateMask(rlw)
  1851  	if mb > me || mb < sldi {
  1852  		// Wrapping masks cannot be merged as the upper 32 bits are effectively undefined in this case.
  1853  		// Likewise, if mb is less than the shift amount, it cannot be merged.
  1854  		return 0
  1855  	}
  1856  	// combine the masks, and adjust for the final left shift.
  1857  	mask_3 := mask_1 << sldi
  1858  	r_3 := (r_1 + sldi) & 31 // This can wrap.
  1859  
  1860  	// Verify the result is still a valid bitmask of <= 32 bits.
  1861  	if uint64(uint32(mask_3)) != mask_3 {
  1862  		return 0
  1863  	}
  1864  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1865  }
  1866  
  1867  // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
  1868  // or return 0 if they cannot be combined.
  1869  func mergePPC64SldiSrw(sld, srw int64) int64 {
  1870  	if sld > srw || srw >= 32 {
  1871  		return 0
  1872  	}
  1873  	mask_r := uint32(0xFFFFFFFF) >> uint(srw)
  1874  	mask_l := uint32(0xFFFFFFFF) >> uint(sld)
  1875  	mask := (mask_r & mask_l) << uint(sld)
  1876  	return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
  1877  }
  1878  
  1879  // Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y)
  1880  // to (Select0 (opCC x y)) without having to explicitly fixup every user
  1881  // of op.
  1882  //
  1883  // E.g consider the case:
  1884  // a = (ADD x y)
  1885  // b = (CMPconst [0] a)
  1886  // c = (OR a z)
  1887  //
  1888  // A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y)))
  1889  // would produce:
  1890  // a  = (ADD x y)
  1891  // a' = (ADDCC x y)
  1892  // a” = (Select0 a')
  1893  // b  = (CMPconst [0] a”)
  1894  // c  = (OR a z)
  1895  //
  1896  // which makes it impossible to rewrite the second user. Instead the result
  1897  // of this conversion is:
  1898  // a' = (ADDCC x y)
  1899  // a  = (Select0 a')
  1900  // b  = (CMPconst [0] a)
  1901  // c  = (OR a z)
  1902  //
  1903  // Which makes it trivial to rewrite b using a lowering rule.
  1904  func convertPPC64OpToOpCC(op *Value) *Value {
  1905  	ccOpMap := map[Op]Op{
  1906  		OpPPC64ADD:      OpPPC64ADDCC,
  1907  		OpPPC64ADDconst: OpPPC64ADDCCconst,
  1908  		OpPPC64AND:      OpPPC64ANDCC,
  1909  		OpPPC64ANDN:     OpPPC64ANDNCC,
  1910  		OpPPC64ANDconst: OpPPC64ANDCCconst,
  1911  		OpPPC64CNTLZD:   OpPPC64CNTLZDCC,
  1912  		OpPPC64MULHDU:   OpPPC64MULHDUCC,
  1913  		OpPPC64NEG:      OpPPC64NEGCC,
  1914  		OpPPC64NOR:      OpPPC64NORCC,
  1915  		OpPPC64OR:       OpPPC64ORCC,
  1916  		OpPPC64RLDICL:   OpPPC64RLDICLCC,
  1917  		OpPPC64SUB:      OpPPC64SUBCC,
  1918  		OpPPC64XOR:      OpPPC64XORCC,
  1919  	}
  1920  	b := op.Block
  1921  	opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt)
  1922  	opCC.AddArgs(op.Args...)
  1923  	op.reset(OpSelect0)
  1924  	op.AddArgs(opCC)
  1925  	return op
  1926  }
  1927  
  1928  // Try converting a RLDICL to ANDCC. If successful, return the mask otherwise 0.
  1929  func convertPPC64RldiclAndccconst(sauxint int64) int64 {
  1930  	r, _, _, mask := DecodePPC64RotateMask(sauxint)
  1931  	if r != 0 || mask&0xFFFF != mask {
  1932  		return 0
  1933  	}
  1934  	return int64(mask)
  1935  }
  1936  
  1937  // Convenience function to rotate a 32 bit constant value by another constant.
  1938  func rotateLeft32(v, rotate int64) int64 {
  1939  	return int64(bits.RotateLeft32(uint32(v), int(rotate)))
  1940  }
  1941  
  1942  func rotateRight64(v, rotate int64) int64 {
  1943  	return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
  1944  }
  1945  
  1946  // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  1947  func armBFAuxInt(lsb, width int64) arm64BitField {
  1948  	if lsb < 0 || lsb > 63 {
  1949  		panic("ARM(64) bit field lsb constant out of range")
  1950  	}
  1951  	if width < 1 || lsb+width > 64 {
  1952  		panic("ARM(64) bit field width constant out of range")
  1953  	}
  1954  	return arm64BitField(width | lsb<<8)
  1955  }
  1956  
  1957  // returns the lsb part of the auxInt field of arm64 bitfield ops.
  1958  func (bfc arm64BitField) lsb() int64 {
  1959  	return int64(uint64(bfc) >> 8)
  1960  }
  1961  
  1962  // returns the width part of the auxInt field of arm64 bitfield ops.
  1963  func (bfc arm64BitField) width() int64 {
  1964  	return int64(bfc) & 0xff
  1965  }
  1966  
  1967  // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
  1968  func isARM64BFMask(lsb, mask, rshift int64) bool {
  1969  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1970  	return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  1971  }
  1972  
  1973  // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
  1974  func arm64BFWidth(mask, rshift int64) int64 {
  1975  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1976  	if shiftedMask == 0 {
  1977  		panic("ARM64 BF mask is zero")
  1978  	}
  1979  	return nto(shiftedMask)
  1980  }
  1981  
  1982  // registerizable reports whether t is a primitive type that fits in
  1983  // a register. It assumes float64 values will always fit into registers
  1984  // even if that isn't strictly true.
  1985  func registerizable(b *Block, typ *types.Type) bool {
  1986  	if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
  1987  		return true
  1988  	}
  1989  	if typ.IsInteger() {
  1990  		return typ.Size() <= b.Func.Config.RegSize
  1991  	}
  1992  	return false
  1993  }
  1994  
  1995  // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
  1996  func needRaceCleanup(sym *AuxCall, v *Value) bool {
  1997  	f := v.Block.Func
  1998  	if !f.Config.Race {
  1999  		return false
  2000  	}
  2001  	if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
  2002  		return false
  2003  	}
  2004  	for _, b := range f.Blocks {
  2005  		for _, v := range b.Values {
  2006  			switch v.Op {
  2007  			case OpStaticCall, OpStaticLECall:
  2008  				// Check for racefuncenter will encounter racefuncexit and vice versa.
  2009  				// Allow calls to panic*
  2010  				s := v.Aux.(*AuxCall).Fn.String()
  2011  				switch s {
  2012  				case "runtime.racefuncenter", "runtime.racefuncexit",
  2013  					"runtime.panicdivide", "runtime.panicwrap",
  2014  					"runtime.panicshift":
  2015  					continue
  2016  				}
  2017  				// If we encountered any call, we need to keep racefunc*,
  2018  				// for accurate stacktraces.
  2019  				return false
  2020  			case OpPanicBounds, OpPanicExtend:
  2021  				// Note: these are panic generators that are ok (like the static calls above).
  2022  			case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
  2023  				// We must keep the race functions if there are any other call types.
  2024  				return false
  2025  			}
  2026  		}
  2027  	}
  2028  	if isSameCall(sym, "runtime.racefuncenter") {
  2029  		// TODO REGISTER ABI this needs to be cleaned up.
  2030  		// If we're removing racefuncenter, remove its argument as well.
  2031  		if v.Args[0].Op != OpStore {
  2032  			if v.Op == OpStaticLECall {
  2033  				// there is no store, yet.
  2034  				return true
  2035  			}
  2036  			return false
  2037  		}
  2038  		mem := v.Args[0].Args[2]
  2039  		v.Args[0].reset(OpCopy)
  2040  		v.Args[0].AddArg(mem)
  2041  	}
  2042  	return true
  2043  }
  2044  
  2045  // symIsRO reports whether sym is a read-only global.
  2046  func symIsRO(sym Sym) bool {
  2047  	lsym := sym.(*obj.LSym)
  2048  	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
  2049  }
  2050  
  2051  // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
  2052  func symIsROZero(sym Sym) bool {
  2053  	lsym := sym.(*obj.LSym)
  2054  	if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
  2055  		return false
  2056  	}
  2057  	for _, b := range lsym.P {
  2058  		if b != 0 {
  2059  			return false
  2060  		}
  2061  	}
  2062  	return true
  2063  }
  2064  
  2065  // isFixed32 returns true if the int32 at offset off in symbol sym
  2066  // is known and constant.
  2067  func isFixed32(c *Config, sym Sym, off int64) bool {
  2068  	return isFixed(c, sym, off, 4)
  2069  }
  2070  
  2071  // isFixed returns true if the range [off,off+size] of the symbol sym
  2072  // is known and constant.
  2073  func isFixed(c *Config, sym Sym, off, size int64) bool {
  2074  	lsym := sym.(*obj.LSym)
  2075  	if lsym.Extra == nil {
  2076  		return false
  2077  	}
  2078  	if _, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  2079  		if off == 2*c.PtrSize && size == 4 {
  2080  			return true // type hash field
  2081  		}
  2082  	}
  2083  	return false
  2084  }
  2085  func fixed32(c *Config, sym Sym, off int64) int32 {
  2086  	lsym := sym.(*obj.LSym)
  2087  	if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  2088  		if off == 2*c.PtrSize {
  2089  			return int32(types.TypeHash(ti.Type.(*types.Type)))
  2090  		}
  2091  	}
  2092  	base.Fatalf("fixed32 data not known for %s:%d", sym, off)
  2093  	return 0
  2094  }
  2095  
  2096  // isFixedSym returns true if the contents of sym at the given offset
  2097  // is known and is the constant address of another symbol.
  2098  func isFixedSym(sym Sym, off int64) bool {
  2099  	lsym := sym.(*obj.LSym)
  2100  	switch {
  2101  	case lsym.Type == objabi.SRODATA:
  2102  		// itabs, dictionaries
  2103  	default:
  2104  		return false
  2105  	}
  2106  	for _, r := range lsym.R {
  2107  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  2108  			return true
  2109  		}
  2110  	}
  2111  	return false
  2112  }
  2113  func fixedSym(f *Func, sym Sym, off int64) Sym {
  2114  	lsym := sym.(*obj.LSym)
  2115  	for _, r := range lsym.R {
  2116  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off {
  2117  			if strings.HasPrefix(r.Sym.Name, "type:") {
  2118  				// In case we're loading a type out of a dictionary, we need to record
  2119  				// that the containing function might put that type in an interface.
  2120  				// That information is currently recorded in relocations in the dictionary,
  2121  				// but if we perform this load at compile time then the dictionary
  2122  				// might be dead.
  2123  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2124  			} else if strings.HasPrefix(r.Sym.Name, "go:itab") {
  2125  				// Same, but if we're using an itab we need to record that the
  2126  				// itab._type might be put in an interface.
  2127  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2128  			}
  2129  			return r.Sym
  2130  		}
  2131  	}
  2132  	base.Fatalf("fixedSym data not known for %s:%d", sym, off)
  2133  	return nil
  2134  }
  2135  
  2136  // read8 reads one byte from the read-only global sym at offset off.
  2137  func read8(sym Sym, off int64) uint8 {
  2138  	lsym := sym.(*obj.LSym)
  2139  	if off >= int64(len(lsym.P)) || off < 0 {
  2140  		// Invalid index into the global sym.
  2141  		// This can happen in dead code, so we don't want to panic.
  2142  		// Just return any value, it will eventually get ignored.
  2143  		// See issue 29215.
  2144  		return 0
  2145  	}
  2146  	return lsym.P[off]
  2147  }
  2148  
  2149  // read16 reads two bytes from the read-only global sym at offset off.
  2150  func read16(sym Sym, off int64, byteorder binary.ByteOrder) uint16 {
  2151  	lsym := sym.(*obj.LSym)
  2152  	// lsym.P is written lazily.
  2153  	// Bytes requested after the end of lsym.P are 0.
  2154  	var src []byte
  2155  	if 0 <= off && off < int64(len(lsym.P)) {
  2156  		src = lsym.P[off:]
  2157  	}
  2158  	buf := make([]byte, 2)
  2159  	copy(buf, src)
  2160  	return byteorder.Uint16(buf)
  2161  }
  2162  
  2163  // read32 reads four bytes from the read-only global sym at offset off.
  2164  func read32(sym Sym, off int64, byteorder binary.ByteOrder) uint32 {
  2165  	lsym := sym.(*obj.LSym)
  2166  	var src []byte
  2167  	if 0 <= off && off < int64(len(lsym.P)) {
  2168  		src = lsym.P[off:]
  2169  	}
  2170  	buf := make([]byte, 4)
  2171  	copy(buf, src)
  2172  	return byteorder.Uint32(buf)
  2173  }
  2174  
  2175  // read64 reads eight bytes from the read-only global sym at offset off.
  2176  func read64(sym Sym, off int64, byteorder binary.ByteOrder) uint64 {
  2177  	lsym := sym.(*obj.LSym)
  2178  	var src []byte
  2179  	if 0 <= off && off < int64(len(lsym.P)) {
  2180  		src = lsym.P[off:]
  2181  	}
  2182  	buf := make([]byte, 8)
  2183  	copy(buf, src)
  2184  	return byteorder.Uint64(buf)
  2185  }
  2186  
  2187  // sequentialAddresses reports true if it can prove that x + n == y
  2188  func sequentialAddresses(x, y *Value, n int64) bool {
  2189  	if x == y && n == 0 {
  2190  		return true
  2191  	}
  2192  	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
  2193  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2194  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2195  		return true
  2196  	}
  2197  	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2198  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2199  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2200  		return true
  2201  	}
  2202  	if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
  2203  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2204  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2205  		return true
  2206  	}
  2207  	if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2208  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2209  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2210  		return true
  2211  	}
  2212  	return false
  2213  }
  2214  
  2215  // flagConstant represents the result of a compile-time comparison.
  2216  // The sense of these flags does not necessarily represent the hardware's notion
  2217  // of a flags register - these are just a compile-time construct.
  2218  // We happen to match the semantics to those of arm/arm64.
  2219  // Note that these semantics differ from x86: the carry flag has the opposite
  2220  // sense on a subtraction!
  2221  //
  2222  //	On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
  2223  //	On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
  2224  //	 (because it does x + ^y + C).
  2225  //
  2226  // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
  2227  type flagConstant uint8
  2228  
  2229  // N reports whether the result of an operation is negative (high bit set).
  2230  func (fc flagConstant) N() bool {
  2231  	return fc&1 != 0
  2232  }
  2233  
  2234  // Z reports whether the result of an operation is 0.
  2235  func (fc flagConstant) Z() bool {
  2236  	return fc&2 != 0
  2237  }
  2238  
  2239  // C reports whether an unsigned add overflowed (carry), or an
  2240  // unsigned subtract did not underflow (borrow).
  2241  func (fc flagConstant) C() bool {
  2242  	return fc&4 != 0
  2243  }
  2244  
  2245  // V reports whether a signed operation overflowed or underflowed.
  2246  func (fc flagConstant) V() bool {
  2247  	return fc&8 != 0
  2248  }
  2249  
  2250  func (fc flagConstant) eq() bool {
  2251  	return fc.Z()
  2252  }
  2253  func (fc flagConstant) ne() bool {
  2254  	return !fc.Z()
  2255  }
  2256  func (fc flagConstant) lt() bool {
  2257  	return fc.N() != fc.V()
  2258  }
  2259  func (fc flagConstant) le() bool {
  2260  	return fc.Z() || fc.lt()
  2261  }
  2262  func (fc flagConstant) gt() bool {
  2263  	return !fc.Z() && fc.ge()
  2264  }
  2265  func (fc flagConstant) ge() bool {
  2266  	return fc.N() == fc.V()
  2267  }
  2268  func (fc flagConstant) ult() bool {
  2269  	return !fc.C()
  2270  }
  2271  func (fc flagConstant) ule() bool {
  2272  	return fc.Z() || fc.ult()
  2273  }
  2274  func (fc flagConstant) ugt() bool {
  2275  	return !fc.Z() && fc.uge()
  2276  }
  2277  func (fc flagConstant) uge() bool {
  2278  	return fc.C()
  2279  }
  2280  
  2281  func (fc flagConstant) ltNoov() bool {
  2282  	return fc.lt() && !fc.V()
  2283  }
  2284  func (fc flagConstant) leNoov() bool {
  2285  	return fc.le() && !fc.V()
  2286  }
  2287  func (fc flagConstant) gtNoov() bool {
  2288  	return fc.gt() && !fc.V()
  2289  }
  2290  func (fc flagConstant) geNoov() bool {
  2291  	return fc.ge() && !fc.V()
  2292  }
  2293  
  2294  func (fc flagConstant) String() string {
  2295  	return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
  2296  }
  2297  
  2298  type flagConstantBuilder struct {
  2299  	N bool
  2300  	Z bool
  2301  	C bool
  2302  	V bool
  2303  }
  2304  
  2305  func (fcs flagConstantBuilder) encode() flagConstant {
  2306  	var fc flagConstant
  2307  	if fcs.N {
  2308  		fc |= 1
  2309  	}
  2310  	if fcs.Z {
  2311  		fc |= 2
  2312  	}
  2313  	if fcs.C {
  2314  		fc |= 4
  2315  	}
  2316  	if fcs.V {
  2317  		fc |= 8
  2318  	}
  2319  	return fc
  2320  }
  2321  
  2322  // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
  2323  //  - the results of the C flag are different
  2324  //  - the results of the V flag when y==minint are different
  2325  
  2326  // addFlags64 returns the flags that would be set from computing x+y.
  2327  func addFlags64(x, y int64) flagConstant {
  2328  	var fcb flagConstantBuilder
  2329  	fcb.Z = x+y == 0
  2330  	fcb.N = x+y < 0
  2331  	fcb.C = uint64(x+y) < uint64(x)
  2332  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2333  	return fcb.encode()
  2334  }
  2335  
  2336  // subFlags64 returns the flags that would be set from computing x-y.
  2337  func subFlags64(x, y int64) flagConstant {
  2338  	var fcb flagConstantBuilder
  2339  	fcb.Z = x-y == 0
  2340  	fcb.N = x-y < 0
  2341  	fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
  2342  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2343  	return fcb.encode()
  2344  }
  2345  
  2346  // addFlags32 returns the flags that would be set from computing x+y.
  2347  func addFlags32(x, y int32) flagConstant {
  2348  	var fcb flagConstantBuilder
  2349  	fcb.Z = x+y == 0
  2350  	fcb.N = x+y < 0
  2351  	fcb.C = uint32(x+y) < uint32(x)
  2352  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2353  	return fcb.encode()
  2354  }
  2355  
  2356  // subFlags32 returns the flags that would be set from computing x-y.
  2357  func subFlags32(x, y int32) flagConstant {
  2358  	var fcb flagConstantBuilder
  2359  	fcb.Z = x-y == 0
  2360  	fcb.N = x-y < 0
  2361  	fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
  2362  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2363  	return fcb.encode()
  2364  }
  2365  
  2366  // logicFlags64 returns flags set to the sign/zeroness of x.
  2367  // C and V are set to false.
  2368  func logicFlags64(x int64) flagConstant {
  2369  	var fcb flagConstantBuilder
  2370  	fcb.Z = x == 0
  2371  	fcb.N = x < 0
  2372  	return fcb.encode()
  2373  }
  2374  
  2375  // logicFlags32 returns flags set to the sign/zeroness of x.
  2376  // C and V are set to false.
  2377  func logicFlags32(x int32) flagConstant {
  2378  	var fcb flagConstantBuilder
  2379  	fcb.Z = x == 0
  2380  	fcb.N = x < 0
  2381  	return fcb.encode()
  2382  }
  2383  
  2384  func makeJumpTableSym(b *Block) *obj.LSym {
  2385  	s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
  2386  	// The jump table symbol is accessed only from the function symbol.
  2387  	s.Set(obj.AttrStatic, true)
  2388  	return s
  2389  }
  2390  
  2391  // canRotate reports whether the architecture supports
  2392  // rotates of integer registers with the given number of bits.
  2393  func canRotate(c *Config, bits int64) bool {
  2394  	if bits > c.PtrSize*8 {
  2395  		// Don't rewrite to rotates bigger than the machine word.
  2396  		return false
  2397  	}
  2398  	switch c.arch {
  2399  	case "386", "amd64", "arm64", "loong64", "riscv64":
  2400  		return true
  2401  	case "arm", "s390x", "ppc64", "ppc64le", "wasm":
  2402  		return bits >= 32
  2403  	default:
  2404  		return false
  2405  	}
  2406  }
  2407  
  2408  // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
  2409  func isARM64bitcon(x uint64) bool {
  2410  	if x == 1<<64-1 || x == 0 {
  2411  		return false
  2412  	}
  2413  	// determine the period and sign-extend a unit to 64 bits
  2414  	switch {
  2415  	case x != x>>32|x<<32:
  2416  		// period is 64
  2417  		// nothing to do
  2418  	case x != x>>16|x<<48:
  2419  		// period is 32
  2420  		x = uint64(int64(int32(x)))
  2421  	case x != x>>8|x<<56:
  2422  		// period is 16
  2423  		x = uint64(int64(int16(x)))
  2424  	case x != x>>4|x<<60:
  2425  		// period is 8
  2426  		x = uint64(int64(int8(x)))
  2427  	default:
  2428  		// period is 4 or 2, always true
  2429  		// 0001, 0010, 0100, 1000 -- 0001 rotate
  2430  		// 0011, 0110, 1100, 1001 -- 0011 rotate
  2431  		// 0111, 1011, 1101, 1110 -- 0111 rotate
  2432  		// 0101, 1010             -- 01   rotate, repeat
  2433  		return true
  2434  	}
  2435  	return sequenceOfOnes(x) || sequenceOfOnes(^x)
  2436  }
  2437  
  2438  // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
  2439  func sequenceOfOnes(x uint64) bool {
  2440  	y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
  2441  	y += x
  2442  	return (y-1)&y == 0
  2443  }
  2444  
  2445  // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
  2446  func isARM64addcon(v int64) bool {
  2447  	/* uimm12 or uimm24? */
  2448  	if v < 0 {
  2449  		return false
  2450  	}
  2451  	if (v & 0xFFF) == 0 {
  2452  		v >>= 12
  2453  	}
  2454  	return v <= 0xFFF
  2455  }
  2456  
  2457  // setPos sets the position of v to pos, then returns true.
  2458  // Useful for setting the result of a rewrite's position to
  2459  // something other than the default.
  2460  func setPos(v *Value, pos src.XPos) bool {
  2461  	v.Pos = pos
  2462  	return true
  2463  }
  2464  
  2465  // isNonNegative reports whether v is known to be greater or equal to zero.
  2466  // Note that this is pretty simplistic. The prove pass generates more detailed
  2467  // nonnegative information about values.
  2468  func isNonNegative(v *Value) bool {
  2469  	if !v.Type.IsInteger() {
  2470  		v.Fatalf("isNonNegative bad type: %v", v.Type)
  2471  	}
  2472  	// TODO: return true if !v.Type.IsSigned()
  2473  	// SSA isn't type-safe enough to do that now (issue 37753).
  2474  	// The checks below depend only on the pattern of bits.
  2475  
  2476  	switch v.Op {
  2477  	case OpConst64:
  2478  		return v.AuxInt >= 0
  2479  
  2480  	case OpConst32:
  2481  		return int32(v.AuxInt) >= 0
  2482  
  2483  	case OpConst16:
  2484  		return int16(v.AuxInt) >= 0
  2485  
  2486  	case OpConst8:
  2487  		return int8(v.AuxInt) >= 0
  2488  
  2489  	case OpStringLen, OpSliceLen, OpSliceCap,
  2490  		OpZeroExt8to64, OpZeroExt16to64, OpZeroExt32to64,
  2491  		OpZeroExt8to32, OpZeroExt16to32, OpZeroExt8to16,
  2492  		OpCtz64, OpCtz32, OpCtz16, OpCtz8,
  2493  		OpCtz64NonZero, OpCtz32NonZero, OpCtz16NonZero, OpCtz8NonZero,
  2494  		OpBitLen64, OpBitLen32, OpBitLen16, OpBitLen8:
  2495  		return true
  2496  
  2497  	case OpRsh64Ux64, OpRsh32Ux64:
  2498  		by := v.Args[1]
  2499  		return by.Op == OpConst64 && by.AuxInt > 0
  2500  
  2501  	case OpRsh64x64, OpRsh32x64, OpRsh8x64, OpRsh16x64, OpRsh32x32, OpRsh64x32,
  2502  		OpSignExt32to64, OpSignExt16to64, OpSignExt8to64, OpSignExt16to32, OpSignExt8to32:
  2503  		return isNonNegative(v.Args[0])
  2504  
  2505  	case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  2506  		return isNonNegative(v.Args[0]) || isNonNegative(v.Args[1])
  2507  
  2508  	case OpMod64, OpMod32, OpMod16, OpMod8,
  2509  		OpDiv64, OpDiv32, OpDiv16, OpDiv8,
  2510  		OpOr64, OpOr32, OpOr16, OpOr8,
  2511  		OpXor64, OpXor32, OpXor16, OpXor8:
  2512  		return isNonNegative(v.Args[0]) && isNonNegative(v.Args[1])
  2513  
  2514  		// We could handle OpPhi here, but the improvements from doing
  2515  		// so are very minor, and it is neither simple nor cheap.
  2516  	}
  2517  	return false
  2518  }
  2519  
  2520  func rewriteStructLoad(v *Value) *Value {
  2521  	b := v.Block
  2522  	ptr := v.Args[0]
  2523  	mem := v.Args[1]
  2524  
  2525  	t := v.Type
  2526  	args := make([]*Value, t.NumFields())
  2527  	for i := range args {
  2528  		ft := t.FieldType(i)
  2529  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), ptr)
  2530  		args[i] = b.NewValue2(v.Pos, OpLoad, ft, addr, mem)
  2531  	}
  2532  
  2533  	v.reset(OpStructMake)
  2534  	v.AddArgs(args...)
  2535  	return v
  2536  }
  2537  
  2538  func rewriteStructStore(v *Value) *Value {
  2539  	b := v.Block
  2540  	dst := v.Args[0]
  2541  	x := v.Args[1]
  2542  	if x.Op != OpStructMake {
  2543  		base.Fatalf("invalid struct store: %v", x)
  2544  	}
  2545  	mem := v.Args[2]
  2546  
  2547  	t := x.Type
  2548  	for i, arg := range x.Args {
  2549  		ft := t.FieldType(i)
  2550  
  2551  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), dst)
  2552  		mem = b.NewValue3A(v.Pos, OpStore, types.TypeMem, typeToAux(ft), addr, arg, mem)
  2553  	}
  2554  
  2555  	return mem
  2556  }
  2557  
  2558  // isDirectType reports whether v represents a type
  2559  // (a *runtime._type) whose value is stored directly in an
  2560  // interface (i.e., is pointer or pointer-like).
  2561  func isDirectType(v *Value) bool {
  2562  	return isDirectType1(v)
  2563  }
  2564  
  2565  // v is a type
  2566  func isDirectType1(v *Value) bool {
  2567  	switch v.Op {
  2568  	case OpITab:
  2569  		return isDirectType2(v.Args[0])
  2570  	case OpAddr:
  2571  		lsym := v.Aux.(*obj.LSym)
  2572  		if lsym.Extra == nil {
  2573  			return false
  2574  		}
  2575  		if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  2576  			return types.IsDirectIface(ti.Type.(*types.Type))
  2577  		}
  2578  	}
  2579  	return false
  2580  }
  2581  
  2582  // v is an empty interface
  2583  func isDirectType2(v *Value) bool {
  2584  	switch v.Op {
  2585  	case OpIMake:
  2586  		return isDirectType1(v.Args[0])
  2587  	}
  2588  	return false
  2589  }
  2590  
  2591  // isDirectIface reports whether v represents an itab
  2592  // (a *runtime._itab) for a type whose value is stored directly
  2593  // in an interface (i.e., is pointer or pointer-like).
  2594  func isDirectIface(v *Value) bool {
  2595  	return isDirectIface1(v, 9)
  2596  }
  2597  
  2598  // v is an itab
  2599  func isDirectIface1(v *Value, depth int) bool {
  2600  	if depth == 0 {
  2601  		return false
  2602  	}
  2603  	switch v.Op {
  2604  	case OpITab:
  2605  		return isDirectIface2(v.Args[0], depth-1)
  2606  	case OpAddr:
  2607  		lsym := v.Aux.(*obj.LSym)
  2608  		if lsym.Extra == nil {
  2609  			return false
  2610  		}
  2611  		if ii, ok := (*lsym.Extra).(*obj.ItabInfo); ok {
  2612  			return types.IsDirectIface(ii.Type.(*types.Type))
  2613  		}
  2614  	case OpConstNil:
  2615  		// We can treat this as direct, because if the itab is
  2616  		// nil, the data field must be nil also.
  2617  		return true
  2618  	}
  2619  	return false
  2620  }
  2621  
  2622  // v is an interface
  2623  func isDirectIface2(v *Value, depth int) bool {
  2624  	if depth == 0 {
  2625  		return false
  2626  	}
  2627  	switch v.Op {
  2628  	case OpIMake:
  2629  		return isDirectIface1(v.Args[0], depth-1)
  2630  	case OpPhi:
  2631  		for _, a := range v.Args {
  2632  			if !isDirectIface2(a, depth-1) {
  2633  				return false
  2634  			}
  2635  		}
  2636  		return true
  2637  	}
  2638  	return false
  2639  }
  2640  
  2641  func bitsAdd64(x, y, carry int64) (r struct{ sum, carry int64 }) {
  2642  	s, c := bits.Add64(uint64(x), uint64(y), uint64(carry))
  2643  	r.sum, r.carry = int64(s), int64(c)
  2644  	return
  2645  }
  2646  
  2647  func bitsMulU64(x, y int64) (r struct{ hi, lo int64 }) {
  2648  	hi, lo := bits.Mul64(uint64(x), uint64(y))
  2649  	r.hi, r.lo = int64(hi), int64(lo)
  2650  	return
  2651  }
  2652  func bitsMulU32(x, y int32) (r struct{ hi, lo int32 }) {
  2653  	hi, lo := bits.Mul32(uint32(x), uint32(y))
  2654  	r.hi, r.lo = int32(hi), int32(lo)
  2655  	return
  2656  }
  2657  
  2658  // flagify rewrites v which is (X ...) to (Select0 (Xflags ...)).
  2659  func flagify(v *Value) bool {
  2660  	var flagVersion Op
  2661  	switch v.Op {
  2662  	case OpAMD64ADDQconst:
  2663  		flagVersion = OpAMD64ADDQconstflags
  2664  	case OpAMD64ADDLconst:
  2665  		flagVersion = OpAMD64ADDLconstflags
  2666  	default:
  2667  		base.Fatalf("can't flagify op %s", v.Op)
  2668  	}
  2669  	inner := v.copyInto(v.Block)
  2670  	inner.Op = flagVersion
  2671  	inner.Type = types.NewTuple(v.Type, types.TypeFlags)
  2672  	v.reset(OpSelect0)
  2673  	v.AddArg(inner)
  2674  	return true
  2675  }
  2676  
  2677  // PanicBoundsC contains a constant for a bounds failure.
  2678  type PanicBoundsC struct {
  2679  	C int64
  2680  }
  2681  
  2682  // PanicBoundsCC contains 2 constants for a bounds failure.
  2683  type PanicBoundsCC struct {
  2684  	Cx int64
  2685  	Cy int64
  2686  }
  2687  
  2688  func (p PanicBoundsC) CanBeAnSSAAux() {
  2689  }
  2690  func (p PanicBoundsCC) CanBeAnSSAAux() {
  2691  }
  2692  
  2693  func auxToPanicBoundsC(i Aux) PanicBoundsC {
  2694  	return i.(PanicBoundsC)
  2695  }
  2696  func auxToPanicBoundsCC(i Aux) PanicBoundsCC {
  2697  	return i.(PanicBoundsCC)
  2698  }
  2699  func panicBoundsCToAux(p PanicBoundsC) Aux {
  2700  	return p
  2701  }
  2702  func panicBoundsCCToAux(p PanicBoundsCC) Aux {
  2703  	return p
  2704  }
  2705
View as plain text