prove.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/types"
     9  	"cmd/internal/src"
    10  	"fmt"
    11  	"math"
    12  	"math/bits"
    13  )
    14  
    15  type branch int
    16  
    17  const (
    18  	unknown branch = iota
    19  	positive
    20  	negative
    21  	// The outedges from a jump table are jumpTable0,
    22  	// jumpTable0+1, jumpTable0+2, etc. There could be an
    23  	// arbitrary number so we can't list them all here.
    24  	jumpTable0
    25  )
    26  
    27  func (b branch) String() string {
    28  	switch b {
    29  	case unknown:
    30  		return "unk"
    31  	case positive:
    32  		return "pos"
    33  	case negative:
    34  		return "neg"
    35  	default:
    36  		return fmt.Sprintf("jmp%d", b-jumpTable0)
    37  	}
    38  }
    39  
    40  // relation represents the set of possible relations between
    41  // pairs of variables (v, w). Without a priori knowledge the
    42  // mask is lt | eq | gt meaning v can be less than, equal to or
    43  // greater than w. When the execution path branches on the condition
    44  // `v op w` the set of relations is updated to exclude any
    45  // relation not possible due to `v op w` being true (or false).
    46  //
    47  // E.g.
    48  //
    49  //	r := relation(...)
    50  //
    51  //	if v < w {
    52  //	  newR := r & lt
    53  //	}
    54  //	if v >= w {
    55  //	  newR := r & (eq|gt)
    56  //	}
    57  //	if v != w {
    58  //	  newR := r & (lt|gt)
    59  //	}
    60  type relation uint
    61  
    62  const (
    63  	lt relation = 1 << iota
    64  	eq
    65  	gt
    66  )
    67  
    68  var relationStrings = [...]string{
    69  	0: "none", lt: "<", eq: "==", lt | eq: "<=",
    70  	gt: ">", gt | lt: "!=", gt | eq: ">=", gt | eq | lt: "any",
    71  }
    72  
    73  func (r relation) String() string {
    74  	if r < relation(len(relationStrings)) {
    75  		return relationStrings[r]
    76  	}
    77  	return fmt.Sprintf("relation(%d)", uint(r))
    78  }
    79  
    80  // domain represents the domain of a variable pair in which a set
    81  // of relations is known. For example, relations learned for unsigned
    82  // pairs cannot be transferred to signed pairs because the same bit
    83  // representation can mean something else.
    84  type domain uint
    85  
    86  const (
    87  	signed domain = 1 << iota
    88  	unsigned
    89  	pointer
    90  	boolean
    91  )
    92  
    93  var domainStrings = [...]string{
    94  	"signed", "unsigned", "pointer", "boolean",
    95  }
    96  
    97  func (d domain) String() string {
    98  	s := ""
    99  	for i, ds := range domainStrings {
   100  		if d&(1<<uint(i)) != 0 {
   101  			if len(s) != 0 {
   102  				s += "|"
   103  			}
   104  			s += ds
   105  			d &^= 1 << uint(i)
   106  		}
   107  	}
   108  	if d != 0 {
   109  		if len(s) != 0 {
   110  			s += "|"
   111  		}
   112  		s += fmt.Sprintf("0x%x", uint(d))
   113  	}
   114  	return s
   115  }
   116  
   117  // a limit records known upper and lower bounds for a value.
   118  //
   119  // If we have min>max or umin>umax, then this limit is
   120  // called "unsatisfiable". When we encounter such a limit, we
   121  // know that any code for which that limit applies is unreachable.
   122  // We don't particularly care how unsatisfiable limits propagate,
   123  // including becoming satisfiable, because any optimization
   124  // decisions based on those limits only apply to unreachable code.
   125  type limit struct {
   126  	min, max   int64  // min <= value <= max, signed
   127  	umin, umax uint64 // umin <= value <= umax, unsigned
   128  	// For booleans, we use 0==false, 1==true for both ranges
   129  	// For pointers, we use 0,0,0,0 for nil and minInt64,maxInt64,1,maxUint64 for nonnil
   130  }
   131  
   132  func (l limit) String() string {
   133  	return fmt.Sprintf("sm,SM,um,UM=%d,%d,%d,%d", l.min, l.max, l.umin, l.umax)
   134  }
   135  
   136  func (l limit) intersect(l2 limit) limit {
   137  	l.min = max(l.min, l2.min)
   138  	l.umin = max(l.umin, l2.umin)
   139  	l.max = min(l.max, l2.max)
   140  	l.umax = min(l.umax, l2.umax)
   141  	return l
   142  }
   143  
   144  func (l limit) signedMin(m int64) limit {
   145  	l.min = max(l.min, m)
   146  	return l
   147  }
   148  func (l limit) signedMax(m int64) limit {
   149  	l.max = min(l.max, m)
   150  	return l
   151  }
   152  func (l limit) signedMinMax(minimum, maximum int64) limit {
   153  	l.min = max(l.min, minimum)
   154  	l.max = min(l.max, maximum)
   155  	return l
   156  }
   157  
   158  func (l limit) unsignedMin(m uint64) limit {
   159  	l.umin = max(l.umin, m)
   160  	return l
   161  }
   162  func (l limit) unsignedMax(m uint64) limit {
   163  	l.umax = min(l.umax, m)
   164  	return l
   165  }
   166  func (l limit) unsignedMinMax(minimum, maximum uint64) limit {
   167  	l.umin = max(l.umin, minimum)
   168  	l.umax = min(l.umax, maximum)
   169  	return l
   170  }
   171  
   172  func (l limit) nonzero() bool {
   173  	return l.min > 0 || l.umin > 0 || l.max < 0
   174  }
   175  func (l limit) nonnegative() bool {
   176  	return l.min >= 0
   177  }
   178  func (l limit) unsat() bool {
   179  	return l.min > l.max || l.umin > l.umax
   180  }
   181  
   182  // If x and y can add without overflow or underflow
   183  // (using b bits), safeAdd returns x+y, true.
   184  // Otherwise, returns 0, false.
   185  func safeAdd(x, y int64, b uint) (int64, bool) {
   186  	s := x + y
   187  	if x >= 0 && y >= 0 && s < 0 {
   188  		return 0, false // 64-bit overflow
   189  	}
   190  	if x < 0 && y < 0 && s >= 0 {
   191  		return 0, false // 64-bit underflow
   192  	}
   193  	if !fitsInBits(s, b) {
   194  		return 0, false
   195  	}
   196  	return s, true
   197  }
   198  
   199  // same as safeAdd for unsigned arithmetic.
   200  func safeAddU(x, y uint64, b uint) (uint64, bool) {
   201  	s := x + y
   202  	if s < x || s < y {
   203  		return 0, false // 64-bit overflow
   204  	}
   205  	if !fitsInBitsU(s, b) {
   206  		return 0, false
   207  	}
   208  	return s, true
   209  }
   210  
   211  // same as safeAdd but for subtraction.
   212  func safeSub(x, y int64, b uint) (int64, bool) {
   213  	if y == math.MinInt64 {
   214  		if x == math.MaxInt64 {
   215  			return 0, false // 64-bit overflow
   216  		}
   217  		x++
   218  		y++
   219  	}
   220  	return safeAdd(x, -y, b)
   221  }
   222  
   223  // same as safeAddU but for subtraction.
   224  func safeSubU(x, y uint64, b uint) (uint64, bool) {
   225  	if x < y {
   226  		return 0, false // 64-bit underflow
   227  	}
   228  	s := x - y
   229  	if !fitsInBitsU(s, b) {
   230  		return 0, false
   231  	}
   232  	return s, true
   233  }
   234  
   235  // fitsInBits reports whether x fits in b bits (signed).
   236  func fitsInBits(x int64, b uint) bool {
   237  	if b == 64 {
   238  		return true
   239  	}
   240  	m := int64(-1) << (b - 1)
   241  	M := -m - 1
   242  	return x >= m && x <= M
   243  }
   244  
   245  // fitsInBitsU reports whether x fits in b bits (unsigned).
   246  func fitsInBitsU(x uint64, b uint) bool {
   247  	return x>>b == 0
   248  }
   249  
   250  // add returns the limit obtained by adding a value with limit l
   251  // to a value with limit l2. The result must fit in b bits.
   252  func (l limit) add(l2 limit, b uint) limit {
   253  	r := noLimit
   254  	min, minOk := safeAdd(l.min, l2.min, b)
   255  	max, maxOk := safeAdd(l.max, l2.max, b)
   256  	if minOk && maxOk {
   257  		r.min = min
   258  		r.max = max
   259  	}
   260  	umin, uminOk := safeAddU(l.umin, l2.umin, b)
   261  	umax, umaxOk := safeAddU(l.umax, l2.umax, b)
   262  	if uminOk && umaxOk {
   263  		r.umin = umin
   264  		r.umax = umax
   265  	}
   266  	return r
   267  }
   268  
   269  // same as add but for subtraction.
   270  func (l limit) sub(l2 limit, b uint) limit {
   271  	r := noLimit
   272  	min, minOk := safeSub(l.min, l2.max, b)
   273  	max, maxOk := safeSub(l.max, l2.min, b)
   274  	if minOk && maxOk {
   275  		r.min = min
   276  		r.max = max
   277  	}
   278  	umin, uminOk := safeSubU(l.umin, l2.umax, b)
   279  	umax, umaxOk := safeSubU(l.umax, l2.umin, b)
   280  	if uminOk && umaxOk {
   281  		r.umin = umin
   282  		r.umax = umax
   283  	}
   284  	return r
   285  }
   286  
   287  // same as add but for multiplication.
   288  func (l limit) mul(l2 limit, b uint) limit {
   289  	r := noLimit
   290  	umaxhi, umaxlo := bits.Mul64(l.umax, l2.umax)
   291  	if umaxhi == 0 && fitsInBitsU(umaxlo, b) {
   292  		r.umax = umaxlo
   293  		r.umin = l.umin * l2.umin
   294  		// Note: if the code containing this multiply is
   295  		// unreachable, then we may have umin>umax, and this
   296  		// multiply may overflow.  But that's ok for
   297  		// unreachable code. If this code is reachable, we
   298  		// know umin<=umax, so this multiply will not overflow
   299  		// because the max multiply didn't.
   300  	}
   301  	// Signed is harder, so don't bother. The only useful
   302  	// case is when we know both multiplicands are nonnegative,
   303  	// but that case is handled above because we would have then
   304  	// previously propagated signed info to the unsigned domain,
   305  	// and will propagate it back after the multiply.
   306  	return r
   307  }
   308  
   309  // Similar to add, but compute 1 << l if it fits without overflow in b bits.
   310  func (l limit) exp2(b uint) limit {
   311  	r := noLimit
   312  	if l.umax < uint64(b) {
   313  		r.umin = 1 << l.umin
   314  		r.umax = 1 << l.umax
   315  		// Same as above in mul, signed<->unsigned propagation
   316  		// will handle the signed case for us.
   317  	}
   318  	return r
   319  }
   320  
   321  // Similar to add, but computes the complement of the limit for bitsize b.
   322  func (l limit) com(b uint) limit {
   323  	switch b {
   324  	case 64:
   325  		return limit{
   326  			min:  ^l.max,
   327  			max:  ^l.min,
   328  			umin: ^l.umax,
   329  			umax: ^l.umin,
   330  		}
   331  	case 32:
   332  		return limit{
   333  			min:  int64(^int32(l.max)),
   334  			max:  int64(^int32(l.min)),
   335  			umin: uint64(^uint32(l.umax)),
   336  			umax: uint64(^uint32(l.umin)),
   337  		}
   338  	case 16:
   339  		return limit{
   340  			min:  int64(^int16(l.max)),
   341  			max:  int64(^int16(l.min)),
   342  			umin: uint64(^uint16(l.umax)),
   343  			umax: uint64(^uint16(l.umin)),
   344  		}
   345  	case 8:
   346  		return limit{
   347  			min:  int64(^int8(l.max)),
   348  			max:  int64(^int8(l.min)),
   349  			umin: uint64(^uint8(l.umax)),
   350  			umax: uint64(^uint8(l.umin)),
   351  		}
   352  	default:
   353  		panic("unreachable")
   354  	}
   355  }
   356  
   357  var noLimit = limit{math.MinInt64, math.MaxInt64, 0, math.MaxUint64}
   358  
   359  // a limitFact is a limit known for a particular value.
   360  type limitFact struct {
   361  	vid   ID
   362  	limit limit
   363  }
   364  
   365  // An ordering encodes facts like v < w.
   366  type ordering struct {
   367  	next *ordering // linked list of all known orderings for v.
   368  	// Note: v is implicit here, determined by which linked list it is in.
   369  	w *Value
   370  	d domain
   371  	r relation // one of ==,!=,<,<=,>,>=
   372  	// if d is boolean or pointer, r can only be ==, !=
   373  }
   374  
   375  // factsTable keeps track of relations between pairs of values.
   376  //
   377  // The fact table logic is sound, but incomplete. Outside of a few
   378  // special cases, it performs no deduction or arithmetic. While there
   379  // are known decision procedures for this, the ad hoc approach taken
   380  // by the facts table is effective for real code while remaining very
   381  // efficient.
   382  type factsTable struct {
   383  	// unsat is true if facts contains a contradiction.
   384  	//
   385  	// Note that the factsTable logic is incomplete, so if unsat
   386  	// is false, the assertions in factsTable could be satisfiable
   387  	// *or* unsatisfiable.
   388  	unsat      bool // true if facts contains a contradiction
   389  	unsatDepth int  // number of unsat checkpoints
   390  
   391  	// order* is a couple of partial order sets that record information
   392  	// about relations between SSA values in the signed and unsigned
   393  	// domain.
   394  	orderS *poset
   395  	orderU *poset
   396  
   397  	// orderings contains a list of known orderings between values.
   398  	// These lists are indexed by v.ID.
   399  	// We do not record transitive orderings. Only explicitly learned
   400  	// orderings are recorded. Transitive orderings can be obtained
   401  	// by walking along the individual orderings.
   402  	orderings map[ID]*ordering
   403  	// stack of IDs which have had an entry added in orderings.
   404  	// In addition, ID==0 are checkpoint markers.
   405  	orderingsStack []ID
   406  	orderingCache  *ordering // unused ordering records
   407  
   408  	// known lower and upper constant bounds on individual values.
   409  	limits       []limit     // indexed by value ID
   410  	limitStack   []limitFact // previous entries
   411  	recurseCheck []bool      // recursion detector for limit propagation
   412  
   413  	// For each slice s, a map from s to a len(s)/cap(s) value (if any)
   414  	// TODO: check if there are cases that matter where we have
   415  	// more than one len(s) for a slice. We could keep a list if necessary.
   416  	lens map[ID]*Value
   417  	caps map[ID]*Value
   418  }
   419  
   420  // checkpointBound is an invalid value used for checkpointing
   421  // and restoring factsTable.
   422  var checkpointBound = limitFact{}
   423  
   424  func newFactsTable(f *Func) *factsTable {
   425  	ft := &factsTable{}
   426  	ft.orderS = f.newPoset()
   427  	ft.orderU = f.newPoset()
   428  	ft.orderS.SetUnsigned(false)
   429  	ft.orderU.SetUnsigned(true)
   430  	ft.orderings = make(map[ID]*ordering)
   431  	ft.limits = f.Cache.allocLimitSlice(f.NumValues())
   432  	for _, b := range f.Blocks {
   433  		for _, v := range b.Values {
   434  			ft.limits[v.ID] = initLimit(v)
   435  		}
   436  	}
   437  	ft.limitStack = make([]limitFact, 4)
   438  	ft.recurseCheck = f.Cache.allocBoolSlice(f.NumValues())
   439  	return ft
   440  }
   441  
   442  // initLimitForNewValue initializes the limits for newly created values,
   443  // possibly needing to expand the limits slice. Currently used by
   444  // simplifyBlock when certain provably constant results are folded.
   445  func (ft *factsTable) initLimitForNewValue(v *Value) {
   446  	if int(v.ID) >= len(ft.limits) {
   447  		f := v.Block.Func
   448  		n := f.NumValues()
   449  		if cap(ft.limits) >= n {
   450  			ft.limits = ft.limits[:n]
   451  		} else {
   452  			old := ft.limits
   453  			ft.limits = f.Cache.allocLimitSlice(n)
   454  			copy(ft.limits, old)
   455  			f.Cache.freeLimitSlice(old)
   456  		}
   457  	}
   458  	ft.limits[v.ID] = initLimit(v)
   459  }
   460  
   461  // signedMin records the fact that we know v is at least
   462  // min in the signed domain.
   463  func (ft *factsTable) signedMin(v *Value, min int64) bool {
   464  	return ft.newLimit(v, limit{min: min, max: math.MaxInt64, umin: 0, umax: math.MaxUint64})
   465  }
   466  
   467  // signedMax records the fact that we know v is at most
   468  // max in the signed domain.
   469  func (ft *factsTable) signedMax(v *Value, max int64) bool {
   470  	return ft.newLimit(v, limit{min: math.MinInt64, max: max, umin: 0, umax: math.MaxUint64})
   471  }
   472  func (ft *factsTable) signedMinMax(v *Value, min, max int64) bool {
   473  	return ft.newLimit(v, limit{min: min, max: max, umin: 0, umax: math.MaxUint64})
   474  }
   475  
   476  // setNonNegative records the fact that v is known to be non-negative.
   477  func (ft *factsTable) setNonNegative(v *Value) bool {
   478  	return ft.signedMin(v, 0)
   479  }
   480  
   481  // unsignedMin records the fact that we know v is at least
   482  // min in the unsigned domain.
   483  func (ft *factsTable) unsignedMin(v *Value, min uint64) bool {
   484  	return ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: math.MaxUint64})
   485  }
   486  
   487  // unsignedMax records the fact that we know v is at most
   488  // max in the unsigned domain.
   489  func (ft *factsTable) unsignedMax(v *Value, max uint64) bool {
   490  	return ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: 0, umax: max})
   491  }
   492  func (ft *factsTable) unsignedMinMax(v *Value, min, max uint64) bool {
   493  	return ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: max})
   494  }
   495  
   496  func (ft *factsTable) booleanFalse(v *Value) bool {
   497  	return ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
   498  }
   499  func (ft *factsTable) booleanTrue(v *Value) bool {
   500  	return ft.newLimit(v, limit{min: 1, max: 1, umin: 1, umax: 1})
   501  }
   502  func (ft *factsTable) pointerNil(v *Value) bool {
   503  	return ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
   504  }
   505  func (ft *factsTable) pointerNonNil(v *Value) bool {
   506  	l := noLimit
   507  	l.umin = 1
   508  	return ft.newLimit(v, l)
   509  }
   510  
   511  // newLimit adds new limiting information for v.
   512  // Returns true if the new limit added any new information.
   513  func (ft *factsTable) newLimit(v *Value, newLim limit) bool {
   514  	oldLim := ft.limits[v.ID]
   515  
   516  	// Merge old and new information.
   517  	lim := oldLim.intersect(newLim)
   518  
   519  	// signed <-> unsigned propagation
   520  	if lim.min >= 0 {
   521  		lim = lim.unsignedMinMax(uint64(lim.min), uint64(lim.max))
   522  	}
   523  	if fitsInBitsU(lim.umax, uint(8*v.Type.Size()-1)) {
   524  		lim = lim.signedMinMax(int64(lim.umin), int64(lim.umax))
   525  	}
   526  
   527  	if lim == oldLim {
   528  		return false // nothing new to record
   529  	}
   530  
   531  	if lim.unsat() {
   532  		r := !ft.unsat
   533  		ft.unsat = true
   534  		return r
   535  	}
   536  
   537  	// Check for recursion. This normally happens because in unsatisfiable
   538  	// cases we have a < b < a, and every update to a's limits returns
   539  	// here again with the limit increased by 2.
   540  	// Normally this is caught early by the orderS/orderU posets, but in
   541  	// cases where the comparisons jump between signed and unsigned domains,
   542  	// the posets will not notice.
   543  	if ft.recurseCheck[v.ID] {
   544  		// This should only happen for unsatisfiable cases. TODO: check
   545  		return false
   546  	}
   547  	ft.recurseCheck[v.ID] = true
   548  	defer func() {
   549  		ft.recurseCheck[v.ID] = false
   550  	}()
   551  
   552  	// Record undo information.
   553  	ft.limitStack = append(ft.limitStack, limitFact{v.ID, oldLim})
   554  	// Record new information.
   555  	ft.limits[v.ID] = lim
   556  	if v.Block.Func.pass.debug > 2 {
   557  		// TODO: pos is probably wrong. This is the position where v is defined,
   558  		// not the position where we learned the fact about it (which was
   559  		// probably some subsequent compare+branch).
   560  		v.Block.Func.Warnl(v.Pos, "new limit %s %s unsat=%v", v, lim.String(), ft.unsat)
   561  	}
   562  
   563  	// Propagate this new constant range to other values
   564  	// that we know are ordered with respect to this one.
   565  	// Note overflow/underflow in the arithmetic below is ok,
   566  	// it will just lead to imprecision (undetected unsatisfiability).
   567  	for o := ft.orderings[v.ID]; o != nil; o = o.next {
   568  		switch o.d {
   569  		case signed:
   570  			switch o.r {
   571  			case eq: // v == w
   572  				ft.signedMinMax(o.w, lim.min, lim.max)
   573  			case lt | eq: // v <= w
   574  				ft.signedMin(o.w, lim.min)
   575  			case lt: // v < w
   576  				ft.signedMin(o.w, lim.min+1)
   577  			case gt | eq: // v >= w
   578  				ft.signedMax(o.w, lim.max)
   579  			case gt: // v > w
   580  				ft.signedMax(o.w, lim.max-1)
   581  			case lt | gt: // v != w
   582  				if lim.min == lim.max { // v is a constant
   583  					c := lim.min
   584  					if ft.limits[o.w.ID].min == c {
   585  						ft.signedMin(o.w, c+1)
   586  					}
   587  					if ft.limits[o.w.ID].max == c {
   588  						ft.signedMax(o.w, c-1)
   589  					}
   590  				}
   591  			}
   592  		case unsigned:
   593  			switch o.r {
   594  			case eq: // v == w
   595  				ft.unsignedMinMax(o.w, lim.umin, lim.umax)
   596  			case lt | eq: // v <= w
   597  				ft.unsignedMin(o.w, lim.umin)
   598  			case lt: // v < w
   599  				ft.unsignedMin(o.w, lim.umin+1)
   600  			case gt | eq: // v >= w
   601  				ft.unsignedMax(o.w, lim.umax)
   602  			case gt: // v > w
   603  				ft.unsignedMax(o.w, lim.umax-1)
   604  			case lt | gt: // v != w
   605  				if lim.umin == lim.umax { // v is a constant
   606  					c := lim.umin
   607  					if ft.limits[o.w.ID].umin == c {
   608  						ft.unsignedMin(o.w, c+1)
   609  					}
   610  					if ft.limits[o.w.ID].umax == c {
   611  						ft.unsignedMax(o.w, c-1)
   612  					}
   613  				}
   614  			}
   615  		case boolean:
   616  			switch o.r {
   617  			case eq:
   618  				if lim.min == 0 && lim.max == 0 { // constant false
   619  					ft.booleanFalse(o.w)
   620  				}
   621  				if lim.min == 1 && lim.max == 1 { // constant true
   622  					ft.booleanTrue(o.w)
   623  				}
   624  			case lt | gt:
   625  				if lim.min == 0 && lim.max == 0 { // constant false
   626  					ft.booleanTrue(o.w)
   627  				}
   628  				if lim.min == 1 && lim.max == 1 { // constant true
   629  					ft.booleanFalse(o.w)
   630  				}
   631  			}
   632  		case pointer:
   633  			switch o.r {
   634  			case eq:
   635  				if lim.umax == 0 { // nil
   636  					ft.pointerNil(o.w)
   637  				}
   638  				if lim.umin > 0 { // non-nil
   639  					ft.pointerNonNil(o.w)
   640  				}
   641  			case lt | gt:
   642  				if lim.umax == 0 { // nil
   643  					ft.pointerNonNil(o.w)
   644  				}
   645  				// note: not equal to non-nil doesn't tell us anything.
   646  			}
   647  		}
   648  	}
   649  
   650  	// If this is new known constant for a boolean value,
   651  	// extract relation between its args. For example, if
   652  	// We learn v is false, and v is defined as a<b, then we learn a>=b.
   653  	if v.Type.IsBoolean() {
   654  		// If we reach here, it is because we have a more restrictive
   655  		// value for v than the default. The only two such values
   656  		// are constant true or constant false.
   657  		if lim.min != lim.max {
   658  			v.Block.Func.Fatalf("boolean not constant %v", v)
   659  		}
   660  		isTrue := lim.min == 1
   661  		if dr, ok := domainRelationTable[v.Op]; ok && v.Op != OpIsInBounds && v.Op != OpIsSliceInBounds {
   662  			d := dr.d
   663  			r := dr.r
   664  			if d == signed && ft.isNonNegative(v.Args[0]) && ft.isNonNegative(v.Args[1]) {
   665  				d |= unsigned
   666  			}
   667  			if !isTrue {
   668  				r ^= lt | gt | eq
   669  			}
   670  			// TODO: v.Block is wrong?
   671  			addRestrictions(v.Block, ft, d, v.Args[0], v.Args[1], r)
   672  		}
   673  		switch v.Op {
   674  		case OpIsNonNil:
   675  			if isTrue {
   676  				ft.pointerNonNil(v.Args[0])
   677  			} else {
   678  				ft.pointerNil(v.Args[0])
   679  			}
   680  		case OpIsInBounds, OpIsSliceInBounds:
   681  			// 0 <= a0 < a1 (or 0 <= a0 <= a1)
   682  			r := lt
   683  			if v.Op == OpIsSliceInBounds {
   684  				r |= eq
   685  			}
   686  			if isTrue {
   687  				// On the positive branch, we learn:
   688  				//   signed: 0 <= a0 < a1 (or 0 <= a0 <= a1)
   689  				//   unsigned:    a0 < a1 (or a0 <= a1)
   690  				ft.setNonNegative(v.Args[0])
   691  				ft.update(v.Block, v.Args[0], v.Args[1], signed, r)
   692  				ft.update(v.Block, v.Args[0], v.Args[1], unsigned, r)
   693  			} else {
   694  				// On the negative branch, we learn (0 > a0 ||
   695  				// a0 >= a1). In the unsigned domain, this is
   696  				// simply a0 >= a1 (which is the reverse of the
   697  				// positive branch, so nothing surprising).
   698  				// But in the signed domain, we can't express the ||
   699  				// condition, so check if a0 is non-negative instead,
   700  				// to be able to learn something.
   701  				r ^= lt | gt | eq // >= (index) or > (slice)
   702  				if ft.isNonNegative(v.Args[0]) {
   703  					ft.update(v.Block, v.Args[0], v.Args[1], signed, r)
   704  				}
   705  				ft.update(v.Block, v.Args[0], v.Args[1], unsigned, r)
   706  				// TODO: v.Block is wrong here
   707  			}
   708  		}
   709  	}
   710  
   711  	return true
   712  }
   713  
   714  func (ft *factsTable) addOrdering(v, w *Value, d domain, r relation) {
   715  	o := ft.orderingCache
   716  	if o == nil {
   717  		o = &ordering{}
   718  	} else {
   719  		ft.orderingCache = o.next
   720  	}
   721  	o.w = w
   722  	o.d = d
   723  	o.r = r
   724  	o.next = ft.orderings[v.ID]
   725  	ft.orderings[v.ID] = o
   726  	ft.orderingsStack = append(ft.orderingsStack, v.ID)
   727  }
   728  
   729  // update updates the set of relations between v and w in domain d
   730  // restricting it to r.
   731  func (ft *factsTable) update(parent *Block, v, w *Value, d domain, r relation) {
   732  	if parent.Func.pass.debug > 2 {
   733  		parent.Func.Warnl(parent.Pos, "parent=%s, update %s %s %s", parent, v, w, r)
   734  	}
   735  	// No need to do anything else if we already found unsat.
   736  	if ft.unsat {
   737  		return
   738  	}
   739  
   740  	// Self-fact. It's wasteful to register it into the facts
   741  	// table, so just note whether it's satisfiable
   742  	if v == w {
   743  		if r&eq == 0 {
   744  			ft.unsat = true
   745  		}
   746  		return
   747  	}
   748  
   749  	if d == signed || d == unsigned {
   750  		var ok bool
   751  		order := ft.orderS
   752  		if d == unsigned {
   753  			order = ft.orderU
   754  		}
   755  		switch r {
   756  		case lt:
   757  			ok = order.SetOrder(v, w)
   758  		case gt:
   759  			ok = order.SetOrder(w, v)
   760  		case lt | eq:
   761  			ok = order.SetOrderOrEqual(v, w)
   762  		case gt | eq:
   763  			ok = order.SetOrderOrEqual(w, v)
   764  		case eq:
   765  			ok = order.SetEqual(v, w)
   766  		case lt | gt:
   767  			ok = order.SetNonEqual(v, w)
   768  		default:
   769  			panic("unknown relation")
   770  		}
   771  		ft.addOrdering(v, w, d, r)
   772  		ft.addOrdering(w, v, d, reverseBits[r])
   773  
   774  		if !ok {
   775  			if parent.Func.pass.debug > 2 {
   776  				parent.Func.Warnl(parent.Pos, "unsat %s %s %s", v, w, r)
   777  			}
   778  			ft.unsat = true
   779  			return
   780  		}
   781  	}
   782  	if d == boolean || d == pointer {
   783  		for o := ft.orderings[v.ID]; o != nil; o = o.next {
   784  			if o.d == d && o.w == w {
   785  				// We already know a relationship between v and w.
   786  				// Either it is a duplicate, or it is a contradiction,
   787  				// as we only allow eq and lt|gt for these domains,
   788  				if o.r != r {
   789  					ft.unsat = true
   790  				}
   791  				return
   792  			}
   793  		}
   794  		// TODO: this does not do transitive equality.
   795  		// We could use a poset like above, but somewhat degenerate (==,!= only).
   796  		ft.addOrdering(v, w, d, r)
   797  		ft.addOrdering(w, v, d, r) // note: reverseBits unnecessary for eq and lt|gt.
   798  	}
   799  
   800  	// Extract new constant limits based on the comparison.
   801  	vLimit := ft.limits[v.ID]
   802  	wLimit := ft.limits[w.ID]
   803  	// Note: all the +1/-1 below could overflow/underflow. Either will
   804  	// still generate correct results, it will just lead to imprecision.
   805  	// In fact if there is overflow/underflow, the corresponding
   806  	// code is unreachable because the known range is outside the range
   807  	// of the value's type.
   808  	switch d {
   809  	case signed:
   810  		switch r {
   811  		case eq: // v == w
   812  			ft.signedMinMax(v, wLimit.min, wLimit.max)
   813  			ft.signedMinMax(w, vLimit.min, vLimit.max)
   814  		case lt: // v < w
   815  			ft.signedMax(v, wLimit.max-1)
   816  			ft.signedMin(w, vLimit.min+1)
   817  		case lt | eq: // v <= w
   818  			ft.signedMax(v, wLimit.max)
   819  			ft.signedMin(w, vLimit.min)
   820  		case gt: // v > w
   821  			ft.signedMin(v, wLimit.min+1)
   822  			ft.signedMax(w, vLimit.max-1)
   823  		case gt | eq: // v >= w
   824  			ft.signedMin(v, wLimit.min)
   825  			ft.signedMax(w, vLimit.max)
   826  		case lt | gt: // v != w
   827  			if vLimit.min == vLimit.max { // v is a constant
   828  				c := vLimit.min
   829  				if wLimit.min == c {
   830  					ft.signedMin(w, c+1)
   831  				}
   832  				if wLimit.max == c {
   833  					ft.signedMax(w, c-1)
   834  				}
   835  			}
   836  			if wLimit.min == wLimit.max { // w is a constant
   837  				c := wLimit.min
   838  				if vLimit.min == c {
   839  					ft.signedMin(v, c+1)
   840  				}
   841  				if vLimit.max == c {
   842  					ft.signedMax(v, c-1)
   843  				}
   844  			}
   845  		}
   846  	case unsigned:
   847  		switch r {
   848  		case eq: // v == w
   849  			ft.unsignedMinMax(v, wLimit.umin, wLimit.umax)
   850  			ft.unsignedMinMax(w, vLimit.umin, vLimit.umax)
   851  		case lt: // v < w
   852  			ft.unsignedMax(v, wLimit.umax-1)
   853  			ft.unsignedMin(w, vLimit.umin+1)
   854  		case lt | eq: // v <= w
   855  			ft.unsignedMax(v, wLimit.umax)
   856  			ft.unsignedMin(w, vLimit.umin)
   857  		case gt: // v > w
   858  			ft.unsignedMin(v, wLimit.umin+1)
   859  			ft.unsignedMax(w, vLimit.umax-1)
   860  		case gt | eq: // v >= w
   861  			ft.unsignedMin(v, wLimit.umin)
   862  			ft.unsignedMax(w, vLimit.umax)
   863  		case lt | gt: // v != w
   864  			if vLimit.umin == vLimit.umax { // v is a constant
   865  				c := vLimit.umin
   866  				if wLimit.umin == c {
   867  					ft.unsignedMin(w, c+1)
   868  				}
   869  				if wLimit.umax == c {
   870  					ft.unsignedMax(w, c-1)
   871  				}
   872  			}
   873  			if wLimit.umin == wLimit.umax { // w is a constant
   874  				c := wLimit.umin
   875  				if vLimit.umin == c {
   876  					ft.unsignedMin(v, c+1)
   877  				}
   878  				if vLimit.umax == c {
   879  					ft.unsignedMax(v, c-1)
   880  				}
   881  			}
   882  		}
   883  	case boolean:
   884  		switch r {
   885  		case eq: // v == w
   886  			if vLimit.min == 1 { // v is true
   887  				ft.booleanTrue(w)
   888  			}
   889  			if vLimit.max == 0 { // v is false
   890  				ft.booleanFalse(w)
   891  			}
   892  			if wLimit.min == 1 { // w is true
   893  				ft.booleanTrue(v)
   894  			}
   895  			if wLimit.max == 0 { // w is false
   896  				ft.booleanFalse(v)
   897  			}
   898  		case lt | gt: // v != w
   899  			if vLimit.min == 1 { // v is true
   900  				ft.booleanFalse(w)
   901  			}
   902  			if vLimit.max == 0 { // v is false
   903  				ft.booleanTrue(w)
   904  			}
   905  			if wLimit.min == 1 { // w is true
   906  				ft.booleanFalse(v)
   907  			}
   908  			if wLimit.max == 0 { // w is false
   909  				ft.booleanTrue(v)
   910  			}
   911  		}
   912  	case pointer:
   913  		switch r {
   914  		case eq: // v == w
   915  			if vLimit.umax == 0 { // v is nil
   916  				ft.pointerNil(w)
   917  			}
   918  			if vLimit.umin > 0 { // v is non-nil
   919  				ft.pointerNonNil(w)
   920  			}
   921  			if wLimit.umax == 0 { // w is nil
   922  				ft.pointerNil(v)
   923  			}
   924  			if wLimit.umin > 0 { // w is non-nil
   925  				ft.pointerNonNil(v)
   926  			}
   927  		case lt | gt: // v != w
   928  			if vLimit.umax == 0 { // v is nil
   929  				ft.pointerNonNil(w)
   930  			}
   931  			if wLimit.umax == 0 { // w is nil
   932  				ft.pointerNonNil(v)
   933  			}
   934  			// Note: the other direction doesn't work.
   935  			// Being not equal to a non-nil pointer doesn't
   936  			// make you (necessarily) a nil pointer.
   937  		}
   938  	}
   939  
   940  	// Derived facts below here are only about numbers.
   941  	if d != signed && d != unsigned {
   942  		return
   943  	}
   944  
   945  	// Additional facts we know given the relationship between len and cap.
   946  	//
   947  	// TODO: Since prove now derives transitive relations, it
   948  	// should be sufficient to learn that len(w) <= cap(w) at the
   949  	// beginning of prove where we look for all len/cap ops.
   950  	if v.Op == OpSliceLen && r&lt == 0 && ft.caps[v.Args[0].ID] != nil {
   951  		// len(s) > w implies cap(s) > w
   952  		// len(s) >= w implies cap(s) >= w
   953  		// len(s) == w implies cap(s) >= w
   954  		ft.update(parent, ft.caps[v.Args[0].ID], w, d, r|gt)
   955  	}
   956  	if w.Op == OpSliceLen && r&gt == 0 && ft.caps[w.Args[0].ID] != nil {
   957  		// same, length on the RHS.
   958  		ft.update(parent, v, ft.caps[w.Args[0].ID], d, r|lt)
   959  	}
   960  	if v.Op == OpSliceCap && r&gt == 0 && ft.lens[v.Args[0].ID] != nil {
   961  		// cap(s) < w implies len(s) < w
   962  		// cap(s) <= w implies len(s) <= w
   963  		// cap(s) == w implies len(s) <= w
   964  		ft.update(parent, ft.lens[v.Args[0].ID], w, d, r|lt)
   965  	}
   966  	if w.Op == OpSliceCap && r&lt == 0 && ft.lens[w.Args[0].ID] != nil {
   967  		// same, capacity on the RHS.
   968  		ft.update(parent, v, ft.lens[w.Args[0].ID], d, r|gt)
   969  	}
   970  
   971  	// Process fence-post implications.
   972  	//
   973  	// First, make the condition > or >=.
   974  	if r == lt || r == lt|eq {
   975  		v, w = w, v
   976  		r = reverseBits[r]
   977  	}
   978  	switch r {
   979  	case gt:
   980  		if x, delta := isConstDelta(v); x != nil && delta == 1 {
   981  			// x+1 > w  ⇒  x >= w
   982  			//
   983  			// This is useful for eliminating the
   984  			// growslice branch of append.
   985  			ft.update(parent, x, w, d, gt|eq)
   986  		} else if x, delta := isConstDelta(w); x != nil && delta == -1 {
   987  			// v > x-1  ⇒  v >= x
   988  			ft.update(parent, v, x, d, gt|eq)
   989  		}
   990  	case gt | eq:
   991  		if x, delta := isConstDelta(v); x != nil && delta == -1 {
   992  			// x-1 >= w && x > min  ⇒  x > w
   993  			//
   994  			// Useful for i > 0; s[i-1].
   995  			lim := ft.limits[x.ID]
   996  			if (d == signed && lim.min > opMin[v.Op]) || (d == unsigned && lim.umin > 0) {
   997  				ft.update(parent, x, w, d, gt)
   998  			}
   999  		} else if x, delta := isConstDelta(w); x != nil && delta == 1 {
  1000  			// v >= x+1 && x < max  ⇒  v > x
  1001  			lim := ft.limits[x.ID]
  1002  			if (d == signed && lim.max < opMax[w.Op]) || (d == unsigned && lim.umax < opUMax[w.Op]) {
  1003  				ft.update(parent, v, x, d, gt)
  1004  			}
  1005  		}
  1006  	}
  1007  
  1008  	// Process: x+delta > w (with delta constant)
  1009  	// Only signed domain for now (useful for accesses to slices in loops).
  1010  	if r == gt || r == gt|eq {
  1011  		if x, delta := isConstDelta(v); x != nil && d == signed {
  1012  			if parent.Func.pass.debug > 1 {
  1013  				parent.Func.Warnl(parent.Pos, "x+d %s w; x:%v %v delta:%v w:%v d:%v", r, x, parent.String(), delta, w.AuxInt, d)
  1014  			}
  1015  			underflow := true
  1016  			if delta < 0 {
  1017  				l := ft.limits[x.ID]
  1018  				if (x.Type.Size() == 8 && l.min >= math.MinInt64-delta) ||
  1019  					(x.Type.Size() == 4 && l.min >= math.MinInt32-delta) {
  1020  					underflow = false
  1021  				}
  1022  			}
  1023  			if delta < 0 && !underflow {
  1024  				// If delta < 0 and x+delta cannot underflow then x > x+delta (that is, x > v)
  1025  				ft.update(parent, x, v, signed, gt)
  1026  			}
  1027  			if !w.isGenericIntConst() {
  1028  				// If we know that x+delta > w but w is not constant, we can derive:
  1029  				//    if delta < 0 and x+delta cannot underflow, then x > w
  1030  				// This is useful for loops with bounds "len(slice)-K" (delta = -K)
  1031  				if delta < 0 && !underflow {
  1032  					ft.update(parent, x, w, signed, r)
  1033  				}
  1034  			} else {
  1035  				// With w,delta constants, we want to derive: x+delta > w  ⇒  x > w-delta
  1036  				//
  1037  				// We compute (using integers of the correct size):
  1038  				//    min = w - delta
  1039  				//    max = MaxInt - delta
  1040  				//
  1041  				// And we prove that:
  1042  				//    if min<max: min < x AND x <= max
  1043  				//    if min>max: min < x OR  x <= max
  1044  				//
  1045  				// This is always correct, even in case of overflow.
  1046  				//
  1047  				// If the initial fact is x+delta >= w instead, the derived conditions are:
  1048  				//    if min<max: min <= x AND x <= max
  1049  				//    if min>max: min <= x OR  x <= max
  1050  				//
  1051  				// Notice the conditions for max are still <=, as they handle overflows.
  1052  				var min, max int64
  1053  				switch x.Type.Size() {
  1054  				case 8:
  1055  					min = w.AuxInt - delta
  1056  					max = int64(^uint64(0)>>1) - delta
  1057  				case 4:
  1058  					min = int64(int32(w.AuxInt) - int32(delta))
  1059  					max = int64(int32(^uint32(0)>>1) - int32(delta))
  1060  				case 2:
  1061  					min = int64(int16(w.AuxInt) - int16(delta))
  1062  					max = int64(int16(^uint16(0)>>1) - int16(delta))
  1063  				case 1:
  1064  					min = int64(int8(w.AuxInt) - int8(delta))
  1065  					max = int64(int8(^uint8(0)>>1) - int8(delta))
  1066  				default:
  1067  					panic("unimplemented")
  1068  				}
  1069  
  1070  				if min < max {
  1071  					// Record that x > min and max >= x
  1072  					if r == gt {
  1073  						min++
  1074  					}
  1075  					ft.signedMinMax(x, min, max)
  1076  				} else {
  1077  					// We know that either x>min OR x<=max. factsTable cannot record OR conditions,
  1078  					// so let's see if we can already prove that one of them is false, in which case
  1079  					// the other must be true
  1080  					l := ft.limits[x.ID]
  1081  					if l.max <= min {
  1082  						if r&eq == 0 || l.max < min {
  1083  							// x>min (x>=min) is impossible, so it must be x<=max
  1084  							ft.signedMax(x, max)
  1085  						}
  1086  					} else if l.min > max {
  1087  						// x<=max is impossible, so it must be x>min
  1088  						if r == gt {
  1089  							min++
  1090  						}
  1091  						ft.signedMin(x, min)
  1092  					}
  1093  				}
  1094  			}
  1095  		}
  1096  	}
  1097  
  1098  	// Look through value-preserving extensions.
  1099  	// If the domain is appropriate for the pre-extension Type,
  1100  	// repeat the update with the pre-extension Value.
  1101  	if isCleanExt(v) {
  1102  		switch {
  1103  		case d == signed && v.Args[0].Type.IsSigned():
  1104  			fallthrough
  1105  		case d == unsigned && !v.Args[0].Type.IsSigned():
  1106  			ft.update(parent, v.Args[0], w, d, r)
  1107  		}
  1108  	}
  1109  	if isCleanExt(w) {
  1110  		switch {
  1111  		case d == signed && w.Args[0].Type.IsSigned():
  1112  			fallthrough
  1113  		case d == unsigned && !w.Args[0].Type.IsSigned():
  1114  			ft.update(parent, v, w.Args[0], d, r)
  1115  		}
  1116  	}
  1117  }
  1118  
  1119  var opMin = map[Op]int64{
  1120  	OpAdd64: math.MinInt64, OpSub64: math.MinInt64,
  1121  	OpAdd32: math.MinInt32, OpSub32: math.MinInt32,
  1122  }
  1123  
  1124  var opMax = map[Op]int64{
  1125  	OpAdd64: math.MaxInt64, OpSub64: math.MaxInt64,
  1126  	OpAdd32: math.MaxInt32, OpSub32: math.MaxInt32,
  1127  }
  1128  
  1129  var opUMax = map[Op]uint64{
  1130  	OpAdd64: math.MaxUint64, OpSub64: math.MaxUint64,
  1131  	OpAdd32: math.MaxUint32, OpSub32: math.MaxUint32,
  1132  }
  1133  
  1134  // isNonNegative reports whether v is known to be non-negative.
  1135  func (ft *factsTable) isNonNegative(v *Value) bool {
  1136  	return ft.limits[v.ID].min >= 0
  1137  }
  1138  
  1139  // checkpoint saves the current state of known relations.
  1140  // Called when descending on a branch.
  1141  func (ft *factsTable) checkpoint() {
  1142  	if ft.unsat {
  1143  		ft.unsatDepth++
  1144  	}
  1145  	ft.limitStack = append(ft.limitStack, checkpointBound)
  1146  	ft.orderS.Checkpoint()
  1147  	ft.orderU.Checkpoint()
  1148  	ft.orderingsStack = append(ft.orderingsStack, 0)
  1149  }
  1150  
  1151  // restore restores known relation to the state just
  1152  // before the previous checkpoint.
  1153  // Called when backing up on a branch.
  1154  func (ft *factsTable) restore() {
  1155  	if ft.unsatDepth > 0 {
  1156  		ft.unsatDepth--
  1157  	} else {
  1158  		ft.unsat = false
  1159  	}
  1160  	for {
  1161  		old := ft.limitStack[len(ft.limitStack)-1]
  1162  		ft.limitStack = ft.limitStack[:len(ft.limitStack)-1]
  1163  		if old.vid == 0 { // checkpointBound
  1164  			break
  1165  		}
  1166  		ft.limits[old.vid] = old.limit
  1167  	}
  1168  	ft.orderS.Undo()
  1169  	ft.orderU.Undo()
  1170  	for {
  1171  		id := ft.orderingsStack[len(ft.orderingsStack)-1]
  1172  		ft.orderingsStack = ft.orderingsStack[:len(ft.orderingsStack)-1]
  1173  		if id == 0 { // checkpoint marker
  1174  			break
  1175  		}
  1176  		o := ft.orderings[id]
  1177  		ft.orderings[id] = o.next
  1178  		o.next = ft.orderingCache
  1179  		ft.orderingCache = o
  1180  	}
  1181  }
  1182  
  1183  var (
  1184  	reverseBits = [...]relation{0, 4, 2, 6, 1, 5, 3, 7}
  1185  
  1186  	// maps what we learn when the positive branch is taken.
  1187  	// For example:
  1188  	//      OpLess8:   {signed, lt},
  1189  	//	v1 = (OpLess8 v2 v3).
  1190  	// If we learn that v1 is true, then we can deduce that v2<v3
  1191  	// in the signed domain.
  1192  	domainRelationTable = map[Op]struct {
  1193  		d domain
  1194  		r relation
  1195  	}{
  1196  		OpEq8:   {signed | unsigned, eq},
  1197  		OpEq16:  {signed | unsigned, eq},
  1198  		OpEq32:  {signed | unsigned, eq},
  1199  		OpEq64:  {signed | unsigned, eq},
  1200  		OpEqPtr: {pointer, eq},
  1201  		OpEqB:   {boolean, eq},
  1202  
  1203  		OpNeq8:   {signed | unsigned, lt | gt},
  1204  		OpNeq16:  {signed | unsigned, lt | gt},
  1205  		OpNeq32:  {signed | unsigned, lt | gt},
  1206  		OpNeq64:  {signed | unsigned, lt | gt},
  1207  		OpNeqPtr: {pointer, lt | gt},
  1208  		OpNeqB:   {boolean, lt | gt},
  1209  
  1210  		OpLess8:   {signed, lt},
  1211  		OpLess8U:  {unsigned, lt},
  1212  		OpLess16:  {signed, lt},
  1213  		OpLess16U: {unsigned, lt},
  1214  		OpLess32:  {signed, lt},
  1215  		OpLess32U: {unsigned, lt},
  1216  		OpLess64:  {signed, lt},
  1217  		OpLess64U: {unsigned, lt},
  1218  
  1219  		OpLeq8:   {signed, lt | eq},
  1220  		OpLeq8U:  {unsigned, lt | eq},
  1221  		OpLeq16:  {signed, lt | eq},
  1222  		OpLeq16U: {unsigned, lt | eq},
  1223  		OpLeq32:  {signed, lt | eq},
  1224  		OpLeq32U: {unsigned, lt | eq},
  1225  		OpLeq64:  {signed, lt | eq},
  1226  		OpLeq64U: {unsigned, lt | eq},
  1227  	}
  1228  )
  1229  
  1230  // cleanup returns the posets to the free list
  1231  func (ft *factsTable) cleanup(f *Func) {
  1232  	for _, po := range []*poset{ft.orderS, ft.orderU} {
  1233  		// Make sure it's empty as it should be. A non-empty poset
  1234  		// might cause errors and miscompilations if reused.
  1235  		if checkEnabled {
  1236  			if err := po.CheckEmpty(); err != nil {
  1237  				f.Fatalf("poset not empty after function %s: %v", f.Name, err)
  1238  			}
  1239  		}
  1240  		f.retPoset(po)
  1241  	}
  1242  	f.Cache.freeLimitSlice(ft.limits)
  1243  	f.Cache.freeBoolSlice(ft.recurseCheck)
  1244  }
  1245  
  1246  // prove removes redundant BlockIf branches that can be inferred
  1247  // from previous dominating comparisons.
  1248  //
  1249  // By far, the most common redundant pair are generated by bounds checking.
  1250  // For example for the code:
  1251  //
  1252  //	a[i] = 4
  1253  //	foo(a[i])
  1254  //
  1255  // The compiler will generate the following code:
  1256  //
  1257  //	if i >= len(a) {
  1258  //	    panic("not in bounds")
  1259  //	}
  1260  //	a[i] = 4
  1261  //	if i >= len(a) {
  1262  //	    panic("not in bounds")
  1263  //	}
  1264  //	foo(a[i])
  1265  //
  1266  // The second comparison i >= len(a) is clearly redundant because if the
  1267  // else branch of the first comparison is executed, we already know that i < len(a).
  1268  // The code for the second panic can be removed.
  1269  //
  1270  // prove works by finding contradictions and trimming branches whose
  1271  // conditions are unsatisfiable given the branches leading up to them.
  1272  // It tracks a "fact table" of branch conditions. For each branching
  1273  // block, it asserts the branch conditions that uniquely dominate that
  1274  // block, and then separately asserts the block's branch condition and
  1275  // its negation. If either leads to a contradiction, it can trim that
  1276  // successor.
  1277  func prove(f *Func) {
  1278  	// Find induction variables. Currently, findIndVars
  1279  	// is limited to one induction variable per block.
  1280  	var indVars map[*Block]indVar
  1281  	for _, v := range findIndVar(f) {
  1282  		ind := v.ind
  1283  		if len(ind.Args) != 2 {
  1284  			// the rewrite code assumes there is only ever two parents to loops
  1285  			panic("unexpected induction with too many parents")
  1286  		}
  1287  
  1288  		nxt := v.nxt
  1289  		if !(ind.Uses == 2 && // 2 used by comparison and next
  1290  			nxt.Uses == 1) { // 1 used by induction
  1291  			// ind or nxt is used inside the loop, add it for the facts table
  1292  			if indVars == nil {
  1293  				indVars = make(map[*Block]indVar)
  1294  			}
  1295  			indVars[v.entry] = v
  1296  			continue
  1297  		} else {
  1298  			// Since this induction variable is not used for anything but counting the iterations,
  1299  			// no point in putting it into the facts table.
  1300  		}
  1301  
  1302  		// try to rewrite to a downward counting loop checking against start if the
  1303  		// loop body does not depend on ind or nxt and end is known before the loop.
  1304  		// This reduces pressure on the register allocator because this does not need
  1305  		// to use end on each iteration anymore. We compare against the start constant instead.
  1306  		// That means this code:
  1307  		//
  1308  		//	loop:
  1309  		//		ind = (Phi (Const [x]) nxt),
  1310  		//		if ind < end
  1311  		//		then goto enter_loop
  1312  		//		else goto exit_loop
  1313  		//
  1314  		//	enter_loop:
  1315  		//		do something without using ind nor nxt
  1316  		//		nxt = inc + ind
  1317  		//		goto loop
  1318  		//
  1319  		//	exit_loop:
  1320  		//
  1321  		// is rewritten to:
  1322  		//
  1323  		//	loop:
  1324  		//		ind = (Phi end nxt)
  1325  		//		if (Const [x]) < ind
  1326  		//		then goto enter_loop
  1327  		//		else goto exit_loop
  1328  		//
  1329  		//	enter_loop:
  1330  		//		do something without using ind nor nxt
  1331  		//		nxt = ind - inc
  1332  		//		goto loop
  1333  		//
  1334  		//	exit_loop:
  1335  		//
  1336  		// this is better because it only requires to keep ind then nxt alive while looping,
  1337  		// while the original form keeps ind then nxt and end alive
  1338  		start, end := v.min, v.max
  1339  		if v.flags&indVarCountDown != 0 {
  1340  			start, end = end, start
  1341  		}
  1342  
  1343  		if !start.isGenericIntConst() {
  1344  			// if start is not a constant we would be winning nothing from inverting the loop
  1345  			continue
  1346  		}
  1347  		if end.isGenericIntConst() {
  1348  			// TODO: if both start and end are constants we should rewrite such that the comparison
  1349  			// is against zero and nxt is ++ or -- operation
  1350  			// That means:
  1351  			//	for i := 2; i < 11; i += 2 {
  1352  			// should be rewritten to:
  1353  			//	for i := 5; 0 < i; i-- {
  1354  			continue
  1355  		}
  1356  
  1357  		if end.Block == ind.Block {
  1358  			// we can't rewrite loops where the condition depends on the loop body
  1359  			// this simple check is forced to work because if this is true a Phi in ind.Block must exist
  1360  			continue
  1361  		}
  1362  
  1363  		check := ind.Block.Controls[0]
  1364  		// invert the check
  1365  		check.Args[0], check.Args[1] = check.Args[1], check.Args[0]
  1366  
  1367  		// swap start and end in the loop
  1368  		for i, v := range check.Args {
  1369  			if v != end {
  1370  				continue
  1371  			}
  1372  
  1373  			check.SetArg(i, start)
  1374  			goto replacedEnd
  1375  		}
  1376  		panic(fmt.Sprintf("unreachable, ind: %v, start: %v, end: %v", ind, start, end))
  1377  	replacedEnd:
  1378  
  1379  		for i, v := range ind.Args {
  1380  			if v != start {
  1381  				continue
  1382  			}
  1383  
  1384  			ind.SetArg(i, end)
  1385  			goto replacedStart
  1386  		}
  1387  		panic(fmt.Sprintf("unreachable, ind: %v, start: %v, end: %v", ind, start, end))
  1388  	replacedStart:
  1389  
  1390  		if nxt.Args[0] != ind {
  1391  			// unlike additions subtractions are not commutative so be sure we get it right
  1392  			nxt.Args[0], nxt.Args[1] = nxt.Args[1], nxt.Args[0]
  1393  		}
  1394  
  1395  		switch nxt.Op {
  1396  		case OpAdd8:
  1397  			nxt.Op = OpSub8
  1398  		case OpAdd16:
  1399  			nxt.Op = OpSub16
  1400  		case OpAdd32:
  1401  			nxt.Op = OpSub32
  1402  		case OpAdd64:
  1403  			nxt.Op = OpSub64
  1404  		case OpSub8:
  1405  			nxt.Op = OpAdd8
  1406  		case OpSub16:
  1407  			nxt.Op = OpAdd16
  1408  		case OpSub32:
  1409  			nxt.Op = OpAdd32
  1410  		case OpSub64:
  1411  			nxt.Op = OpAdd64
  1412  		default:
  1413  			panic("unreachable")
  1414  		}
  1415  
  1416  		if f.pass.debug > 0 {
  1417  			f.Warnl(ind.Pos, "Inverted loop iteration")
  1418  		}
  1419  	}
  1420  
  1421  	ft := newFactsTable(f)
  1422  	ft.checkpoint()
  1423  
  1424  	// Find length and capacity ops.
  1425  	for _, b := range f.Blocks {
  1426  		for _, v := range b.Values {
  1427  			if v.Uses == 0 {
  1428  				// We don't care about dead values.
  1429  				// (There can be some that are CSEd but not removed yet.)
  1430  				continue
  1431  			}
  1432  			switch v.Op {
  1433  			case OpSliceLen:
  1434  				if ft.lens == nil {
  1435  					ft.lens = map[ID]*Value{}
  1436  				}
  1437  				// Set all len Values for the same slice as equal in the poset.
  1438  				// The poset handles transitive relations, so Values related to
  1439  				// any OpSliceLen for this slice will be correctly related to others.
  1440  				if l, ok := ft.lens[v.Args[0].ID]; ok {
  1441  					ft.update(b, v, l, signed, eq)
  1442  				} else {
  1443  					ft.lens[v.Args[0].ID] = v
  1444  				}
  1445  			case OpSliceCap:
  1446  				if ft.caps == nil {
  1447  					ft.caps = map[ID]*Value{}
  1448  				}
  1449  				// Same as case OpSliceLen above, but for slice cap.
  1450  				if c, ok := ft.caps[v.Args[0].ID]; ok {
  1451  					ft.update(b, v, c, signed, eq)
  1452  				} else {
  1453  					ft.caps[v.Args[0].ID] = v
  1454  				}
  1455  			}
  1456  		}
  1457  	}
  1458  
  1459  	// current node state
  1460  	type walkState int
  1461  	const (
  1462  		descend walkState = iota
  1463  		simplify
  1464  	)
  1465  	// work maintains the DFS stack.
  1466  	type bp struct {
  1467  		block *Block    // current handled block
  1468  		state walkState // what's to do
  1469  	}
  1470  	work := make([]bp, 0, 256)
  1471  	work = append(work, bp{
  1472  		block: f.Entry,
  1473  		state: descend,
  1474  	})
  1475  
  1476  	idom := f.Idom()
  1477  	sdom := f.Sdom()
  1478  
  1479  	// DFS on the dominator tree.
  1480  	//
  1481  	// For efficiency, we consider only the dominator tree rather
  1482  	// than the entire flow graph. On the way down, we consider
  1483  	// incoming branches and accumulate conditions that uniquely
  1484  	// dominate the current block. If we discover a contradiction,
  1485  	// we can eliminate the entire block and all of its children.
  1486  	// On the way back up, we consider outgoing branches that
  1487  	// haven't already been considered. This way we consider each
  1488  	// branch condition only once.
  1489  	for len(work) > 0 {
  1490  		node := work[len(work)-1]
  1491  		work = work[:len(work)-1]
  1492  		parent := idom[node.block.ID]
  1493  		branch := getBranch(sdom, parent, node.block)
  1494  
  1495  		switch node.state {
  1496  		case descend:
  1497  			ft.checkpoint()
  1498  
  1499  			// Entering the block, add facts about the induction variable
  1500  			// that is bound to this block.
  1501  			if iv, ok := indVars[node.block]; ok {
  1502  				addIndVarRestrictions(ft, parent, iv)
  1503  			}
  1504  
  1505  			// Add results of reaching this block via a branch from
  1506  			// its immediate dominator (if any).
  1507  			if branch != unknown {
  1508  				addBranchRestrictions(ft, parent, branch)
  1509  			}
  1510  
  1511  			if ft.unsat {
  1512  				// node.block is unreachable.
  1513  				// Remove it and don't visit
  1514  				// its children.
  1515  				removeBranch(parent, branch)
  1516  				ft.restore()
  1517  				break
  1518  			}
  1519  			// Otherwise, we can now commit to
  1520  			// taking this branch. We'll restore
  1521  			// ft when we unwind.
  1522  
  1523  			// Add facts about the values in the current block.
  1524  			addLocalFacts(ft, node.block)
  1525  
  1526  			work = append(work, bp{
  1527  				block: node.block,
  1528  				state: simplify,
  1529  			})
  1530  			for s := sdom.Child(node.block); s != nil; s = sdom.Sibling(s) {
  1531  				work = append(work, bp{
  1532  					block: s,
  1533  					state: descend,
  1534  				})
  1535  			}
  1536  
  1537  		case simplify:
  1538  			simplifyBlock(sdom, ft, node.block)
  1539  			ft.restore()
  1540  		}
  1541  	}
  1542  
  1543  	ft.restore()
  1544  
  1545  	ft.cleanup(f)
  1546  }
  1547  
  1548  // initLimit sets initial constant limit for v.  This limit is based
  1549  // only on the operation itself, not any of its input arguments. This
  1550  // method is only used in two places, once when the prove pass startup
  1551  // and the other when a new ssa value is created, both for init. (unlike
  1552  // flowLimit, below, which computes additional constraints based on
  1553  // ranges of opcode arguments).
  1554  func initLimit(v *Value) limit {
  1555  	if v.Type.IsBoolean() {
  1556  		switch v.Op {
  1557  		case OpConstBool:
  1558  			b := v.AuxInt
  1559  			return limit{min: b, max: b, umin: uint64(b), umax: uint64(b)}
  1560  		default:
  1561  			return limit{min: 0, max: 1, umin: 0, umax: 1}
  1562  		}
  1563  	}
  1564  	if v.Type.IsPtrShaped() { // These are the types that EqPtr/NeqPtr operate on, except uintptr.
  1565  		switch v.Op {
  1566  		case OpConstNil:
  1567  			return limit{min: 0, max: 0, umin: 0, umax: 0}
  1568  		case OpAddr, OpLocalAddr: // TODO: others?
  1569  			l := noLimit
  1570  			l.umin = 1
  1571  			return l
  1572  		default:
  1573  			return noLimit
  1574  		}
  1575  	}
  1576  	if !v.Type.IsInteger() {
  1577  		return noLimit
  1578  	}
  1579  
  1580  	// Default limits based on type.
  1581  	bitsize := v.Type.Size() * 8
  1582  	lim := limit{min: -(1 << (bitsize - 1)), max: 1<<(bitsize-1) - 1, umin: 0, umax: 1<<bitsize - 1}
  1583  
  1584  	// Tighter limits on some opcodes.
  1585  	switch v.Op {
  1586  	// constants
  1587  	case OpConst64:
  1588  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(v.AuxInt), umax: uint64(v.AuxInt)}
  1589  	case OpConst32:
  1590  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(uint32(v.AuxInt)), umax: uint64(uint32(v.AuxInt))}
  1591  	case OpConst16:
  1592  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(uint16(v.AuxInt)), umax: uint64(uint16(v.AuxInt))}
  1593  	case OpConst8:
  1594  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(uint8(v.AuxInt)), umax: uint64(uint8(v.AuxInt))}
  1595  
  1596  	// extensions
  1597  	case OpZeroExt8to64, OpZeroExt8to32, OpZeroExt8to16:
  1598  		lim = lim.signedMinMax(0, 1<<8-1)
  1599  		lim = lim.unsignedMax(1<<8 - 1)
  1600  	case OpZeroExt16to64, OpZeroExt16to32:
  1601  		lim = lim.signedMinMax(0, 1<<16-1)
  1602  		lim = lim.unsignedMax(1<<16 - 1)
  1603  	case OpZeroExt32to64:
  1604  		lim = lim.signedMinMax(0, 1<<32-1)
  1605  		lim = lim.unsignedMax(1<<32 - 1)
  1606  	case OpSignExt8to64, OpSignExt8to32, OpSignExt8to16:
  1607  		lim = lim.signedMinMax(math.MinInt8, math.MaxInt8)
  1608  	case OpSignExt16to64, OpSignExt16to32:
  1609  		lim = lim.signedMinMax(math.MinInt16, math.MaxInt16)
  1610  	case OpSignExt32to64:
  1611  		lim = lim.signedMinMax(math.MinInt32, math.MaxInt32)
  1612  
  1613  	// math/bits intrinsics
  1614  	case OpCtz64, OpBitLen64, OpPopCount64,
  1615  		OpCtz32, OpBitLen32, OpPopCount32,
  1616  		OpCtz16, OpBitLen16, OpPopCount16,
  1617  		OpCtz8, OpBitLen8, OpPopCount8:
  1618  		lim = lim.unsignedMax(uint64(v.Args[0].Type.Size() * 8))
  1619  
  1620  	// bool to uint8 conversion
  1621  	case OpCvtBoolToUint8:
  1622  		lim = lim.unsignedMax(1)
  1623  
  1624  	// length operations
  1625  	case OpStringLen, OpSliceLen, OpSliceCap:
  1626  		lim = lim.signedMin(0)
  1627  	}
  1628  
  1629  	// signed <-> unsigned propagation
  1630  	if lim.min >= 0 {
  1631  		lim = lim.unsignedMinMax(uint64(lim.min), uint64(lim.max))
  1632  	}
  1633  	if fitsInBitsU(lim.umax, uint(8*v.Type.Size()-1)) {
  1634  		lim = lim.signedMinMax(int64(lim.umin), int64(lim.umax))
  1635  	}
  1636  
  1637  	return lim
  1638  }
  1639  
  1640  // flowLimit updates the known limits of v in ft. Returns true if anything changed.
  1641  // flowLimit can use the ranges of input arguments.
  1642  //
  1643  // Note: this calculation only happens at the point the value is defined. We do not reevaluate
  1644  // it later. So for example:
  1645  //
  1646  //	v := x + y
  1647  //	if 0 <= x && x < 5 && 0 <= y && y < 5 { ... use v ... }
  1648  //
  1649  // we don't discover that the range of v is bounded in the conditioned
  1650  // block. We could recompute the range of v once we enter the block so
  1651  // we know that it is 0 <= v <= 8, but we don't have a mechanism to do
  1652  // that right now.
  1653  func (ft *factsTable) flowLimit(v *Value) bool {
  1654  	if !v.Type.IsInteger() {
  1655  		// TODO: boolean?
  1656  		return false
  1657  	}
  1658  
  1659  	// Additional limits based on opcode and argument.
  1660  	// No need to repeat things here already done in initLimit.
  1661  	switch v.Op {
  1662  
  1663  	// extensions
  1664  	case OpZeroExt8to64, OpZeroExt8to32, OpZeroExt8to16, OpZeroExt16to64, OpZeroExt16to32, OpZeroExt32to64:
  1665  		a := ft.limits[v.Args[0].ID]
  1666  		return ft.unsignedMinMax(v, a.umin, a.umax)
  1667  	case OpSignExt8to64, OpSignExt8to32, OpSignExt8to16, OpSignExt16to64, OpSignExt16to32, OpSignExt32to64:
  1668  		a := ft.limits[v.Args[0].ID]
  1669  		return ft.signedMinMax(v, a.min, a.max)
  1670  	case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
  1671  		a := ft.limits[v.Args[0].ID]
  1672  		if a.umax <= 1<<(uint64(v.Type.Size())*8)-1 {
  1673  			return ft.unsignedMinMax(v, a.umin, a.umax)
  1674  		}
  1675  
  1676  	// math/bits
  1677  	case OpCtz64:
  1678  		a := ft.limits[v.Args[0].ID]
  1679  		if a.nonzero() {
  1680  			return ft.unsignedMax(v, uint64(bits.Len64(a.umax)-1))
  1681  		}
  1682  	case OpCtz32:
  1683  		a := ft.limits[v.Args[0].ID]
  1684  		if a.nonzero() {
  1685  			return ft.unsignedMax(v, uint64(bits.Len32(uint32(a.umax))-1))
  1686  		}
  1687  	case OpCtz16:
  1688  		a := ft.limits[v.Args[0].ID]
  1689  		if a.nonzero() {
  1690  			return ft.unsignedMax(v, uint64(bits.Len16(uint16(a.umax))-1))
  1691  		}
  1692  	case OpCtz8:
  1693  		a := ft.limits[v.Args[0].ID]
  1694  		if a.nonzero() {
  1695  			return ft.unsignedMax(v, uint64(bits.Len8(uint8(a.umax))-1))
  1696  		}
  1697  
  1698  	case OpPopCount64, OpPopCount32, OpPopCount16, OpPopCount8:
  1699  		a := ft.limits[v.Args[0].ID]
  1700  		changingBitsCount := uint64(bits.Len64(a.umax ^ a.umin))
  1701  		sharedLeadingMask := ^(uint64(1)<<changingBitsCount - 1)
  1702  		fixedBits := a.umax & sharedLeadingMask
  1703  		min := uint64(bits.OnesCount64(fixedBits))
  1704  		return ft.unsignedMinMax(v, min, min+changingBitsCount)
  1705  
  1706  	case OpBitLen64:
  1707  		a := ft.limits[v.Args[0].ID]
  1708  		return ft.unsignedMinMax(v,
  1709  			uint64(bits.Len64(a.umin)),
  1710  			uint64(bits.Len64(a.umax)))
  1711  	case OpBitLen32:
  1712  		a := ft.limits[v.Args[0].ID]
  1713  		return ft.unsignedMinMax(v,
  1714  			uint64(bits.Len32(uint32(a.umin))),
  1715  			uint64(bits.Len32(uint32(a.umax))))
  1716  	case OpBitLen16:
  1717  		a := ft.limits[v.Args[0].ID]
  1718  		return ft.unsignedMinMax(v,
  1719  			uint64(bits.Len16(uint16(a.umin))),
  1720  			uint64(bits.Len16(uint16(a.umax))))
  1721  	case OpBitLen8:
  1722  		a := ft.limits[v.Args[0].ID]
  1723  		return ft.unsignedMinMax(v,
  1724  			uint64(bits.Len8(uint8(a.umin))),
  1725  			uint64(bits.Len8(uint8(a.umax))))
  1726  
  1727  	// Masks.
  1728  
  1729  	// TODO: if y.umax and y.umin share a leading bit pattern, y also has that leading bit pattern.
  1730  	// we could compare the patterns of always set bits in a and b and learn more about minimum and maximum.
  1731  	// But I doubt this help any real world code.
  1732  	case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  1733  		// AND can only make the value smaller.
  1734  		a := ft.limits[v.Args[0].ID]
  1735  		b := ft.limits[v.Args[1].ID]
  1736  		return ft.unsignedMax(v, min(a.umax, b.umax))
  1737  	case OpOr64, OpOr32, OpOr16, OpOr8:
  1738  		// OR can only make the value bigger and can't flip bits proved to be zero in both inputs.
  1739  		a := ft.limits[v.Args[0].ID]
  1740  		b := ft.limits[v.Args[1].ID]
  1741  		return ft.unsignedMinMax(v,
  1742  			max(a.umin, b.umin),
  1743  			1<<bits.Len64(a.umax|b.umax)-1)
  1744  	case OpXor64, OpXor32, OpXor16, OpXor8:
  1745  		// XOR can't flip bits that are proved to be zero in both inputs.
  1746  		a := ft.limits[v.Args[0].ID]
  1747  		b := ft.limits[v.Args[1].ID]
  1748  		return ft.unsignedMax(v, 1<<bits.Len64(a.umax|b.umax)-1)
  1749  	case OpCom64, OpCom32, OpCom16, OpCom8:
  1750  		a := ft.limits[v.Args[0].ID]
  1751  		return ft.newLimit(v, a.com(uint(v.Type.Size())*8))
  1752  
  1753  	// Arithmetic.
  1754  	case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  1755  		a := ft.limits[v.Args[0].ID]
  1756  		b := ft.limits[v.Args[1].ID]
  1757  		return ft.newLimit(v, a.add(b, uint(v.Type.Size())*8))
  1758  	case OpSub64, OpSub32, OpSub16, OpSub8:
  1759  		a := ft.limits[v.Args[0].ID]
  1760  		b := ft.limits[v.Args[1].ID]
  1761  		sub := ft.newLimit(v, a.sub(b, uint(v.Type.Size())*8))
  1762  		mod := ft.detectSignedMod(v)
  1763  		return sub || mod
  1764  	case OpNeg64, OpNeg32, OpNeg16, OpNeg8:
  1765  		a := ft.limits[v.Args[0].ID]
  1766  		bitsize := uint(v.Type.Size()) * 8
  1767  		return ft.newLimit(v, a.com(bitsize).add(limit{min: 1, max: 1, umin: 1, umax: 1}, bitsize))
  1768  	case OpMul64, OpMul32, OpMul16, OpMul8:
  1769  		a := ft.limits[v.Args[0].ID]
  1770  		b := ft.limits[v.Args[1].ID]
  1771  		return ft.newLimit(v, a.mul(b, uint(v.Type.Size())*8))
  1772  	case OpLsh64x64, OpLsh64x32, OpLsh64x16, OpLsh64x8,
  1773  		OpLsh32x64, OpLsh32x32, OpLsh32x16, OpLsh32x8,
  1774  		OpLsh16x64, OpLsh16x32, OpLsh16x16, OpLsh16x8,
  1775  		OpLsh8x64, OpLsh8x32, OpLsh8x16, OpLsh8x8:
  1776  		a := ft.limits[v.Args[0].ID]
  1777  		b := ft.limits[v.Args[1].ID]
  1778  		bitsize := uint(v.Type.Size()) * 8
  1779  		return ft.newLimit(v, a.mul(b.exp2(bitsize), bitsize))
  1780  	case OpMod64, OpMod32, OpMod16, OpMod8:
  1781  		a := ft.limits[v.Args[0].ID]
  1782  		b := ft.limits[v.Args[1].ID]
  1783  		if !(a.nonnegative() && b.nonnegative()) {
  1784  			// TODO: we could handle signed limits but I didn't bother.
  1785  			break
  1786  		}
  1787  		fallthrough
  1788  	case OpMod64u, OpMod32u, OpMod16u, OpMod8u:
  1789  		a := ft.limits[v.Args[0].ID]
  1790  		b := ft.limits[v.Args[1].ID]
  1791  		// Underflow in the arithmetic below is ok, it gives to MaxUint64 which does nothing to the limit.
  1792  		return ft.unsignedMax(v, min(a.umax, b.umax-1))
  1793  	case OpDiv64, OpDiv32, OpDiv16, OpDiv8:
  1794  		a := ft.limits[v.Args[0].ID]
  1795  		b := ft.limits[v.Args[1].ID]
  1796  		if !(a.nonnegative() && b.nonnegative()) {
  1797  			// TODO: we could handle signed limits but I didn't bother.
  1798  			break
  1799  		}
  1800  		fallthrough
  1801  	case OpDiv64u, OpDiv32u, OpDiv16u, OpDiv8u:
  1802  		a := ft.limits[v.Args[0].ID]
  1803  		b := ft.limits[v.Args[1].ID]
  1804  		lim := noLimit
  1805  		if b.umax > 0 {
  1806  			lim = lim.unsignedMin(a.umin / b.umax)
  1807  		}
  1808  		if b.umin > 0 {
  1809  			lim = lim.unsignedMax(a.umax / b.umin)
  1810  		}
  1811  		return ft.newLimit(v, lim)
  1812  
  1813  	case OpPhi:
  1814  		{
  1815  			// Work around for go.dev/issue/68857, look for min(x, y) and max(x, y).
  1816  			b := v.Block
  1817  			if len(b.Preds) != 2 {
  1818  				goto notMinNorMax
  1819  			}
  1820  			// FIXME: this code searches for the following losange pattern
  1821  			// because that what ssagen produce for min and max builtins:
  1822  			// conditionBlock → (firstBlock, secondBlock) → v.Block
  1823  			// there are three non losange equivalent constructions
  1824  			// we could match for, but I didn't bother:
  1825  			// conditionBlock → (v.Block, secondBlock → v.Block)
  1826  			// conditionBlock → (firstBlock → v.Block, v.Block)
  1827  			// conditionBlock → (v.Block, v.Block)
  1828  			firstBlock, secondBlock := b.Preds[0].b, b.Preds[1].b
  1829  			if firstBlock.Kind != BlockPlain || secondBlock.Kind != BlockPlain {
  1830  				goto notMinNorMax
  1831  			}
  1832  			if len(firstBlock.Preds) != 1 || len(secondBlock.Preds) != 1 {
  1833  				goto notMinNorMax
  1834  			}
  1835  			conditionBlock := firstBlock.Preds[0].b
  1836  			if conditionBlock != secondBlock.Preds[0].b {
  1837  				goto notMinNorMax
  1838  			}
  1839  			if conditionBlock.Kind != BlockIf {
  1840  				goto notMinNorMax
  1841  			}
  1842  
  1843  			less := conditionBlock.Controls[0]
  1844  			var unsigned bool
  1845  			switch less.Op {
  1846  			case OpLess64U, OpLess32U, OpLess16U, OpLess8U,
  1847  				OpLeq64U, OpLeq32U, OpLeq16U, OpLeq8U:
  1848  				unsigned = true
  1849  			case OpLess64, OpLess32, OpLess16, OpLess8,
  1850  				OpLeq64, OpLeq32, OpLeq16, OpLeq8:
  1851  			default:
  1852  				goto notMinNorMax
  1853  			}
  1854  			small, big := less.Args[0], less.Args[1]
  1855  			truev, falsev := v.Args[0], v.Args[1]
  1856  			if conditionBlock.Succs[0].b == secondBlock {
  1857  				truev, falsev = falsev, truev
  1858  			}
  1859  
  1860  			bigl, smalll := ft.limits[big.ID], ft.limits[small.ID]
  1861  			if truev == big {
  1862  				if falsev == small {
  1863  					// v := big if small <¿=? big else small
  1864  					if unsigned {
  1865  						maximum := max(bigl.umax, smalll.umax)
  1866  						minimum := max(bigl.umin, smalll.umin)
  1867  						return ft.unsignedMinMax(v, minimum, maximum)
  1868  					} else {
  1869  						maximum := max(bigl.max, smalll.max)
  1870  						minimum := max(bigl.min, smalll.min)
  1871  						return ft.signedMinMax(v, minimum, maximum)
  1872  					}
  1873  				} else {
  1874  					goto notMinNorMax
  1875  				}
  1876  			} else if truev == small {
  1877  				if falsev == big {
  1878  					// v := small if small <¿=? big else big
  1879  					if unsigned {
  1880  						maximum := min(bigl.umax, smalll.umax)
  1881  						minimum := min(bigl.umin, smalll.umin)
  1882  						return ft.unsignedMinMax(v, minimum, maximum)
  1883  					} else {
  1884  						maximum := min(bigl.max, smalll.max)
  1885  						minimum := min(bigl.min, smalll.min)
  1886  						return ft.signedMinMax(v, minimum, maximum)
  1887  					}
  1888  				} else {
  1889  					goto notMinNorMax
  1890  				}
  1891  			} else {
  1892  				goto notMinNorMax
  1893  			}
  1894  		}
  1895  	notMinNorMax:
  1896  
  1897  		// Compute the union of all the input phis.
  1898  		// Often this will convey no information, because the block
  1899  		// is not dominated by its predecessors and hence the
  1900  		// phi arguments might not have been processed yet. But if
  1901  		// the values are declared earlier, it may help. e.g., for
  1902  		//    v = phi(c3, c5)
  1903  		// where c3 = OpConst [3] and c5 = OpConst [5] are
  1904  		// defined in the entry block, we can derive [3,5]
  1905  		// as the limit for v.
  1906  		l := ft.limits[v.Args[0].ID]
  1907  		for _, a := range v.Args[1:] {
  1908  			l2 := ft.limits[a.ID]
  1909  			l.min = min(l.min, l2.min)
  1910  			l.max = max(l.max, l2.max)
  1911  			l.umin = min(l.umin, l2.umin)
  1912  			l.umax = max(l.umax, l2.umax)
  1913  		}
  1914  		return ft.newLimit(v, l)
  1915  	}
  1916  	return false
  1917  }
  1918  
  1919  // See if we can get any facts because v is the result of signed mod by a constant.
  1920  // The mod operation has already been rewritten, so we have to try and reconstruct it.
  1921  //
  1922  //	x % d
  1923  //
  1924  // is rewritten as
  1925  //
  1926  //	x - (x / d) * d
  1927  //
  1928  // furthermore, the divide itself gets rewritten. If d is a power of 2 (d == 1<<k), we do
  1929  //
  1930  //	(x / d) * d = ((x + adj) >> k) << k
  1931  //	            = (x + adj) & (-1<<k)
  1932  //
  1933  // with adj being an adjustment in case x is negative (see below).
  1934  // if d is not a power of 2, we do
  1935  //
  1936  //	x / d = ... TODO ...
  1937  func (ft *factsTable) detectSignedMod(v *Value) bool {
  1938  	if ft.detectSignedModByPowerOfTwo(v) {
  1939  		return true
  1940  	}
  1941  	// TODO: non-powers-of-2
  1942  	return false
  1943  }
  1944  func (ft *factsTable) detectSignedModByPowerOfTwo(v *Value) bool {
  1945  	// We're looking for:
  1946  	//
  1947  	//   x % d ==
  1948  	//   x - (x / d) * d
  1949  	//
  1950  	// which for d a power of 2, d == 1<<k, is done as
  1951  	//
  1952  	//   x - ((x + (x>>(w-1))>>>(w-k)) & (-1<<k))
  1953  	//
  1954  	// w = bit width of x.
  1955  	// (>> = signed shift, >>> = unsigned shift).
  1956  	// See ./_gen/generic.rules, search for "Signed divide by power of 2".
  1957  
  1958  	var w int64
  1959  	var addOp, andOp, constOp, sshiftOp, ushiftOp Op
  1960  	switch v.Op {
  1961  	case OpSub64:
  1962  		w = 64
  1963  		addOp = OpAdd64
  1964  		andOp = OpAnd64
  1965  		constOp = OpConst64
  1966  		sshiftOp = OpRsh64x64
  1967  		ushiftOp = OpRsh64Ux64
  1968  	case OpSub32:
  1969  		w = 32
  1970  		addOp = OpAdd32
  1971  		andOp = OpAnd32
  1972  		constOp = OpConst32
  1973  		sshiftOp = OpRsh32x64
  1974  		ushiftOp = OpRsh32Ux64
  1975  	case OpSub16:
  1976  		w = 16
  1977  		addOp = OpAdd16
  1978  		andOp = OpAnd16
  1979  		constOp = OpConst16
  1980  		sshiftOp = OpRsh16x64
  1981  		ushiftOp = OpRsh16Ux64
  1982  	case OpSub8:
  1983  		w = 8
  1984  		addOp = OpAdd8
  1985  		andOp = OpAnd8
  1986  		constOp = OpConst8
  1987  		sshiftOp = OpRsh8x64
  1988  		ushiftOp = OpRsh8Ux64
  1989  	default:
  1990  		return false
  1991  	}
  1992  
  1993  	x := v.Args[0]
  1994  	and := v.Args[1]
  1995  	if and.Op != andOp {
  1996  		return false
  1997  	}
  1998  	var add, mask *Value
  1999  	if and.Args[0].Op == addOp && and.Args[1].Op == constOp {
  2000  		add = and.Args[0]
  2001  		mask = and.Args[1]
  2002  	} else if and.Args[1].Op == addOp && and.Args[0].Op == constOp {
  2003  		add = and.Args[1]
  2004  		mask = and.Args[0]
  2005  	} else {
  2006  		return false
  2007  	}
  2008  	var ushift *Value
  2009  	if add.Args[0] == x {
  2010  		ushift = add.Args[1]
  2011  	} else if add.Args[1] == x {
  2012  		ushift = add.Args[0]
  2013  	} else {
  2014  		return false
  2015  	}
  2016  	if ushift.Op != ushiftOp {
  2017  		return false
  2018  	}
  2019  	if ushift.Args[1].Op != OpConst64 {
  2020  		return false
  2021  	}
  2022  	k := w - ushift.Args[1].AuxInt // Now we know k!
  2023  	d := int64(1) << k             // divisor
  2024  	sshift := ushift.Args[0]
  2025  	if sshift.Op != sshiftOp {
  2026  		return false
  2027  	}
  2028  	if sshift.Args[0] != x {
  2029  		return false
  2030  	}
  2031  	if sshift.Args[1].Op != OpConst64 || sshift.Args[1].AuxInt != w-1 {
  2032  		return false
  2033  	}
  2034  	if mask.AuxInt != -d {
  2035  		return false
  2036  	}
  2037  
  2038  	// All looks ok. x % d is at most +/- d-1.
  2039  	return ft.signedMinMax(v, -d+1, d-1)
  2040  }
  2041  
  2042  // getBranch returns the range restrictions added by p
  2043  // when reaching b. p is the immediate dominator of b.
  2044  func getBranch(sdom SparseTree, p *Block, b *Block) branch {
  2045  	if p == nil {
  2046  		return unknown
  2047  	}
  2048  	switch p.Kind {
  2049  	case BlockIf:
  2050  		// If p and p.Succs[0] are dominators it means that every path
  2051  		// from entry to b passes through p and p.Succs[0]. We care that
  2052  		// no path from entry to b passes through p.Succs[1]. If p.Succs[0]
  2053  		// has one predecessor then (apart from the degenerate case),
  2054  		// there is no path from entry that can reach b through p.Succs[1].
  2055  		// TODO: how about p->yes->b->yes, i.e. a loop in yes.
  2056  		if sdom.IsAncestorEq(p.Succs[0].b, b) && len(p.Succs[0].b.Preds) == 1 {
  2057  			return positive
  2058  		}
  2059  		if sdom.IsAncestorEq(p.Succs[1].b, b) && len(p.Succs[1].b.Preds) == 1 {
  2060  			return negative
  2061  		}
  2062  	case BlockJumpTable:
  2063  		// TODO: this loop can lead to quadratic behavior, as
  2064  		// getBranch can be called len(p.Succs) times.
  2065  		for i, e := range p.Succs {
  2066  			if sdom.IsAncestorEq(e.b, b) && len(e.b.Preds) == 1 {
  2067  				return jumpTable0 + branch(i)
  2068  			}
  2069  		}
  2070  	}
  2071  	return unknown
  2072  }
  2073  
  2074  // addIndVarRestrictions updates the factsTables ft with the facts
  2075  // learned from the induction variable indVar which drives the loop
  2076  // starting in Block b.
  2077  func addIndVarRestrictions(ft *factsTable, b *Block, iv indVar) {
  2078  	d := signed
  2079  	if ft.isNonNegative(iv.min) && ft.isNonNegative(iv.max) {
  2080  		d |= unsigned
  2081  	}
  2082  
  2083  	if iv.flags&indVarMinExc == 0 {
  2084  		addRestrictions(b, ft, d, iv.min, iv.ind, lt|eq)
  2085  	} else {
  2086  		addRestrictions(b, ft, d, iv.min, iv.ind, lt)
  2087  	}
  2088  
  2089  	if iv.flags&indVarMaxInc == 0 {
  2090  		addRestrictions(b, ft, d, iv.ind, iv.max, lt)
  2091  	} else {
  2092  		addRestrictions(b, ft, d, iv.ind, iv.max, lt|eq)
  2093  	}
  2094  }
  2095  
  2096  // addBranchRestrictions updates the factsTables ft with the facts learned when
  2097  // branching from Block b in direction br.
  2098  func addBranchRestrictions(ft *factsTable, b *Block, br branch) {
  2099  	c := b.Controls[0]
  2100  	switch {
  2101  	case br == negative:
  2102  		ft.booleanFalse(c)
  2103  	case br == positive:
  2104  		ft.booleanTrue(c)
  2105  	case br >= jumpTable0:
  2106  		idx := br - jumpTable0
  2107  		val := int64(idx)
  2108  		if v, off := isConstDelta(c); v != nil {
  2109  			// Establish the bound on the underlying value we're switching on,
  2110  			// not on the offset-ed value used as the jump table index.
  2111  			c = v
  2112  			val -= off
  2113  		}
  2114  		ft.newLimit(c, limit{min: val, max: val, umin: uint64(val), umax: uint64(val)})
  2115  	default:
  2116  		panic("unknown branch")
  2117  	}
  2118  }
  2119  
  2120  // addRestrictions updates restrictions from the immediate
  2121  // dominating block (p) using r.
  2122  func addRestrictions(parent *Block, ft *factsTable, t domain, v, w *Value, r relation) {
  2123  	if t == 0 {
  2124  		// Trivial case: nothing to do.
  2125  		// Should not happen, but just in case.
  2126  		return
  2127  	}
  2128  	for i := domain(1); i <= t; i <<= 1 {
  2129  		if t&i == 0 {
  2130  			continue
  2131  		}
  2132  		ft.update(parent, v, w, i, r)
  2133  	}
  2134  }
  2135  
  2136  func unsignedAddOverflows(a, b uint64, t *types.Type) bool {
  2137  	switch t.Size() {
  2138  	case 8:
  2139  		return a+b < a
  2140  	case 4:
  2141  		return a+b > math.MaxUint32
  2142  	case 2:
  2143  		return a+b > math.MaxUint16
  2144  	case 1:
  2145  		return a+b > math.MaxUint8
  2146  	default:
  2147  		panic("unreachable")
  2148  	}
  2149  }
  2150  
  2151  func signedAddOverflowsOrUnderflows(a, b int64, t *types.Type) bool {
  2152  	r := a + b
  2153  	switch t.Size() {
  2154  	case 8:
  2155  		return (a >= 0 && b >= 0 && r < 0) || (a < 0 && b < 0 && r >= 0)
  2156  	case 4:
  2157  		return r < math.MinInt32 || math.MaxInt32 < r
  2158  	case 2:
  2159  		return r < math.MinInt16 || math.MaxInt16 < r
  2160  	case 1:
  2161  		return r < math.MinInt8 || math.MaxInt8 < r
  2162  	default:
  2163  		panic("unreachable")
  2164  	}
  2165  }
  2166  
  2167  func unsignedSubUnderflows(a, b uint64) bool {
  2168  	return a < b
  2169  }
  2170  
  2171  func addLocalFacts(ft *factsTable, b *Block) {
  2172  	// Propagate constant ranges among values in this block.
  2173  	// We do this before the second loop so that we have the
  2174  	// most up-to-date constant bounds for isNonNegative calls.
  2175  	for {
  2176  		changed := false
  2177  		for _, v := range b.Values {
  2178  			changed = ft.flowLimit(v) || changed
  2179  		}
  2180  		if !changed {
  2181  			break
  2182  		}
  2183  	}
  2184  
  2185  	// Add facts about individual operations.
  2186  	for _, v := range b.Values {
  2187  		// FIXME(go.dev/issue/68857): this loop only set up limits properly when b.Values is in topological order.
  2188  		// flowLimit can also depend on limits given by this loop which right now is not handled.
  2189  		switch v.Op {
  2190  		case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  2191  			x := ft.limits[v.Args[0].ID]
  2192  			y := ft.limits[v.Args[1].ID]
  2193  			if !unsignedAddOverflows(x.umax, y.umax, v.Type) {
  2194  				r := gt
  2195  				if !x.nonzero() {
  2196  					r |= eq
  2197  				}
  2198  				ft.update(b, v, v.Args[1], unsigned, r)
  2199  				r = gt
  2200  				if !y.nonzero() {
  2201  					r |= eq
  2202  				}
  2203  				ft.update(b, v, v.Args[0], unsigned, r)
  2204  			}
  2205  			if x.min >= 0 && !signedAddOverflowsOrUnderflows(x.max, y.max, v.Type) {
  2206  				r := gt
  2207  				if !x.nonzero() {
  2208  					r |= eq
  2209  				}
  2210  				ft.update(b, v, v.Args[1], signed, r)
  2211  			}
  2212  			if y.min >= 0 && !signedAddOverflowsOrUnderflows(x.max, y.max, v.Type) {
  2213  				r := gt
  2214  				if !y.nonzero() {
  2215  					r |= eq
  2216  				}
  2217  				ft.update(b, v, v.Args[0], signed, r)
  2218  			}
  2219  			if x.max <= 0 && !signedAddOverflowsOrUnderflows(x.min, y.min, v.Type) {
  2220  				r := lt
  2221  				if !x.nonzero() {
  2222  					r |= eq
  2223  				}
  2224  				ft.update(b, v, v.Args[1], signed, r)
  2225  			}
  2226  			if y.max <= 0 && !signedAddOverflowsOrUnderflows(x.min, y.min, v.Type) {
  2227  				r := lt
  2228  				if !y.nonzero() {
  2229  					r |= eq
  2230  				}
  2231  				ft.update(b, v, v.Args[0], signed, r)
  2232  			}
  2233  		case OpSub64, OpSub32, OpSub16, OpSub8:
  2234  			x := ft.limits[v.Args[0].ID]
  2235  			y := ft.limits[v.Args[1].ID]
  2236  			if !unsignedSubUnderflows(x.umin, y.umax) {
  2237  				r := lt
  2238  				if !y.nonzero() {
  2239  					r |= eq
  2240  				}
  2241  				ft.update(b, v, v.Args[0], unsigned, r)
  2242  			}
  2243  			// FIXME: we could also do signed facts but the overflow checks are much trickier and I don't need it yet.
  2244  		case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  2245  			ft.update(b, v, v.Args[0], unsigned, lt|eq)
  2246  			ft.update(b, v, v.Args[1], unsigned, lt|eq)
  2247  			if ft.isNonNegative(v.Args[0]) {
  2248  				ft.update(b, v, v.Args[0], signed, lt|eq)
  2249  			}
  2250  			if ft.isNonNegative(v.Args[1]) {
  2251  				ft.update(b, v, v.Args[1], signed, lt|eq)
  2252  			}
  2253  		case OpOr64, OpOr32, OpOr16, OpOr8:
  2254  			// TODO: investigate how to always add facts without much slowdown, see issue #57959
  2255  			//ft.update(b, v, v.Args[0], unsigned, gt|eq)
  2256  			//ft.update(b, v, v.Args[1], unsigned, gt|eq)
  2257  		case OpDiv64u, OpDiv32u, OpDiv16u, OpDiv8u,
  2258  			OpRsh8Ux64, OpRsh8Ux32, OpRsh8Ux16, OpRsh8Ux8,
  2259  			OpRsh16Ux64, OpRsh16Ux32, OpRsh16Ux16, OpRsh16Ux8,
  2260  			OpRsh32Ux64, OpRsh32Ux32, OpRsh32Ux16, OpRsh32Ux8,
  2261  			OpRsh64Ux64, OpRsh64Ux32, OpRsh64Ux16, OpRsh64Ux8:
  2262  			ft.update(b, v, v.Args[0], unsigned, lt|eq)
  2263  		case OpMod64u, OpMod32u, OpMod16u, OpMod8u:
  2264  			ft.update(b, v, v.Args[0], unsigned, lt|eq)
  2265  			// Note: we have to be careful that this doesn't imply
  2266  			// that the modulus is >0, which isn't true until *after*
  2267  			// the mod instruction executes (and thus panics if the
  2268  			// modulus is 0). See issue 67625.
  2269  			ft.update(b, v, v.Args[1], unsigned, lt)
  2270  		case OpStringLen:
  2271  			if v.Args[0].Op == OpStringMake {
  2272  				ft.update(b, v, v.Args[0].Args[1], signed, eq)
  2273  			}
  2274  		case OpSliceLen:
  2275  			if v.Args[0].Op == OpSliceMake {
  2276  				ft.update(b, v, v.Args[0].Args[1], signed, eq)
  2277  			}
  2278  		case OpSliceCap:
  2279  			if v.Args[0].Op == OpSliceMake {
  2280  				ft.update(b, v, v.Args[0].Args[2], signed, eq)
  2281  			}
  2282  		case OpPhi:
  2283  			addLocalFactsPhi(ft, v)
  2284  		}
  2285  	}
  2286  }
  2287  
  2288  func addLocalFactsPhi(ft *factsTable, v *Value) {
  2289  	// Look for phis that implement min/max.
  2290  	//   z:
  2291  	//      c = Less64 x y (or other Less/Leq operation)
  2292  	//      If c -> bx by
  2293  	//   bx: <- z
  2294  	//       -> b ...
  2295  	//   by: <- z
  2296  	//      -> b ...
  2297  	//   b: <- bx by
  2298  	//      v = Phi x y
  2299  	// Then v is either min or max of x,y.
  2300  	// If it is the min, then we deduce v <= x && v <= y.
  2301  	// If it is the max, then we deduce v >= x && v >= y.
  2302  	// The min case is useful for the copy builtin, see issue 16833.
  2303  	if len(v.Args) != 2 {
  2304  		return
  2305  	}
  2306  	b := v.Block
  2307  	x := v.Args[0]
  2308  	y := v.Args[1]
  2309  	bx := b.Preds[0].b
  2310  	by := b.Preds[1].b
  2311  	var z *Block // branch point
  2312  	switch {
  2313  	case bx == by: // bx == by == z case
  2314  		z = bx
  2315  	case by.uniquePred() == bx: // bx == z case
  2316  		z = bx
  2317  	case bx.uniquePred() == by: // by == z case
  2318  		z = by
  2319  	case bx.uniquePred() == by.uniquePred():
  2320  		z = bx.uniquePred()
  2321  	}
  2322  	if z == nil || z.Kind != BlockIf {
  2323  		return
  2324  	}
  2325  	c := z.Controls[0]
  2326  	if len(c.Args) != 2 {
  2327  		return
  2328  	}
  2329  	var isMin bool // if c, a less-than comparison, is true, phi chooses x.
  2330  	if bx == z {
  2331  		isMin = b.Preds[0].i == 0
  2332  	} else {
  2333  		isMin = bx.Preds[0].i == 0
  2334  	}
  2335  	if c.Args[0] == x && c.Args[1] == y {
  2336  		// ok
  2337  	} else if c.Args[0] == y && c.Args[1] == x {
  2338  		// Comparison is reversed from how the values are listed in the Phi.
  2339  		isMin = !isMin
  2340  	} else {
  2341  		// Not comparing x and y.
  2342  		return
  2343  	}
  2344  	var dom domain
  2345  	switch c.Op {
  2346  	case OpLess64, OpLess32, OpLess16, OpLess8, OpLeq64, OpLeq32, OpLeq16, OpLeq8:
  2347  		dom = signed
  2348  	case OpLess64U, OpLess32U, OpLess16U, OpLess8U, OpLeq64U, OpLeq32U, OpLeq16U, OpLeq8U:
  2349  		dom = unsigned
  2350  	default:
  2351  		return
  2352  	}
  2353  	var rel relation
  2354  	if isMin {
  2355  		rel = lt | eq
  2356  	} else {
  2357  		rel = gt | eq
  2358  	}
  2359  	ft.update(b, v, x, dom, rel)
  2360  	ft.update(b, v, y, dom, rel)
  2361  }
  2362  
  2363  var ctzNonZeroOp = map[Op]Op{OpCtz8: OpCtz8NonZero, OpCtz16: OpCtz16NonZero, OpCtz32: OpCtz32NonZero, OpCtz64: OpCtz64NonZero}
  2364  var mostNegativeDividend = map[Op]int64{
  2365  	OpDiv16: -1 << 15,
  2366  	OpMod16: -1 << 15,
  2367  	OpDiv32: -1 << 31,
  2368  	OpMod32: -1 << 31,
  2369  	OpDiv64: -1 << 63,
  2370  	OpMod64: -1 << 63}
  2371  
  2372  // simplifyBlock simplifies some constant values in b and evaluates
  2373  // branches to non-uniquely dominated successors of b.
  2374  func simplifyBlock(sdom SparseTree, ft *factsTable, b *Block) {
  2375  	for _, v := range b.Values {
  2376  		switch v.Op {
  2377  		case OpSlicemask:
  2378  			// Replace OpSlicemask operations in b with constants where possible.
  2379  			x, delta := isConstDelta(v.Args[0])
  2380  			if x == nil {
  2381  				break
  2382  			}
  2383  			// slicemask(x + y)
  2384  			// if x is larger than -y (y is negative), then slicemask is -1.
  2385  			lim := ft.limits[x.ID]
  2386  			if lim.umin > uint64(-delta) {
  2387  				if v.Args[0].Op == OpAdd64 {
  2388  					v.reset(OpConst64)
  2389  				} else {
  2390  					v.reset(OpConst32)
  2391  				}
  2392  				if b.Func.pass.debug > 0 {
  2393  					b.Func.Warnl(v.Pos, "Proved slicemask not needed")
  2394  				}
  2395  				v.AuxInt = -1
  2396  			}
  2397  		case OpCtz8, OpCtz16, OpCtz32, OpCtz64:
  2398  			// On some architectures, notably amd64, we can generate much better
  2399  			// code for CtzNN if we know that the argument is non-zero.
  2400  			// Capture that information here for use in arch-specific optimizations.
  2401  			x := v.Args[0]
  2402  			lim := ft.limits[x.ID]
  2403  			if lim.umin > 0 || lim.min > 0 || lim.max < 0 {
  2404  				if b.Func.pass.debug > 0 {
  2405  					b.Func.Warnl(v.Pos, "Proved %v non-zero", v.Op)
  2406  				}
  2407  				v.Op = ctzNonZeroOp[v.Op]
  2408  			}
  2409  		case OpRsh8x8, OpRsh8x16, OpRsh8x32, OpRsh8x64,
  2410  			OpRsh16x8, OpRsh16x16, OpRsh16x32, OpRsh16x64,
  2411  			OpRsh32x8, OpRsh32x16, OpRsh32x32, OpRsh32x64,
  2412  			OpRsh64x8, OpRsh64x16, OpRsh64x32, OpRsh64x64:
  2413  			// Check whether, for a >> b, we know that a is non-negative
  2414  			// and b is all of a's bits except the MSB. If so, a is shifted to zero.
  2415  			bits := 8 * v.Args[0].Type.Size()
  2416  			if v.Args[1].isGenericIntConst() && v.Args[1].AuxInt >= bits-1 && ft.isNonNegative(v.Args[0]) {
  2417  				if b.Func.pass.debug > 0 {
  2418  					b.Func.Warnl(v.Pos, "Proved %v shifts to zero", v.Op)
  2419  				}
  2420  				switch bits {
  2421  				case 64:
  2422  					v.reset(OpConst64)
  2423  				case 32:
  2424  					v.reset(OpConst32)
  2425  				case 16:
  2426  					v.reset(OpConst16)
  2427  				case 8:
  2428  					v.reset(OpConst8)
  2429  				default:
  2430  					panic("unexpected integer size")
  2431  				}
  2432  				v.AuxInt = 0
  2433  				break // Be sure not to fallthrough - this is no longer OpRsh.
  2434  			}
  2435  			// If the Rsh hasn't been replaced with 0, still check if it is bounded.
  2436  			fallthrough
  2437  		case OpLsh8x8, OpLsh8x16, OpLsh8x32, OpLsh8x64,
  2438  			OpLsh16x8, OpLsh16x16, OpLsh16x32, OpLsh16x64,
  2439  			OpLsh32x8, OpLsh32x16, OpLsh32x32, OpLsh32x64,
  2440  			OpLsh64x8, OpLsh64x16, OpLsh64x32, OpLsh64x64,
  2441  			OpRsh8Ux8, OpRsh8Ux16, OpRsh8Ux32, OpRsh8Ux64,
  2442  			OpRsh16Ux8, OpRsh16Ux16, OpRsh16Ux32, OpRsh16Ux64,
  2443  			OpRsh32Ux8, OpRsh32Ux16, OpRsh32Ux32, OpRsh32Ux64,
  2444  			OpRsh64Ux8, OpRsh64Ux16, OpRsh64Ux32, OpRsh64Ux64:
  2445  			// Check whether, for a << b, we know that b
  2446  			// is strictly less than the number of bits in a.
  2447  			by := v.Args[1]
  2448  			lim := ft.limits[by.ID]
  2449  			bits := 8 * v.Args[0].Type.Size()
  2450  			if lim.umax < uint64(bits) || (lim.max < bits && ft.isNonNegative(by)) {
  2451  				v.AuxInt = 1 // see shiftIsBounded
  2452  				if b.Func.pass.debug > 0 && !by.isGenericIntConst() {
  2453  					b.Func.Warnl(v.Pos, "Proved %v bounded", v.Op)
  2454  				}
  2455  			}
  2456  		case OpDiv16, OpDiv32, OpDiv64, OpMod16, OpMod32, OpMod64:
  2457  			// On amd64 and 386 fix-up code can be avoided if we know
  2458  			//  the divisor is not -1 or the dividend > MinIntNN.
  2459  			// Don't modify AuxInt on other architectures,
  2460  			// as that can interfere with CSE.
  2461  			// TODO: add other architectures?
  2462  			if b.Func.Config.arch != "386" && b.Func.Config.arch != "amd64" {
  2463  				break
  2464  			}
  2465  			divr := v.Args[1]
  2466  			divrLim := ft.limits[divr.ID]
  2467  			divd := v.Args[0]
  2468  			divdLim := ft.limits[divd.ID]
  2469  			if divrLim.max < -1 || divrLim.min > -1 || divdLim.min > mostNegativeDividend[v.Op] {
  2470  				// See DivisionNeedsFixUp in rewrite.go.
  2471  				// v.AuxInt = 1 means we have proved both that the divisor is not -1
  2472  				// and that the dividend is not the most negative integer,
  2473  				// so we do not need to add fix-up code.
  2474  				v.AuxInt = 1
  2475  				if b.Func.pass.debug > 0 {
  2476  					b.Func.Warnl(v.Pos, "Proved %v does not need fix-up", v.Op)
  2477  				}
  2478  			}
  2479  		}
  2480  		// Fold provable constant results.
  2481  		// Helps in cases where we reuse a value after branching on its equality.
  2482  		for i, arg := range v.Args {
  2483  			lim := ft.limits[arg.ID]
  2484  			var constValue int64
  2485  			switch {
  2486  			case lim.min == lim.max:
  2487  				constValue = lim.min
  2488  			case lim.umin == lim.umax:
  2489  				constValue = int64(lim.umin)
  2490  			default:
  2491  				continue
  2492  			}
  2493  			switch arg.Op {
  2494  			case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConstNil:
  2495  				continue
  2496  			}
  2497  			typ := arg.Type
  2498  			f := b.Func
  2499  			var c *Value
  2500  			switch {
  2501  			case typ.IsBoolean():
  2502  				c = f.ConstBool(typ, constValue != 0)
  2503  			case typ.IsInteger() && typ.Size() == 1:
  2504  				c = f.ConstInt8(typ, int8(constValue))
  2505  			case typ.IsInteger() && typ.Size() == 2:
  2506  				c = f.ConstInt16(typ, int16(constValue))
  2507  			case typ.IsInteger() && typ.Size() == 4:
  2508  				c = f.ConstInt32(typ, int32(constValue))
  2509  			case typ.IsInteger() && typ.Size() == 8:
  2510  				c = f.ConstInt64(typ, constValue)
  2511  			case typ.IsPtrShaped():
  2512  				if constValue == 0 {
  2513  					c = f.ConstNil(typ)
  2514  				} else {
  2515  					// Not sure how this might happen, but if it
  2516  					// does, just skip it.
  2517  					continue
  2518  				}
  2519  			default:
  2520  				// Not sure how this might happen, but if it
  2521  				// does, just skip it.
  2522  				continue
  2523  			}
  2524  			v.SetArg(i, c)
  2525  			ft.initLimitForNewValue(c)
  2526  			if b.Func.pass.debug > 1 {
  2527  				b.Func.Warnl(v.Pos, "Proved %v's arg %d (%v) is constant %d", v, i, arg, constValue)
  2528  			}
  2529  		}
  2530  	}
  2531  
  2532  	if b.Kind != BlockIf {
  2533  		return
  2534  	}
  2535  
  2536  	// Consider outgoing edges from this block.
  2537  	parent := b
  2538  	for i, branch := range [...]branch{positive, negative} {
  2539  		child := parent.Succs[i].b
  2540  		if getBranch(sdom, parent, child) != unknown {
  2541  			// For edges to uniquely dominated blocks, we
  2542  			// already did this when we visited the child.
  2543  			continue
  2544  		}
  2545  		// For edges to other blocks, this can trim a branch
  2546  		// even if we couldn't get rid of the child itself.
  2547  		ft.checkpoint()
  2548  		addBranchRestrictions(ft, parent, branch)
  2549  		unsat := ft.unsat
  2550  		ft.restore()
  2551  		if unsat {
  2552  			// This branch is impossible, so remove it
  2553  			// from the block.
  2554  			removeBranch(parent, branch)
  2555  			// No point in considering the other branch.
  2556  			// (It *is* possible for both to be
  2557  			// unsatisfiable since the fact table is
  2558  			// incomplete. We could turn this into a
  2559  			// BlockExit, but it doesn't seem worth it.)
  2560  			break
  2561  		}
  2562  	}
  2563  }
  2564  
  2565  func removeBranch(b *Block, branch branch) {
  2566  	c := b.Controls[0]
  2567  	if b.Func.pass.debug > 0 {
  2568  		verb := "Proved"
  2569  		if branch == positive {
  2570  			verb = "Disproved"
  2571  		}
  2572  		if b.Func.pass.debug > 1 {
  2573  			b.Func.Warnl(b.Pos, "%s %s (%s)", verb, c.Op, c)
  2574  		} else {
  2575  			b.Func.Warnl(b.Pos, "%s %s", verb, c.Op)
  2576  		}
  2577  	}
  2578  	if c != nil && c.Pos.IsStmt() == src.PosIsStmt && c.Pos.SameFileAndLine(b.Pos) {
  2579  		// attempt to preserve statement marker.
  2580  		b.Pos = b.Pos.WithIsStmt()
  2581  	}
  2582  	if branch == positive || branch == negative {
  2583  		b.Kind = BlockFirst
  2584  		b.ResetControls()
  2585  		if branch == positive {
  2586  			b.swapSuccessors()
  2587  		}
  2588  	} else {
  2589  		// TODO: figure out how to remove an entry from a jump table
  2590  	}
  2591  }
  2592  
  2593  // isConstDelta returns non-nil if v is equivalent to w+delta (signed).
  2594  func isConstDelta(v *Value) (w *Value, delta int64) {
  2595  	cop := OpConst64
  2596  	switch v.Op {
  2597  	case OpAdd32, OpSub32:
  2598  		cop = OpConst32
  2599  	case OpAdd16, OpSub16:
  2600  		cop = OpConst16
  2601  	case OpAdd8, OpSub8:
  2602  		cop = OpConst8
  2603  	}
  2604  	switch v.Op {
  2605  	case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  2606  		if v.Args[0].Op == cop {
  2607  			return v.Args[1], v.Args[0].AuxInt
  2608  		}
  2609  		if v.Args[1].Op == cop {
  2610  			return v.Args[0], v.Args[1].AuxInt
  2611  		}
  2612  	case OpSub64, OpSub32, OpSub16, OpSub8:
  2613  		if v.Args[1].Op == cop {
  2614  			aux := v.Args[1].AuxInt
  2615  			if aux != -aux { // Overflow; too bad
  2616  				return v.Args[0], -aux
  2617  			}
  2618  		}
  2619  	}
  2620  	return nil, 0
  2621  }
  2622  
  2623  // isCleanExt reports whether v is the result of a value-preserving
  2624  // sign or zero extension.
  2625  func isCleanExt(v *Value) bool {
  2626  	switch v.Op {
  2627  	case OpSignExt8to16, OpSignExt8to32, OpSignExt8to64,
  2628  		OpSignExt16to32, OpSignExt16to64, OpSignExt32to64:
  2629  		// signed -> signed is the only value-preserving sign extension
  2630  		return v.Args[0].Type.IsSigned() && v.Type.IsSigned()
  2631  
  2632  	case OpZeroExt8to16, OpZeroExt8to32, OpZeroExt8to64,
  2633  		OpZeroExt16to32, OpZeroExt16to64, OpZeroExt32to64:
  2634  		// unsigned -> signed/unsigned are value-preserving zero extensions
  2635  		return !v.Args[0].Type.IsSigned()
  2636  	}
  2637  	return false
  2638  }
  2639
View as plain text