Source file src/cmd/compile/internal/ssa/memcombine.go

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/types"
    10  	"cmd/internal/src"
    11  	"cmp"
    12  	"slices"
    13  )
    14  
    15  // memcombine combines smaller loads and stores into larger ones.
    16  // We ensure this generates good code for encoding/binary operations.
    17  // It may help other cases also.
    18  func memcombine(f *Func) {
    19  	// This optimization requires that the architecture has
    20  	// unaligned loads and unaligned stores.
    21  	if !f.Config.unalignedOK {
    22  		return
    23  	}
    24  
    25  	memcombineLoads(f)
    26  	memcombineStores(f)
    27  }
    28  
    29  func memcombineLoads(f *Func) {
    30  	// Find "OR trees" to start with.
    31  	mark := f.newSparseSet(f.NumValues())
    32  	defer f.retSparseSet(mark)
    33  	var order []*Value
    34  
    35  	// Mark all values that are the argument of an OR.
    36  	for _, b := range f.Blocks {
    37  		for _, v := range b.Values {
    38  			if v.Op == OpOr16 || v.Op == OpOr32 || v.Op == OpOr64 {
    39  				mark.add(v.Args[0].ID)
    40  				mark.add(v.Args[1].ID)
    41  			}
    42  		}
    43  	}
    44  	for _, b := range f.Blocks {
    45  		order = order[:0]
    46  		for _, v := range b.Values {
    47  			if v.Op != OpOr16 && v.Op != OpOr32 && v.Op != OpOr64 {
    48  				continue
    49  			}
    50  			if mark.contains(v.ID) {
    51  				// marked - means it is not the root of an OR tree
    52  				continue
    53  			}
    54  			// Add the OR tree rooted at v to the order.
    55  			// We use BFS here, but any walk that puts roots before leaves would work.
    56  			i := len(order)
    57  			order = append(order, v)
    58  			for ; i < len(order); i++ {
    59  				x := order[i]
    60  				for j := 0; j < 2; j++ {
    61  					a := x.Args[j]
    62  					if a.Op == OpOr16 || a.Op == OpOr32 || a.Op == OpOr64 {
    63  						order = append(order, a)
    64  					}
    65  				}
    66  			}
    67  		}
    68  		for _, v := range order {
    69  			max := f.Config.RegSize
    70  			switch v.Op {
    71  			case OpOr64:
    72  			case OpOr32:
    73  				max = 4
    74  			case OpOr16:
    75  				max = 2
    76  			default:
    77  				continue
    78  			}
    79  			for n := max; n > 1; n /= 2 {
    80  				if combineLoads(v, n) {
    81  					break
    82  				}
    83  			}
    84  		}
    85  	}
    86  }
    87  
    88  // A BaseAddress represents the address ptr+idx, where
    89  // ptr is a pointer type and idx is an integer type.
    90  // idx may be nil, in which case it is treated as 0.
    91  type BaseAddress struct {
    92  	ptr *Value
    93  	idx *Value
    94  }
    95  
    96  // splitPtr returns the base address of ptr and any
    97  // constant offset from that base.
    98  // BaseAddress{ptr,nil},0 is always a valid result, but splitPtr
    99  // tries to peel away as many constants into off as possible.
   100  func splitPtr(ptr *Value) (BaseAddress, int64) {
   101  	var idx *Value
   102  	var off int64
   103  	for {
   104  		if ptr.Op == OpOffPtr {
   105  			off += ptr.AuxInt
   106  			ptr = ptr.Args[0]
   107  		} else if ptr.Op == OpAddPtr {
   108  			if idx != nil {
   109  				// We have two or more indexing values.
   110  				// Pick the first one we found.
   111  				return BaseAddress{ptr: ptr, idx: idx}, off
   112  			}
   113  			idx = ptr.Args[1]
   114  			if idx.Op == OpAdd32 || idx.Op == OpAdd64 {
   115  				if idx.Args[0].Op == OpConst32 || idx.Args[0].Op == OpConst64 {
   116  					off += idx.Args[0].AuxInt
   117  					idx = idx.Args[1]
   118  				} else if idx.Args[1].Op == OpConst32 || idx.Args[1].Op == OpConst64 {
   119  					off += idx.Args[1].AuxInt
   120  					idx = idx.Args[0]
   121  				}
   122  			}
   123  			ptr = ptr.Args[0]
   124  		} else {
   125  			return BaseAddress{ptr: ptr, idx: idx}, off
   126  		}
   127  	}
   128  }
   129  
   130  func combineLoads(root *Value, n int64) bool {
   131  	orOp := root.Op
   132  	var shiftOp Op
   133  	switch orOp {
   134  	case OpOr64:
   135  		shiftOp = OpLsh64x64
   136  	case OpOr32:
   137  		shiftOp = OpLsh32x64
   138  	case OpOr16:
   139  		shiftOp = OpLsh16x64
   140  	default:
   141  		return false
   142  	}
   143  
   144  	// Find n values that are ORed together with the above op.
   145  	a := make([]*Value, 0, 8)
   146  	a = append(a, root)
   147  	for i := 0; i < len(a) && int64(len(a)) < n; i++ {
   148  		v := a[i]
   149  		if v.Uses != 1 && v != root {
   150  			// Something in this subtree is used somewhere else.
   151  			return false
   152  		}
   153  		if v.Op == orOp {
   154  			a[i] = v.Args[0]
   155  			a = append(a, v.Args[1])
   156  			i--
   157  		}
   158  	}
   159  	if int64(len(a)) != n {
   160  		return false
   161  	}
   162  
   163  	// Check that the first entry to see what ops we're looking for.
   164  	// All the entries should be of the form shift(extend(load)), maybe with no shift.
   165  	v := a[0]
   166  	if v.Op == shiftOp {
   167  		v = v.Args[0]
   168  	}
   169  	var extOp Op
   170  	if orOp == OpOr64 && (v.Op == OpZeroExt8to64 || v.Op == OpZeroExt16to64 || v.Op == OpZeroExt32to64) ||
   171  		orOp == OpOr32 && (v.Op == OpZeroExt8to32 || v.Op == OpZeroExt16to32) ||
   172  		orOp == OpOr16 && v.Op == OpZeroExt8to16 {
   173  		extOp = v.Op
   174  		v = v.Args[0]
   175  	} else {
   176  		return false
   177  	}
   178  	if v.Op != OpLoad {
   179  		return false
   180  	}
   181  	base, _ := splitPtr(v.Args[0])
   182  	mem := v.Args[1]
   183  	size := v.Type.Size()
   184  
   185  	if root.Block.Func.Config.arch == "S390X" {
   186  		// s390x can't handle unaligned accesses to global variables.
   187  		if base.ptr.Op == OpAddr {
   188  			return false
   189  		}
   190  	}
   191  
   192  	// Check all the entries, extract useful info.
   193  	type LoadRecord struct {
   194  		load   *Value
   195  		offset int64 // offset of load address from base
   196  		shift  int64
   197  	}
   198  	r := make([]LoadRecord, n, 8)
   199  	for i := int64(0); i < n; i++ {
   200  		v := a[i]
   201  		if v.Uses != 1 {
   202  			return false
   203  		}
   204  		shift := int64(0)
   205  		if v.Op == shiftOp {
   206  			if v.Args[1].Op != OpConst64 {
   207  				return false
   208  			}
   209  			shift = v.Args[1].AuxInt
   210  			v = v.Args[0]
   211  			if v.Uses != 1 {
   212  				return false
   213  			}
   214  		}
   215  		if v.Op != extOp {
   216  			return false
   217  		}
   218  		load := v.Args[0]
   219  		if load.Op != OpLoad {
   220  			return false
   221  		}
   222  		if load.Uses != 1 {
   223  			return false
   224  		}
   225  		if load.Args[1] != mem {
   226  			return false
   227  		}
   228  		p, off := splitPtr(load.Args[0])
   229  		if p != base {
   230  			return false
   231  		}
   232  		r[i] = LoadRecord{load: load, offset: off, shift: shift}
   233  	}
   234  
   235  	// Sort in memory address order.
   236  	slices.SortFunc(r, func(a, b LoadRecord) int {
   237  		return cmp.Compare(a.offset, b.offset)
   238  	})
   239  
   240  	// Check that we have contiguous offsets.
   241  	for i := int64(0); i < n; i++ {
   242  		if r[i].offset != r[0].offset+i*size {
   243  			return false
   244  		}
   245  	}
   246  
   247  	// Check for reads in little-endian or big-endian order.
   248  	shift0 := r[0].shift
   249  	isLittleEndian := true
   250  	for i := int64(0); i < n; i++ {
   251  		if r[i].shift != shift0+i*size*8 {
   252  			isLittleEndian = false
   253  			break
   254  		}
   255  	}
   256  	isBigEndian := true
   257  	for i := int64(0); i < n; i++ {
   258  		if r[i].shift != shift0-i*size*8 {
   259  			isBigEndian = false
   260  			break
   261  		}
   262  	}
   263  	if !isLittleEndian && !isBigEndian {
   264  		return false
   265  	}
   266  
   267  	// Find a place to put the new load.
   268  	// This is tricky, because it has to be at a point where
   269  	// its memory argument is live. We can't just put it in root.Block.
   270  	// We use the block of the latest load.
   271  	loads := make([]*Value, n, 8)
   272  	for i := int64(0); i < n; i++ {
   273  		loads[i] = r[i].load
   274  	}
   275  	loadBlock := mergePoint(root.Block, loads...)
   276  	if loadBlock == nil {
   277  		return false
   278  	}
   279  	// Find a source position to use.
   280  	pos := src.NoXPos
   281  	for _, load := range loads {
   282  		if load.Block == loadBlock {
   283  			pos = load.Pos
   284  			break
   285  		}
   286  	}
   287  	if pos == src.NoXPos {
   288  		return false
   289  	}
   290  
   291  	// Check to see if we need byte swap before storing.
   292  	needSwap := isLittleEndian && root.Block.Func.Config.BigEndian ||
   293  		isBigEndian && !root.Block.Func.Config.BigEndian
   294  	if needSwap && (size != 1 || !root.Block.Func.Config.haveByteSwap(n)) {
   295  		return false
   296  	}
   297  
   298  	// This is the commit point.
   299  
   300  	// First, issue load at lowest address.
   301  	v = loadBlock.NewValue2(pos, OpLoad, sizeType(n*size), r[0].load.Args[0], mem)
   302  
   303  	// Byte swap if needed,
   304  	if needSwap {
   305  		v = byteSwap(loadBlock, pos, v)
   306  	}
   307  
   308  	// Extend if needed.
   309  	if n*size < root.Type.Size() {
   310  		v = zeroExtend(loadBlock, pos, v, n*size, root.Type.Size())
   311  	}
   312  
   313  	// Shift if needed.
   314  	if isLittleEndian && shift0 != 0 {
   315  		v = leftShift(loadBlock, pos, v, shift0)
   316  	}
   317  	if isBigEndian && shift0-(n-1)*size*8 != 0 {
   318  		v = leftShift(loadBlock, pos, v, shift0-(n-1)*size*8)
   319  	}
   320  
   321  	// Install with (Copy v).
   322  	root.reset(OpCopy)
   323  	root.AddArg(v)
   324  
   325  	// Clobber the loads, just to prevent additional work being done on
   326  	// subtrees (which are now unreachable).
   327  	for i := int64(0); i < n; i++ {
   328  		clobber(r[i].load)
   329  	}
   330  	return true
   331  }
   332  
   333  func memcombineStores(f *Func) {
   334  	mark := f.newSparseSet(f.NumValues())
   335  	defer f.retSparseSet(mark)
   336  	var order []*Value
   337  
   338  	for _, b := range f.Blocks {
   339  		// Mark all stores which are not last in a store sequence.
   340  		mark.clear()
   341  		for _, v := range b.Values {
   342  			if v.Op == OpStore {
   343  				mark.add(v.MemoryArg().ID)
   344  			}
   345  		}
   346  
   347  		// pick an order for visiting stores such that
   348  		// later stores come earlier in the ordering.
   349  		order = order[:0]
   350  		for _, v := range b.Values {
   351  			if v.Op != OpStore {
   352  				continue
   353  			}
   354  			if mark.contains(v.ID) {
   355  				continue // not last in a chain of stores
   356  			}
   357  			for {
   358  				order = append(order, v)
   359  				v = v.Args[2]
   360  				if v.Block != b || v.Op != OpStore {
   361  					break
   362  				}
   363  			}
   364  		}
   365  
   366  		// Look for combining opportunities at each store in queue order.
   367  		for _, v := range order {
   368  			if v.Op != OpStore { // already rewritten
   369  				continue
   370  			}
   371  
   372  			size := v.Aux.(*types.Type).Size()
   373  			if size >= f.Config.RegSize || size == 0 {
   374  				continue
   375  			}
   376  
   377  			combineStores(v)
   378  		}
   379  	}
   380  }
   381  
   382  // combineStores tries to combine the stores ending in root.
   383  func combineStores(root *Value) {
   384  	// Helper functions.
   385  	maxRegSize := root.Block.Func.Config.RegSize
   386  	type StoreRecord struct {
   387  		store  *Value
   388  		offset int64
   389  		size   int64
   390  	}
   391  	getShiftBase := func(a []StoreRecord) *Value {
   392  		x := a[0].store.Args[1]
   393  		y := a[1].store.Args[1]
   394  		switch x.Op {
   395  		case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
   396  			x = x.Args[0]
   397  		default:
   398  			return nil
   399  		}
   400  		switch y.Op {
   401  		case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
   402  			y = y.Args[0]
   403  		default:
   404  			return nil
   405  		}
   406  		var x2 *Value
   407  		switch x.Op {
   408  		case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
   409  			x2 = x.Args[0]
   410  		default:
   411  		}
   412  		var y2 *Value
   413  		switch y.Op {
   414  		case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
   415  			y2 = y.Args[0]
   416  		default:
   417  		}
   418  		if y2 == x {
   419  			// a shift of x and x itself.
   420  			return x
   421  		}
   422  		if x2 == y {
   423  			// a shift of y and y itself.
   424  			return y
   425  		}
   426  		if x2 == y2 {
   427  			// 2 shifts both of the same argument.
   428  			return x2
   429  		}
   430  		return nil
   431  	}
   432  	isShiftBase := func(v, base *Value) bool {
   433  		val := v.Args[1]
   434  		switch val.Op {
   435  		case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
   436  			val = val.Args[0]
   437  		default:
   438  			return false
   439  		}
   440  		if val == base {
   441  			return true
   442  		}
   443  		switch val.Op {
   444  		case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
   445  			val = val.Args[0]
   446  		default:
   447  			return false
   448  		}
   449  		return val == base
   450  	}
   451  	shift := func(v, base *Value) int64 {
   452  		val := v.Args[1]
   453  		switch val.Op {
   454  		case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
   455  			val = val.Args[0]
   456  		default:
   457  			return -1
   458  		}
   459  		if val == base {
   460  			return 0
   461  		}
   462  		switch val.Op {
   463  		case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
   464  			val = val.Args[1]
   465  		default:
   466  			return -1
   467  		}
   468  		if val.Op != OpConst64 {
   469  			return -1
   470  		}
   471  		return val.AuxInt
   472  	}
   473  
   474  	// Gather n stores to look at. Check easy conditions we require.
   475  	allMergeable := make([]StoreRecord, 0, 8)
   476  	rbase, roff := splitPtr(root.Args[0])
   477  	if root.Block.Func.Config.arch == "S390X" {
   478  		// s390x can't handle unaligned accesses to global variables.
   479  		if rbase.ptr.Op == OpAddr {
   480  			return
   481  		}
   482  	}
   483  	allMergeable = append(allMergeable, StoreRecord{root, roff, root.Aux.(*types.Type).Size()})
   484  	allMergeableSize := root.Aux.(*types.Type).Size()
   485  	// TODO: this loop strictly requires stores to chain together in memory.
   486  	// maybe we can break this constraint and match more patterns.
   487  	for i, x := 1, root.Args[2]; i < 8; i, x = i+1, x.Args[2] {
   488  		if x.Op != OpStore {
   489  			break
   490  		}
   491  		if x.Block != root.Block {
   492  			break
   493  		}
   494  		if x.Uses != 1 { // Note: root can have more than one use.
   495  			break
   496  		}
   497  		xSize := x.Aux.(*types.Type).Size()
   498  		if xSize == 0 {
   499  			break
   500  		}
   501  		if xSize > maxRegSize-allMergeableSize {
   502  			break
   503  		}
   504  		base, off := splitPtr(x.Args[0])
   505  		if base != rbase {
   506  			break
   507  		}
   508  		allMergeable = append(allMergeable, StoreRecord{x, off, xSize})
   509  		allMergeableSize += xSize
   510  	}
   511  	if len(allMergeable) <= 1 {
   512  		return
   513  	}
   514  	// Fit the combined total size to be one of the register size.
   515  	mergeableSet := map[int64][]StoreRecord{}
   516  	for i, size := 0, int64(0); i < len(allMergeable); i++ {
   517  		size += allMergeable[i].size
   518  		for _, bucketSize := range []int64{8, 4, 2} {
   519  			if size == bucketSize {
   520  				mergeableSet[size] = slices.Clone(allMergeable[:i+1])
   521  				break
   522  			}
   523  		}
   524  	}
   525  	var a []StoreRecord
   526  	var aTotalSize int64
   527  	var mem *Value
   528  	var pos src.XPos
   529  	// Pick the largest mergeable set.
   530  	for _, s := range []int64{8, 4, 2} {
   531  		candidate := mergeableSet[s]
   532  		// TODO: a refactoring might be more efficient:
   533  		// Find a bunch of stores that are all adjacent and then decide how big a chunk of
   534  		// those sequential stores to combine.
   535  		if len(candidate) >= 2 {
   536  			// Before we sort, grab the memory arg the result should have.
   537  			mem = candidate[len(candidate)-1].store.Args[2]
   538  			// Also grab position of first store (last in array = first in memory order).
   539  			pos = candidate[len(candidate)-1].store.Pos
   540  			// Sort stores in increasing address order.
   541  			slices.SortFunc(candidate, func(sr1, sr2 StoreRecord) int {
   542  				return cmp.Compare(sr1.offset, sr2.offset)
   543  			})
   544  			// Check that everything is written to sequential locations.
   545  			sequential := true
   546  			for i := 1; i < len(candidate); i++ {
   547  				if candidate[i].offset != candidate[i-1].offset+candidate[i-1].size {
   548  					sequential = false
   549  					break
   550  				}
   551  			}
   552  			if sequential {
   553  				a = candidate
   554  				aTotalSize = s
   555  				break
   556  			}
   557  		}
   558  	}
   559  	if len(a) <= 1 {
   560  		return
   561  	}
   562  	// Memory location we're going to write at (the lowest one).
   563  	ptr := a[0].store.Args[0]
   564  
   565  	// Check for constant stores
   566  	isConst := true
   567  	for i := range a {
   568  		switch a[i].store.Args[1].Op {
   569  		case OpConst32, OpConst16, OpConst8, OpConstBool:
   570  		default:
   571  			isConst = false
   572  		}
   573  		if !isConst {
   574  			break
   575  		}
   576  	}
   577  	if isConst {
   578  		// Modify root to do all the stores.
   579  		var c int64
   580  		for i := range a {
   581  			mask := int64(1)<<(8*a[i].size) - 1
   582  			s := 8 * (a[i].offset - a[0].offset)
   583  			if root.Block.Func.Config.BigEndian {
   584  				s = (aTotalSize-a[i].size)*8 - s
   585  			}
   586  			c |= (a[i].store.Args[1].AuxInt & mask) << s
   587  		}
   588  		var cv *Value
   589  		switch aTotalSize {
   590  		case 2:
   591  			cv = root.Block.Func.ConstInt16(types.Types[types.TUINT16], int16(c))
   592  		case 4:
   593  			cv = root.Block.Func.ConstInt32(types.Types[types.TUINT32], int32(c))
   594  		case 8:
   595  			cv = root.Block.Func.ConstInt64(types.Types[types.TUINT64], c)
   596  		}
   597  
   598  		// Move all the stores to the root.
   599  		for i := range a {
   600  			v := a[i].store
   601  			if v == root {
   602  				v.Aux = cv.Type // widen store type
   603  				v.Pos = pos
   604  				v.SetArg(0, ptr)
   605  				v.SetArg(1, cv)
   606  				v.SetArg(2, mem)
   607  			} else {
   608  				clobber(v)
   609  				v.Type = types.Types[types.TBOOL] // erase memory type
   610  			}
   611  		}
   612  		return
   613  	}
   614  
   615  	// Check for consecutive loads as the source of the stores.
   616  	var loadMem *Value
   617  	var loadBase BaseAddress
   618  	var loadIdx int64
   619  	for i := range a {
   620  		load := a[i].store.Args[1]
   621  		if load.Op != OpLoad {
   622  			loadMem = nil
   623  			break
   624  		}
   625  		if load.Uses != 1 {
   626  			loadMem = nil
   627  			break
   628  		}
   629  		if load.Type.IsPtr() {
   630  			// Don't combine stores containing a pointer, as we need
   631  			// a write barrier for those. This can't currently happen,
   632  			// but might in the future if we ever have another
   633  			// 8-byte-reg/4-byte-ptr architecture like amd64p32.
   634  			loadMem = nil
   635  			break
   636  		}
   637  		mem := load.Args[1]
   638  		base, idx := splitPtr(load.Args[0])
   639  		if loadMem == nil {
   640  			// First one we found
   641  			loadMem = mem
   642  			loadBase = base
   643  			loadIdx = idx
   644  			continue
   645  		}
   646  		if base != loadBase || mem != loadMem {
   647  			loadMem = nil
   648  			break
   649  		}
   650  		if idx != loadIdx+(a[i].offset-a[0].offset) {
   651  			loadMem = nil
   652  			break
   653  		}
   654  	}
   655  	if loadMem != nil {
   656  		// Modify the first load to do a larger load instead.
   657  		load := a[0].store.Args[1]
   658  		switch aTotalSize {
   659  		case 2:
   660  			load.Type = types.Types[types.TUINT16]
   661  		case 4:
   662  			load.Type = types.Types[types.TUINT32]
   663  		case 8:
   664  			load.Type = types.Types[types.TUINT64]
   665  		}
   666  
   667  		// Modify root to do the store.
   668  		for i := range a {
   669  			v := a[i].store
   670  			if v == root {
   671  				v.Aux = load.Type // widen store type
   672  				v.Pos = pos
   673  				v.SetArg(0, ptr)
   674  				v.SetArg(1, load)
   675  				v.SetArg(2, mem)
   676  			} else {
   677  				clobber(v)
   678  				v.Type = types.Types[types.TBOOL] // erase memory type
   679  			}
   680  		}
   681  		return
   682  	}
   683  
   684  	// Check that all the shift/trunc are of the same base value.
   685  	shiftBase := getShiftBase(a)
   686  	if shiftBase == nil {
   687  		return
   688  	}
   689  	for i := range a {
   690  		if !isShiftBase(a[i].store, shiftBase) {
   691  			return
   692  		}
   693  	}
   694  
   695  	// Check for writes in little-endian or big-endian order.
   696  	isLittleEndian := true
   697  	shift0 := shift(a[0].store, shiftBase)
   698  	for i := 1; i < len(a); i++ {
   699  		if shift(a[i].store, shiftBase) != shift0+(a[i].offset-a[0].offset)*8 {
   700  			isLittleEndian = false
   701  			break
   702  		}
   703  	}
   704  	isBigEndian := true
   705  	shiftedSize := int64(0)
   706  	for i := 1; i < len(a); i++ {
   707  		shiftedSize += a[i].size
   708  		if shift(a[i].store, shiftBase) != shift0-shiftedSize*8 {
   709  			isBigEndian = false
   710  			break
   711  		}
   712  	}
   713  	if !isLittleEndian && !isBigEndian {
   714  		return
   715  	}
   716  
   717  	// Check to see if we need byte swap before storing.
   718  	needSwap := isLittleEndian && root.Block.Func.Config.BigEndian ||
   719  		isBigEndian && !root.Block.Func.Config.BigEndian
   720  	if needSwap && (int64(len(a)) != aTotalSize || !root.Block.Func.Config.haveByteSwap(aTotalSize)) {
   721  		return
   722  	}
   723  
   724  	// This is the commit point.
   725  
   726  	// Modify root to do all the stores.
   727  	sv := shiftBase
   728  	if isLittleEndian && shift0 != 0 {
   729  		sv = rightShift(root.Block, root.Pos, sv, shift0)
   730  	}
   731  	shiftedSize = int64(aTotalSize - a[0].size)
   732  	if isBigEndian && shift0-shiftedSize*8 != 0 {
   733  		sv = rightShift(root.Block, root.Pos, sv, shift0-shiftedSize*8)
   734  	}
   735  	if sv.Type.Size() > aTotalSize {
   736  		sv = truncate(root.Block, root.Pos, sv, sv.Type.Size(), aTotalSize)
   737  	}
   738  	if needSwap {
   739  		sv = byteSwap(root.Block, root.Pos, sv)
   740  	}
   741  
   742  	// Move all the stores to the root.
   743  	for i := range a {
   744  		v := a[i].store
   745  		if v == root {
   746  			v.Aux = sv.Type // widen store type
   747  			v.Pos = pos
   748  			v.SetArg(0, ptr)
   749  			v.SetArg(1, sv)
   750  			v.SetArg(2, mem)
   751  		} else {
   752  			clobber(v)
   753  			v.Type = types.Types[types.TBOOL] // erase memory type
   754  		}
   755  	}
   756  }
   757  
   758  func sizeType(size int64) *types.Type {
   759  	switch size {
   760  	case 8:
   761  		return types.Types[types.TUINT64]
   762  	case 4:
   763  		return types.Types[types.TUINT32]
   764  	case 2:
   765  		return types.Types[types.TUINT16]
   766  	default:
   767  		base.Fatalf("bad size %d\n", size)
   768  		return nil
   769  	}
   770  }
   771  
   772  func truncate(b *Block, pos src.XPos, v *Value, from, to int64) *Value {
   773  	switch from*10 + to {
   774  	case 82:
   775  		return b.NewValue1(pos, OpTrunc64to16, types.Types[types.TUINT16], v)
   776  	case 84:
   777  		return b.NewValue1(pos, OpTrunc64to32, types.Types[types.TUINT32], v)
   778  	case 42:
   779  		return b.NewValue1(pos, OpTrunc32to16, types.Types[types.TUINT16], v)
   780  	default:
   781  		base.Fatalf("bad sizes %d %d\n", from, to)
   782  		return nil
   783  	}
   784  }
   785  func zeroExtend(b *Block, pos src.XPos, v *Value, from, to int64) *Value {
   786  	switch from*10 + to {
   787  	case 24:
   788  		return b.NewValue1(pos, OpZeroExt16to32, types.Types[types.TUINT32], v)
   789  	case 28:
   790  		return b.NewValue1(pos, OpZeroExt16to64, types.Types[types.TUINT64], v)
   791  	case 48:
   792  		return b.NewValue1(pos, OpZeroExt32to64, types.Types[types.TUINT64], v)
   793  	default:
   794  		base.Fatalf("bad sizes %d %d\n", from, to)
   795  		return nil
   796  	}
   797  }
   798  
   799  func leftShift(b *Block, pos src.XPos, v *Value, shift int64) *Value {
   800  	s := b.Func.ConstInt64(types.Types[types.TUINT64], shift)
   801  	size := v.Type.Size()
   802  	switch size {
   803  	case 8:
   804  		return b.NewValue2(pos, OpLsh64x64, v.Type, v, s)
   805  	case 4:
   806  		return b.NewValue2(pos, OpLsh32x64, v.Type, v, s)
   807  	case 2:
   808  		return b.NewValue2(pos, OpLsh16x64, v.Type, v, s)
   809  	default:
   810  		base.Fatalf("bad size %d\n", size)
   811  		return nil
   812  	}
   813  }
   814  func rightShift(b *Block, pos src.XPos, v *Value, shift int64) *Value {
   815  	s := b.Func.ConstInt64(types.Types[types.TUINT64], shift)
   816  	size := v.Type.Size()
   817  	switch size {
   818  	case 8:
   819  		return b.NewValue2(pos, OpRsh64Ux64, v.Type, v, s)
   820  	case 4:
   821  		return b.NewValue2(pos, OpRsh32Ux64, v.Type, v, s)
   822  	case 2:
   823  		return b.NewValue2(pos, OpRsh16Ux64, v.Type, v, s)
   824  	default:
   825  		base.Fatalf("bad size %d\n", size)
   826  		return nil
   827  	}
   828  }
   829  func byteSwap(b *Block, pos src.XPos, v *Value) *Value {
   830  	switch v.Type.Size() {
   831  	case 8:
   832  		return b.NewValue1(pos, OpBswap64, v.Type, v)
   833  	case 4:
   834  		return b.NewValue1(pos, OpBswap32, v.Type, v)
   835  	case 2:
   836  		return b.NewValue1(pos, OpBswap16, v.Type, v)
   837  
   838  	default:
   839  		v.Fatalf("bad size %d\n", v.Type.Size())
   840  		return nil
   841  	}
   842  }
   843  

View as plain text