1
2
3
4
5 package ssa
6
7 import (
8 "cmd/compile/internal/base"
9 "cmd/compile/internal/types"
10 "cmd/internal/src"
11 "cmp"
12 "slices"
13 )
14
15
16
17
18 func memcombine(f *Func) {
19
20
21 if !f.Config.unalignedOK {
22 return
23 }
24
25 memcombineLoads(f)
26 memcombineStores(f)
27 }
28
29 func memcombineLoads(f *Func) {
30
31 mark := f.newSparseSet(f.NumValues())
32 defer f.retSparseSet(mark)
33 var order []*Value
34
35
36 for _, b := range f.Blocks {
37 for _, v := range b.Values {
38 if v.Op == OpOr16 || v.Op == OpOr32 || v.Op == OpOr64 {
39 mark.add(v.Args[0].ID)
40 mark.add(v.Args[1].ID)
41 }
42 }
43 }
44 for _, b := range f.Blocks {
45 order = order[:0]
46 for _, v := range b.Values {
47 if v.Op != OpOr16 && v.Op != OpOr32 && v.Op != OpOr64 {
48 continue
49 }
50 if mark.contains(v.ID) {
51
52 continue
53 }
54
55
56 i := len(order)
57 order = append(order, v)
58 for ; i < len(order); i++ {
59 x := order[i]
60 for j := 0; j < 2; j++ {
61 a := x.Args[j]
62 if a.Op == OpOr16 || a.Op == OpOr32 || a.Op == OpOr64 {
63 order = append(order, a)
64 }
65 }
66 }
67 }
68 for _, v := range order {
69 max := f.Config.RegSize
70 switch v.Op {
71 case OpOr64:
72 case OpOr32:
73 max = 4
74 case OpOr16:
75 max = 2
76 default:
77 continue
78 }
79 for n := max; n > 1; n /= 2 {
80 if combineLoads(v, n) {
81 break
82 }
83 }
84 }
85 }
86 }
87
88
89
90
91 type BaseAddress struct {
92 ptr *Value
93 idx *Value
94 }
95
96
97
98
99
100 func splitPtr(ptr *Value) (BaseAddress, int64) {
101 var idx *Value
102 var off int64
103 for {
104 if ptr.Op == OpOffPtr {
105 off += ptr.AuxInt
106 ptr = ptr.Args[0]
107 } else if ptr.Op == OpAddPtr {
108 if idx != nil {
109
110
111 return BaseAddress{ptr: ptr, idx: idx}, off
112 }
113 idx = ptr.Args[1]
114 if idx.Op == OpAdd32 || idx.Op == OpAdd64 {
115 if idx.Args[0].Op == OpConst32 || idx.Args[0].Op == OpConst64 {
116 off += idx.Args[0].AuxInt
117 idx = idx.Args[1]
118 } else if idx.Args[1].Op == OpConst32 || idx.Args[1].Op == OpConst64 {
119 off += idx.Args[1].AuxInt
120 idx = idx.Args[0]
121 }
122 }
123 ptr = ptr.Args[0]
124 } else {
125 return BaseAddress{ptr: ptr, idx: idx}, off
126 }
127 }
128 }
129
130 func combineLoads(root *Value, n int64) bool {
131 orOp := root.Op
132 var shiftOp Op
133 switch orOp {
134 case OpOr64:
135 shiftOp = OpLsh64x64
136 case OpOr32:
137 shiftOp = OpLsh32x64
138 case OpOr16:
139 shiftOp = OpLsh16x64
140 default:
141 return false
142 }
143
144
145 a := make([]*Value, 0, 8)
146 a = append(a, root)
147 for i := 0; i < len(a) && int64(len(a)) < n; i++ {
148 v := a[i]
149 if v.Uses != 1 && v != root {
150
151 return false
152 }
153 if v.Op == orOp {
154 a[i] = v.Args[0]
155 a = append(a, v.Args[1])
156 i--
157 }
158 }
159 if int64(len(a)) != n {
160 return false
161 }
162
163
164
165 v := a[0]
166 if v.Op == shiftOp {
167 v = v.Args[0]
168 }
169 var extOp Op
170 if orOp == OpOr64 && (v.Op == OpZeroExt8to64 || v.Op == OpZeroExt16to64 || v.Op == OpZeroExt32to64) ||
171 orOp == OpOr32 && (v.Op == OpZeroExt8to32 || v.Op == OpZeroExt16to32) ||
172 orOp == OpOr16 && v.Op == OpZeroExt8to16 {
173 extOp = v.Op
174 v = v.Args[0]
175 } else {
176 return false
177 }
178 if v.Op != OpLoad {
179 return false
180 }
181 base, _ := splitPtr(v.Args[0])
182 mem := v.Args[1]
183 size := v.Type.Size()
184
185 if root.Block.Func.Config.arch == "S390X" {
186
187 if base.ptr.Op == OpAddr {
188 return false
189 }
190 }
191
192
193 type LoadRecord struct {
194 load *Value
195 offset int64
196 shift int64
197 }
198 r := make([]LoadRecord, n, 8)
199 for i := int64(0); i < n; i++ {
200 v := a[i]
201 if v.Uses != 1 {
202 return false
203 }
204 shift := int64(0)
205 if v.Op == shiftOp {
206 if v.Args[1].Op != OpConst64 {
207 return false
208 }
209 shift = v.Args[1].AuxInt
210 v = v.Args[0]
211 if v.Uses != 1 {
212 return false
213 }
214 }
215 if v.Op != extOp {
216 return false
217 }
218 load := v.Args[0]
219 if load.Op != OpLoad {
220 return false
221 }
222 if load.Uses != 1 {
223 return false
224 }
225 if load.Args[1] != mem {
226 return false
227 }
228 p, off := splitPtr(load.Args[0])
229 if p != base {
230 return false
231 }
232 r[i] = LoadRecord{load: load, offset: off, shift: shift}
233 }
234
235
236 slices.SortFunc(r, func(a, b LoadRecord) int {
237 return cmp.Compare(a.offset, b.offset)
238 })
239
240
241 for i := int64(0); i < n; i++ {
242 if r[i].offset != r[0].offset+i*size {
243 return false
244 }
245 }
246
247
248 shift0 := r[0].shift
249 isLittleEndian := true
250 for i := int64(0); i < n; i++ {
251 if r[i].shift != shift0+i*size*8 {
252 isLittleEndian = false
253 break
254 }
255 }
256 isBigEndian := true
257 for i := int64(0); i < n; i++ {
258 if r[i].shift != shift0-i*size*8 {
259 isBigEndian = false
260 break
261 }
262 }
263 if !isLittleEndian && !isBigEndian {
264 return false
265 }
266
267
268
269
270
271 loads := make([]*Value, n, 8)
272 for i := int64(0); i < n; i++ {
273 loads[i] = r[i].load
274 }
275 loadBlock := mergePoint(root.Block, loads...)
276 if loadBlock == nil {
277 return false
278 }
279
280 pos := src.NoXPos
281 for _, load := range loads {
282 if load.Block == loadBlock {
283 pos = load.Pos
284 break
285 }
286 }
287 if pos == src.NoXPos {
288 return false
289 }
290
291
292 needSwap := isLittleEndian && root.Block.Func.Config.BigEndian ||
293 isBigEndian && !root.Block.Func.Config.BigEndian
294 if needSwap && (size != 1 || !root.Block.Func.Config.haveByteSwap(n)) {
295 return false
296 }
297
298
299
300
301 v = loadBlock.NewValue2(pos, OpLoad, sizeType(n*size), r[0].load.Args[0], mem)
302
303
304 if needSwap {
305 v = byteSwap(loadBlock, pos, v)
306 }
307
308
309 if n*size < root.Type.Size() {
310 v = zeroExtend(loadBlock, pos, v, n*size, root.Type.Size())
311 }
312
313
314 if isLittleEndian && shift0 != 0 {
315 v = leftShift(loadBlock, pos, v, shift0)
316 }
317 if isBigEndian && shift0-(n-1)*size*8 != 0 {
318 v = leftShift(loadBlock, pos, v, shift0-(n-1)*size*8)
319 }
320
321
322 root.reset(OpCopy)
323 root.AddArg(v)
324
325
326
327 for i := int64(0); i < n; i++ {
328 clobber(r[i].load)
329 }
330 return true
331 }
332
333 func memcombineStores(f *Func) {
334 mark := f.newSparseSet(f.NumValues())
335 defer f.retSparseSet(mark)
336 var order []*Value
337
338 for _, b := range f.Blocks {
339
340 mark.clear()
341 for _, v := range b.Values {
342 if v.Op == OpStore {
343 mark.add(v.MemoryArg().ID)
344 }
345 }
346
347
348
349 order = order[:0]
350 for _, v := range b.Values {
351 if v.Op != OpStore {
352 continue
353 }
354 if mark.contains(v.ID) {
355 continue
356 }
357 for {
358 order = append(order, v)
359 v = v.Args[2]
360 if v.Block != b || v.Op != OpStore {
361 break
362 }
363 }
364 }
365
366
367 for _, v := range order {
368 if v.Op != OpStore {
369 continue
370 }
371
372 size := v.Aux.(*types.Type).Size()
373 if size >= f.Config.RegSize || size == 0 {
374 continue
375 }
376
377 combineStores(v)
378 }
379 }
380 }
381
382
383 func combineStores(root *Value) {
384
385 maxRegSize := root.Block.Func.Config.RegSize
386 type StoreRecord struct {
387 store *Value
388 offset int64
389 size int64
390 }
391 getShiftBase := func(a []StoreRecord) *Value {
392 x := a[0].store.Args[1]
393 y := a[1].store.Args[1]
394 switch x.Op {
395 case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
396 x = x.Args[0]
397 default:
398 return nil
399 }
400 switch y.Op {
401 case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
402 y = y.Args[0]
403 default:
404 return nil
405 }
406 var x2 *Value
407 switch x.Op {
408 case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
409 x2 = x.Args[0]
410 default:
411 }
412 var y2 *Value
413 switch y.Op {
414 case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
415 y2 = y.Args[0]
416 default:
417 }
418 if y2 == x {
419
420 return x
421 }
422 if x2 == y {
423
424 return y
425 }
426 if x2 == y2 {
427
428 return x2
429 }
430 return nil
431 }
432 isShiftBase := func(v, base *Value) bool {
433 val := v.Args[1]
434 switch val.Op {
435 case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
436 val = val.Args[0]
437 default:
438 return false
439 }
440 if val == base {
441 return true
442 }
443 switch val.Op {
444 case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
445 val = val.Args[0]
446 default:
447 return false
448 }
449 return val == base
450 }
451 shift := func(v, base *Value) int64 {
452 val := v.Args[1]
453 switch val.Op {
454 case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
455 val = val.Args[0]
456 default:
457 return -1
458 }
459 if val == base {
460 return 0
461 }
462 switch val.Op {
463 case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
464 val = val.Args[1]
465 default:
466 return -1
467 }
468 if val.Op != OpConst64 {
469 return -1
470 }
471 return val.AuxInt
472 }
473
474
475 allMergeable := make([]StoreRecord, 0, 8)
476 rbase, roff := splitPtr(root.Args[0])
477 if root.Block.Func.Config.arch == "S390X" {
478
479 if rbase.ptr.Op == OpAddr {
480 return
481 }
482 }
483 allMergeable = append(allMergeable, StoreRecord{root, roff, root.Aux.(*types.Type).Size()})
484 allMergeableSize := root.Aux.(*types.Type).Size()
485
486
487 for i, x := 1, root.Args[2]; i < 8; i, x = i+1, x.Args[2] {
488 if x.Op != OpStore {
489 break
490 }
491 if x.Block != root.Block {
492 break
493 }
494 if x.Uses != 1 {
495 break
496 }
497 xSize := x.Aux.(*types.Type).Size()
498 if xSize == 0 {
499 break
500 }
501 if xSize > maxRegSize-allMergeableSize {
502 break
503 }
504 base, off := splitPtr(x.Args[0])
505 if base != rbase {
506 break
507 }
508 allMergeable = append(allMergeable, StoreRecord{x, off, xSize})
509 allMergeableSize += xSize
510 }
511 if len(allMergeable) <= 1 {
512 return
513 }
514
515 mergeableSet := map[int64][]StoreRecord{}
516 for i, size := 0, int64(0); i < len(allMergeable); i++ {
517 size += allMergeable[i].size
518 for _, bucketSize := range []int64{8, 4, 2} {
519 if size == bucketSize {
520 mergeableSet[size] = slices.Clone(allMergeable[:i+1])
521 break
522 }
523 }
524 }
525 var a []StoreRecord
526 var aTotalSize int64
527 var mem *Value
528 var pos src.XPos
529
530 for _, s := range []int64{8, 4, 2} {
531 candidate := mergeableSet[s]
532
533
534
535 if len(candidate) >= 2 {
536
537 mem = candidate[len(candidate)-1].store.Args[2]
538
539 pos = candidate[len(candidate)-1].store.Pos
540
541 slices.SortFunc(candidate, func(sr1, sr2 StoreRecord) int {
542 return cmp.Compare(sr1.offset, sr2.offset)
543 })
544
545 sequential := true
546 for i := 1; i < len(candidate); i++ {
547 if candidate[i].offset != candidate[i-1].offset+candidate[i-1].size {
548 sequential = false
549 break
550 }
551 }
552 if sequential {
553 a = candidate
554 aTotalSize = s
555 break
556 }
557 }
558 }
559 if len(a) <= 1 {
560 return
561 }
562
563 ptr := a[0].store.Args[0]
564
565
566 isConst := true
567 for i := range a {
568 switch a[i].store.Args[1].Op {
569 case OpConst32, OpConst16, OpConst8, OpConstBool:
570 default:
571 isConst = false
572 }
573 if !isConst {
574 break
575 }
576 }
577 if isConst {
578
579 var c int64
580 for i := range a {
581 mask := int64(1)<<(8*a[i].size) - 1
582 s := 8 * (a[i].offset - a[0].offset)
583 if root.Block.Func.Config.BigEndian {
584 s = (aTotalSize-a[i].size)*8 - s
585 }
586 c |= (a[i].store.Args[1].AuxInt & mask) << s
587 }
588 var cv *Value
589 switch aTotalSize {
590 case 2:
591 cv = root.Block.Func.ConstInt16(types.Types[types.TUINT16], int16(c))
592 case 4:
593 cv = root.Block.Func.ConstInt32(types.Types[types.TUINT32], int32(c))
594 case 8:
595 cv = root.Block.Func.ConstInt64(types.Types[types.TUINT64], c)
596 }
597
598
599 for i := range a {
600 v := a[i].store
601 if v == root {
602 v.Aux = cv.Type
603 v.Pos = pos
604 v.SetArg(0, ptr)
605 v.SetArg(1, cv)
606 v.SetArg(2, mem)
607 } else {
608 clobber(v)
609 v.Type = types.Types[types.TBOOL]
610 }
611 }
612 return
613 }
614
615
616 var loadMem *Value
617 var loadBase BaseAddress
618 var loadIdx int64
619 for i := range a {
620 load := a[i].store.Args[1]
621 if load.Op != OpLoad {
622 loadMem = nil
623 break
624 }
625 if load.Uses != 1 {
626 loadMem = nil
627 break
628 }
629 if load.Type.IsPtr() {
630
631
632
633
634 loadMem = nil
635 break
636 }
637 mem := load.Args[1]
638 base, idx := splitPtr(load.Args[0])
639 if loadMem == nil {
640
641 loadMem = mem
642 loadBase = base
643 loadIdx = idx
644 continue
645 }
646 if base != loadBase || mem != loadMem {
647 loadMem = nil
648 break
649 }
650 if idx != loadIdx+(a[i].offset-a[0].offset) {
651 loadMem = nil
652 break
653 }
654 }
655 if loadMem != nil {
656
657 load := a[0].store.Args[1]
658 switch aTotalSize {
659 case 2:
660 load.Type = types.Types[types.TUINT16]
661 case 4:
662 load.Type = types.Types[types.TUINT32]
663 case 8:
664 load.Type = types.Types[types.TUINT64]
665 }
666
667
668 for i := range a {
669 v := a[i].store
670 if v == root {
671 v.Aux = load.Type
672 v.Pos = pos
673 v.SetArg(0, ptr)
674 v.SetArg(1, load)
675 v.SetArg(2, mem)
676 } else {
677 clobber(v)
678 v.Type = types.Types[types.TBOOL]
679 }
680 }
681 return
682 }
683
684
685 shiftBase := getShiftBase(a)
686 if shiftBase == nil {
687 return
688 }
689 for i := range a {
690 if !isShiftBase(a[i].store, shiftBase) {
691 return
692 }
693 }
694
695
696 isLittleEndian := true
697 shift0 := shift(a[0].store, shiftBase)
698 for i := 1; i < len(a); i++ {
699 if shift(a[i].store, shiftBase) != shift0+(a[i].offset-a[0].offset)*8 {
700 isLittleEndian = false
701 break
702 }
703 }
704 isBigEndian := true
705 shiftedSize := int64(0)
706 for i := 1; i < len(a); i++ {
707 shiftedSize += a[i].size
708 if shift(a[i].store, shiftBase) != shift0-shiftedSize*8 {
709 isBigEndian = false
710 break
711 }
712 }
713 if !isLittleEndian && !isBigEndian {
714 return
715 }
716
717
718 needSwap := isLittleEndian && root.Block.Func.Config.BigEndian ||
719 isBigEndian && !root.Block.Func.Config.BigEndian
720 if needSwap && (int64(len(a)) != aTotalSize || !root.Block.Func.Config.haveByteSwap(aTotalSize)) {
721 return
722 }
723
724
725
726
727 sv := shiftBase
728 if isLittleEndian && shift0 != 0 {
729 sv = rightShift(root.Block, root.Pos, sv, shift0)
730 }
731 shiftedSize = int64(aTotalSize - a[0].size)
732 if isBigEndian && shift0-shiftedSize*8 != 0 {
733 sv = rightShift(root.Block, root.Pos, sv, shift0-shiftedSize*8)
734 }
735 if sv.Type.Size() > aTotalSize {
736 sv = truncate(root.Block, root.Pos, sv, sv.Type.Size(), aTotalSize)
737 }
738 if needSwap {
739 sv = byteSwap(root.Block, root.Pos, sv)
740 }
741
742
743 for i := range a {
744 v := a[i].store
745 if v == root {
746 v.Aux = sv.Type
747 v.Pos = pos
748 v.SetArg(0, ptr)
749 v.SetArg(1, sv)
750 v.SetArg(2, mem)
751 } else {
752 clobber(v)
753 v.Type = types.Types[types.TBOOL]
754 }
755 }
756 }
757
758 func sizeType(size int64) *types.Type {
759 switch size {
760 case 8:
761 return types.Types[types.TUINT64]
762 case 4:
763 return types.Types[types.TUINT32]
764 case 2:
765 return types.Types[types.TUINT16]
766 default:
767 base.Fatalf("bad size %d\n", size)
768 return nil
769 }
770 }
771
772 func truncate(b *Block, pos src.XPos, v *Value, from, to int64) *Value {
773 switch from*10 + to {
774 case 82:
775 return b.NewValue1(pos, OpTrunc64to16, types.Types[types.TUINT16], v)
776 case 84:
777 return b.NewValue1(pos, OpTrunc64to32, types.Types[types.TUINT32], v)
778 case 42:
779 return b.NewValue1(pos, OpTrunc32to16, types.Types[types.TUINT16], v)
780 default:
781 base.Fatalf("bad sizes %d %d\n", from, to)
782 return nil
783 }
784 }
785 func zeroExtend(b *Block, pos src.XPos, v *Value, from, to int64) *Value {
786 switch from*10 + to {
787 case 24:
788 return b.NewValue1(pos, OpZeroExt16to32, types.Types[types.TUINT32], v)
789 case 28:
790 return b.NewValue1(pos, OpZeroExt16to64, types.Types[types.TUINT64], v)
791 case 48:
792 return b.NewValue1(pos, OpZeroExt32to64, types.Types[types.TUINT64], v)
793 default:
794 base.Fatalf("bad sizes %d %d\n", from, to)
795 return nil
796 }
797 }
798
799 func leftShift(b *Block, pos src.XPos, v *Value, shift int64) *Value {
800 s := b.Func.ConstInt64(types.Types[types.TUINT64], shift)
801 size := v.Type.Size()
802 switch size {
803 case 8:
804 return b.NewValue2(pos, OpLsh64x64, v.Type, v, s)
805 case 4:
806 return b.NewValue2(pos, OpLsh32x64, v.Type, v, s)
807 case 2:
808 return b.NewValue2(pos, OpLsh16x64, v.Type, v, s)
809 default:
810 base.Fatalf("bad size %d\n", size)
811 return nil
812 }
813 }
814 func rightShift(b *Block, pos src.XPos, v *Value, shift int64) *Value {
815 s := b.Func.ConstInt64(types.Types[types.TUINT64], shift)
816 size := v.Type.Size()
817 switch size {
818 case 8:
819 return b.NewValue2(pos, OpRsh64Ux64, v.Type, v, s)
820 case 4:
821 return b.NewValue2(pos, OpRsh32Ux64, v.Type, v, s)
822 case 2:
823 return b.NewValue2(pos, OpRsh16Ux64, v.Type, v, s)
824 default:
825 base.Fatalf("bad size %d\n", size)
826 return nil
827 }
828 }
829 func byteSwap(b *Block, pos src.XPos, v *Value) *Value {
830 switch v.Type.Size() {
831 case 8:
832 return b.NewValue1(pos, OpBswap64, v.Type, v)
833 case 4:
834 return b.NewValue1(pos, OpBswap32, v.Type, v)
835 case 2:
836 return b.NewValue1(pos, OpBswap16, v.Type, v)
837
838 default:
839 v.Fatalf("bad size %d\n", v.Type.Size())
840 return nil
841 }
842 }
843
View as plain text