Source file
src/runtime/mgcmark_greenteagc.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37 package runtime
38
39 import (
40 "internal/cpu"
41 "internal/goarch"
42 "internal/runtime/atomic"
43 "internal/runtime/gc"
44 "internal/runtime/sys"
45 "unsafe"
46 )
47
48 const doubleCheckGreenTea = false
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66 type spanInlineMarkBits struct {
67 scans [63]uint8
68 owned spanScanOwnership
69 marks [63]uint8
70 class spanClass
71 }
72
73
74
75
76
77
78
79 type spanScanOwnership uint8
80
81 const (
82 spanScanUnowned spanScanOwnership = 0
83 spanScanOneMark = 1 << iota
84 spanScanManyMark
85
86
87
88 )
89
90
91 func (o *spanScanOwnership) load() spanScanOwnership {
92 return spanScanOwnership(atomic.Load8((*uint8)(unsafe.Pointer(o))))
93 }
94
95 func (o *spanScanOwnership) or(v spanScanOwnership) spanScanOwnership {
96
97
98
99
100
101
102
103
104
105 o32 := (*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(o)) &^ 0b11))
106 off := (uintptr(unsafe.Pointer(o)) & 0b11) * 8
107 if goarch.BigEndian {
108 off = 32 - off - 8
109 }
110 return spanScanOwnership(atomic.Or32(o32, uint32(v)<<off) >> off)
111 }
112
113 func (imb *spanInlineMarkBits) init(class spanClass, needzero bool) {
114 if imb == nil {
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132 throw("runtime: span inline mark bits nil?")
133 }
134 if needzero {
135
136
137
138
139 memclrNoHeapPointers(unsafe.Pointer(imb), unsafe.Sizeof(spanInlineMarkBits{}))
140 }
141 imb.class = class
142 }
143
144
145
146 func (imb *spanInlineMarkBits) tryAcquire() bool {
147 switch imb.owned.load() {
148 case spanScanUnowned:
149
150 if imb.owned.or(spanScanOneMark) == spanScanUnowned {
151 return true
152 }
153
154
155
156 fallthrough
157 case spanScanOneMark:
158
159
160 return imb.owned.or(spanScanManyMark) == spanScanUnowned
161 }
162 return false
163 }
164
165
166
167
168
169
170
171
172
173 func (imb *spanInlineMarkBits) release() spanScanOwnership {
174 return spanScanOwnership(atomic.Xchg8((*uint8)(unsafe.Pointer(&imb.owned)), uint8(spanScanUnowned)))
175 }
176
177
178
179
180 func spanInlineMarkBitsFromBase(base uintptr) *spanInlineMarkBits {
181 return (*spanInlineMarkBits)(unsafe.Pointer(base + gc.PageSize - unsafe.Sizeof(spanInlineMarkBits{})))
182 }
183
184
185 func (s *mspan) initInlineMarkBits() {
186 if doubleCheckGreenTea && !gcUsesSpanInlineMarkBits(s.elemsize) {
187 throw("expected span with inline mark bits")
188 }
189
190 s.inlineMarkBits().init(s.spanclass, s.needzero != 0)
191 }
192
193
194
195
196 func (s *mspan) moveInlineMarks(dst *gcBits) {
197 if doubleCheckGreenTea && !gcUsesSpanInlineMarkBits(s.elemsize) {
198 throw("expected span with inline mark bits")
199 }
200 bytes := divRoundUp(uintptr(s.nelems), 8)
201 imb := s.inlineMarkBits()
202 imbMarks := (*gc.ObjMask)(unsafe.Pointer(&imb.marks))
203 for i := uintptr(0); i < bytes; i += goarch.PtrSize {
204 marks := bswapIfBigEndian(imbMarks[i/goarch.PtrSize])
205 if i/goarch.PtrSize == uintptr(len(imb.marks)+1)/goarch.PtrSize-1 {
206 marks &^= 0xff << ((goarch.PtrSize - 1) * 8)
207 }
208 *(*uintptr)(unsafe.Pointer(dst.bytep(i))) |= bswapIfBigEndian(marks)
209 }
210 if doubleCheckGreenTea && !s.spanclass.noscan() && imb.marks != imb.scans {
211 throw("marks don't match scans for span with pointer")
212 }
213
214
215 imb.init(s.spanclass, true )
216 }
217
218
219
220
221 func (s *mspan) inlineMarkBits() *spanInlineMarkBits {
222 if doubleCheckGreenTea && !gcUsesSpanInlineMarkBits(s.elemsize) {
223 throw("expected span with inline mark bits")
224 }
225 return spanInlineMarkBitsFromBase(s.base())
226 }
227
228 func (s *mspan) markBitsForIndex(objIndex uintptr) (bits markBits) {
229 if gcUsesSpanInlineMarkBits(s.elemsize) {
230 bits.bytep = &s.inlineMarkBits().marks[objIndex/8]
231 } else {
232 bits.bytep = s.gcmarkBits.bytep(objIndex / 8)
233 }
234 bits.mask = uint8(1) << (objIndex % 8)
235 bits.index = objIndex
236 return
237 }
238
239 func (s *mspan) markBitsForBase() markBits {
240 if gcUsesSpanInlineMarkBits(s.elemsize) {
241 return markBits{&s.inlineMarkBits().marks[0], uint8(1), 0}
242 }
243 return markBits{&s.gcmarkBits.x, uint8(1), 0}
244 }
245
246
247
248 func (s *mspan) scannedBitsForIndex(objIndex uintptr) markBits {
249 return markBits{&s.inlineMarkBits().scans[objIndex/8], uint8(1) << (objIndex % 8), objIndex}
250 }
251
252
253
254
255
256
257
258 func gcUsesSpanInlineMarkBits(size uintptr) bool {
259 return heapBitsInSpan(size) && size >= 16
260 }
261
262
263
264 func tryDeferToSpanScan(p uintptr, gcw *gcWork) bool {
265 if useCheckmark {
266 return false
267 }
268
269
270 ha := heapArenaOf(p)
271 if ha == nil {
272 return false
273 }
274 pageIdx := ((p / pageSize) / 8) % uintptr(len(ha.pageInUse))
275 pageMask := byte(1 << ((p / pageSize) % 8))
276 if ha.pageUseSpanInlineMarkBits[pageIdx]&pageMask == 0 {
277 return false
278 }
279
280
281 base := alignDown(p, gc.PageSize)
282 q := spanInlineMarkBitsFromBase(base)
283 objIndex := uint16((uint64(p-base) * uint64(gc.SizeClassToDivMagic[q.class.sizeclass()])) >> 32)
284
285
286 idx, mask := objIndex/8, uint8(1)<<(objIndex%8)
287 if atomic.Load8(&q.marks[idx])&mask != 0 {
288 return true
289 }
290 atomic.Or8(&q.marks[idx], mask)
291
292
293 if q.class.noscan() {
294 gcw.bytesMarked += uint64(gc.SizeClassToSize[q.class.sizeclass()])
295 return true
296 }
297
298
299 if q.tryAcquire() {
300 if gcw.spanq.put(makeObjPtr(base, objIndex)) {
301 if gcphase == _GCmark {
302 gcw.mayNeedWorker = true
303 }
304 gcw.flushedWork = true
305 }
306 }
307 return true
308 }
309
310
311 func (w *gcWork) tryGetSpan(slow bool) objptr {
312 if s := w.spanq.get(); s != 0 {
313 return s
314 }
315
316 if slow {
317
318 if s := work.spanq.get(w); s != 0 {
319 return s
320 }
321
322
323 return spanQueueSteal(w)
324 }
325 return 0
326 }
327
328
329
330 type spanQueue struct {
331 avail atomic.Bool
332 _ cpu.CacheLinePad
333 lock mutex
334 q mSpanQueue
335 }
336
337 func (q *spanQueue) empty() bool {
338 return !q.avail.Load()
339 }
340
341 func (q *spanQueue) size() int {
342 return q.q.n
343 }
344
345
346 func (q *spanQueue) putBatch(batch []objptr) {
347 var list mSpanQueue
348 for _, p := range batch {
349 s := spanOfUnchecked(p.spanBase())
350 s.scanIdx = p.objIndex()
351 list.push(s)
352 }
353
354 lock(&q.lock)
355 if q.q.n == 0 {
356 q.avail.Store(true)
357 }
358 q.q.takeAll(&list)
359 unlock(&q.lock)
360 }
361
362
363
364
365
366 func (q *spanQueue) get(gcw *gcWork) objptr {
367 if q.empty() {
368 return 0
369 }
370 lock(&q.lock)
371 if q.q.n == 0 {
372 unlock(&q.lock)
373 return 0
374 }
375 n := q.q.n/int(gomaxprocs) + 1
376 if n > q.q.n {
377 n = q.q.n
378 }
379 if max := len(gcw.spanq.ring) / 2; n > max {
380 n = max
381 }
382 newQ := q.q.popN(n)
383 if q.q.n == 0 {
384 q.avail.Store(false)
385 }
386 unlock(&q.lock)
387
388 s := newQ.pop()
389 for newQ.n > 0 {
390 s := newQ.pop()
391 gcw.spanq.put(makeObjPtr(s.base(), s.scanIdx))
392 }
393 return makeObjPtr(s.base(), s.scanIdx)
394 }
395
396
397
398
399
400
401
402
403
404 type localSpanQueue struct {
405 head atomic.Uint32
406 tail atomic.Uint32
407 ring [256]objptr
408 }
409
410
411
412 func (q *localSpanQueue) put(s objptr) (flushed bool) {
413 for {
414 h := q.head.Load()
415 t := q.tail.Load()
416 if t-h < uint32(len(q.ring)) {
417 q.ring[t%uint32(len(q.ring))] = s
418 q.tail.Store(t + 1)
419 return false
420 }
421 if q.putSlow(s, h, t) {
422 return true
423 }
424
425 }
426 }
427
428
429
430 func (q *localSpanQueue) putSlow(s objptr, h, t uint32) bool {
431 var batch [len(q.ring)/2 + 1]objptr
432
433
434 n := t - h
435 n = n / 2
436 if n != uint32(len(q.ring)/2) {
437 throw("localSpanQueue.putSlow: queue is not full")
438 }
439 for i := uint32(0); i < n; i++ {
440 batch[i] = q.ring[(h+i)%uint32(len(q.ring))]
441 }
442 if !q.head.CompareAndSwap(h, h+n) {
443 return false
444 }
445 batch[n] = s
446
447 work.spanq.putBatch(batch[:])
448 return true
449 }
450
451
452
453
454
455 func (q *localSpanQueue) get() objptr {
456 for {
457 h := q.head.Load()
458 t := q.tail.Load()
459 if t == h {
460 return 0
461 }
462 s := q.ring[h%uint32(len(q.ring))]
463 if q.head.CompareAndSwap(h, h+1) {
464 return s
465 }
466 }
467 }
468
469 func (q *localSpanQueue) empty() bool {
470 h := q.head.Load()
471 t := q.tail.Load()
472 return t == h
473 }
474
475
476
477
478 func (q1 *localSpanQueue) stealFrom(q2 *localSpanQueue) objptr {
479 writeHead := q1.tail.Load()
480
481 var n uint32
482 for {
483 h := q2.head.Load()
484 t := q2.tail.Load()
485 n = t - h
486 n = n - n/2
487 if n == 0 {
488 return 0
489 }
490 if n > uint32(len(q2.ring)/2) {
491 continue
492 }
493 for i := uint32(0); i < n; i++ {
494 c := q2.ring[(h+i)%uint32(len(q2.ring))]
495 q1.ring[(writeHead+i)%uint32(len(q1.ring))] = c
496 }
497 if q2.head.CompareAndSwap(h, h+n) {
498 break
499 }
500 }
501 n--
502 c := q1.ring[(writeHead+n)%uint32(len(q1.ring))]
503 if n == 0 {
504 return c
505 }
506 h := q1.head.Load()
507 if writeHead-h+n >= uint32(len(q1.ring)) {
508 throw("localSpanQueue.stealFrom: queue overflow")
509 }
510 q1.tail.Store(writeHead + n)
511 return c
512 }
513
514
515
516
517 func (q *localSpanQueue) drain() bool {
518 var batch [len(q.ring)]objptr
519
520 var n uint32
521 for {
522 var h uint32
523 for {
524 h = q.head.Load()
525 t := q.tail.Load()
526 n = t - h
527 if n == 0 {
528 return false
529 }
530 if n <= uint32(len(q.ring)) {
531 break
532 }
533
534 }
535 for i := uint32(0); i < n; i++ {
536 batch[i] = q.ring[(h+i)%uint32(len(q.ring))]
537 }
538 if q.head.CompareAndSwap(h, h+n) {
539 break
540 }
541 }
542 if !q.empty() {
543 throw("drained local span queue, but not empty")
544 }
545
546 work.spanq.putBatch(batch[:n])
547 return true
548 }
549
550
551
552
553 func spanQueueSteal(gcw *gcWork) objptr {
554 pp := getg().m.p.ptr()
555
556 for enum := stealOrder.start(cheaprand()); !enum.done(); enum.next() {
557 p2 := allp[enum.position()]
558 if pp == p2 {
559 continue
560 }
561 if s := gcw.spanq.stealFrom(&p2.gcw.spanq); s != 0 {
562 return s
563 }
564 }
565 return 0
566 }
567
568
569 type objptr uintptr
570
571
572 func makeObjPtr(spanBase uintptr, objIndex uint16) objptr {
573 if doubleCheckGreenTea && spanBase&((1<<gc.PageShift)-1) != 0 {
574 throw("created objptr with address that is incorrectly aligned")
575 }
576 return objptr(spanBase | uintptr(objIndex))
577 }
578
579 func (p objptr) spanBase() uintptr {
580 return uintptr(p) &^ ((1 << gc.PageShift) - 1)
581 }
582
583 func (p objptr) objIndex() uint16 {
584 return uint16(p) & ((1 << gc.PageShift) - 1)
585 }
586
587
588
589 func scanSpan(p objptr, gcw *gcWork) {
590 spanBase := p.spanBase()
591 imb := spanInlineMarkBitsFromBase(spanBase)
592 spanclass := imb.class
593 if spanclass.noscan() {
594 throw("noscan object in scanSpan")
595 }
596 elemsize := uintptr(gc.SizeClassToSize[spanclass.sizeclass()])
597
598
599 if imb.release() == spanScanOneMark {
600
601
602 objIndex := p.objIndex()
603 bytep := &imb.scans[objIndex/8]
604 mask := uint8(1) << (objIndex % 8)
605 if atomic.Load8(bytep)&mask != 0 {
606 return
607 }
608 atomic.Or8(bytep, mask)
609 gcw.bytesMarked += uint64(elemsize)
610 if debug.gctrace > 1 {
611 gcw.stats[spanclass.sizeclass()].spansSparseScanned++
612 gcw.stats[spanclass.sizeclass()].spanObjsSparseScanned++
613 }
614 b := spanBase + uintptr(objIndex)*elemsize
615 scanObjectSmall(spanBase, b, elemsize, gcw)
616 return
617 }
618
619
620 divMagic := uint64(gc.SizeClassToDivMagic[spanclass.sizeclass()])
621 usableSpanSize := uint64(gc.PageSize - unsafe.Sizeof(spanInlineMarkBits{}))
622 if !spanclass.noscan() {
623 usableSpanSize -= gc.PageSize / goarch.PtrSize / 8
624 }
625 nelems := uint16((usableSpanSize * divMagic) >> 32)
626
627
628 var toScan gc.ObjMask
629 objsMarked := spanSetScans(spanBase, nelems, imb, &toScan)
630 if objsMarked == 0 {
631 return
632 }
633 gcw.bytesMarked += uint64(objsMarked) * uint64(elemsize)
634 if debug.gctrace > 1 {
635 gcw.stats[spanclass.sizeclass()].spansDenseScanned++
636 gcw.stats[spanclass.sizeclass()].spanObjsDenseScanned += uint64(objsMarked)
637 }
638 scanObjectsSmall(spanBase, elemsize, nelems, gcw, &toScan)
639 }
640
641
642
643
644
645
646 func spanSetScans(spanBase uintptr, nelems uint16, imb *spanInlineMarkBits, toScan *gc.ObjMask) int {
647 arena, pageIdx, pageMask := pageIndexOf(spanBase)
648 if arena.pageMarks[pageIdx]&pageMask == 0 {
649 atomic.Or8(&arena.pageMarks[pageIdx], pageMask)
650 }
651
652 bytes := divRoundUp(uintptr(nelems), 8)
653 objsMarked := 0
654
655
656
657
658 imbMarks := (*gc.ObjMask)(unsafe.Pointer(&imb.marks))
659 imbScans := (*gc.ObjMask)(unsafe.Pointer(&imb.scans))
660
661
662
663
664 for i := uintptr(0); i < bytes; i += goarch.PtrSize {
665 scans := atomic.Loaduintptr(&imbScans[i/goarch.PtrSize])
666 marks := imbMarks[i/goarch.PtrSize]
667 scans = bswapIfBigEndian(scans)
668 marks = bswapIfBigEndian(marks)
669 if i/goarch.PtrSize == uintptr(len(imb.marks)+1)/goarch.PtrSize-1 {
670 scans &^= 0xff << ((goarch.PtrSize - 1) * 8)
671 marks &^= 0xff << ((goarch.PtrSize - 1) * 8)
672 }
673 toGrey := marks &^ scans
674 toScan[i/goarch.PtrSize] = toGrey
675
676
677 if toGrey != 0 {
678 toGrey = bswapIfBigEndian(toGrey)
679 if goarch.PtrSize == 4 {
680 atomic.Or32((*uint32)(unsafe.Pointer(&imbScans[i/goarch.PtrSize])), uint32(toGrey))
681 } else {
682 atomic.Or64((*uint64)(unsafe.Pointer(&imbScans[i/goarch.PtrSize])), uint64(toGrey))
683 }
684 }
685 objsMarked += sys.OnesCount64(uint64(toGrey))
686 }
687 return objsMarked
688 }
689
690 func scanObjectSmall(spanBase, b, objSize uintptr, gcw *gcWork) {
691 ptrBits := heapBitsSmallForAddrInline(spanBase, b, objSize)
692 gcw.heapScanWork += int64(sys.Len64(uint64(ptrBits)) * goarch.PtrSize)
693 nptrs := 0
694 n := sys.OnesCount64(uint64(ptrBits))
695 for range n {
696 k := sys.TrailingZeros64(uint64(ptrBits))
697 ptrBits &^= 1 << k
698 addr := b + uintptr(k)*goarch.PtrSize
699
700
701
702 sys.Prefetch(addr)
703
704
705 gcw.ptrBuf[nptrs] = addr
706 nptrs++
707 }
708
709
710 for _, p := range gcw.ptrBuf[:nptrs] {
711 p = *(*uintptr)(unsafe.Pointer(p))
712 if p == 0 {
713 continue
714 }
715 if !tryDeferToSpanScan(p, gcw) {
716 if obj, span, objIndex := findObject(p, 0, 0); obj != 0 {
717 greyobject(obj, 0, 0, span, gcw, objIndex)
718 }
719 }
720 }
721 }
722
723 func scanObjectsSmall(base, objSize uintptr, elems uint16, gcw *gcWork, scans *gc.ObjMask) {
724 nptrs := 0
725 for i, bits := range scans {
726 if i*(goarch.PtrSize*8) > int(elems) {
727 break
728 }
729 n := sys.OnesCount64(uint64(bits))
730 for range n {
731 j := sys.TrailingZeros64(uint64(bits))
732 bits &^= 1 << j
733
734 b := base + uintptr(i*(goarch.PtrSize*8)+j)*objSize
735 ptrBits := heapBitsSmallForAddrInline(base, b, objSize)
736 gcw.heapScanWork += int64(sys.Len64(uint64(ptrBits)) * goarch.PtrSize)
737
738 n := sys.OnesCount64(uint64(ptrBits))
739 for range n {
740 k := sys.TrailingZeros64(uint64(ptrBits))
741 ptrBits &^= 1 << k
742 addr := b + uintptr(k)*goarch.PtrSize
743
744
745
746 sys.Prefetch(addr)
747
748
749 gcw.ptrBuf[nptrs] = addr
750 nptrs++
751 }
752 }
753 }
754
755
756 for _, p := range gcw.ptrBuf[:nptrs] {
757 p = *(*uintptr)(unsafe.Pointer(p))
758 if p == 0 {
759 continue
760 }
761 if !tryDeferToSpanScan(p, gcw) {
762 if obj, span, objIndex := findObject(p, 0, 0); obj != 0 {
763 greyobject(obj, 0, 0, span, gcw, objIndex)
764 }
765 }
766 }
767 }
768
769 func heapBitsSmallForAddrInline(spanBase, addr, elemsize uintptr) uintptr {
770 hbitsBase, _ := spanHeapBitsRange(spanBase, gc.PageSize, elemsize)
771 hbits := (*byte)(unsafe.Pointer(hbitsBase))
772
773
774
775
776
777
778
779
780
781 i := (addr - spanBase) / goarch.PtrSize / ptrBits
782 j := (addr - spanBase) / goarch.PtrSize % ptrBits
783 bits := elemsize / goarch.PtrSize
784 word0 := (*uintptr)(unsafe.Pointer(addb(hbits, goarch.PtrSize*(i+0))))
785 word1 := (*uintptr)(unsafe.Pointer(addb(hbits, goarch.PtrSize*(i+1))))
786
787 var read uintptr
788 if j+bits > ptrBits {
789
790 bits0 := ptrBits - j
791 bits1 := bits - bits0
792 read = *word0 >> j
793 read |= (*word1 & ((1 << bits1) - 1)) << bits0
794 } else {
795
796 read = (*word0 >> j) & ((1 << bits) - 1)
797 }
798 return read
799 }
800
801 type sizeClassScanStats struct {
802 spansDenseScanned uint64
803 spanObjsDenseScanned uint64
804 spansSparseScanned uint64
805 spanObjsSparseScanned uint64
806 sparseObjsScanned uint64
807 }
808
809 func dumpScanStats() {
810 var (
811 spansDenseScanned uint64
812 spanObjsDenseScanned uint64
813 spansSparseScanned uint64
814 spanObjsSparseScanned uint64
815 sparseObjsScanned uint64
816 )
817 for _, stats := range memstats.lastScanStats {
818 spansDenseScanned += stats.spansDenseScanned
819 spanObjsDenseScanned += stats.spanObjsDenseScanned
820 spansSparseScanned += stats.spansSparseScanned
821 spanObjsSparseScanned += stats.spanObjsSparseScanned
822 sparseObjsScanned += stats.sparseObjsScanned
823 }
824 totalObjs := sparseObjsScanned + spanObjsSparseScanned + spanObjsDenseScanned
825 totalSpans := spansSparseScanned + spansDenseScanned
826 print("scan: total ", sparseObjsScanned, "+", spanObjsSparseScanned, "+", spanObjsDenseScanned, "=", totalObjs, " objs")
827 print(", ", spansSparseScanned, "+", spansDenseScanned, "=", totalSpans, " spans\n")
828 for i, stats := range memstats.lastScanStats {
829 if stats == (sizeClassScanStats{}) {
830 continue
831 }
832 totalObjs := stats.sparseObjsScanned + stats.spanObjsSparseScanned + stats.spanObjsDenseScanned
833 totalSpans := stats.spansSparseScanned + stats.spansDenseScanned
834 if i == 0 {
835 print("scan: class L ")
836 } else {
837 print("scan: class ", gc.SizeClassToSize[i], "B ")
838 }
839 print(stats.sparseObjsScanned, "+", stats.spanObjsSparseScanned, "+", stats.spanObjsDenseScanned, "=", totalObjs, " objs")
840 print(", ", stats.spansSparseScanned, "+", stats.spansDenseScanned, "=", totalSpans, " spans\n")
841 }
842 }
843
844 func (w *gcWork) flushScanStats(dst *[gc.NumSizeClasses]sizeClassScanStats) {
845 for i := range w.stats {
846 dst[i].spansDenseScanned += w.stats[i].spansDenseScanned
847 dst[i].spanObjsDenseScanned += w.stats[i].spanObjsDenseScanned
848 dst[i].spansSparseScanned += w.stats[i].spansSparseScanned
849 dst[i].spanObjsSparseScanned += w.stats[i].spanObjsSparseScanned
850 dst[i].sparseObjsScanned += w.stats[i].sparseObjsScanned
851 }
852 clear(w.stats[:])
853 }
854
View as plain text