Source file src/internal/runtime/gc/scan/scan_test.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package scan_test
     6  
     7  import (
     8  	"fmt"
     9  	"internal/cpu"
    10  	"internal/goarch"
    11  	"internal/runtime/gc"
    12  	"internal/runtime/gc/scan"
    13  	"math/bits"
    14  	"math/rand/v2"
    15  	"slices"
    16  	"sync"
    17  	"testing"
    18  	"unsafe"
    19  )
    20  
    21  type scanFunc func(mem unsafe.Pointer, bufp *uintptr, objMarks *gc.ObjMask, sizeClass uintptr, ptrMask *gc.PtrMask) (count int32)
    22  
    23  func testScanSpanPacked(t *testing.T, scanF scanFunc) {
    24  	scanR := scan.ScanSpanPackedReference
    25  
    26  	// Construct a fake memory
    27  	mem, free := makeMem(t, 1)
    28  	defer free()
    29  	for i := range mem {
    30  		// Use values > heap.PageSize because a scan function can discard
    31  		// pointers smaller than this.
    32  		mem[i] = uintptr(int(gc.PageSize) + i + 1)
    33  	}
    34  
    35  	// Construct a random pointer mask
    36  	rnd := rand.New(rand.NewPCG(42, 42))
    37  	var ptrs gc.PtrMask
    38  	for i := range ptrs {
    39  		ptrs[i] = uintptr(rnd.Uint64())
    40  	}
    41  
    42  	bufF := make([]uintptr, gc.PageWords)
    43  	bufR := make([]uintptr, gc.PageWords)
    44  	testObjs(t, func(t *testing.T, sizeClass int, objs *gc.ObjMask) {
    45  		nF := scanF(unsafe.Pointer(&mem[0]), &bufF[0], objs, uintptr(sizeClass), &ptrs)
    46  		nR := scanR(unsafe.Pointer(&mem[0]), &bufR[0], objs, uintptr(sizeClass), &ptrs)
    47  
    48  		if nR != nF {
    49  			t.Errorf("want %d count, got %d", nR, nF)
    50  		} else if !slices.Equal(bufF[:nF], bufR[:nR]) {
    51  			t.Errorf("want scanned pointers %d, got %d", bufR[:nR], bufF[:nF])
    52  		}
    53  	})
    54  }
    55  
    56  func testObjs(t *testing.T, f func(t *testing.T, sizeClass int, objMask *gc.ObjMask)) {
    57  	for sizeClass := range gc.NumSizeClasses {
    58  		if sizeClass == 0 {
    59  			continue
    60  		}
    61  		size := uintptr(gc.SizeClassToSize[sizeClass])
    62  		if size > gc.MinSizeForMallocHeader {
    63  			break // Pointer/scalar metadata is not packed for larger sizes.
    64  		}
    65  		t.Run(fmt.Sprintf("size=%d", size), func(t *testing.T) {
    66  			// Scan a few objects near i to test boundary conditions.
    67  			const objMask = 0x101
    68  			nObj := uintptr(gc.SizeClassToNPages[sizeClass]) * gc.PageSize / size
    69  			for i := range nObj - uintptr(bits.Len(objMask)-1) {
    70  				t.Run(fmt.Sprintf("objs=0x%x<<%d", objMask, i), func(t *testing.T) {
    71  					var objs gc.ObjMask
    72  					objs[i/goarch.PtrBits] = objMask << (i % goarch.PtrBits)
    73  					f(t, sizeClass, &objs)
    74  				})
    75  			}
    76  		})
    77  	}
    78  }
    79  
    80  var dataCacheSizes = sync.OnceValue(func() []uintptr {
    81  	cs := cpu.DataCacheSizes()
    82  	for i, c := range cs {
    83  		fmt.Printf("# L%d cache: %d (%d Go pages)\n", i+1, c, c/gc.PageSize)
    84  	}
    85  	return cs
    86  })
    87  
    88  func BenchmarkScanSpanPacked(b *testing.B) {
    89  	benchmarkCacheSizes(b, benchmarkScanSpanPackedAllSizeClasses)
    90  }
    91  
    92  func benchmarkCacheSizes(b *testing.B, fn func(b *testing.B, heapPages int)) {
    93  	cacheSizes := dataCacheSizes()
    94  	b.Run("cache=tiny/pages=1", func(b *testing.B) {
    95  		fn(b, 1)
    96  	})
    97  	for i, cacheBytes := range cacheSizes {
    98  		pages := int(cacheBytes*3/4) / gc.PageSize
    99  		b.Run(fmt.Sprintf("cache=L%d/pages=%d", i+1, pages), func(b *testing.B) {
   100  			fn(b, pages)
   101  		})
   102  	}
   103  	if len(cacheSizes) == 0 {
   104  		return
   105  	}
   106  	ramPages := int(cacheSizes[len(cacheSizes)-1]*3/2) / gc.PageSize
   107  	b.Run(fmt.Sprintf("cache=ram/pages=%d", ramPages), func(b *testing.B) {
   108  		fn(b, ramPages)
   109  	})
   110  }
   111  
   112  func benchmarkScanSpanPackedAllSizeClasses(b *testing.B, nPages int) {
   113  	for sc := range gc.NumSizeClasses {
   114  		if sc == 0 {
   115  			continue
   116  		}
   117  		size := gc.SizeClassToSize[sc]
   118  		if size >= gc.MinSizeForMallocHeader {
   119  			break
   120  		}
   121  		b.Run(fmt.Sprintf("sizeclass=%d", sc), func(b *testing.B) {
   122  			benchmarkScanSpanPacked(b, nPages, sc)
   123  		})
   124  	}
   125  }
   126  
   127  func benchmarkScanSpanPacked(b *testing.B, nPages int, sizeClass int) {
   128  	rnd := rand.New(rand.NewPCG(42, 42))
   129  
   130  	// Construct a fake memory
   131  	mem, free := makeMem(b, nPages)
   132  	defer free()
   133  	for i := range mem {
   134  		// Use values > heap.PageSize because a scan function can discard
   135  		// pointers smaller than this.
   136  		mem[i] = uintptr(int(gc.PageSize) + i + 1)
   137  	}
   138  
   139  	// Construct a random pointer mask
   140  	ptrs := make([]gc.PtrMask, nPages)
   141  	for i := range ptrs {
   142  		for j := range ptrs[i] {
   143  			ptrs[i][j] = uintptr(rnd.Uint64())
   144  		}
   145  	}
   146  
   147  	// Visit the pages in a random order
   148  	pageOrder := rnd.Perm(nPages)
   149  
   150  	// Create the scan buffer.
   151  	buf := make([]uintptr, gc.PageWords)
   152  
   153  	// Sweep from 0 marks to all marks. We'll use the same marks for each page
   154  	// because I don't think that predictability matters.
   155  	objBytes := uintptr(gc.SizeClassToSize[sizeClass])
   156  	nObj := gc.PageSize / objBytes
   157  	markOrder := rnd.Perm(int(nObj))
   158  	const steps = 11
   159  	for i := 0; i < steps; i++ {
   160  		frac := float64(i) / float64(steps-1)
   161  		// Set frac marks.
   162  		nMarks := int(float64(len(markOrder))*frac + 0.5)
   163  		var objMarks gc.ObjMask
   164  		for _, mark := range markOrder[:nMarks] {
   165  			objMarks[mark/goarch.PtrBits] |= 1 << (mark % goarch.PtrBits)
   166  		}
   167  		greyClusters := 0
   168  		for page := range ptrs {
   169  			greyClusters += countGreyClusters(sizeClass, &objMarks, &ptrs[page])
   170  		}
   171  
   172  		// Report MB/s of how much memory they're actually hitting. This assumes
   173  		// 64 byte cache lines (TODO: Should it assume 128 byte cache lines?)
   174  		// and expands each access to the whole cache line. This is useful for
   175  		// comparing against memory bandwidth.
   176  		//
   177  		// TODO: Add a benchmark that just measures single core memory bandwidth
   178  		// for comparison. (See runtime memcpy benchmarks.)
   179  		//
   180  		// TODO: Should there be a separate measure where we don't expand to
   181  		// cache lines?
   182  		avgBytes := int64(greyClusters) * int64(cpu.CacheLineSize) / int64(len(ptrs))
   183  
   184  		b.Run(fmt.Sprintf("pct=%d", int(100*frac)), func(b *testing.B) {
   185  			b.Run("impl=Reference", func(b *testing.B) {
   186  				b.SetBytes(avgBytes)
   187  				for i := range b.N {
   188  					page := pageOrder[i%len(pageOrder)]
   189  					scan.ScanSpanPackedReference(unsafe.Pointer(&mem[gc.PageWords*page]), &buf[0], &objMarks, uintptr(sizeClass), &ptrs[page])
   190  				}
   191  			})
   192  			b.Run("impl=Go", func(b *testing.B) {
   193  				b.SetBytes(avgBytes)
   194  				for i := range b.N {
   195  					page := pageOrder[i%len(pageOrder)]
   196  					scan.ScanSpanPackedGo(unsafe.Pointer(&mem[gc.PageWords*page]), &buf[0], &objMarks, uintptr(sizeClass), &ptrs[page])
   197  				}
   198  			})
   199  			if scan.HasFastScanSpanPacked() {
   200  				b.Run("impl=Platform", func(b *testing.B) {
   201  					b.SetBytes(avgBytes)
   202  					for i := range b.N {
   203  						page := pageOrder[i%len(pageOrder)]
   204  						scan.ScanSpanPacked(unsafe.Pointer(&mem[gc.PageWords*page]), &buf[0], &objMarks, uintptr(sizeClass), &ptrs[page])
   205  					}
   206  				})
   207  			}
   208  		})
   209  	}
   210  }
   211  
   212  func countGreyClusters(sizeClass int, objMarks *gc.ObjMask, ptrMask *gc.PtrMask) int {
   213  	clusters := 0
   214  	lastCluster := -1
   215  
   216  	expandBy := uintptr(gc.SizeClassToSize[sizeClass]) / goarch.PtrSize
   217  	for word := range gc.PageWords {
   218  		objI := uintptr(word) / expandBy
   219  		if objMarks[objI/goarch.PtrBits]&(1<<(objI%goarch.PtrBits)) == 0 {
   220  			continue
   221  		}
   222  		if ptrMask[word/goarch.PtrBits]&(1<<(word%goarch.PtrBits)) == 0 {
   223  			continue
   224  		}
   225  		c := word * 8 / goarch.PtrBits
   226  		if c != lastCluster {
   227  			lastCluster = c
   228  			clusters++
   229  		}
   230  	}
   231  	return clusters
   232  }
   233  
   234  func BenchmarkScanMaxBandwidth(b *testing.B) {
   235  	// Measure the theoretical "maximum" bandwidth of scanning by reproducing
   236  	// the memory access pattern of a full page scan, but using memcpy as the
   237  	// kernel instead of scanning.
   238  	benchmarkCacheSizes(b, func(b *testing.B, heapPages int) {
   239  		mem, free := makeMem(b, heapPages)
   240  		defer free()
   241  		for i := range mem {
   242  			mem[i] = uintptr(int(gc.PageSize) + i + 1)
   243  		}
   244  		buf := make([]uintptr, gc.PageWords)
   245  
   246  		// Visit the pages in a random order
   247  		rnd := rand.New(rand.NewPCG(42, 42))
   248  		pageOrder := rnd.Perm(heapPages)
   249  
   250  		b.SetBytes(int64(gc.PageSize))
   251  
   252  		b.ResetTimer()
   253  		for i := range b.N {
   254  			page := pageOrder[i%len(pageOrder)]
   255  			copy(buf, mem[gc.PageWords*page:])
   256  		}
   257  	})
   258  }
   259  

View as plain text