Source file src/runtime/testdata/testprog/schedmetrics.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"internal/testenv"
    11  	"log"
    12  	"os"
    13  	"runtime"
    14  	"runtime/debug"
    15  	"runtime/metrics"
    16  	"strings"
    17  	"sync/atomic"
    18  	"syscall"
    19  	"time"
    20  )
    21  
    22  func init() {
    23  	register("SchedMetrics", SchedMetrics)
    24  }
    25  
    26  // Tests runtime/metrics.Read for various scheduler metrics.
    27  //
    28  // Implemented in testprog to prevent other tests from polluting
    29  // the metrics.
    30  func SchedMetrics() {
    31  	const (
    32  		notInGo = iota
    33  		runnable
    34  		running
    35  		waiting
    36  		created
    37  		threads
    38  		numSamples
    39  	)
    40  	var s [numSamples]metrics.Sample
    41  	s[notInGo].Name = "/sched/goroutines/not-in-go:goroutines"
    42  	s[runnable].Name = "/sched/goroutines/runnable:goroutines"
    43  	s[running].Name = "/sched/goroutines/running:goroutines"
    44  	s[waiting].Name = "/sched/goroutines/waiting:goroutines"
    45  	s[created].Name = "/sched/goroutines-created:goroutines"
    46  	s[threads].Name = "/sched/threads/total:threads"
    47  
    48  	var failed bool
    49  	var out bytes.Buffer
    50  	logger := log.New(&out, "", 0)
    51  	indent := 0
    52  	logf := func(s string, a ...any) {
    53  		var prefix strings.Builder
    54  		for range indent {
    55  			prefix.WriteString("\t")
    56  		}
    57  		logger.Printf(prefix.String()+s, a...)
    58  	}
    59  	errorf := func(s string, a ...any) {
    60  		logf(s, a...)
    61  		failed = true
    62  	}
    63  	run := func(name string, f func()) {
    64  		logf("=== Checking %q", name)
    65  		indent++
    66  		f()
    67  		indent--
    68  	}
    69  	logMetrics := func(s []metrics.Sample) {
    70  		for i := range s {
    71  			logf("%s: %d", s[i].Name, s[i].Value.Uint64())
    72  		}
    73  	}
    74  
    75  	initialGMP := runtime.GOMAXPROCS(-1)
    76  	logf("Initial GOMAXPROCS=%d", initialGMP)
    77  
    78  	// generalSlack is the amount of goroutines we allow ourselves to be
    79  	// off by in any given category, either due to background system
    80  	// goroutines. This excludes GC goroutines.
    81  	generalSlack := uint64(4)
    82  
    83  	// waitingSlack is the max number of blocked goroutines controlled
    84  	// by the runtime that we'll allow for. This includes GC goroutines
    85  	// as well as finalizer and cleanup goroutines.
    86  	waitingSlack := generalSlack + uint64(2*initialGMP)
    87  
    88  	// threadsSlack is the maximum number of threads left over
    89  	// from the runtime (sysmon, the template thread, etc.)
    90  	// Certain build modes may also cause the creation of additional
    91  	// threads through frequent scheduling, like mayMoreStackPreempt.
    92  	// A slack of 5 is arbitrary but appears to be enough to cover
    93  	// the leftovers plus any inflation from scheduling-heavy build
    94  	// modes. We then also add initialGMP to this slack, since we're
    95  	// about to call runtime.GC, and in the worst case this will
    96  	// spin up GOMAXPROCS new threads to run those workers.
    97  	threadsSlack := 5 + uint64(initialGMP)
    98  
    99  	// Make sure GC isn't running, since GC workers interfere with
   100  	// expected counts.
   101  	defer debug.SetGCPercent(debug.SetGCPercent(-1))
   102  	runtime.GC()
   103  
   104  	check := func(s *metrics.Sample, min, max uint64) {
   105  		val := s.Value.Uint64()
   106  		if val < min {
   107  			errorf("%s too low; %d < %d", s.Name, val, min)
   108  		}
   109  		if val > max {
   110  			errorf("%s too high; %d > %d", s.Name, val, max)
   111  		}
   112  	}
   113  	checkEq := func(s *metrics.Sample, value uint64) {
   114  		check(s, value, value)
   115  	}
   116  	spinUntil := func(f func() bool) bool {
   117  		for {
   118  			if f() {
   119  				return true
   120  			}
   121  			time.Sleep(50 * time.Millisecond)
   122  		}
   123  	}
   124  
   125  	// Check base values.
   126  	run("base", func() {
   127  		defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
   128  		metrics.Read(s[:])
   129  		logMetrics(s[:])
   130  		check(&s[notInGo], 0, generalSlack)
   131  		check(&s[runnable], 0, generalSlack)
   132  		checkEq(&s[running], 1)
   133  		check(&s[waiting], 0, waitingSlack)
   134  	})
   135  
   136  	metrics.Read(s[:])
   137  	createdAfterBase := s[created].Value.Uint64()
   138  
   139  	// Force Running count to be high. We'll use these goroutines
   140  	// for Runnable, too.
   141  	const count = 10
   142  	var ready, exit atomic.Uint32
   143  	for range count {
   144  		go func() {
   145  			ready.Add(1)
   146  			for exit.Load() == 0 {
   147  				// Spin to get us and keep us running, but check
   148  				// the exit condition so we exit out early if we're
   149  				// done.
   150  				start := time.Now()
   151  				for time.Since(start) < 10*time.Millisecond && exit.Load() == 0 {
   152  				}
   153  				runtime.Gosched()
   154  			}
   155  		}()
   156  	}
   157  	for ready.Load() < count {
   158  		runtime.Gosched()
   159  	}
   160  
   161  	// Be careful. We've entered a dangerous state for platforms
   162  	// that do not return back to the underlying system unless all
   163  	// goroutines are blocked, like js/wasm, since we have a bunch
   164  	// of runnable goroutines all spinning. We cannot write anything
   165  	// out.
   166  	if testenv.HasParallelism() {
   167  		run("created", func() {
   168  			metrics.Read(s[:])
   169  			logMetrics(s[:])
   170  			checkEq(&s[created], createdAfterBase+count)
   171  		})
   172  		run("running", func() {
   173  			defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(count + 4))
   174  			// It can take a little bit for the scheduler to
   175  			// distribute the goroutines to Ps, so retry until
   176  			// we see the count we expect or the test times out.
   177  			spinUntil(func() bool {
   178  				metrics.Read(s[:])
   179  				return s[running].Value.Uint64() >= count
   180  			})
   181  			logMetrics(s[:])
   182  			check(&s[running], count, count+4)
   183  			check(&s[threads], count, count+4+threadsSlack)
   184  		})
   185  
   186  		// Force runnable count to be high.
   187  		run("runnable", func() {
   188  			defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
   189  			metrics.Read(s[:])
   190  			logMetrics(s[:])
   191  			checkEq(&s[running], 1)
   192  			check(&s[runnable], count-1, count+generalSlack)
   193  		})
   194  
   195  		// Done with the running/runnable goroutines.
   196  		exit.Store(1)
   197  	} else {
   198  		// Read metrics and then exit all the other goroutines,
   199  		// so that system calls may proceed.
   200  		metrics.Read(s[:])
   201  
   202  		// Done with the running/runnable goroutines.
   203  		exit.Store(1)
   204  
   205  		// Now we can check our invariants.
   206  		run("created", func() {
   207  			// Look for count-1 goroutines because we read metrics
   208  			// *before* run goroutine was created for this sub-test.
   209  			checkEq(&s[created], createdAfterBase+count-1)
   210  		})
   211  		run("running", func() {
   212  			logMetrics(s[:])
   213  			checkEq(&s[running], 1)
   214  			checkEq(&s[threads], 1)
   215  		})
   216  		run("runnable", func() {
   217  			logMetrics(s[:])
   218  			check(&s[runnable], count-1, count+generalSlack)
   219  		})
   220  	}
   221  
   222  	// Force not-in-go count to be high. This is a little tricky since
   223  	// we try really hard not to let things block in system calls.
   224  	// We have to drop to the syscall package to do this reliably.
   225  	run("not-in-go", func() {
   226  		// Block a bunch of goroutines on an OS pipe.
   227  		pr, pw, err := pipe()
   228  		if err != nil {
   229  			switch runtime.GOOS {
   230  			case "js", "wasip1":
   231  				logf("creating pipe: %v", err)
   232  				return
   233  			}
   234  			panic(fmt.Sprintf("creating pipe: %v", err))
   235  		}
   236  		for i := 0; i < count; i++ {
   237  			go syscall.Read(pr, make([]byte, 1))
   238  		}
   239  
   240  		// Let the goroutines block.
   241  		spinUntil(func() bool {
   242  			metrics.Read(s[:])
   243  			return s[notInGo].Value.Uint64() >= count
   244  		})
   245  		logMetrics(s[:])
   246  		check(&s[notInGo], count, count+generalSlack)
   247  
   248  		syscall.Close(pw)
   249  		syscall.Close(pr)
   250  	})
   251  
   252  	run("waiting", func() {
   253  		// Force waiting count to be high.
   254  		const waitingCount = 1000
   255  		stop := make(chan bool)
   256  		for i := 0; i < waitingCount; i++ {
   257  			go func() { <-stop }()
   258  		}
   259  
   260  		// Let the goroutines block.
   261  		spinUntil(func() bool {
   262  			metrics.Read(s[:])
   263  			return s[waiting].Value.Uint64() >= waitingCount
   264  		})
   265  		logMetrics(s[:])
   266  		check(&s[waiting], waitingCount, waitingCount+waitingSlack)
   267  
   268  		close(stop)
   269  	})
   270  
   271  	if failed {
   272  		fmt.Fprintln(os.Stderr, out.String())
   273  		os.Exit(1)
   274  	} else {
   275  		fmt.Fprintln(os.Stderr, "OK")
   276  	}
   277  }
   278  

View as plain text