Source file src/runtime/goroutineleakprofile_test.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime_test
     6  
     7  import (
     8  	"fmt"
     9  	"internal/testenv"
    10  	"os"
    11  	"regexp"
    12  	"strings"
    13  	"testing"
    14  )
    15  
    16  func TestGoroutineLeakProfile(t *testing.T) {
    17  	// Some tests have false negatives under mayMoreStackPreempt and mayMoreStackMove.
    18  	// This may be a test-only issue in that they're just sensitive to scheduling, but it
    19  	// needs more investigation.
    20  	for _, cfg := range []string{"mayMoreStackPreempt", "mayMoreStackMove"} {
    21  		if strings.Contains(os.Getenv("GOFLAGS"), cfg) {
    22  			testenv.SkipFlaky(t, 75729)
    23  		}
    24  	}
    25  
    26  	// Goroutine leak test case.
    27  	//
    28  	// Test cases can be configured with test name, the name of the entry point function,
    29  	// a set of expected leaks identified by regular expressions, and the number of times
    30  	// the test should be repeated.
    31  	//
    32  	// Repeated runs reduce flakiness in some tests.
    33  	type testCase struct {
    34  		name          string
    35  		simple        bool
    36  		repetitions   int
    37  		expectedLeaks map[*regexp.Regexp]bool
    38  
    39  		// flakyLeaks are goroutine leaks that are too flaky to be reliably detected.
    40  		// Still, they might pop up every once in a while. The test will pass regardless
    41  		// if they occur or nor, as they are not unexpected.
    42  		//
    43  		// Note that all flaky leaks are true positives, i.e. real goroutine leaks,
    44  		// and it is only their detection that is unreliable due to scheduling
    45  		// non-determinism.
    46  		flakyLeaks map[*regexp.Regexp]struct{}
    47  	}
    48  
    49  	// makeAnyTest is a short-hand for creating test cases.
    50  	// Each of the leaks in the list is identified by a regular expression.
    51  	// If a leak is flaky, it is added to the flakyLeaks map.
    52  	makeAnyTest := func(name string, flaky bool, repetitions int, leaks ...string) testCase {
    53  		tc := testCase{
    54  			name:          name,
    55  			expectedLeaks: make(map[*regexp.Regexp]bool, len(leaks)),
    56  			flakyLeaks:    make(map[*regexp.Regexp]struct{}, len(leaks)),
    57  			// Make sure the test is repeated at least once.
    58  			repetitions: repetitions | 1,
    59  		}
    60  
    61  		for _, leak := range leaks {
    62  			if !flaky {
    63  				tc.expectedLeaks[regexp.MustCompile(leak)] = false
    64  			} else {
    65  				tc.flakyLeaks[regexp.MustCompile(leak)] = struct{}{}
    66  			}
    67  		}
    68  
    69  		return tc
    70  	}
    71  
    72  	// makeTest is a short-hand for creating non-flaky test cases.
    73  	makeTest := func(name string, leaks ...string) testCase {
    74  		tcase := makeAnyTest(name, false, 2, leaks...)
    75  		tcase.simple = true
    76  		return tcase
    77  	}
    78  
    79  	// makeFlakyTest is a short-hand for creating flaky test cases.
    80  	makeFlakyTest := func(name string, leaks ...string) testCase {
    81  		if testing.Short() {
    82  			return makeAnyTest(name, true, 2, leaks...)
    83  		}
    84  		return makeAnyTest(name, true, 10, leaks...)
    85  	}
    86  
    87  	goroutineHeader := regexp.MustCompile(`goroutine \d+ \[`)
    88  
    89  	// extractLeaks takes the output of a test and splits it into a
    90  	// list of strings denoting goroutine leaks.
    91  	//
    92  	// If the input is:
    93  	//
    94  	// goroutine 1 [wait reason (leaked)]:
    95  	// main.leaked()
    96  	// 	./testdata/testgoroutineleakprofile/foo.go:37 +0x100
    97  	// created by main.main()
    98  	// 	./testdata/testgoroutineleakprofile/main.go:10 +0x20
    99  	//
   100  	// goroutine 2 [wait reason (leaked)]:
   101  	// main.leaked2()
   102  	// 	./testdata/testgoroutineleakprofile/foo.go:37 +0x100
   103  	// created by main.main()
   104  	// 	./testdata/testgoroutineleakprofile/main.go:10 +0x20
   105  	//
   106  	// The output is (as a list of strings):
   107  	//
   108  	// leaked() [wait reason]
   109  	// leaked2() [wait reason]
   110  	extractLeaks := func(output string) []string {
   111  		stacks := strings.Split(output, "\n\ngoroutine")
   112  		var leaks []string
   113  		for _, stack := range stacks {
   114  			lines := strings.Split(stack, "\n")
   115  			if len(lines) < 5 {
   116  				// Expecting at least the following lines (where n=len(lines)-1):
   117  				//
   118  				// [0] goroutine n [wait reason (leaked)]
   119  				// ...
   120  				// [n-3] bottom.leak.frame(...)
   121  				// [n-2]  ./bottom/leak/frame/source.go:line
   122  				// [n-1] created by go.instruction()
   123  				// [n] 	  ./go/instruction/source.go:line
   124  				continue
   125  			}
   126  
   127  			if !strings.Contains(lines[0], "(leaked)") {
   128  				// Ignore non-leaked goroutines.
   129  				continue
   130  			}
   131  
   132  			// Get the wait reason from the goroutine header.
   133  			header := lines[0]
   134  			waitReason := goroutineHeader.ReplaceAllString(header, "[")
   135  			waitReason = strings.ReplaceAll(waitReason, " (leaked)", "")
   136  
   137  			// Get the function name from the stack trace (should be two lines above `created by`).
   138  			var funcName string
   139  			for i := len(lines) - 1; i >= 0; i-- {
   140  				if strings.Contains(lines[i], "created by") {
   141  					funcName = strings.TrimPrefix(lines[i-2], "main.")
   142  					break
   143  				}
   144  			}
   145  			if funcName == "" {
   146  				t.Fatalf("failed to extract function name from stack trace: %s", lines)
   147  			}
   148  
   149  			leaks = append(leaks, funcName+" "+waitReason)
   150  		}
   151  		return leaks
   152  	}
   153  
   154  	// Micro tests involve very simple leaks for each type of concurrency primitive operation.
   155  	microTests := []testCase{
   156  		makeTest("NilRecv",
   157  			`NilRecv\.func1\(.* \[chan receive \(nil chan\)\]`,
   158  		),
   159  		makeTest("NilSend",
   160  			`NilSend\.func1\(.* \[chan send \(nil chan\)\]`,
   161  		),
   162  		makeTest("SelectNoCases",
   163  			`SelectNoCases\.func1\(.* \[select \(no cases\)\]`,
   164  		),
   165  		makeTest("ChanRecv",
   166  			`ChanRecv\.func1\(.* \[chan receive\]`,
   167  		),
   168  		makeTest("ChanSend",
   169  			`ChanSend\.func1\(.* \[chan send\]`,
   170  		),
   171  		makeTest("Select",
   172  			`Select\.func1\(.* \[select\]`,
   173  		),
   174  		makeTest("WaitGroup",
   175  			`WaitGroup\.func1\(.* \[sync\.WaitGroup\.Wait\]`,
   176  		),
   177  		makeTest("MutexStack",
   178  			`MutexStack\.func1\(.* \[sync\.Mutex\.Lock\]`,
   179  		),
   180  		makeTest("MutexHeap",
   181  			`MutexHeap\.func1.1\(.* \[sync\.Mutex\.Lock\]`,
   182  		),
   183  		makeTest("Cond",
   184  			`Cond\.func1\(.* \[sync\.Cond\.Wait\]`,
   185  		),
   186  		makeTest("RWMutexRLock",
   187  			`RWMutexRLock\.func1\(.* \[sync\.RWMutex\.RLock\]`,
   188  		),
   189  		makeTest("RWMutexLock",
   190  			`RWMutexLock\.func1\(.* \[sync\.(RW)?Mutex\.Lock\]`,
   191  		),
   192  		makeTest("Mixed",
   193  			`Mixed\.func1\(.* \[sync\.WaitGroup\.Wait\]`,
   194  			`Mixed\.func1.1\(.* \[chan send\]`,
   195  		),
   196  		makeTest("NoLeakGlobal"),
   197  	}
   198  
   199  	// Stress tests are flaky and we do not strictly care about their output.
   200  	// They are only intended to stress the goroutine leak detector and profiling
   201  	// infrastructure in interesting ways.
   202  	stressTestCases := []testCase{
   203  		makeFlakyTest("SpawnGC",
   204  			`spawnGC.func1\(.* \[chan receive\]`,
   205  		),
   206  		makeTest("DaisyChain"),
   207  	}
   208  
   209  	// Common goroutine leak patterns.
   210  	// Extracted from "Unveiling and Vanquishing Goroutine Leaks in Enterprise Microservices: A Dynamic Analysis Approach"
   211  	// doi:10.1109/CGO57630.2024.10444835
   212  	patternTestCases := []testCase{
   213  		makeTest("NoCloseRange",
   214  			`noCloseRange\(.* \[chan send\]`,
   215  			`noCloseRange\.func1\(.* \[chan receive\]`,
   216  		),
   217  		makeTest("MethodContractViolation",
   218  			`worker\.Start\.func1\(.* \[select\]`,
   219  		),
   220  		makeTest("DoubleSend",
   221  			`DoubleSend\.func3\(.* \[chan send\]`,
   222  		),
   223  		makeTest("EarlyReturn",
   224  			`earlyReturn\.func1\(.* \[chan send\]`,
   225  		),
   226  		makeTest("NCastLeak",
   227  			`nCastLeak\.func1\(.* \[chan send\]`,
   228  			`NCastLeak\.func2\(.* \[chan receive\]`,
   229  		),
   230  		makeTest("Timeout",
   231  			// (vsaioc): Timeout is *theoretically* flaky, but the
   232  			// pseudo-random choice for select case branches makes it
   233  			// practically impossible for it to fail.
   234  			`timeout\.func1\(.* \[chan send\]`,
   235  		),
   236  	}
   237  
   238  	// GoKer tests from "GoBench: A Benchmark Suite of Real-World Go Concurrency Bugs".
   239  	// Refer to testdata/testgoroutineleakprofile/goker/README.md.
   240  	//
   241  	// This list is curated for tests that are not excessively flaky.
   242  	// Some tests are also excluded because they are redundant.
   243  	//
   244  	// TODO(vsaioc): Some of these might be removable (their patterns may overlap).
   245  	gokerTestCases := []testCase{
   246  		makeFlakyTest("Cockroach584",
   247  			`Cockroach584\.func2\(.* \[sync\.Mutex\.Lock\]`,
   248  		),
   249  		makeFlakyTest("Cockroach1055",
   250  			`Cockroach1055\.func2\(.* \[chan receive\]`,
   251  			`Cockroach1055\.func2\.2\(.* \[sync\.WaitGroup\.Wait\]`,
   252  			`Cockroach1055\.func2\.1\(.* \[chan receive\]`,
   253  			`Cockroach1055\.func2\.1\(.* \[sync\.Mutex\.Lock\]`,
   254  		),
   255  		makeFlakyTest("Cockroach1462",
   256  			`\(\*Stopper_cockroach1462\)\.RunWorker\.func1\(.* \[chan send\]`,
   257  			`Cockroach1462\.func2\(.* \[sync\.WaitGroup\.Wait\]`,
   258  		),
   259  		makeFlakyTest("Cockroach2448",
   260  			`\(\*Store_cockroach2448\)\.processRaft\(.* \[select\]`,
   261  			`\(\*state_cockroach2448\)\.start\(.* \[select\]`,
   262  		),
   263  		makeFlakyTest("Cockroach3710",
   264  			`\(\*Store_cockroach3710\)\.ForceRaftLogScanAndProcess\(.* \[sync\.RWMutex\.RLock\]`,
   265  			`\(\*Store_cockroach3710\)\.processRaft\.func1\(.* \[sync\.RWMutex\.Lock\]`,
   266  		),
   267  		makeFlakyTest("Cockroach6181",
   268  			`testRangeCacheCoalescedRequests_cockroach6181\(.* \[sync\.WaitGroup\.Wait\]`,
   269  			`testRangeCacheCoalescedRequests_cockroach6181\.func1\.1\(.* \[sync\.(RW)?Mutex\.Lock\]`,
   270  			`testRangeCacheCoalescedRequests_cockroach6181\.func1\.1\(.* \[sync\.RWMutex\.RLock\]`,
   271  		),
   272  		makeTest("Cockroach7504",
   273  			`Cockroach7504\.func2\.1.* \[sync\.Mutex\.Lock\]`,
   274  			`Cockroach7504\.func2\.2.* \[sync\.Mutex\.Lock\]`,
   275  		),
   276  		makeFlakyTest("Cockroach9935",
   277  			`\(\*loggingT_cockroach9935\)\.outputLogEntry\(.* \[sync\.Mutex\.Lock\]`,
   278  		),
   279  		makeFlakyTest("Cockroach10214",
   280  			`\(*Store_cockroach10214\)\.sendQueuedHeartbeats\(.* \[sync\.Mutex\.Lock\]`,
   281  			`\(*Replica_cockroach10214\)\.tick\(.* \[sync\.Mutex\.Lock\]`,
   282  		),
   283  		makeFlakyTest("Cockroach10790",
   284  			`\(\*Replica_cockroach10790\)\.beginCmds\.func1\(.* \[chan receive\]`,
   285  		),
   286  		makeTest("Cockroach13197",
   287  			`\(\*Tx_cockroach13197\)\.awaitDone\(.* \[chan receive\]`,
   288  		),
   289  		makeTest("Cockroach13755",
   290  			`\(\*Rows_cockroach13755\)\.awaitDone\(.* \[chan receive\]`,
   291  		),
   292  		makeFlakyTest("Cockroach16167",
   293  			`Cockroach16167\.func2\(.* \[sync\.RWMutex\.RLock\]`,
   294  			`\(\*Executor_cockroach16167\)\.Start\(.* \[sync\.RWMutex\.Lock\]`,
   295  		),
   296  		makeFlakyTest("Cockroach18101",
   297  			`restore_cockroach18101\.func1\(.* \[chan send\]`,
   298  		),
   299  		makeTest("Cockroach24808",
   300  			`Cockroach24808\.func2\(.* \[chan send\]`,
   301  		),
   302  		makeTest("Cockroach25456",
   303  			`Cockroach25456\.func2\(.* \[chan receive\]`,
   304  		),
   305  		makeTest("Cockroach35073",
   306  			`Cockroach35073\.func2.1\(.* \[chan send\]`,
   307  			`Cockroach35073\.func2\(.* \[chan send\]`,
   308  		),
   309  		makeTest("Cockroach35931",
   310  			`Cockroach35931\.func2\(.* \[chan send\]`,
   311  		),
   312  		makeTest("Etcd5509",
   313  			`Etcd5509\.func2\(.* \[sync\.RWMutex\.Lock\]`,
   314  		),
   315  		makeTest("Etcd6708",
   316  			`Etcd6708\.func2\(.* \[sync\.RWMutex\.RLock\]`,
   317  		),
   318  		makeFlakyTest("Etcd6857",
   319  			`\(\*node_etcd6857\)\.Status\(.* \[chan send\]`,
   320  		),
   321  		makeFlakyTest("Etcd6873",
   322  			`\(\*watchBroadcasts_etcd6873\)\.stop\(.* \[chan receive\]`,
   323  			`newWatchBroadcasts_etcd6873\.func1\(.* \[sync\.Mutex\.Lock\]`,
   324  		),
   325  		makeFlakyTest("Etcd7492",
   326  			`Etcd7492\.func2\(.* \[sync\.WaitGroup\.Wait\]`,
   327  			`Etcd7492\.func2\.1\(.* \[chan send\]`,
   328  			`\(\*simpleTokenTTLKeeper_etcd7492\)\.run\(.* \[sync\.Mutex\.Lock\]`,
   329  		),
   330  		makeFlakyTest("Etcd7902",
   331  			`doRounds_etcd7902\.func1\(.* \[chan receive\]`,
   332  			`doRounds_etcd7902\.func1\(.* \[sync\.Mutex\.Lock\]`,
   333  			`runElectionFunc_etcd7902\(.* \[sync\.WaitGroup\.Wait\]`,
   334  		),
   335  		makeTest("Etcd10492",
   336  			`Etcd10492\.func2\(.* \[sync\.Mutex\.Lock\]`,
   337  		),
   338  		makeTest("Grpc660",
   339  			`\(\*benchmarkClient_grpc660\)\.doCloseLoopUnary\.func1\(.* \[chan send\]`,
   340  		),
   341  		makeFlakyTest("Grpc795",
   342  			`\(\*Server_grpc795\)\.Serve\(.* \[sync\.Mutex\.Lock\]`,
   343  			`testServerGracefulStopIdempotent_grpc795\(.* \[sync\.Mutex\.Lock\]`,
   344  		),
   345  		makeTest("Grpc862",
   346  			`DialContext_grpc862\.func2\(.* \[chan receive\]`),
   347  		makeTest("Grpc1275",
   348  			`testInflightStreamClosing_grpc1275\.func1\(.* \[chan receive\]`),
   349  		makeTest("Grpc1424",
   350  			`DialContext_grpc1424\.func1\(.* \[chan receive\]`),
   351  		makeFlakyTest("Grpc1460",
   352  			`\(\*http2Client_grpc1460\)\.keepalive\(.* \[chan receive\]`,
   353  			`\(\*http2Client_grpc1460\)\.NewStream\(.* \[sync\.Mutex\.Lock\]`,
   354  		),
   355  		makeFlakyTest("Grpc3017",
   356  			// grpc/3017 involves a goroutine leak that also simultaneously engages many GC assists.
   357  			`Grpc3017\.func2\(.* \[chan receive\]`,
   358  			`Grpc3017\.func2\.1\(.* \[sync\.Mutex\.Lock\]`,
   359  			`\(\*lbCacheClientConn_grpc3017\)\.RemoveSubConn\.func1\(.* \[sync\.Mutex\.Lock\]`,
   360  		),
   361  		makeFlakyTest("Hugo3251",
   362  			`Hugo3251\.func2\(.* \[sync\.WaitGroup\.Wait\]`,
   363  			`Hugo3251\.func2\.1\(.* \[sync\.Mutex\.Lock\]`,
   364  			`Hugo3251\.func2\.1\(.* \[sync\.RWMutex\.RLock\]`,
   365  		),
   366  		makeFlakyTest("Hugo5379",
   367  			`\(\*Page_hugo5379\)\.initContent\.func1\.1\(.* \[sync\.Mutex\.Lock\]`,
   368  			`pageRenderer_hugo5379\(.* \[sync\.Mutex\.Lock\]`,
   369  			`Hugo5379\.func2\(.* \[sync\.WaitGroup\.Wait\]`,
   370  		),
   371  		makeFlakyTest("Istio16224",
   372  			`Istio16224\.func2\(.* \[sync\.Mutex\.Lock\]`,
   373  			`\(\*controller_istio16224\)\.Run\(.* \[chan send\]`,
   374  			`\(\*controller_istio16224\)\.Run\(.* \[chan receive\]`,
   375  		),
   376  		makeFlakyTest("Istio17860",
   377  			`\(\*agent_istio17860\)\.runWait\(.* \[chan send\]`,
   378  		),
   379  		makeFlakyTest("Istio18454",
   380  			`\(\*Worker_istio18454\)\.Start\.func1\(.* \[chan receive\]`,
   381  			`\(\*Worker_istio18454\)\.Start\.func1\(.* \[chan send\]`,
   382  		),
   383  		// NOTE(vsaioc):
   384  		// Kubernetes/1321 is excluded due to a race condition in the original program
   385  		// that may, in extremely rare cases, lead to nil pointer dereference crashes.
   386  		// (Reproducible even with regular GC). Only kept here for posterity.
   387  		//
   388  		// makeTest(testCase{name: "Kubernetes1321"},
   389  		// 	`NewMux_kubernetes1321\.gowrap1\(.* \[chan send\]`,
   390  		// 	`testMuxWatcherClose_kubernetes1321\(.* \[sync\.Mutex\.Lock\]`),
   391  		makeTest("Kubernetes5316",
   392  			`finishRequest_kubernetes5316\.func1\(.* \[chan send\]`,
   393  		),
   394  		makeFlakyTest("Kubernetes6632",
   395  			`\(\*idleAwareFramer_kubernetes6632\)\.monitor\(.* \[sync\.Mutex\.Lock\]`,
   396  			`\(\*idleAwareFramer_kubernetes6632\)\.WriteFrame\(.* \[chan send\]`,
   397  		),
   398  		makeFlakyTest("Kubernetes10182",
   399  			`\(\*statusManager_kubernetes10182\)\.Start\.func1\(.* \[sync\.Mutex\.Lock\]`,
   400  			`\(\*statusManager_kubernetes10182\)\.SetPodStatus\(.* \[chan send\]`,
   401  		),
   402  		makeFlakyTest("Kubernetes11298",
   403  			`After_kubernetes11298\.func1\(.* \[chan receive\]`,
   404  			`After_kubernetes11298\.func1\(.* \[sync\.Cond\.Wait\]`,
   405  			`Kubernetes11298\.func2\(.* \[chan receive\]`,
   406  		),
   407  		makeFlakyTest("Kubernetes13135",
   408  			`Util_kubernetes13135\(.* \[sync\.Mutex\.Lock\]`,
   409  			`\(\*WatchCache_kubernetes13135\)\.Add\(.* \[sync\.Mutex\.Lock\]`,
   410  		),
   411  		makeTest("Kubernetes25331",
   412  			`\(\*watchChan_kubernetes25331\)\.run\(.* \[chan send\]`,
   413  		),
   414  		makeFlakyTest("Kubernetes26980",
   415  			`Kubernetes26980\.func2\(.* \[chan receive\]`,
   416  			`Kubernetes26980\.func2\.1\(.* \[sync\.Mutex\.Lock\]`,
   417  			`\(\*processorListener_kubernetes26980\)\.pop\(.* \[chan receive\]`,
   418  		),
   419  		makeFlakyTest("Kubernetes30872",
   420  			`\(\*DelayingDeliverer_kubernetes30872\)\.StartWithHandler\.func1\(.* \[sync\.Mutex\.Lock\]`,
   421  			`\(\*Controller_kubernetes30872\)\.Run\(.* \[sync\.Mutex\.Lock\]`,
   422  			`\(\*NamespaceController_kubernetes30872\)\.Run\.func1\(.* \[sync\.Mutex\.Lock\]`,
   423  		),
   424  		makeTest("Kubernetes38669",
   425  			`\(\*cacheWatcher_kubernetes38669\)\.process\(.* \[chan send\]`,
   426  		),
   427  		makeFlakyTest("Kubernetes58107",
   428  			`\(\*ResourceQuotaController_kubernetes58107\)\.worker\(.* \[sync\.Cond\.Wait\]`,
   429  			`\(\*ResourceQuotaController_kubernetes58107\)\.worker\(.* \[sync\.RWMutex\.RLock\]`,
   430  			`\(\*ResourceQuotaController_kubernetes58107\)\.Sync\(.* \[sync\.RWMutex\.Lock\]`,
   431  		),
   432  		makeFlakyTest("Kubernetes62464",
   433  			`\(\*manager_kubernetes62464\)\.reconcileState\(.* \[sync\.RWMutex\.RLock\]`,
   434  			`\(\*staticPolicy_kubernetes62464\)\.RemoveContainer\(.* \[sync\.(RW)?Mutex\.Lock\]`,
   435  		),
   436  		makeFlakyTest("Kubernetes70277",
   437  			`Kubernetes70277\.func2\(.* \[chan receive\]`,
   438  		),
   439  		makeFlakyTest("Moby4951",
   440  			`\(\*DeviceSet_moby4951\)\.DeleteDevice\(.* \[sync\.Mutex\.Lock\]`,
   441  		),
   442  		makeTest("Moby7559",
   443  			`\(\*UDPProxy_moby7559\)\.Run\(.* \[sync\.Mutex\.Lock\]`,
   444  		),
   445  		makeTest("Moby17176",
   446  			`testDevmapperLockReleasedDeviceDeletion_moby17176\.func1\(.* \[sync\.Mutex\.Lock\]`,
   447  		),
   448  		makeFlakyTest("Moby21233",
   449  			`\(\*Transfer_moby21233\)\.Watch\.func1\(.* \[chan send\]`,
   450  			`\(\*Transfer_moby21233\)\.Watch\.func1\(.* \[select\]`,
   451  			`testTransfer_moby21233\(.* \[chan receive\]`,
   452  		),
   453  		makeTest("Moby25348",
   454  			`\(\*Manager_moby25348\)\.init\(.* \[sync\.WaitGroup\.Wait\]`,
   455  		),
   456  		makeFlakyTest("Moby27782",
   457  			`\(\*JSONFileLogger_moby27782\)\.readLogs\(.* \[sync\.Cond\.Wait\]`,
   458  			`\(\*Watcher_moby27782\)\.readEvents\(.* \[select\]`,
   459  		),
   460  		makeFlakyTest("Moby28462",
   461  			`monitor_moby28462\(.* \[sync\.Mutex\.Lock\]`,
   462  			`\(\*Daemon_moby28462\)\.StateChanged\(.* \[chan send\]`,
   463  		),
   464  		makeTest("Moby30408",
   465  			`Moby30408\.func2\(.* \[chan receive\]`,
   466  			`testActive_moby30408\.func1\(.* \[sync\.Cond\.Wait\]`,
   467  		),
   468  		makeFlakyTest("Moby33781",
   469  			`monitor_moby33781\.func1\(.* \[chan send\]`,
   470  		),
   471  		makeFlakyTest("Moby36114",
   472  			`\(\*serviceVM_moby36114\)\.hotAddVHDsAtStart\(.* \[sync\.Mutex\.Lock\]`,
   473  		),
   474  		makeFlakyTest("Serving2137",
   475  			`\(\*Breaker_serving2137\)\.concurrentRequest\.func1\(.* \[chan send\]`,
   476  			`\(\*Breaker_serving2137\)\.concurrentRequest\.func1\(.* \[sync\.Mutex\.Lock\]`,
   477  			`Serving2137\.func2\(.* \[chan receive\]`,
   478  		),
   479  		makeTest("Syncthing4829",
   480  			`Syncthing4829\.func2\(.* \[sync\.RWMutex\.RLock\]`,
   481  		),
   482  		makeTest("Syncthing5795",
   483  			`\(\*rawConnection_syncthing5795\)\.dispatcherLoop\(.* \[chan receive\]`,
   484  			`Syncthing5795\.func2.* \[chan receive\]`,
   485  		),
   486  	}
   487  
   488  	// Combine all test cases into a single list.
   489  	testCases := append(microTests, stressTestCases...)
   490  	testCases = append(testCases, patternTestCases...)
   491  
   492  	runTests := func(exepath string, testCases []testCase) {
   493  
   494  		// Build the test program once.
   495  		exe, err := buildTestProg(t, exepath)
   496  		if err != nil {
   497  			t.Fatal(fmt.Sprintf("building testgoroutineleakprofile failed: %v", err))
   498  		}
   499  
   500  		for _, tcase := range testCases {
   501  			t.Run(tcase.name, func(t *testing.T) {
   502  				t.Parallel()
   503  
   504  				cmdEnv := []string{
   505  					"GODEBUG=asyncpreemptoff=1",
   506  					"GOEXPERIMENT=goroutineleakprofile",
   507  				}
   508  
   509  				if tcase.simple {
   510  					// If the test is simple, set GOMAXPROCS=1 in order to better
   511  					// control the behavior of the scheduler.
   512  					cmdEnv = append(cmdEnv, "GOMAXPROCS=1")
   513  				}
   514  
   515  				var output string
   516  				for i := 0; i < tcase.repetitions; i++ {
   517  					// Run program for one repetition and get runOutput trace.
   518  					runOutput, err := runBuiltTestProgErr(t, exe, tcase.name, cmdEnv...)
   519  					if len(runOutput) == 0 {
   520  						t.Errorf("Test %s produced no output. Is the goroutine leak profile collected?", tcase.name)
   521  					}
   522  					// Test cases must not end in a non-zero exit code, or otherwise experience a failure to
   523  					// actually execute.
   524  					if err != nil {
   525  						t.Errorf("unexpected failure\noutput:\n%s\n\n", runOutput)
   526  					}
   527  
   528  					output += runOutput + "\n\n"
   529  				}
   530  
   531  				// Extract all the goroutine leaks
   532  				foundLeaks := extractLeaks(output)
   533  
   534  				// If the test case was not expected to produce leaks, but some were reported,
   535  				// stop the test immediately. Zero tolerance policy for false positives.
   536  				if len(tcase.expectedLeaks)+len(tcase.flakyLeaks) == 0 && len(foundLeaks) > 0 {
   537  					t.Errorf("output:\n%s\n\ngoroutines leaks detected in case with no leaks", output)
   538  				}
   539  
   540  				unexpectedLeaks := make([]string, 0, len(foundLeaks))
   541  
   542  				// Parse every leak and check if it is expected (maybe as a flaky leak).
   543  			leaks:
   544  				for _, leak := range foundLeaks {
   545  					// Check if the leak is expected.
   546  					// If it is, check whether it has been encountered before.
   547  					var foundNew bool
   548  					var leakPattern *regexp.Regexp
   549  
   550  					for expectedLeak, ok := range tcase.expectedLeaks {
   551  						if expectedLeak.MatchString(leak) {
   552  							if !ok {
   553  								foundNew = true
   554  							}
   555  
   556  							leakPattern = expectedLeak
   557  							break
   558  						}
   559  					}
   560  
   561  					if foundNew {
   562  						// Only bother writing if we found a new leak.
   563  						tcase.expectedLeaks[leakPattern] = true
   564  					}
   565  
   566  					if leakPattern == nil {
   567  						// We are dealing with a leak not marked as expected.
   568  						// Check if it is a flaky leak.
   569  						for flakyLeak := range tcase.flakyLeaks {
   570  							if flakyLeak.MatchString(leak) {
   571  								// The leak is flaky. Carry on to the next line.
   572  								continue leaks
   573  							}
   574  						}
   575  
   576  						unexpectedLeaks = append(unexpectedLeaks, leak)
   577  					}
   578  				}
   579  
   580  				missingLeakStrs := make([]string, 0, len(tcase.expectedLeaks))
   581  				for expectedLeak, found := range tcase.expectedLeaks {
   582  					if !found {
   583  						missingLeakStrs = append(missingLeakStrs, expectedLeak.String())
   584  					}
   585  				}
   586  
   587  				var errors []error
   588  				if len(unexpectedLeaks) > 0 {
   589  					errors = append(errors, fmt.Errorf("unexpected goroutine leaks:\n%s\n", strings.Join(unexpectedLeaks, "\n")))
   590  				}
   591  				if len(missingLeakStrs) > 0 {
   592  					errors = append(errors, fmt.Errorf("missing expected leaks:\n%s\n", strings.Join(missingLeakStrs, ", ")))
   593  				}
   594  				if len(errors) > 0 {
   595  					t.Fatalf("Failed with the following errors:\n%s\n\noutput:\n%s", errors, output)
   596  				}
   597  			})
   598  		}
   599  	}
   600  
   601  	runTests("testgoroutineleakprofile", testCases)
   602  	runTests("testgoroutineleakprofile/goker", gokerTestCases)
   603  }
   604  

View as plain text