Source file src/runtime/malloc_stubs.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file contains stub functions that are not meant to be called directly,
     6  // but that will be assembled together using the inlining logic in runtime/_mkmalloc
     7  // to produce a full mallocgc function that's specialized for a span class
     8  // or specific size in the case of the tiny allocator.
     9  //
    10  // To generate the specialized mallocgc functions, do 'go run .' inside runtime/_mkmalloc.
    11  //
    12  // To assemble a mallocgc function, the mallocStub function is cloned, and the call to
    13  // inlinedMalloc is replaced with the inlined body of smallScanNoHeaderStub,
    14  // smallNoScanStub or tinyStub, depending on the parameters being specialized.
    15  //
    16  // The size_ (for the tiny case) and elemsize_, sizeclass_, and noscanint_ (for all three cases)
    17  // identifiers are replaced with the value of the parameter in the specialized case.
    18  // The nextFreeFastStub, nextFreeFastTiny, heapSetTypeNoHeaderStub, and writeHeapBitsSmallStub
    19  // functions are also inlined by _mkmalloc.
    20  
    21  package runtime
    22  
    23  import (
    24  	"internal/goarch"
    25  	"internal/goexperiment"
    26  	"internal/runtime/sys"
    27  	"unsafe"
    28  )
    29  
    30  // These identifiers will all be replaced by the inliner. So their values don't
    31  // really matter: they just need to be set so that the stub functions, which
    32  // will never be used on their own, can compile. elemsize_ can't be  set to
    33  // zero because we divide by it in nextFreeFastTiny, and the compiler would
    34  // complain about a division by zero. Its replaced value will always be greater
    35  // than zero.
    36  const elemsize_ = 8
    37  const sizeclass_ = 0
    38  const noscanint_ = 0
    39  const size_ = 0
    40  const isTiny_ = false
    41  
    42  func malloc0(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
    43  	if doubleCheckMalloc {
    44  		if gcphase == _GCmarktermination {
    45  			throw("mallocgc called with gcphase == _GCmarktermination")
    46  		}
    47  	}
    48  
    49  	// Short-circuit zero-sized allocation requests.
    50  	return unsafe.Pointer(&zerobase)
    51  }
    52  
    53  func mallocPanic(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
    54  	panic("not defined for sizeclass")
    55  }
    56  
    57  // WARNING: mallocStub does not do any work for sanitizers so callers need
    58  // to steer out of this codepath early if sanitizers are enabled.
    59  func mallocStub(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
    60  
    61  	if isTiny_ {
    62  		// secret code, need to avoid the tiny allocator since it might keep
    63  		// co-located values alive longer and prevent timely zero-ing
    64  		//
    65  		// Call directly into the NoScan allocator.
    66  		// See go.dev/issue/76356
    67  		gp := getg()
    68  		if goexperiment.RuntimeSecret && gp.secret > 0 {
    69  			return mallocgcSmallNoScanSC2(size, typ, needzero)
    70  		}
    71  	}
    72  	if doubleCheckMalloc {
    73  		if gcphase == _GCmarktermination {
    74  			throw("mallocgc called with gcphase == _GCmarktermination")
    75  		}
    76  	}
    77  
    78  	// It's possible for any malloc to trigger sweeping, which may in
    79  	// turn queue finalizers. Record this dynamic lock edge.
    80  	// N.B. Compiled away if lockrank experiment is not enabled.
    81  	lockRankMayQueueFinalizer()
    82  
    83  	// Pre-malloc debug hooks.
    84  	if debug.malloc {
    85  		if x := preMallocgcDebug(size, typ); x != nil {
    86  			return x
    87  		}
    88  	}
    89  
    90  	// Assist the GC if needed. (On the reuse path, we currently compensate for this;
    91  	// changes here might require changes there.)
    92  	if gcBlackenEnabled != 0 {
    93  		deductAssistCredit(size)
    94  	}
    95  
    96  	// Actually do the allocation.
    97  	x, elemsize := inlinedMalloc(size, typ, needzero)
    98  
    99  	if !isTiny_ {
   100  		gp := getg()
   101  		if goexperiment.RuntimeSecret && gp.secret > 0 {
   102  			// Mark any object allocated while in secret mode as secret.
   103  			// This ensures we zero it immediately when freeing it.
   104  			addSecret(x, size)
   105  		}
   106  	}
   107  
   108  	// Notify valgrind, if enabled.
   109  	// To allow the compiler to not know about valgrind, we do valgrind instrumentation
   110  	// unlike the other sanitizers.
   111  	if valgrindenabled {
   112  		valgrindMalloc(x, size)
   113  	}
   114  
   115  	// Adjust our GC assist debt to account for internal fragmentation.
   116  	if gcBlackenEnabled != 0 && elemsize != 0 {
   117  		if assistG := getg().m.curg; assistG != nil {
   118  			assistG.gcAssistBytes -= int64(elemsize - size)
   119  		}
   120  	}
   121  
   122  	// Post-malloc debug hooks.
   123  	if debug.malloc {
   124  		postMallocgcDebug(x, elemsize, typ)
   125  	}
   126  	return x
   127  }
   128  
   129  // inlinedMalloc will never be called. It is defined just so that the compiler can compile
   130  // the mallocStub function, which will also never be called, but instead used as a template
   131  // to generate a size-specialized malloc function. The call to inlinedMalloc in mallocStub
   132  // will be replaced with the inlined body of smallScanNoHeaderStub, smallNoScanStub, or tinyStub
   133  // when generating the size-specialized malloc function. See the comment at the top of this
   134  // file for more information.
   135  func inlinedMalloc(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
   136  	return unsafe.Pointer(uintptr(0)), 0
   137  }
   138  
   139  func doubleCheckSmallScanNoHeader(size uintptr, typ *_type, mp *m) {
   140  	if mp.mallocing != 0 {
   141  		throw("malloc deadlock")
   142  	}
   143  	if mp.gsignal == getg() {
   144  		throw("malloc during signal")
   145  	}
   146  	if typ == nil || !typ.Pointers() {
   147  		throw("noscan allocated in scan-only path")
   148  	}
   149  	if !heapBitsInSpan(size) {
   150  		throw("heap bits in not in span for non-header-only path")
   151  	}
   152  }
   153  
   154  func smallScanNoHeaderStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
   155  	const sizeclass = sizeclass_
   156  	const elemsize = elemsize_
   157  
   158  	// Set mp.mallocing to keep from being preempted by GC.
   159  	mp := acquirem()
   160  	if doubleCheckMalloc {
   161  		doubleCheckSmallScanNoHeader(size, typ, mp)
   162  	}
   163  	mp.mallocing = 1
   164  
   165  	checkGCTrigger := false
   166  	c := getMCache(mp)
   167  	const spc = spanClass(sizeclass<<1) | spanClass(noscanint_)
   168  	span := c.alloc[spc]
   169  	v := nextFreeFastStub(span)
   170  	if v == 0 {
   171  		v, span, checkGCTrigger = c.nextFree(spc)
   172  	}
   173  	x := unsafe.Pointer(v)
   174  	if span.needzero != 0 {
   175  		memclrNoHeapPointers(x, elemsize)
   176  	}
   177  	if goarch.PtrSize == 8 && sizeclass == 1 {
   178  		// initHeapBits already set the pointer bits for the 8-byte sizeclass
   179  		// on 64-bit platforms.
   180  		c.scanAlloc += 8
   181  	} else {
   182  		dataSize := size // make the inliner happy
   183  		x := uintptr(x)
   184  		scanSize := heapSetTypeNoHeaderStub(x, dataSize, typ, span)
   185  		c.scanAlloc += scanSize
   186  	}
   187  
   188  	// Ensure that the stores above that initialize x to
   189  	// type-safe memory and set the heap bits occur before
   190  	// the caller can make x observable to the garbage
   191  	// collector. Otherwise, on weakly ordered machines,
   192  	// the garbage collector could follow a pointer to x,
   193  	// but see uninitialized memory or stale heap bits.
   194  	publicationBarrier()
   195  
   196  	if writeBarrier.enabled {
   197  		// Allocate black during GC.
   198  		// All slots hold nil so no scanning is needed.
   199  		// This may be racing with GC so do it atomically if there can be
   200  		// a race marking the bit.
   201  		gcmarknewobject(span, uintptr(x))
   202  	} else {
   203  		// Track the last free index before the mark phase. This field
   204  		// is only used by the garbage collector. During the mark phase
   205  		// this is used by the conservative scanner to filter out objects
   206  		// that are both free and recently-allocated. It's safe to do that
   207  		// because we allocate-black if the GC is enabled. The conservative
   208  		// scanner produces pointers out of thin air, so without additional
   209  		// synchronization it might otherwise observe a partially-initialized
   210  		// object, which could crash the program.
   211  		span.freeIndexForScan = span.freeindex
   212  	}
   213  
   214  	// Note cache c only valid while m acquired; see #47302
   215  	//
   216  	// N.B. Use the full size because that matches how the GC
   217  	// will update the mem profile on the "free" side.
   218  	//
   219  	// TODO(mknyszek): We should really count the header as part
   220  	// of gc_sys or something. The code below just pretends it is
   221  	// internal fragmentation and matches the GC's accounting by
   222  	// using the whole allocation slot.
   223  	c.nextSample -= int64(elemsize)
   224  	if c.nextSample < 0 || MemProfileRate != c.memProfRate {
   225  		profilealloc(mp, x, elemsize)
   226  	}
   227  	mp.mallocing = 0
   228  	releasem(mp)
   229  
   230  	if checkGCTrigger {
   231  		if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
   232  			gcStart(t)
   233  		}
   234  	}
   235  
   236  	return x, elemsize
   237  }
   238  
   239  func doubleCheckSmallNoScan(typ *_type, mp *m) {
   240  	if mp.mallocing != 0 {
   241  		throw("malloc deadlock")
   242  	}
   243  	if mp.gsignal == getg() {
   244  		throw("malloc during signal")
   245  	}
   246  	if typ != nil && typ.Pointers() {
   247  		throw("expected noscan type for noscan alloc")
   248  	}
   249  }
   250  
   251  func smallNoScanStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
   252  	// TODO(matloob): Add functionality to mkmalloc to allow us to inline a non-constant
   253  	// sizeclass_ and elemsize_ value (instead just set to the expressions to look up the size class
   254  	// and elemsize. We'd also need to teach mkmalloc that values that are touched by these (specifically
   255  	// spc below) should turn into vars. This would allow us to generate mallocgcSmallNoScan itself,
   256  	// so that its code could not diverge from the generated functions.
   257  	const sizeclass = sizeclass_
   258  	const elemsize = elemsize_
   259  
   260  	// Set mp.mallocing to keep from being preempted by GC.
   261  	mp := acquirem()
   262  	if doubleCheckMalloc {
   263  		doubleCheckSmallNoScan(typ, mp)
   264  	}
   265  	mp.mallocing = 1
   266  
   267  	checkGCTrigger := false
   268  	c := getMCache(mp)
   269  	const spc = spanClass(sizeclass<<1) | spanClass(noscanint_)
   270  	span := c.alloc[spc]
   271  
   272  	// First, check for a reusable object.
   273  	if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
   274  		// We have a reusable object, use it.
   275  		v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
   276  		mp.mallocing = 0
   277  		releasem(mp)
   278  
   279  		// TODO(thepudds): note that the generated return path is essentially duplicated
   280  		// by the generator. For example, see the two postMallocgcDebug calls and
   281  		// related duplicated code on the return path currently in the generated
   282  		// mallocgcSmallNoScanSC2 function. One set of those correspond to this
   283  		// return here. We might be able to de-duplicate the generated return path
   284  		// by updating the generator, perhaps by jumping to a shared return or similar.
   285  		return v, elemsize
   286  	}
   287  
   288  	v := nextFreeFastStub(span)
   289  	if v == 0 {
   290  		v, span, checkGCTrigger = c.nextFree(spc)
   291  	}
   292  	x := unsafe.Pointer(v)
   293  	if needzero && span.needzero != 0 {
   294  		memclrNoHeapPointers(x, elemsize)
   295  	}
   296  
   297  	// Ensure that the stores above that initialize x to
   298  	// type-safe memory and set the heap bits occur before
   299  	// the caller can make x observable to the garbage
   300  	// collector. Otherwise, on weakly ordered machines,
   301  	// the garbage collector could follow a pointer to x,
   302  	// but see uninitialized memory or stale heap bits.
   303  	publicationBarrier()
   304  
   305  	if writeBarrier.enabled {
   306  		// Allocate black during GC.
   307  		// All slots hold nil so no scanning is needed.
   308  		// This may be racing with GC so do it atomically if there can be
   309  		// a race marking the bit.
   310  		gcmarknewobject(span, uintptr(x))
   311  	} else {
   312  		// Track the last free index before the mark phase. This field
   313  		// is only used by the garbage collector. During the mark phase
   314  		// this is used by the conservative scanner to filter out objects
   315  		// that are both free and recently-allocated. It's safe to do that
   316  		// because we allocate-black if the GC is enabled. The conservative
   317  		// scanner produces pointers out of thin air, so without additional
   318  		// synchronization it might otherwise observe a partially-initialized
   319  		// object, which could crash the program.
   320  		span.freeIndexForScan = span.freeindex
   321  	}
   322  
   323  	// Note cache c only valid while m acquired; see #47302
   324  	//
   325  	// N.B. Use the full size because that matches how the GC
   326  	// will update the mem profile on the "free" side.
   327  	//
   328  	// TODO(mknyszek): We should really count the header as part
   329  	// of gc_sys or something. The code below just pretends it is
   330  	// internal fragmentation and matches the GC's accounting by
   331  	// using the whole allocation slot.
   332  	c.nextSample -= int64(elemsize)
   333  	if c.nextSample < 0 || MemProfileRate != c.memProfRate {
   334  		profilealloc(mp, x, elemsize)
   335  	}
   336  	mp.mallocing = 0
   337  	releasem(mp)
   338  
   339  	if checkGCTrigger {
   340  		if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
   341  			gcStart(t)
   342  		}
   343  	}
   344  	return x, elemsize
   345  }
   346  
   347  func doubleCheckTiny(size uintptr, typ *_type, mp *m) {
   348  	if mp.mallocing != 0 {
   349  		throw("malloc deadlock")
   350  	}
   351  	if mp.gsignal == getg() {
   352  		throw("malloc during signal")
   353  	}
   354  	if typ != nil && typ.Pointers() {
   355  		throw("expected noscan for tiny alloc")
   356  	}
   357  }
   358  
   359  func tinyStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
   360  	const constsize = size_
   361  	const elemsize = elemsize_
   362  
   363  	// Set mp.mallocing to keep from being preempted by GC.
   364  	mp := acquirem()
   365  	if doubleCheckMalloc {
   366  		doubleCheckTiny(constsize, typ, mp)
   367  	}
   368  	mp.mallocing = 1
   369  
   370  	// Tiny allocator.
   371  	//
   372  	// Tiny allocator combines several tiny allocation requests
   373  	// into a single memory block. The resulting memory block
   374  	// is freed when all subobjects are unreachable. The subobjects
   375  	// must be noscan (don't have pointers), this ensures that
   376  	// the amount of potentially wasted memory is bounded.
   377  	//
   378  	// Size of the memory block used for combining (maxTinySize) is tunable.
   379  	// Current setting is 16 bytes, which relates to 2x worst case memory
   380  	// wastage (when all but one subobjects are unreachable).
   381  	// 8 bytes would result in no wastage at all, but provides less
   382  	// opportunities for combining.
   383  	// 32 bytes provides more opportunities for combining,
   384  	// but can lead to 4x worst case wastage.
   385  	// The best case winning is 8x regardless of block size.
   386  	//
   387  	// Objects obtained from tiny allocator must not be freed explicitly.
   388  	// So when an object will be freed explicitly, we ensure that
   389  	// its size >= maxTinySize.
   390  	//
   391  	// SetFinalizer has a special case for objects potentially coming
   392  	// from tiny allocator, it such case it allows to set finalizers
   393  	// for an inner byte of a memory block.
   394  	//
   395  	// The main targets of tiny allocator are small strings and
   396  	// standalone escaping variables. On a json benchmark
   397  	// the allocator reduces number of allocations by ~12% and
   398  	// reduces heap size by ~20%.
   399  	c := getMCache(mp)
   400  	off := c.tinyoffset
   401  	// Align tiny pointer for required (conservative) alignment.
   402  	if constsize&7 == 0 {
   403  		off = alignUp(off, 8)
   404  	} else if goarch.PtrSize == 4 && constsize == 12 {
   405  		// Conservatively align 12-byte objects to 8 bytes on 32-bit
   406  		// systems so that objects whose first field is a 64-bit
   407  		// value is aligned to 8 bytes and does not cause a fault on
   408  		// atomic access. See issue 37262.
   409  		// TODO(mknyszek): Remove this workaround if/when issue 36606
   410  		// is resolved.
   411  		off = alignUp(off, 8)
   412  	} else if constsize&3 == 0 {
   413  		off = alignUp(off, 4)
   414  	} else if constsize&1 == 0 {
   415  		off = alignUp(off, 2)
   416  	}
   417  	if off+constsize <= maxTinySize && c.tiny != 0 {
   418  		// The object fits into existing tiny block.
   419  		x := unsafe.Pointer(c.tiny + off)
   420  		c.tinyoffset = off + constsize
   421  		c.tinyAllocs++
   422  		mp.mallocing = 0
   423  		releasem(mp)
   424  		return x, 0
   425  	}
   426  	// Allocate a new maxTinySize block.
   427  	checkGCTrigger := false
   428  	span := c.alloc[tinySpanClass]
   429  	v := nextFreeFastTiny(span)
   430  	if v == 0 {
   431  		v, span, checkGCTrigger = c.nextFree(tinySpanClass)
   432  	}
   433  	x := unsafe.Pointer(v)
   434  	(*[2]uint64)(x)[0] = 0 // Always zero
   435  	(*[2]uint64)(x)[1] = 0
   436  	// See if we need to replace the existing tiny block with the new one
   437  	// based on amount of remaining free space.
   438  	if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) {
   439  		// Note: disabled when race detector is on, see comment near end of this function.
   440  		c.tiny = uintptr(x)
   441  		c.tinyoffset = constsize
   442  	}
   443  
   444  	// Ensure that the stores above that initialize x to
   445  	// type-safe memory and set the heap bits occur before
   446  	// the caller can make x observable to the garbage
   447  	// collector. Otherwise, on weakly ordered machines,
   448  	// the garbage collector could follow a pointer to x,
   449  	// but see uninitialized memory or stale heap bits.
   450  	publicationBarrier()
   451  
   452  	if writeBarrier.enabled {
   453  		// Allocate black during GC.
   454  		// All slots hold nil so no scanning is needed.
   455  		// This may be racing with GC so do it atomically if there can be
   456  		// a race marking the bit.
   457  		gcmarknewobject(span, uintptr(x))
   458  	} else {
   459  		// Track the last free index before the mark phase. This field
   460  		// is only used by the garbage collector. During the mark phase
   461  		// this is used by the conservative scanner to filter out objects
   462  		// that are both free and recently-allocated. It's safe to do that
   463  		// because we allocate-black if the GC is enabled. The conservative
   464  		// scanner produces pointers out of thin air, so without additional
   465  		// synchronization it might otherwise observe a partially-initialized
   466  		// object, which could crash the program.
   467  		span.freeIndexForScan = span.freeindex
   468  	}
   469  
   470  	// Note cache c only valid while m acquired; see #47302
   471  	//
   472  	// N.B. Use the full size because that matches how the GC
   473  	// will update the mem profile on the "free" side.
   474  	//
   475  	// TODO(mknyszek): We should really count the header as part
   476  	// of gc_sys or something. The code below just pretends it is
   477  	// internal fragmentation and matches the GC's accounting by
   478  	// using the whole allocation slot.
   479  	c.nextSample -= int64(elemsize)
   480  	if c.nextSample < 0 || MemProfileRate != c.memProfRate {
   481  		profilealloc(mp, x, elemsize)
   482  	}
   483  	mp.mallocing = 0
   484  	releasem(mp)
   485  
   486  	if checkGCTrigger {
   487  		if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
   488  			gcStart(t)
   489  		}
   490  	}
   491  
   492  	if raceenabled {
   493  		// Pad tinysize allocations so they are aligned with the end
   494  		// of the tinyalloc region. This ensures that any arithmetic
   495  		// that goes off the top end of the object will be detectable
   496  		// by checkptr (issue 38872).
   497  		// Note that we disable tinyalloc when raceenabled for this to work.
   498  		// TODO: This padding is only performed when the race detector
   499  		// is enabled. It would be nice to enable it if any package
   500  		// was compiled with checkptr, but there's no easy way to
   501  		// detect that (especially at compile time).
   502  		// TODO: enable this padding for all allocations, not just
   503  		// tinyalloc ones. It's tricky because of pointer maps.
   504  		// Maybe just all noscan objects?
   505  		x = add(x, elemsize-constsize)
   506  	}
   507  	return x, elemsize
   508  }
   509  
   510  // TODO(matloob): Should we let the go compiler inline this instead of using mkmalloc?
   511  // We won't be able to use elemsize_ but that's probably ok.
   512  func nextFreeFastTiny(span *mspan) gclinkptr {
   513  	const nbytes = 8192
   514  	const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / elemsize_)
   515  	var nextFreeFastResult gclinkptr
   516  	if span.allocCache != 0 {
   517  		theBit := sys.TrailingZeros64(span.allocCache) // Is there a free object in the allocCache?
   518  		result := span.freeindex + uint16(theBit)
   519  		if result < nelems {
   520  			freeidx := result + 1
   521  			if !(freeidx%64 == 0 && freeidx != nelems) {
   522  				span.allocCache >>= uint(theBit + 1)
   523  				span.freeindex = freeidx
   524  				span.allocCount++
   525  				nextFreeFastResult = gclinkptr(uintptr(result)*elemsize_ + span.base())
   526  			}
   527  		}
   528  	}
   529  	return nextFreeFastResult
   530  }
   531  
   532  func nextFreeFastStub(span *mspan) gclinkptr {
   533  	var nextFreeFastResult gclinkptr
   534  	if span.allocCache != 0 {
   535  		theBit := sys.TrailingZeros64(span.allocCache) // Is there a free object in the allocCache?
   536  		result := span.freeindex + uint16(theBit)
   537  		if result < span.nelems {
   538  			freeidx := result + 1
   539  			if !(freeidx%64 == 0 && freeidx != span.nelems) {
   540  				span.allocCache >>= uint(theBit + 1)
   541  				span.freeindex = freeidx
   542  				span.allocCount++
   543  				nextFreeFastResult = gclinkptr(uintptr(result)*elemsize_ + span.base())
   544  			}
   545  		}
   546  	}
   547  	return nextFreeFastResult
   548  }
   549  
   550  func heapSetTypeNoHeaderStub(x, dataSize uintptr, typ *_type, span *mspan) uintptr {
   551  	if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(elemsize_)) {
   552  		throw("tried to write heap bits, but no heap bits in span")
   553  	}
   554  	scanSize := writeHeapBitsSmallStub(span, x, dataSize, typ)
   555  	if doubleCheckHeapSetType {
   556  		doubleCheckHeapType(x, dataSize, typ, nil, span)
   557  	}
   558  	return scanSize
   559  }
   560  
   561  // writeHeapBitsSmallStub writes the heap bits for small objects whose ptr/scalar data is
   562  // stored as a bitmap at the end of the span.
   563  //
   564  // Assumes dataSize is <= ptrBits*goarch.PtrSize. x must be a pointer into the span.
   565  // heapBitsInSpan(dataSize) must be true. dataSize must be >= typ.Size_.
   566  //
   567  //go:nosplit
   568  func writeHeapBitsSmallStub(span *mspan, x, dataSize uintptr, typ *_type) uintptr {
   569  	// The objects here are always really small, so a single load is sufficient.
   570  	src0 := readUintptr(getGCMask(typ))
   571  
   572  	const elemsize = elemsize_
   573  
   574  	// Create repetitions of the bitmap if we have a small slice backing store.
   575  	scanSize := typ.PtrBytes
   576  	src := src0
   577  	if typ.Size_ == goarch.PtrSize {
   578  		src = (1 << (dataSize / goarch.PtrSize)) - 1
   579  	} else {
   580  		// N.B. We rely on dataSize being an exact multiple of the type size.
   581  		// The alternative is to be defensive and mask out src to the length
   582  		// of dataSize. The purpose is to save on one additional masking operation.
   583  		if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 {
   584  			throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_")
   585  		}
   586  		for i := typ.Size_; i < dataSize; i += typ.Size_ {
   587  			src |= src0 << (i / goarch.PtrSize)
   588  			scanSize += typ.Size_
   589  		}
   590  	}
   591  
   592  	// Since we're never writing more than one uintptr's worth of bits, we're either going
   593  	// to do one or two writes.
   594  	dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize)
   595  	dst := unsafe.Pointer(dstBase)
   596  	o := (x - span.base()) / goarch.PtrSize
   597  	i := o / ptrBits
   598  	j := o % ptrBits
   599  	const bits uintptr = elemsize / goarch.PtrSize
   600  	// In the if statement below, we have to do two uintptr writes if the bits
   601  	// we need to write straddle across two different memory locations. But if
   602  	// the number of bits we're writing divides evenly into the number of bits
   603  	// in the uintptr we're writing, this can never happen. Since bitsIsPowerOfTwo
   604  	// is a compile-time constant in the generated code, in the case where the size is
   605  	// a power of two less than or equal to ptrBits, the compiler can remove the
   606  	// 'two writes' branch of the if statement and always do only one write without
   607  	// the check.
   608  	const bitsIsPowerOfTwo = bits&(bits-1) == 0
   609  	if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) {
   610  		// Two writes.
   611  		bits0 := ptrBits - j
   612  		bits1 := bits - bits0
   613  		dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize))
   614  		dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize))
   615  		*dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j)
   616  		*dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0)
   617  	} else {
   618  		// One write.
   619  		dst := (*uintptr)(add(dst, i*goarch.PtrSize))
   620  		*dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) // We're taking the min so this compiles on 32 bit platforms. But if bits > ptrbits we always take the other branch
   621  	}
   622  
   623  	const doubleCheck = false
   624  	if doubleCheck {
   625  		writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ)
   626  	}
   627  	return scanSize
   628  }
   629  
   630  func writeHeapBitsDoubleCheck(span *mspan, x, dataSize, src, src0, i, j, bits uintptr, typ *_type) {
   631  	srcRead := span.heapBitsSmallForAddr(x)
   632  	if srcRead != src {
   633  		print("runtime: x=", hex(x), " i=", i, " j=", j, " bits=", bits, "\n")
   634  		print("runtime: dataSize=", dataSize, " typ.Size_=", typ.Size_, " typ.PtrBytes=", typ.PtrBytes, "\n")
   635  		print("runtime: src0=", hex(src0), " src=", hex(src), " srcRead=", hex(srcRead), "\n")
   636  		throw("bad pointer bits written for small object")
   637  	}
   638  }
   639  

View as plain text