level2.go

     1  // Copyright 2026 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package flate
     6  
     7  const (
     8  	l2TableBits = 17               // Bits used in level 2 table
     9  	l2TableSize = 1 << l2TableBits // Size of the level 2 table
    10  )
    11  
    12  // Level 2 uses a similar algorithm to level 1, but with a larger table.
    13  type fastEncL2 struct {
    14  	fastGen
    15  	table [l2TableSize]tableEntry
    16  }
    17  
    18  func (e *fastEncL2) encode(dst *tokens, src []byte) {
    19  	const (
    20  		inputMargin            = 12 - 1
    21  		minNonLiteralBlockSize = 1 + 1 + inputMargin
    22  		hashBytes              = 5
    23  	)
    24  
    25  	// Protect against e.cur wraparound.
    26  	for e.cur >= bufferReset {
    27  		if len(e.hist) == 0 {
    28  			clear(e.table[:])
    29  			e.cur = maxMatchOffset
    30  			break
    31  		}
    32  		// Shift down everything in the table that isn't already too far away.
    33  		minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
    34  		for i := range e.table[:] {
    35  			v := e.table[i].offset
    36  			if v <= minOff {
    37  				v = 0
    38  			} else {
    39  				v = v - e.cur + maxMatchOffset
    40  			}
    41  			e.table[i].offset = v
    42  		}
    43  		e.cur = maxMatchOffset
    44  	}
    45  
    46  	s := e.addBlock(src)
    47  
    48  	if len(src) < minNonLiteralBlockSize {
    49  		// We do not fill the token table.
    50  		// This will be picked up by caller.
    51  		dst.n = uint16(len(src))
    52  		return
    53  	}
    54  
    55  	// Override src
    56  	src = e.hist
    57  
    58  	// nextEmit is where in src the next emitLiterals should start from.
    59  	nextEmit := s
    60  
    61  	// sLimit is when to stop looking for offset/length copies. The inputMargin
    62  	// lets us use a fast path for emitLiterals in the main loop, while we are
    63  	// looking for copies.
    64  	sLimit := int32(len(src) - inputMargin)
    65  
    66  	cv := loadLE64(src, s)
    67  	for {
    68  		// When should we start skipping if we haven't found matches in a long while.
    69  		const skipLog = 5
    70  		const doEvery = 2
    71  
    72  		nextS := s
    73  		var candidate tableEntry
    74  		for {
    75  			nextHash := hashLen(cv, l2TableBits, hashBytes)
    76  			s = nextS
    77  			nextS = s + doEvery + (s-nextEmit)>>skipLog
    78  			if nextS > sLimit {
    79  				goto emitRemainder
    80  			}
    81  			candidate = e.table[nextHash]
    82  			now := loadLE64(src, nextS)
    83  			e.table[nextHash] = tableEntry{offset: s + e.cur}
    84  			nextHash = hashLen(now, l2TableBits, hashBytes)
    85  
    86  			offset := s - (candidate.offset - e.cur)
    87  			if offset < maxMatchOffset && uint32(cv) == loadLE32(src, candidate.offset-e.cur) {
    88  				e.table[nextHash] = tableEntry{offset: nextS + e.cur}
    89  				break
    90  			}
    91  
    92  			// Do one right away...
    93  			cv = now
    94  			s = nextS
    95  			nextS++
    96  			candidate = e.table[nextHash]
    97  			now >>= 8
    98  			e.table[nextHash] = tableEntry{offset: s + e.cur}
    99  
   100  			offset = s - (candidate.offset - e.cur)
   101  			if offset < maxMatchOffset && uint32(cv) == loadLE32(src, candidate.offset-e.cur) {
   102  				break
   103  			}
   104  			cv = now
   105  		}
   106  
   107  		// A 4-byte match has been found. We'll later see if more than 4 bytes match.
   108  		for {
   109  			// Extend the 4-byte match as long as possible.
   110  			t := candidate.offset - e.cur
   111  			l := e.matchLenLong(int(s+4), int(t+4), src) + 4
   112  
   113  			// Extend backwards
   114  			for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
   115  				s--
   116  				t--
   117  				l++
   118  			}
   119  			if nextEmit < s {
   120  				for _, v := range src[nextEmit:s] {
   121  					dst.tokens[dst.n] = token(v)
   122  					dst.litHist[v]++
   123  					dst.n++
   124  				}
   125  			}
   126  
   127  			dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
   128  			s += l
   129  			nextEmit = s
   130  			if nextS >= s {
   131  				s = nextS + 1
   132  			}
   133  
   134  			if s >= sLimit {
   135  				// Index first pair after match end.
   136  				if int(s+l+8) < len(src) {
   137  					cv := loadLE64(src, s)
   138  					e.table[hashLen(cv, l2TableBits, hashBytes)] = tableEntry{offset: s + e.cur}
   139  				}
   140  				goto emitRemainder
   141  			}
   142  
   143  			// Store every second hash in-between, but offset by 1.
   144  			for i := s - l + 2; i < s-5; i += 7 {
   145  				x := loadLE64(src, i)
   146  				nextHash := hashLen(x, l2TableBits, hashBytes)
   147  				e.table[nextHash] = tableEntry{offset: e.cur + i}
   148  				// Skip one
   149  				x >>= 16
   150  				nextHash = hashLen(x, l2TableBits, hashBytes)
   151  				e.table[nextHash] = tableEntry{offset: e.cur + i + 2}
   152  				// Skip one
   153  				x >>= 16
   154  				nextHash = hashLen(x, l2TableBits, hashBytes)
   155  				e.table[nextHash] = tableEntry{offset: e.cur + i + 4}
   156  			}
   157  
   158  			// We could immediately start working at s now, but to improve
   159  			// compression we first update the hash table at s-2 to s. If
   160  			// another emitCopy is not our next move, also calculate nextHash
   161  			// at s+1.
   162  			x := loadLE64(src, s-2)
   163  			o := e.cur + s - 2
   164  			prevHash := hashLen(x, l2TableBits, hashBytes)
   165  			prevHash2 := hashLen(x>>8, l2TableBits, hashBytes)
   166  			e.table[prevHash] = tableEntry{offset: o}
   167  			e.table[prevHash2] = tableEntry{offset: o + 1}
   168  			currHash := hashLen(x>>16, l2TableBits, hashBytes)
   169  			candidate = e.table[currHash]
   170  			e.table[currHash] = tableEntry{offset: o + 2}
   171  
   172  			offset := s - (candidate.offset - e.cur)
   173  			if offset > maxMatchOffset || uint32(x>>16) != loadLE32(src, candidate.offset-e.cur) {
   174  				cv = x >> 24
   175  				s++
   176  				break
   177  			}
   178  		}
   179  	}
   180  
   181  emitRemainder:
   182  	if int(nextEmit) < len(src) {
   183  		// If nothing was added, don't encode literals.
   184  		if dst.n == 0 {
   185  			return
   186  		}
   187  
   188  		emitLiterals(dst, src[nextEmit:])
   189  	}
   190  }
   191
View as plain text