Source file src/compress/flate/level6.go

     1  // Copyright 2026 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package flate
     6  
     7  // Level 6 extends level 5, but does "repeat offset" check,
     8  // as well as adding more hash entries to the tables.
     9  type fastEncL6 struct {
    10  	fastGen
    11  	table  [tableSize]tableEntry
    12  	bTable [tableSize]tableEntryPrev
    13  }
    14  
    15  func (e *fastEncL6) encode(dst *tokens, src []byte) {
    16  	const (
    17  		inputMargin            = 12 - 1
    18  		minNonLiteralBlockSize = 1 + 1 + inputMargin
    19  		hashShortBytes         = 4
    20  	)
    21  
    22  	// Protect against e.cur wraparound.
    23  	for e.cur >= bufferReset {
    24  		if len(e.hist) == 0 {
    25  			clear(e.table[:])
    26  			clear(e.bTable[:])
    27  			e.cur = maxMatchOffset
    28  			break
    29  		}
    30  		// Shift down everything in the table that isn't already too far away.
    31  		minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
    32  		for i := range e.table[:] {
    33  			v := e.table[i].offset
    34  			if v <= minOff {
    35  				v = 0
    36  			} else {
    37  				v = v - e.cur + maxMatchOffset
    38  			}
    39  			e.table[i].offset = v
    40  		}
    41  		for i := range e.bTable[:] {
    42  			v := e.bTable[i]
    43  			if v.cur.offset <= minOff {
    44  				v.cur.offset = 0
    45  				v.prev.offset = 0
    46  			} else {
    47  				v.cur.offset = v.cur.offset - e.cur + maxMatchOffset
    48  				if v.prev.offset <= minOff {
    49  					v.prev.offset = 0
    50  				} else {
    51  					v.prev.offset = v.prev.offset - e.cur + maxMatchOffset
    52  				}
    53  			}
    54  			e.bTable[i] = v
    55  		}
    56  		e.cur = maxMatchOffset
    57  	}
    58  
    59  	s := e.addBlock(src)
    60  
    61  	if len(src) < minNonLiteralBlockSize {
    62  		// We do not fill the token table.
    63  		// This will be picked up by caller.
    64  		dst.n = uint16(len(src))
    65  		return
    66  	}
    67  
    68  	// Override src
    69  	src = e.hist
    70  
    71  	// nextEmit is where in src the next emitLiterals should start from.
    72  	nextEmit := s
    73  
    74  	// sLimit is when to stop looking for offset/length copies. The inputMargin
    75  	// lets us use a fast path for emitLiterals in the main loop, while we are
    76  	// looking for copies.
    77  	sLimit := int32(len(src) - inputMargin)
    78  
    79  	cv := loadLE64(src, s)
    80  	// Repeat MUST be > 1 and within range
    81  	repeat := int32(1)
    82  	for {
    83  		const skipLog = 7
    84  		const doEvery = 1
    85  
    86  		nextS := s
    87  		var l int32
    88  		var t int32
    89  		for {
    90  			nextHashS := hashLen(cv, tableBits, hashShortBytes)
    91  			nextHashL := hashLen(cv, tableBits, hashLongBytes)
    92  			s = nextS
    93  			nextS = s + doEvery + (s-nextEmit)>>skipLog
    94  			if nextS > sLimit {
    95  				goto emitRemainder
    96  			}
    97  			// Fetch a short+long candidate
    98  			sCandidate := e.table[nextHashS]
    99  			lCandidate := e.bTable[nextHashL]
   100  			next := loadLE64(src, nextS)
   101  			entry := tableEntry{offset: s + e.cur}
   102  			e.table[nextHashS] = entry
   103  			eLong := &e.bTable[nextHashL]
   104  			eLong.cur, eLong.prev = entry, eLong.cur
   105  
   106  			// Calculate hashes of 'next'
   107  			nextHashS = hashLen(next, tableBits, hashShortBytes)
   108  			nextHashL = hashLen(next, tableBits, hashLongBytes)
   109  
   110  			t = lCandidate.cur.offset - e.cur
   111  			if s-t < maxMatchOffset {
   112  				if uint32(cv) == loadLE32(src, t) {
   113  					// Long candidate matches at least 4 bytes.
   114  
   115  					// Store the next match
   116  					e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
   117  					eLong := &e.bTable[nextHashL]
   118  					eLong.cur, eLong.prev = tableEntry{offset: nextS + e.cur}, eLong.cur
   119  
   120  					// Check the previous long candidate as well.
   121  					t2 := lCandidate.prev.offset - e.cur
   122  					if s-t2 < maxMatchOffset && uint32(cv) == loadLE32(src, t2) {
   123  						l = e.matchLenLimited(int(s+4), int(t+4), src) + 4
   124  						ml1 := e.matchLenLimited(int(s+4), int(t2+4), src) + 4
   125  						if ml1 > l {
   126  							t = t2
   127  							l = ml1
   128  							break
   129  						}
   130  					}
   131  					break
   132  				}
   133  				// Current value did not match, but check if previous long value does.
   134  				t = lCandidate.prev.offset - e.cur
   135  				if s-t < maxMatchOffset && uint32(cv) == loadLE32(src, t) {
   136  					// Store the next match
   137  					e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
   138  					eLong := &e.bTable[nextHashL]
   139  					eLong.cur, eLong.prev = tableEntry{offset: nextS + e.cur}, eLong.cur
   140  					break
   141  				}
   142  			}
   143  
   144  			t = sCandidate.offset - e.cur
   145  			if s-t < maxMatchOffset && uint32(cv) == loadLE32(src, t) {
   146  				// Found a 4 match...
   147  				l = e.matchLenLimited(int(s+4), int(t+4), src) + 4
   148  
   149  				// Look up next long candidate (at nextS)
   150  				lCandidate = e.bTable[nextHashL]
   151  
   152  				// Store the next match
   153  				e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
   154  				eLong := &e.bTable[nextHashL]
   155  				eLong.cur, eLong.prev = tableEntry{offset: nextS + e.cur}, eLong.cur
   156  
   157  				// Check repeat at s + repOff
   158  				const repOff = 1
   159  				t2 := s - repeat + repOff
   160  				if loadLE32(src, t2) == uint32(cv>>(8*repOff)) {
   161  					ml := e.matchLenLimited(int(s+4+repOff), int(t2+4), src) + 4
   162  					if ml > l {
   163  						t = t2
   164  						l = ml
   165  						s += repOff
   166  						// Not worth checking more.
   167  						break
   168  					}
   169  				}
   170  
   171  				// If the next long is a candidate, use that...
   172  				t2 = lCandidate.cur.offset - e.cur
   173  				if nextS-t2 < maxMatchOffset {
   174  					if loadLE32(src, t2) == uint32(next) {
   175  						ml := e.matchLenLimited(int(nextS+4), int(t2+4), src) + 4
   176  						if ml > l {
   177  							t = t2
   178  							s = nextS
   179  							l = ml
   180  							// This is ok, but check previous as well.
   181  						}
   182  					}
   183  					// If the previous long is a candidate, use that...
   184  					t2 = lCandidate.prev.offset - e.cur
   185  					if nextS-t2 < maxMatchOffset && loadLE32(src, t2) == uint32(next) {
   186  						ml := e.matchLenLimited(int(nextS+4), int(t2+4), src) + 4
   187  						if ml > l {
   188  							t = t2
   189  							s = nextS
   190  							l = ml
   191  							break
   192  						}
   193  					}
   194  				}
   195  				break
   196  			}
   197  			cv = next
   198  		}
   199  
   200  		// Extend the 4-byte match as long as possible.
   201  		if l == 0 {
   202  			l = e.matchLenLong(int(s+4), int(t+4), src) + 4
   203  		} else if l == maxMatchLength {
   204  			l += e.matchLenLong(int(s+l), int(t+l), src)
   205  		}
   206  
   207  		// Try to locate a better match by checking the end-of-match...
   208  		if sAt := s + l; sAt < sLimit {
   209  			// Allow some bytes at the beginning to mismatch.
   210  			// Sweet spot is 2/3 bytes depending on input.
   211  			// 3 is only a little better when it is but sometimes a lot worse.
   212  			// The skipped bytes are tested in extend backwards,
   213  			// and still picked up as part of the match if they do.
   214  			const skipBeginning = 2
   215  			eLong := &e.bTable[hashLen(loadLE64(src, sAt), tableBits, hashLongBytes)]
   216  			// Test current
   217  			t2 := eLong.cur.offset - e.cur - l + skipBeginning
   218  			s2 := s + skipBeginning
   219  			off := s2 - t2
   220  			if off < maxMatchOffset {
   221  				if off > 0 && t2 >= 0 {
   222  					if l2 := e.matchLenLong(int(s2), int(t2), src); l2 > l {
   223  						t = t2
   224  						l = l2
   225  						s = s2
   226  					}
   227  				}
   228  				// Test previous entry:
   229  				t2 = eLong.prev.offset - e.cur - l + skipBeginning
   230  				off := s2 - t2
   231  				if off > 0 && off < maxMatchOffset && t2 >= 0 {
   232  					if l2 := e.matchLenLong(int(s2), int(t2), src); l2 > l {
   233  						t = t2
   234  						l = l2
   235  						s = s2
   236  					}
   237  				}
   238  			}
   239  		}
   240  
   241  		// Extend backwards
   242  		for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
   243  			s--
   244  			t--
   245  			l++
   246  		}
   247  		if nextEmit < s {
   248  			for _, v := range src[nextEmit:s] {
   249  				dst.tokens[dst.n] = token(v)
   250  				dst.litHist[v]++
   251  				dst.n++
   252  			}
   253  		}
   254  
   255  		dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
   256  		repeat = s - t
   257  		s += l
   258  		nextEmit = s
   259  		if nextS >= s {
   260  			s = nextS + 1
   261  		}
   262  
   263  		if s >= sLimit {
   264  			// Index after match end.
   265  			for i := nextS + 1; i < int32(len(src))-8; i += 2 {
   266  				cv := loadLE64(src, i)
   267  				e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: i + e.cur}
   268  				eLong := &e.bTable[hashLen(cv, tableBits, hashLongBytes)]
   269  				eLong.cur, eLong.prev = tableEntry{offset: i + e.cur}, eLong.cur
   270  			}
   271  			goto emitRemainder
   272  		}
   273  
   274  		// Store every long hash in-between and every second short.
   275  		for i := nextS + 1; i < s-1; i += 2 {
   276  			cv := loadLE64(src, i)
   277  			t := tableEntry{offset: i + e.cur}
   278  			t2 := tableEntry{offset: t.offset + 1}
   279  			eLong := &e.bTable[hashLen(cv, tableBits, hashLongBytes)]
   280  			eLong2 := &e.bTable[hashLen(cv>>8, tableBits, hashLongBytes)]
   281  			e.table[hashLen(cv, tableBits, hashShortBytes)] = t
   282  			eLong.cur, eLong.prev = t, eLong.cur
   283  			eLong2.cur, eLong2.prev = t2, eLong2.cur
   284  		}
   285  		cv = loadLE64(src, s)
   286  	}
   287  
   288  emitRemainder:
   289  	if int(nextEmit) < len(src) {
   290  		// If nothing was added, don't encode literals.
   291  		if dst.n == 0 {
   292  			return
   293  		}
   294  
   295  		emitLiterals(dst, src[nextEmit:])
   296  	}
   297  }
   298  

View as plain text