blob: 4613724e9d1417600609ccd78c017a1f5d7da5c4 [file] [log] [blame]
khenaidood948f772021-08-11 17:49:24 -04001// Copyright 2019+ Klaus Post. All rights reserved.
2// License information can be found in the LICENSE file.
3// Based on work by Yann Collet, released under BSD License.
4
5package zstd
6
7import (
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +05308 "bytes"
khenaidood948f772021-08-11 17:49:24 -04009 "fmt"
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053010
11 "github.com/klauspost/compress"
khenaidood948f772021-08-11 17:49:24 -040012)
13
14const (
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053015 bestLongTableBits = 22 // Bits used in the long match table
khenaidood948f772021-08-11 17:49:24 -040016 bestLongTableSize = 1 << bestLongTableBits // Size of the table
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053017 bestLongLen = 8 // Bytes used for table hash
khenaidood948f772021-08-11 17:49:24 -040018
19 // Note: Increasing the short table bits or making the hash shorter
20 // can actually lead to compression degradation since it will 'steal' more from the
21 // long match table and match offsets are quite big.
22 // This greatly depends on the type of input.
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053023 bestShortTableBits = 18 // Bits used in the short match table
khenaidood948f772021-08-11 17:49:24 -040024 bestShortTableSize = 1 << bestShortTableBits // Size of the table
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053025 bestShortLen = 4 // Bytes used for table hash
26
khenaidood948f772021-08-11 17:49:24 -040027)
28
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053029type match struct {
30 offset int32
31 s int32
32 length int32
33 rep int32
34 est int32
35}
36
Abhay Kumara2ae5992025-11-10 14:02:24 +000037const highScore = maxMatchLen * 8
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053038
39// estBits will estimate output bits from predefined tables.
40func (m *match) estBits(bitsPerByte int32) {
41 mlc := mlCode(uint32(m.length - zstdMinMatch))
42 var ofc uint8
43 if m.rep < 0 {
44 ofc = ofCode(uint32(m.s-m.offset) + 3)
45 } else {
Abhay Kumara2ae5992025-11-10 14:02:24 +000046 ofc = ofCode(uint32(m.rep) & 3)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053047 }
48 // Cost, excluding
49 ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc]
50
51 // Add cost of match encoding...
52 m.est = int32(ofTT.outBits + mlTT.outBits)
53 m.est += int32(ofTT.deltaNbBits>>16 + mlTT.deltaNbBits>>16)
54 // Subtract savings compared to literal encoding...
55 m.est -= (m.length * bitsPerByte) >> 10
56 if m.est > 0 {
57 // Unlikely gain..
58 m.length = 0
59 m.est = highScore
60 }
61}
62
khenaidood948f772021-08-11 17:49:24 -040063// bestFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
64// The long match table contains the previous entry with the same hash,
65// effectively making it a "chain" of length 2.
66// When we find a long match we choose between the two values and select the longest.
67// When we find a short match, after checking the long, we check if we can find a long at n+1
68// and that it is longer (lazy matching).
69type bestFastEncoder struct {
70 fastBase
71 table [bestShortTableSize]prevEntry
72 longTable [bestLongTableSize]prevEntry
73 dictTable []prevEntry
74 dictLongTable []prevEntry
75}
76
77// Encode improves compression...
78func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
79 const (
80 // Input margin is the number of bytes we read (8)
81 // and the maximum we will read ahead (2)
82 inputMargin = 8 + 4
83 minNonLiteralBlockSize = 16
84 )
85
86 // Protect against e.cur wraparound.
Abhay Kumara2ae5992025-11-10 14:02:24 +000087 for e.cur >= e.bufferReset-int32(len(e.hist)) {
khenaidood948f772021-08-11 17:49:24 -040088 if len(e.hist) == 0 {
Abhay Kumara2ae5992025-11-10 14:02:24 +000089 e.table = [bestShortTableSize]prevEntry{}
90 e.longTable = [bestLongTableSize]prevEntry{}
khenaidood948f772021-08-11 17:49:24 -040091 e.cur = e.maxMatchOff
92 break
93 }
94 // Shift down everything in the table that isn't already too far away.
95 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
96 for i := range e.table[:] {
97 v := e.table[i].offset
98 v2 := e.table[i].prev
99 if v < minOff {
100 v = 0
101 v2 = 0
102 } else {
103 v = v - e.cur + e.maxMatchOff
104 if v2 < minOff {
105 v2 = 0
106 } else {
107 v2 = v2 - e.cur + e.maxMatchOff
108 }
109 }
110 e.table[i] = prevEntry{
111 offset: v,
112 prev: v2,
113 }
114 }
115 for i := range e.longTable[:] {
116 v := e.longTable[i].offset
117 v2 := e.longTable[i].prev
118 if v < minOff {
119 v = 0
120 v2 = 0
121 } else {
122 v = v - e.cur + e.maxMatchOff
123 if v2 < minOff {
124 v2 = 0
125 } else {
126 v2 = v2 - e.cur + e.maxMatchOff
127 }
128 }
129 e.longTable[i] = prevEntry{
130 offset: v,
131 prev: v2,
132 }
133 }
134 e.cur = e.maxMatchOff
135 break
136 }
137
Abhay Kumara2ae5992025-11-10 14:02:24 +0000138 // Add block to history
khenaidood948f772021-08-11 17:49:24 -0400139 s := e.addBlock(src)
140 blk.size = len(src)
Abhay Kumara2ae5992025-11-10 14:02:24 +0000141
142 // Check RLE first
143 if len(src) > zstdMinMatch {
144 ml := matchLen(src[1:], src)
145 if ml == len(src)-1 {
146 blk.literals = append(blk.literals, src[0])
147 blk.sequences = append(blk.sequences, seq{litLen: 1, matchLen: uint32(len(src)-1) - zstdMinMatch, offset: 1 + 3})
148 return
149 }
150 }
151
khenaidood948f772021-08-11 17:49:24 -0400152 if len(src) < minNonLiteralBlockSize {
153 blk.extraLits = len(src)
154 blk.literals = blk.literals[:len(src)]
155 copy(blk.literals, src)
156 return
157 }
158
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530159 // Use this to estimate literal cost.
160 // Scaled by 10 bits.
161 bitsPerByte := int32((compress.ShannonEntropyBits(src) * 1024) / len(src))
162 // Huffman can never go < 1 bit/byte
163 if bitsPerByte < 1024 {
164 bitsPerByte = 1024
165 }
166
khenaidood948f772021-08-11 17:49:24 -0400167 // Override src
168 src = e.hist
169 sLimit := int32(len(src)) - inputMargin
170 const kSearchStrength = 10
171
172 // nextEmit is where in src the next emitLiteral should start from.
173 nextEmit := s
khenaidood948f772021-08-11 17:49:24 -0400174
175 // Relative offsets
176 offset1 := int32(blk.recentOffsets[0])
177 offset2 := int32(blk.recentOffsets[1])
178 offset3 := int32(blk.recentOffsets[2])
179
180 addLiterals := func(s *seq, until int32) {
181 if until == nextEmit {
182 return
183 }
184 blk.literals = append(blk.literals, src[nextEmit:until]...)
185 s.litLen = uint32(until - nextEmit)
186 }
khenaidood948f772021-08-11 17:49:24 -0400187
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530188 if debugEncoder {
khenaidood948f772021-08-11 17:49:24 -0400189 println("recent offsets:", blk.recentOffsets)
190 }
191
192encodeLoop:
193 for {
194 // We allow the encoder to optionally turn off repeat offsets across blocks
195 canRepeat := len(blk.sequences) > 2
196
197 if debugAsserts && canRepeat && offset1 == 0 {
198 panic("offset0 was 0")
199 }
200
Abhay Kumara2ae5992025-11-10 14:02:24 +0000201 const goodEnough = 250
202
203 cv := load6432(src, s)
khenaidood948f772021-08-11 17:49:24 -0400204
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530205 nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
206 nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
khenaidood948f772021-08-11 17:49:24 -0400207 candidateL := e.longTable[nextHashL]
208 candidateS := e.table[nextHashS]
209
Abhay Kumara2ae5992025-11-10 14:02:24 +0000210 // Set m to a match at offset if it looks like that will improve compression.
211 improve := func(m *match, offset int32, s int32, first uint32, rep int32) {
212 delta := s - offset
213 if delta >= e.maxMatchOff || delta <= 0 || load3232(src, offset) != first {
214 return
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530215 }
Abhay Kumara2ae5992025-11-10 14:02:24 +0000216 // Try to quick reject if we already have a long match.
217 if m.length > 16 {
218 left := len(src) - int(m.s+m.length)
219 // If we are too close to the end, keep as is.
220 if left <= 0 {
221 return
222 }
223 checkLen := m.length - (s - m.s) - 8
224 if left > 2 && checkLen > 4 {
225 // Check 4 bytes, 4 bytes from the end of the current match.
226 a := load3232(src, offset+checkLen)
227 b := load3232(src, s+checkLen)
228 if a != b {
229 return
230 }
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530231 }
232 }
Abhay Kumara2ae5992025-11-10 14:02:24 +0000233 l := 4 + e.matchlen(s+4, offset+4, src)
234 if m.rep <= 0 {
235 // Extend candidate match backwards as far as possible.
236 // Do not extend repeats as we can assume they are optimal
237 // and offsets change if s == nextEmit.
238 tMin := s - e.maxMatchOff
239 if tMin < 0 {
240 tMin = 0
241 }
242 for offset > tMin && s > nextEmit && src[offset-1] == src[s-1] && l < maxMatchLength {
243 s--
244 offset--
245 l++
246 }
247 }
248 if debugAsserts {
249 if offset >= s {
250 panic(fmt.Sprintf("offset: %d - s:%d - rep: %d - cur :%d - max: %d", offset, s, rep, e.cur, e.maxMatchOff))
251 }
252 if !bytes.Equal(src[s:s+l], src[offset:offset+l]) {
253 panic(fmt.Sprintf("second match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
254 }
255 }
256 cand := match{offset: offset, s: s, length: l, rep: rep}
257 cand.estBits(bitsPerByte)
258 if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 {
259 *m = cand
260 }
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530261 }
262
Abhay Kumara2ae5992025-11-10 14:02:24 +0000263 best := match{s: s, est: highScore}
264 improve(&best, candidateL.offset-e.cur, s, uint32(cv), -1)
265 improve(&best, candidateL.prev-e.cur, s, uint32(cv), -1)
266 improve(&best, candidateS.offset-e.cur, s, uint32(cv), -1)
267 improve(&best, candidateS.prev-e.cur, s, uint32(cv), -1)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530268
khenaidood948f772021-08-11 17:49:24 -0400269 if canRepeat && best.length < goodEnough {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000270 if s == nextEmit {
271 // Check repeats straight after a match.
272 improve(&best, s-offset2, s, uint32(cv), 1|4)
273 improve(&best, s-offset3, s, uint32(cv), 2|4)
274 if offset1 > 1 {
275 improve(&best, s-(offset1-1), s, uint32(cv), 3|4)
276 }
277 }
278
279 // If either no match or a non-repeat match, check at + 1
280 if best.rep <= 0 {
281 cv32 := uint32(cv >> 8)
282 spp := s + 1
283 improve(&best, spp-offset1, spp, cv32, 1)
284 improve(&best, spp-offset2, spp, cv32, 2)
285 improve(&best, spp-offset3, spp, cv32, 3)
286 if best.rep < 0 {
287 cv32 = uint32(cv >> 24)
288 spp += 2
289 improve(&best, spp-offset1, spp, cv32, 1)
290 improve(&best, spp-offset2, spp, cv32, 2)
291 improve(&best, spp-offset3, spp, cv32, 3)
292 }
khenaidood948f772021-08-11 17:49:24 -0400293 }
294 }
295 // Load next and check...
296 e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset}
297 e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset}
Abhay Kumara2ae5992025-11-10 14:02:24 +0000298 index0 := s + 1
khenaidood948f772021-08-11 17:49:24 -0400299
300 // Look far ahead, unless we have a really long match already...
301 if best.length < goodEnough {
302 // No match found, move forward on input, no need to check forward...
303 if best.length < 4 {
304 s += 1 + (s-nextEmit)>>(kSearchStrength-1)
305 if s >= sLimit {
306 break encodeLoop
307 }
khenaidood948f772021-08-11 17:49:24 -0400308 continue
309 }
310
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530311 candidateS = e.table[hashLen(cv>>8, bestShortTableBits, bestShortLen)]
Abhay Kumara2ae5992025-11-10 14:02:24 +0000312 cv = load6432(src, s+1)
313 cv2 := load6432(src, s+2)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530314 candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)]
315 candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]
khenaidood948f772021-08-11 17:49:24 -0400316
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530317 // Short at s+1
Abhay Kumara2ae5992025-11-10 14:02:24 +0000318 improve(&best, candidateS.offset-e.cur, s+1, uint32(cv), -1)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530319 // Long at s+1, s+2
Abhay Kumara2ae5992025-11-10 14:02:24 +0000320 improve(&best, candidateL.offset-e.cur, s+1, uint32(cv), -1)
321 improve(&best, candidateL.prev-e.cur, s+1, uint32(cv), -1)
322 improve(&best, candidateL2.offset-e.cur, s+2, uint32(cv2), -1)
323 improve(&best, candidateL2.prev-e.cur, s+2, uint32(cv2), -1)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530324 if false {
325 // Short at s+3.
326 // Too often worse...
Abhay Kumara2ae5992025-11-10 14:02:24 +0000327 improve(&best, e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+3, uint32(cv2>>8), -1)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530328 }
Abhay Kumara2ae5992025-11-10 14:02:24 +0000329
330 // Start check at a fixed offset to allow for a few mismatches.
331 // For this compression level 2 yields the best results.
332 // We cannot do this if we have already indexed this position.
333 const skipBeginning = 2
334 if best.s > s-skipBeginning {
335 // See if we can find a better match by checking where the current best ends.
336 // Use that offset to see if we can find a better full match.
337 if sAt := best.s + best.length; sAt < sLimit {
338 nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
339 candidateEnd := e.longTable[nextHashL]
340
341 if off := candidateEnd.offset - e.cur - best.length + skipBeginning; off >= 0 {
342 improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
343 if off := candidateEnd.prev - e.cur - best.length + skipBeginning; off >= 0 {
344 improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
345 }
khenaidood948f772021-08-11 17:49:24 -0400346 }
khenaidood948f772021-08-11 17:49:24 -0400347 }
348 }
349 }
350
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530351 if debugAsserts {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000352 if best.offset >= best.s {
353 panic(fmt.Sprintf("best.offset > s: %d >= %d", best.offset, best.s))
354 }
355 if best.s < nextEmit {
356 panic(fmt.Sprintf("s %d < nextEmit %d", best.s, nextEmit))
357 }
358 if best.offset < s-e.maxMatchOff {
359 panic(fmt.Sprintf("best.offset < s-e.maxMatchOff: %d < %d", best.offset, s-e.maxMatchOff))
360 }
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530361 if !bytes.Equal(src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]) {
362 panic(fmt.Sprintf("match mismatch: %v != %v", src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]))
363 }
364 }
365
khenaidood948f772021-08-11 17:49:24 -0400366 // We have a match, we can store the forward value
Abhay Kumara2ae5992025-11-10 14:02:24 +0000367 s = best.s
khenaidood948f772021-08-11 17:49:24 -0400368 if best.rep > 0 {
khenaidood948f772021-08-11 17:49:24 -0400369 var seq seq
370 seq.matchLen = uint32(best.length - zstdMinMatch)
Abhay Kumara2ae5992025-11-10 14:02:24 +0000371 addLiterals(&seq, best.s)
khenaidood948f772021-08-11 17:49:24 -0400372
Abhay Kumara2ae5992025-11-10 14:02:24 +0000373 // Repeat. If bit 4 is set, this is a non-lit repeat.
374 seq.offset = uint32(best.rep & 3)
khenaidood948f772021-08-11 17:49:24 -0400375 if debugSequences {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000376 println("repeat sequence", seq, "next s:", best.s, "off:", best.s-best.offset)
khenaidood948f772021-08-11 17:49:24 -0400377 }
378 blk.sequences = append(blk.sequences, seq)
379
Abhay Kumara2ae5992025-11-10 14:02:24 +0000380 // Index old s + 1 -> s - 1
khenaidood948f772021-08-11 17:49:24 -0400381 s = best.s + best.length
khenaidood948f772021-08-11 17:49:24 -0400382 nextEmit = s
khenaidood948f772021-08-11 17:49:24 -0400383
khenaidood948f772021-08-11 17:49:24 -0400384 // Index skipped...
Abhay Kumara2ae5992025-11-10 14:02:24 +0000385 end := s
386 if s > sLimit+4 {
387 end = sLimit + 4
388 }
khenaidood948f772021-08-11 17:49:24 -0400389 off := index0 + e.cur
Abhay Kumara2ae5992025-11-10 14:02:24 +0000390 for index0 < end {
khenaidood948f772021-08-11 17:49:24 -0400391 cv0 := load6432(src, index0)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530392 h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
393 h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
khenaidood948f772021-08-11 17:49:24 -0400394 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
395 e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
396 off++
397 index0++
398 }
Abhay Kumara2ae5992025-11-10 14:02:24 +0000399
khenaidood948f772021-08-11 17:49:24 -0400400 switch best.rep {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000401 case 2, 4 | 1:
khenaidood948f772021-08-11 17:49:24 -0400402 offset1, offset2 = offset2, offset1
Abhay Kumara2ae5992025-11-10 14:02:24 +0000403 case 3, 4 | 2:
khenaidood948f772021-08-11 17:49:24 -0400404 offset1, offset2, offset3 = offset3, offset1, offset2
Abhay Kumara2ae5992025-11-10 14:02:24 +0000405 case 4 | 3:
406 offset1, offset2, offset3 = offset1-1, offset1, offset2
khenaidood948f772021-08-11 17:49:24 -0400407 }
Abhay Kumara2ae5992025-11-10 14:02:24 +0000408 if s >= sLimit {
409 if debugEncoder {
410 println("repeat ended", s, best.length)
411 }
412 break encodeLoop
413 }
khenaidood948f772021-08-11 17:49:24 -0400414 continue
415 }
416
417 // A 4-byte match has been found. Update recent offsets.
418 // We'll later see if more than 4 bytes.
khenaidood948f772021-08-11 17:49:24 -0400419 t := best.offset
420 offset1, offset2, offset3 = s-t, offset1, offset2
421
422 if debugAsserts && s <= t {
423 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
424 }
425
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530426 if debugAsserts && int(offset1) > len(src) {
khenaidood948f772021-08-11 17:49:24 -0400427 panic("invalid offset")
428 }
429
khenaidood948f772021-08-11 17:49:24 -0400430 // Write our sequence
431 var seq seq
Abhay Kumara2ae5992025-11-10 14:02:24 +0000432 l := best.length
khenaidood948f772021-08-11 17:49:24 -0400433 seq.litLen = uint32(s - nextEmit)
434 seq.matchLen = uint32(l - zstdMinMatch)
435 if seq.litLen > 0 {
436 blk.literals = append(blk.literals, src[nextEmit:s]...)
437 }
438 seq.offset = uint32(s-t) + 3
439 s += l
440 if debugSequences {
441 println("sequence", seq, "next s:", s)
442 }
443 blk.sequences = append(blk.sequences, seq)
444 nextEmit = s
Abhay Kumara2ae5992025-11-10 14:02:24 +0000445
446 // Index old s + 1 -> s - 1 or sLimit
447 end := s
448 if s > sLimit-4 {
449 end = sLimit - 4
khenaidood948f772021-08-11 17:49:24 -0400450 }
451
Abhay Kumara2ae5992025-11-10 14:02:24 +0000452 off := index0 + e.cur
453 for index0 < end {
khenaidood948f772021-08-11 17:49:24 -0400454 cv0 := load6432(src, index0)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530455 h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
456 h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
khenaidood948f772021-08-11 17:49:24 -0400457 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
458 e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
459 index0++
Abhay Kumara2ae5992025-11-10 14:02:24 +0000460 off++
khenaidood948f772021-08-11 17:49:24 -0400461 }
Abhay Kumara2ae5992025-11-10 14:02:24 +0000462 if s >= sLimit {
463 break encodeLoop
khenaidood948f772021-08-11 17:49:24 -0400464 }
465 }
466
467 if int(nextEmit) < len(src) {
468 blk.literals = append(blk.literals, src[nextEmit:]...)
469 blk.extraLits = len(src) - int(nextEmit)
470 }
471 blk.recentOffsets[0] = uint32(offset1)
472 blk.recentOffsets[1] = uint32(offset2)
473 blk.recentOffsets[2] = uint32(offset3)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530474 if debugEncoder {
khenaidood948f772021-08-11 17:49:24 -0400475 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
476 }
477}
478
479// EncodeNoHist will encode a block with no history and no following blocks.
480// Most notable difference is that src will not be copied for history and
481// we do not need to check for max match length.
482func (e *bestFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
483 e.ensureHist(len(src))
484 e.Encode(blk, src)
485}
486
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530487// Reset will reset and set a dictionary if not nil
khenaidood948f772021-08-11 17:49:24 -0400488func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
489 e.resetBase(d, singleBlock)
490 if d == nil {
491 return
492 }
493 // Init or copy dict table
494 if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
495 if len(e.dictTable) != len(e.table) {
496 e.dictTable = make([]prevEntry, len(e.table))
497 }
498 end := int32(len(d.content)) - 8 + e.maxMatchOff
499 for i := e.maxMatchOff; i < end; i += 4 {
500 const hashLog = bestShortTableBits
501
502 cv := load6432(d.content, i-e.maxMatchOff)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530503 nextHash := hashLen(cv, hashLog, bestShortLen) // 0 -> 4
504 nextHash1 := hashLen(cv>>8, hashLog, bestShortLen) // 1 -> 5
505 nextHash2 := hashLen(cv>>16, hashLog, bestShortLen) // 2 -> 6
506 nextHash3 := hashLen(cv>>24, hashLog, bestShortLen) // 3 -> 7
khenaidood948f772021-08-11 17:49:24 -0400507 e.dictTable[nextHash] = prevEntry{
508 prev: e.dictTable[nextHash].offset,
509 offset: i,
510 }
511 e.dictTable[nextHash1] = prevEntry{
512 prev: e.dictTable[nextHash1].offset,
513 offset: i + 1,
514 }
515 e.dictTable[nextHash2] = prevEntry{
516 prev: e.dictTable[nextHash2].offset,
517 offset: i + 2,
518 }
519 e.dictTable[nextHash3] = prevEntry{
520 prev: e.dictTable[nextHash3].offset,
521 offset: i + 3,
522 }
523 }
524 e.lastDictID = d.id
525 }
526
527 // Init or copy dict table
528 if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
529 if len(e.dictLongTable) != len(e.longTable) {
530 e.dictLongTable = make([]prevEntry, len(e.longTable))
531 }
532 if len(d.content) >= 8 {
533 cv := load6432(d.content, 0)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530534 h := hashLen(cv, bestLongTableBits, bestLongLen)
khenaidood948f772021-08-11 17:49:24 -0400535 e.dictLongTable[h] = prevEntry{
536 offset: e.maxMatchOff,
537 prev: e.dictLongTable[h].offset,
538 }
539
540 end := int32(len(d.content)) - 8 + e.maxMatchOff
541 off := 8 // First to read
542 for i := e.maxMatchOff + 1; i < end; i++ {
543 cv = cv>>8 | (uint64(d.content[off]) << 56)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530544 h := hashLen(cv, bestLongTableBits, bestLongLen)
khenaidood948f772021-08-11 17:49:24 -0400545 e.dictLongTable[h] = prevEntry{
546 offset: i,
547 prev: e.dictLongTable[h].offset,
548 }
549 off++
550 }
551 }
552 e.lastDictID = d.id
553 }
554 // Reset table to initial state
555 copy(e.longTable[:], e.dictLongTable)
556
557 e.cur = e.maxMatchOff
558 // Reset table to initial state
559 copy(e.table[:], e.dictTable)
560}