blob: d36be7bd8c248ef140e683adf56d2256e87111c2 [file] [log] [blame]
khenaidood948f772021-08-11 17:49:24 -04001// Copyright 2019+ Klaus Post. All rights reserved.
2// License information can be found in the LICENSE file.
3// Based on work by Yann Collet, released under BSD License.
4
5package zstd
6
7import "fmt"
8
9const (
10 dFastLongTableBits = 17 // Bits used in the long match table
11 dFastLongTableSize = 1 << dFastLongTableBits // Size of the table
12 dFastLongTableMask = dFastLongTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053013 dFastLongLen = 8 // Bytes used for table hash
khenaidood948f772021-08-11 17:49:24 -040014
15 dLongTableShardCnt = 1 << (dFastLongTableBits - dictShardBits) // Number of shards in the table
16 dLongTableShardSize = dFastLongTableSize / tableShardCnt // Size of an individual shard
17
18 dFastShortTableBits = tableBits // Bits used in the short match table
19 dFastShortTableSize = 1 << dFastShortTableBits // Size of the table
20 dFastShortTableMask = dFastShortTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053021 dFastShortLen = 5 // Bytes used for table hash
22
khenaidood948f772021-08-11 17:49:24 -040023)
24
25type doubleFastEncoder struct {
26 fastEncoder
27 longTable [dFastLongTableSize]tableEntry
28}
29
30type doubleFastEncoderDict struct {
31 fastEncoderDict
32 longTable [dFastLongTableSize]tableEntry
33 dictLongTable []tableEntry
34 longTableShardDirty [dLongTableShardCnt]bool
35}
36
37// Encode mimmics functionality in zstd_dfast.c
38func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
39 const (
40 // Input margin is the number of bytes we read (8)
41 // and the maximum we will read ahead (2)
42 inputMargin = 8 + 2
43 minNonLiteralBlockSize = 16
44 )
45
46 // Protect against e.cur wraparound.
Abhay Kumara2ae5992025-11-10 14:02:24 +000047 for e.cur >= e.bufferReset-int32(len(e.hist)) {
khenaidood948f772021-08-11 17:49:24 -040048 if len(e.hist) == 0 {
Abhay Kumara2ae5992025-11-10 14:02:24 +000049 e.table = [dFastShortTableSize]tableEntry{}
50 e.longTable = [dFastLongTableSize]tableEntry{}
khenaidood948f772021-08-11 17:49:24 -040051 e.cur = e.maxMatchOff
52 break
53 }
54 // Shift down everything in the table that isn't already too far away.
55 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
56 for i := range e.table[:] {
57 v := e.table[i].offset
58 if v < minOff {
59 v = 0
60 } else {
61 v = v - e.cur + e.maxMatchOff
62 }
63 e.table[i].offset = v
64 }
65 for i := range e.longTable[:] {
66 v := e.longTable[i].offset
67 if v < minOff {
68 v = 0
69 } else {
70 v = v - e.cur + e.maxMatchOff
71 }
72 e.longTable[i].offset = v
73 }
74 e.cur = e.maxMatchOff
75 break
76 }
77
78 s := e.addBlock(src)
79 blk.size = len(src)
80 if len(src) < minNonLiteralBlockSize {
81 blk.extraLits = len(src)
82 blk.literals = blk.literals[:len(src)]
83 copy(blk.literals, src)
84 return
85 }
86
87 // Override src
88 src = e.hist
89 sLimit := int32(len(src)) - inputMargin
90 // stepSize is the number of bytes to skip on every main loop iteration.
91 // It should be >= 1.
92 const stepSize = 1
93
94 const kSearchStrength = 8
95
96 // nextEmit is where in src the next emitLiteral should start from.
97 nextEmit := s
98 cv := load6432(src, s)
99
100 // Relative offsets
101 offset1 := int32(blk.recentOffsets[0])
102 offset2 := int32(blk.recentOffsets[1])
103
104 addLiterals := func(s *seq, until int32) {
105 if until == nextEmit {
106 return
107 }
108 blk.literals = append(blk.literals, src[nextEmit:until]...)
109 s.litLen = uint32(until - nextEmit)
110 }
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530111 if debugEncoder {
khenaidood948f772021-08-11 17:49:24 -0400112 println("recent offsets:", blk.recentOffsets)
113 }
114
115encodeLoop:
116 for {
117 var t int32
118 // We allow the encoder to optionally turn off repeat offsets across blocks
119 canRepeat := len(blk.sequences) > 2
120
121 for {
122 if debugAsserts && canRepeat && offset1 == 0 {
123 panic("offset0 was 0")
124 }
125
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530126 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
127 nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
khenaidood948f772021-08-11 17:49:24 -0400128 candidateL := e.longTable[nextHashL]
129 candidateS := e.table[nextHashS]
130
131 const repOff = 1
132 repIndex := s - offset1 + repOff
133 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
134 e.longTable[nextHashL] = entry
135 e.table[nextHashS] = entry
136
137 if canRepeat {
138 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
139 // Consider history as well.
140 var seq seq
Abhay Kumara2ae5992025-11-10 14:02:24 +0000141 length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
khenaidood948f772021-08-11 17:49:24 -0400142
Abhay Kumara2ae5992025-11-10 14:02:24 +0000143 seq.matchLen = uint32(length - zstdMinMatch)
khenaidood948f772021-08-11 17:49:24 -0400144
145 // We might be able to match backwards.
146 // Extend as long as we can.
147 start := s + repOff
148 // We end the search early, so we don't risk 0 literals
149 // and have to do special offset treatment.
150 startLimit := nextEmit + 1
151
152 tMin := s - e.maxMatchOff
153 if tMin < 0 {
154 tMin = 0
155 }
156 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
157 repIndex--
158 start--
159 seq.matchLen++
160 }
161 addLiterals(&seq, start)
162
163 // rep 0
164 seq.offset = 1
165 if debugSequences {
166 println("repeat sequence", seq, "next s:", s)
167 }
168 blk.sequences = append(blk.sequences, seq)
Abhay Kumara2ae5992025-11-10 14:02:24 +0000169 s += length + repOff
khenaidood948f772021-08-11 17:49:24 -0400170 nextEmit = s
171 if s >= sLimit {
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530172 if debugEncoder {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000173 println("repeat ended", s, length)
khenaidood948f772021-08-11 17:49:24 -0400174
175 }
176 break encodeLoop
177 }
178 cv = load6432(src, s)
179 continue
180 }
181 }
182 // Find the offsets of our two matches.
183 coffsetL := s - (candidateL.offset - e.cur)
184 coffsetS := s - (candidateS.offset - e.cur)
185
186 // Check if we have a long match.
187 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
188 // Found a long match, likely at least 8 bytes.
189 // Reference encoder checks all 8 bytes, we only check 4,
190 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
191 t = candidateL.offset - e.cur
192 if debugAsserts && s <= t {
193 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
194 }
195 if debugAsserts && s-t > e.maxMatchOff {
196 panic("s - t >e.maxMatchOff")
197 }
198 if debugMatches {
199 println("long match")
200 }
201 break
202 }
203
204 // Check if we have a short match.
205 if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
206 // found a regular match
207 // See if we can find a long match at s+1
208 const checkAt = 1
209 cv := load6432(src, s+checkAt)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530210 nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen)
khenaidood948f772021-08-11 17:49:24 -0400211 candidateL = e.longTable[nextHashL]
212 coffsetL = s - (candidateL.offset - e.cur) + checkAt
213
214 // We can store it, since we have at least a 4 byte match.
215 e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
216 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
217 // Found a long match, likely at least 8 bytes.
218 // Reference encoder checks all 8 bytes, we only check 4,
219 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
220 t = candidateL.offset - e.cur
221 s += checkAt
222 if debugMatches {
223 println("long match (after short)")
224 }
225 break
226 }
227
228 t = candidateS.offset - e.cur
229 if debugAsserts && s <= t {
230 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
231 }
232 if debugAsserts && s-t > e.maxMatchOff {
233 panic("s - t >e.maxMatchOff")
234 }
235 if debugAsserts && t < 0 {
236 panic("t<0")
237 }
238 if debugMatches {
239 println("short match")
240 }
241 break
242 }
243
244 // No match found, move forward in input.
245 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
246 if s >= sLimit {
247 break encodeLoop
248 }
249 cv = load6432(src, s)
250 }
251
252 // A 4-byte match has been found. Update recent offsets.
253 // We'll later see if more than 4 bytes.
254 offset2 = offset1
255 offset1 = s - t
256
257 if debugAsserts && s <= t {
258 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
259 }
260
261 if debugAsserts && canRepeat && int(offset1) > len(src) {
262 panic("invalid offset")
263 }
264
265 // Extend the 4-byte match as long as possible.
266 l := e.matchlen(s+4, t+4, src) + 4
267
268 // Extend backwards
269 tMin := s - e.maxMatchOff
270 if tMin < 0 {
271 tMin = 0
272 }
273 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
274 s--
275 t--
276 l++
277 }
278
279 // Write our sequence
280 var seq seq
281 seq.litLen = uint32(s - nextEmit)
282 seq.matchLen = uint32(l - zstdMinMatch)
283 if seq.litLen > 0 {
284 blk.literals = append(blk.literals, src[nextEmit:s]...)
285 }
286 seq.offset = uint32(s-t) + 3
287 s += l
288 if debugSequences {
289 println("sequence", seq, "next s:", s)
290 }
291 blk.sequences = append(blk.sequences, seq)
292 nextEmit = s
293 if s >= sLimit {
294 break encodeLoop
295 }
296
297 // Index match start+1 (long) and start+2 (short)
298 index0 := s - l + 1
299 // Index match end-2 (long) and end-1 (short)
300 index1 := s - 2
301
302 cv0 := load6432(src, index0)
303 cv1 := load6432(src, index1)
304 te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
305 te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530306 e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0
307 e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1
khenaidood948f772021-08-11 17:49:24 -0400308 cv0 >>= 8
309 cv1 >>= 8
310 te0.offset++
311 te1.offset++
312 te0.val = uint32(cv0)
313 te1.val = uint32(cv1)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530314 e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0
315 e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1
khenaidood948f772021-08-11 17:49:24 -0400316
317 cv = load6432(src, s)
318
319 if !canRepeat {
320 continue
321 }
322
323 // Check offset 2
324 for {
325 o2 := s - offset2
326 if load3232(src, o2) != uint32(cv) {
327 // Do regular search
328 break
329 }
330
331 // Store this, since we have it.
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530332 nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
333 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
khenaidood948f772021-08-11 17:49:24 -0400334
335 // We have at least 4 byte match.
336 // No need to check backwards. We come straight from a match
337 l := 4 + e.matchlen(s+4, o2+4, src)
338
339 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
340 e.longTable[nextHashL] = entry
341 e.table[nextHashS] = entry
342 seq.matchLen = uint32(l) - zstdMinMatch
343 seq.litLen = 0
344
345 // Since litlen is always 0, this is offset 1.
346 seq.offset = 1
347 s += l
348 nextEmit = s
349 if debugSequences {
350 println("sequence", seq, "next s:", s)
351 }
352 blk.sequences = append(blk.sequences, seq)
353
354 // Swap offset 1 and 2.
355 offset1, offset2 = offset2, offset1
356 if s >= sLimit {
357 // Finished
358 break encodeLoop
359 }
360 cv = load6432(src, s)
361 }
362 }
363
364 if int(nextEmit) < len(src) {
365 blk.literals = append(blk.literals, src[nextEmit:]...)
366 blk.extraLits = len(src) - int(nextEmit)
367 }
368 blk.recentOffsets[0] = uint32(offset1)
369 blk.recentOffsets[1] = uint32(offset2)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530370 if debugEncoder {
khenaidood948f772021-08-11 17:49:24 -0400371 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
372 }
373}
374
375// EncodeNoHist will encode a block with no history and no following blocks.
376// Most notable difference is that src will not be copied for history and
377// we do not need to check for max match length.
378func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
379 const (
380 // Input margin is the number of bytes we read (8)
381 // and the maximum we will read ahead (2)
382 inputMargin = 8 + 2
383 minNonLiteralBlockSize = 16
384 )
385
386 // Protect against e.cur wraparound.
Abhay Kumara2ae5992025-11-10 14:02:24 +0000387 if e.cur >= e.bufferReset {
khenaidood948f772021-08-11 17:49:24 -0400388 for i := range e.table[:] {
389 e.table[i] = tableEntry{}
390 }
391 for i := range e.longTable[:] {
392 e.longTable[i] = tableEntry{}
393 }
394 e.cur = e.maxMatchOff
395 }
396
397 s := int32(0)
398 blk.size = len(src)
399 if len(src) < minNonLiteralBlockSize {
400 blk.extraLits = len(src)
401 blk.literals = blk.literals[:len(src)]
402 copy(blk.literals, src)
403 return
404 }
405
406 // Override src
407 sLimit := int32(len(src)) - inputMargin
408 // stepSize is the number of bytes to skip on every main loop iteration.
409 // It should be >= 1.
410 const stepSize = 1
411
412 const kSearchStrength = 8
413
414 // nextEmit is where in src the next emitLiteral should start from.
415 nextEmit := s
416 cv := load6432(src, s)
417
418 // Relative offsets
419 offset1 := int32(blk.recentOffsets[0])
420 offset2 := int32(blk.recentOffsets[1])
421
422 addLiterals := func(s *seq, until int32) {
423 if until == nextEmit {
424 return
425 }
426 blk.literals = append(blk.literals, src[nextEmit:until]...)
427 s.litLen = uint32(until - nextEmit)
428 }
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530429 if debugEncoder {
khenaidood948f772021-08-11 17:49:24 -0400430 println("recent offsets:", blk.recentOffsets)
431 }
432
433encodeLoop:
434 for {
435 var t int32
436 for {
437
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530438 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
439 nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
khenaidood948f772021-08-11 17:49:24 -0400440 candidateL := e.longTable[nextHashL]
441 candidateS := e.table[nextHashS]
442
443 const repOff = 1
444 repIndex := s - offset1 + repOff
445 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
446 e.longTable[nextHashL] = entry
447 e.table[nextHashS] = entry
448
449 if len(blk.sequences) > 2 {
450 if load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
451 // Consider history as well.
452 var seq seq
453 //length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
454 length := 4 + int32(matchLen(src[s+4+repOff:], src[repIndex+4:]))
455
456 seq.matchLen = uint32(length - zstdMinMatch)
457
458 // We might be able to match backwards.
459 // Extend as long as we can.
460 start := s + repOff
461 // We end the search early, so we don't risk 0 literals
462 // and have to do special offset treatment.
463 startLimit := nextEmit + 1
464
465 tMin := s - e.maxMatchOff
466 if tMin < 0 {
467 tMin = 0
468 }
469 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] {
470 repIndex--
471 start--
472 seq.matchLen++
473 }
474 addLiterals(&seq, start)
475
476 // rep 0
477 seq.offset = 1
478 if debugSequences {
479 println("repeat sequence", seq, "next s:", s)
480 }
481 blk.sequences = append(blk.sequences, seq)
482 s += length + repOff
483 nextEmit = s
484 if s >= sLimit {
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530485 if debugEncoder {
khenaidood948f772021-08-11 17:49:24 -0400486 println("repeat ended", s, length)
487
488 }
489 break encodeLoop
490 }
491 cv = load6432(src, s)
492 continue
493 }
494 }
495 // Find the offsets of our two matches.
496 coffsetL := s - (candidateL.offset - e.cur)
497 coffsetS := s - (candidateS.offset - e.cur)
498
499 // Check if we have a long match.
500 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
501 // Found a long match, likely at least 8 bytes.
502 // Reference encoder checks all 8 bytes, we only check 4,
503 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
504 t = candidateL.offset - e.cur
505 if debugAsserts && s <= t {
506 panic(fmt.Sprintf("s (%d) <= t (%d). cur: %d", s, t, e.cur))
507 }
508 if debugAsserts && s-t > e.maxMatchOff {
509 panic("s - t >e.maxMatchOff")
510 }
511 if debugMatches {
512 println("long match")
513 }
514 break
515 }
516
517 // Check if we have a short match.
518 if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
519 // found a regular match
520 // See if we can find a long match at s+1
521 const checkAt = 1
522 cv := load6432(src, s+checkAt)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530523 nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen)
khenaidood948f772021-08-11 17:49:24 -0400524 candidateL = e.longTable[nextHashL]
525 coffsetL = s - (candidateL.offset - e.cur) + checkAt
526
527 // We can store it, since we have at least a 4 byte match.
528 e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
529 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
530 // Found a long match, likely at least 8 bytes.
531 // Reference encoder checks all 8 bytes, we only check 4,
532 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
533 t = candidateL.offset - e.cur
534 s += checkAt
535 if debugMatches {
536 println("long match (after short)")
537 }
538 break
539 }
540
541 t = candidateS.offset - e.cur
542 if debugAsserts && s <= t {
543 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
544 }
545 if debugAsserts && s-t > e.maxMatchOff {
546 panic("s - t >e.maxMatchOff")
547 }
548 if debugAsserts && t < 0 {
549 panic("t<0")
550 }
551 if debugMatches {
552 println("short match")
553 }
554 break
555 }
556
557 // No match found, move forward in input.
558 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
559 if s >= sLimit {
560 break encodeLoop
561 }
562 cv = load6432(src, s)
563 }
564
565 // A 4-byte match has been found. Update recent offsets.
566 // We'll later see if more than 4 bytes.
567 offset2 = offset1
568 offset1 = s - t
569
570 if debugAsserts && s <= t {
571 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
572 }
573
574 // Extend the 4-byte match as long as possible.
575 //l := e.matchlen(s+4, t+4, src) + 4
576 l := int32(matchLen(src[s+4:], src[t+4:])) + 4
577
578 // Extend backwards
579 tMin := s - e.maxMatchOff
580 if tMin < 0 {
581 tMin = 0
582 }
583 for t > tMin && s > nextEmit && src[t-1] == src[s-1] {
584 s--
585 t--
586 l++
587 }
588
589 // Write our sequence
590 var seq seq
591 seq.litLen = uint32(s - nextEmit)
592 seq.matchLen = uint32(l - zstdMinMatch)
593 if seq.litLen > 0 {
594 blk.literals = append(blk.literals, src[nextEmit:s]...)
595 }
596 seq.offset = uint32(s-t) + 3
597 s += l
598 if debugSequences {
599 println("sequence", seq, "next s:", s)
600 }
601 blk.sequences = append(blk.sequences, seq)
602 nextEmit = s
603 if s >= sLimit {
604 break encodeLoop
605 }
606
607 // Index match start+1 (long) and start+2 (short)
608 index0 := s - l + 1
609 // Index match end-2 (long) and end-1 (short)
610 index1 := s - 2
611
612 cv0 := load6432(src, index0)
613 cv1 := load6432(src, index1)
614 te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
615 te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530616 e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0
617 e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1
khenaidood948f772021-08-11 17:49:24 -0400618 cv0 >>= 8
619 cv1 >>= 8
620 te0.offset++
621 te1.offset++
622 te0.val = uint32(cv0)
623 te1.val = uint32(cv1)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530624 e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0
625 e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1
khenaidood948f772021-08-11 17:49:24 -0400626
627 cv = load6432(src, s)
628
629 if len(blk.sequences) <= 2 {
630 continue
631 }
632
633 // Check offset 2
634 for {
635 o2 := s - offset2
636 if load3232(src, o2) != uint32(cv) {
637 // Do regular search
638 break
639 }
640
641 // Store this, since we have it.
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530642 nextHashS := hashLen(cv1>>8, dFastShortTableBits, dFastShortLen)
643 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
khenaidood948f772021-08-11 17:49:24 -0400644
645 // We have at least 4 byte match.
646 // No need to check backwards. We come straight from a match
647 //l := 4 + e.matchlen(s+4, o2+4, src)
648 l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
649
650 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
651 e.longTable[nextHashL] = entry
652 e.table[nextHashS] = entry
653 seq.matchLen = uint32(l) - zstdMinMatch
654 seq.litLen = 0
655
656 // Since litlen is always 0, this is offset 1.
657 seq.offset = 1
658 s += l
659 nextEmit = s
660 if debugSequences {
661 println("sequence", seq, "next s:", s)
662 }
663 blk.sequences = append(blk.sequences, seq)
664
665 // Swap offset 1 and 2.
666 offset1, offset2 = offset2, offset1
667 if s >= sLimit {
668 // Finished
669 break encodeLoop
670 }
671 cv = load6432(src, s)
672 }
673 }
674
675 if int(nextEmit) < len(src) {
676 blk.literals = append(blk.literals, src[nextEmit:]...)
677 blk.extraLits = len(src) - int(nextEmit)
678 }
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530679 if debugEncoder {
khenaidood948f772021-08-11 17:49:24 -0400680 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
681 }
682
683 // We do not store history, so we must offset e.cur to avoid false matches for next user.
Abhay Kumara2ae5992025-11-10 14:02:24 +0000684 if e.cur < e.bufferReset {
khenaidood948f772021-08-11 17:49:24 -0400685 e.cur += int32(len(src))
686 }
687}
688
689// Encode will encode the content, with a dictionary if initialized for it.
690func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) {
691 const (
692 // Input margin is the number of bytes we read (8)
693 // and the maximum we will read ahead (2)
694 inputMargin = 8 + 2
695 minNonLiteralBlockSize = 16
696 )
697
698 // Protect against e.cur wraparound.
Abhay Kumara2ae5992025-11-10 14:02:24 +0000699 for e.cur >= e.bufferReset-int32(len(e.hist)) {
khenaidood948f772021-08-11 17:49:24 -0400700 if len(e.hist) == 0 {
701 for i := range e.table[:] {
702 e.table[i] = tableEntry{}
703 }
704 for i := range e.longTable[:] {
705 e.longTable[i] = tableEntry{}
706 }
707 e.markAllShardsDirty()
708 e.cur = e.maxMatchOff
709 break
710 }
711 // Shift down everything in the table that isn't already too far away.
712 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
713 for i := range e.table[:] {
714 v := e.table[i].offset
715 if v < minOff {
716 v = 0
717 } else {
718 v = v - e.cur + e.maxMatchOff
719 }
720 e.table[i].offset = v
721 }
722 for i := range e.longTable[:] {
723 v := e.longTable[i].offset
724 if v < minOff {
725 v = 0
726 } else {
727 v = v - e.cur + e.maxMatchOff
728 }
729 e.longTable[i].offset = v
730 }
731 e.markAllShardsDirty()
732 e.cur = e.maxMatchOff
733 break
734 }
735
736 s := e.addBlock(src)
737 blk.size = len(src)
738 if len(src) < minNonLiteralBlockSize {
739 blk.extraLits = len(src)
740 blk.literals = blk.literals[:len(src)]
741 copy(blk.literals, src)
742 return
743 }
744
745 // Override src
746 src = e.hist
747 sLimit := int32(len(src)) - inputMargin
748 // stepSize is the number of bytes to skip on every main loop iteration.
749 // It should be >= 1.
750 const stepSize = 1
751
752 const kSearchStrength = 8
753
754 // nextEmit is where in src the next emitLiteral should start from.
755 nextEmit := s
756 cv := load6432(src, s)
757
758 // Relative offsets
759 offset1 := int32(blk.recentOffsets[0])
760 offset2 := int32(blk.recentOffsets[1])
761
762 addLiterals := func(s *seq, until int32) {
763 if until == nextEmit {
764 return
765 }
766 blk.literals = append(blk.literals, src[nextEmit:until]...)
767 s.litLen = uint32(until - nextEmit)
768 }
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530769 if debugEncoder {
khenaidood948f772021-08-11 17:49:24 -0400770 println("recent offsets:", blk.recentOffsets)
771 }
772
773encodeLoop:
774 for {
775 var t int32
776 // We allow the encoder to optionally turn off repeat offsets across blocks
777 canRepeat := len(blk.sequences) > 2
778
779 for {
780 if debugAsserts && canRepeat && offset1 == 0 {
781 panic("offset0 was 0")
782 }
783
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530784 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
785 nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
khenaidood948f772021-08-11 17:49:24 -0400786 candidateL := e.longTable[nextHashL]
787 candidateS := e.table[nextHashS]
788
789 const repOff = 1
790 repIndex := s - offset1 + repOff
791 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
792 e.longTable[nextHashL] = entry
793 e.markLongShardDirty(nextHashL)
794 e.table[nextHashS] = entry
795 e.markShardDirty(nextHashS)
796
797 if canRepeat {
798 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
799 // Consider history as well.
800 var seq seq
Abhay Kumara2ae5992025-11-10 14:02:24 +0000801 length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
khenaidood948f772021-08-11 17:49:24 -0400802
Abhay Kumara2ae5992025-11-10 14:02:24 +0000803 seq.matchLen = uint32(length - zstdMinMatch)
khenaidood948f772021-08-11 17:49:24 -0400804
805 // We might be able to match backwards.
806 // Extend as long as we can.
807 start := s + repOff
808 // We end the search early, so we don't risk 0 literals
809 // and have to do special offset treatment.
810 startLimit := nextEmit + 1
811
812 tMin := s - e.maxMatchOff
813 if tMin < 0 {
814 tMin = 0
815 }
816 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
817 repIndex--
818 start--
819 seq.matchLen++
820 }
821 addLiterals(&seq, start)
822
823 // rep 0
824 seq.offset = 1
825 if debugSequences {
826 println("repeat sequence", seq, "next s:", s)
827 }
828 blk.sequences = append(blk.sequences, seq)
Abhay Kumara2ae5992025-11-10 14:02:24 +0000829 s += length + repOff
khenaidood948f772021-08-11 17:49:24 -0400830 nextEmit = s
831 if s >= sLimit {
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530832 if debugEncoder {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000833 println("repeat ended", s, length)
khenaidood948f772021-08-11 17:49:24 -0400834
835 }
836 break encodeLoop
837 }
838 cv = load6432(src, s)
839 continue
840 }
841 }
842 // Find the offsets of our two matches.
843 coffsetL := s - (candidateL.offset - e.cur)
844 coffsetS := s - (candidateS.offset - e.cur)
845
846 // Check if we have a long match.
847 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
848 // Found a long match, likely at least 8 bytes.
849 // Reference encoder checks all 8 bytes, we only check 4,
850 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
851 t = candidateL.offset - e.cur
852 if debugAsserts && s <= t {
853 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
854 }
855 if debugAsserts && s-t > e.maxMatchOff {
856 panic("s - t >e.maxMatchOff")
857 }
858 if debugMatches {
859 println("long match")
860 }
861 break
862 }
863
864 // Check if we have a short match.
865 if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
866 // found a regular match
867 // See if we can find a long match at s+1
868 const checkAt = 1
869 cv := load6432(src, s+checkAt)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530870 nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen)
khenaidood948f772021-08-11 17:49:24 -0400871 candidateL = e.longTable[nextHashL]
872 coffsetL = s - (candidateL.offset - e.cur) + checkAt
873
874 // We can store it, since we have at least a 4 byte match.
875 e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
876 e.markLongShardDirty(nextHashL)
877 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
878 // Found a long match, likely at least 8 bytes.
879 // Reference encoder checks all 8 bytes, we only check 4,
880 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
881 t = candidateL.offset - e.cur
882 s += checkAt
883 if debugMatches {
884 println("long match (after short)")
885 }
886 break
887 }
888
889 t = candidateS.offset - e.cur
890 if debugAsserts && s <= t {
891 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
892 }
893 if debugAsserts && s-t > e.maxMatchOff {
894 panic("s - t >e.maxMatchOff")
895 }
896 if debugAsserts && t < 0 {
897 panic("t<0")
898 }
899 if debugMatches {
900 println("short match")
901 }
902 break
903 }
904
905 // No match found, move forward in input.
906 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
907 if s >= sLimit {
908 break encodeLoop
909 }
910 cv = load6432(src, s)
911 }
912
913 // A 4-byte match has been found. Update recent offsets.
914 // We'll later see if more than 4 bytes.
915 offset2 = offset1
916 offset1 = s - t
917
918 if debugAsserts && s <= t {
919 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
920 }
921
922 if debugAsserts && canRepeat && int(offset1) > len(src) {
923 panic("invalid offset")
924 }
925
926 // Extend the 4-byte match as long as possible.
927 l := e.matchlen(s+4, t+4, src) + 4
928
929 // Extend backwards
930 tMin := s - e.maxMatchOff
931 if tMin < 0 {
932 tMin = 0
933 }
934 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
935 s--
936 t--
937 l++
938 }
939
940 // Write our sequence
941 var seq seq
942 seq.litLen = uint32(s - nextEmit)
943 seq.matchLen = uint32(l - zstdMinMatch)
944 if seq.litLen > 0 {
945 blk.literals = append(blk.literals, src[nextEmit:s]...)
946 }
947 seq.offset = uint32(s-t) + 3
948 s += l
949 if debugSequences {
950 println("sequence", seq, "next s:", s)
951 }
952 blk.sequences = append(blk.sequences, seq)
953 nextEmit = s
954 if s >= sLimit {
955 break encodeLoop
956 }
957
958 // Index match start+1 (long) and start+2 (short)
959 index0 := s - l + 1
960 // Index match end-2 (long) and end-1 (short)
961 index1 := s - 2
962
963 cv0 := load6432(src, index0)
964 cv1 := load6432(src, index1)
965 te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
966 te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530967 longHash1 := hashLen(cv0, dFastLongTableBits, dFastLongLen)
968 longHash2 := hashLen(cv1, dFastLongTableBits, dFastLongLen)
khenaidood948f772021-08-11 17:49:24 -0400969 e.longTable[longHash1] = te0
970 e.longTable[longHash2] = te1
971 e.markLongShardDirty(longHash1)
972 e.markLongShardDirty(longHash2)
973 cv0 >>= 8
974 cv1 >>= 8
975 te0.offset++
976 te1.offset++
977 te0.val = uint32(cv0)
978 te1.val = uint32(cv1)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530979 hashVal1 := hashLen(cv0, dFastShortTableBits, dFastShortLen)
980 hashVal2 := hashLen(cv1, dFastShortTableBits, dFastShortLen)
khenaidood948f772021-08-11 17:49:24 -0400981 e.table[hashVal1] = te0
982 e.markShardDirty(hashVal1)
983 e.table[hashVal2] = te1
984 e.markShardDirty(hashVal2)
985
986 cv = load6432(src, s)
987
988 if !canRepeat {
989 continue
990 }
991
992 // Check offset 2
993 for {
994 o2 := s - offset2
995 if load3232(src, o2) != uint32(cv) {
996 // Do regular search
997 break
998 }
999
1000 // Store this, since we have it.
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +05301001 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
1002 nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
khenaidood948f772021-08-11 17:49:24 -04001003
1004 // We have at least 4 byte match.
1005 // No need to check backwards. We come straight from a match
1006 l := 4 + e.matchlen(s+4, o2+4, src)
1007
1008 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
1009 e.longTable[nextHashL] = entry
1010 e.markLongShardDirty(nextHashL)
1011 e.table[nextHashS] = entry
1012 e.markShardDirty(nextHashS)
1013 seq.matchLen = uint32(l) - zstdMinMatch
1014 seq.litLen = 0
1015
1016 // Since litlen is always 0, this is offset 1.
1017 seq.offset = 1
1018 s += l
1019 nextEmit = s
1020 if debugSequences {
1021 println("sequence", seq, "next s:", s)
1022 }
1023 blk.sequences = append(blk.sequences, seq)
1024
1025 // Swap offset 1 and 2.
1026 offset1, offset2 = offset2, offset1
1027 if s >= sLimit {
1028 // Finished
1029 break encodeLoop
1030 }
1031 cv = load6432(src, s)
1032 }
1033 }
1034
1035 if int(nextEmit) < len(src) {
1036 blk.literals = append(blk.literals, src[nextEmit:]...)
1037 blk.extraLits = len(src) - int(nextEmit)
1038 }
1039 blk.recentOffsets[0] = uint32(offset1)
1040 blk.recentOffsets[1] = uint32(offset2)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +05301041 if debugEncoder {
khenaidood948f772021-08-11 17:49:24 -04001042 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
1043 }
1044 // If we encoded more than 64K mark all dirty.
1045 if len(src) > 64<<10 {
1046 e.markAllShardsDirty()
1047 }
1048}
1049
1050// ResetDict will reset and set a dictionary if not nil
1051func (e *doubleFastEncoder) Reset(d *dict, singleBlock bool) {
1052 e.fastEncoder.Reset(d, singleBlock)
1053 if d != nil {
1054 panic("doubleFastEncoder: Reset with dict not supported")
1055 }
1056}
1057
1058// ResetDict will reset and set a dictionary if not nil
1059func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
1060 allDirty := e.allDirty
1061 e.fastEncoderDict.Reset(d, singleBlock)
1062 if d == nil {
1063 return
1064 }
1065
1066 // Init or copy dict table
1067 if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
1068 if len(e.dictLongTable) != len(e.longTable) {
1069 e.dictLongTable = make([]tableEntry, len(e.longTable))
1070 }
1071 if len(d.content) >= 8 {
1072 cv := load6432(d.content, 0)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +05301073 e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{
khenaidood948f772021-08-11 17:49:24 -04001074 val: uint32(cv),
1075 offset: e.maxMatchOff,
1076 }
1077 end := int32(len(d.content)) - 8 + e.maxMatchOff
1078 for i := e.maxMatchOff + 1; i < end; i++ {
1079 cv = cv>>8 | (uint64(d.content[i-e.maxMatchOff+7]) << 56)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +05301080 e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{
khenaidood948f772021-08-11 17:49:24 -04001081 val: uint32(cv),
1082 offset: i,
1083 }
1084 }
1085 }
1086 e.lastDictID = d.id
Abhay Kumara2ae5992025-11-10 14:02:24 +00001087 allDirty = true
khenaidood948f772021-08-11 17:49:24 -04001088 }
1089 // Reset table to initial state
1090 e.cur = e.maxMatchOff
1091
1092 dirtyShardCnt := 0
1093 if !allDirty {
1094 for i := range e.longTableShardDirty {
1095 if e.longTableShardDirty[i] {
1096 dirtyShardCnt++
1097 }
1098 }
1099 }
1100
1101 if allDirty || dirtyShardCnt > dLongTableShardCnt/2 {
Abhay Kumara2ae5992025-11-10 14:02:24 +00001102 //copy(e.longTable[:], e.dictLongTable)
1103 e.longTable = *(*[dFastLongTableSize]tableEntry)(e.dictLongTable)
khenaidood948f772021-08-11 17:49:24 -04001104 for i := range e.longTableShardDirty {
1105 e.longTableShardDirty[i] = false
1106 }
1107 return
1108 }
1109 for i := range e.longTableShardDirty {
1110 if !e.longTableShardDirty[i] {
1111 continue
1112 }
1113
Abhay Kumara2ae5992025-11-10 14:02:24 +00001114 // copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize])
1115 *(*[dLongTableShardSize]tableEntry)(e.longTable[i*dLongTableShardSize:]) = *(*[dLongTableShardSize]tableEntry)(e.dictLongTable[i*dLongTableShardSize:])
1116
khenaidood948f772021-08-11 17:49:24 -04001117 e.longTableShardDirty[i] = false
1118 }
1119}
1120
1121func (e *doubleFastEncoderDict) markLongShardDirty(entryNum uint32) {
1122 e.longTableShardDirty[entryNum/dLongTableShardSize] = true
1123}