| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 1 | package zstd |
| 2 | |
| 3 | import ( |
| 4 | "fmt" |
| 5 | "math/bits" |
| 6 | |
| 7 | "github.com/klauspost/compress/zstd/internal/xxhash" |
| 8 | ) |
| 9 | |
| 10 | const ( |
| 11 | dictShardBits = 6 |
| 12 | ) |
| 13 | |
| 14 | type fastBase struct { |
| 15 | // cur is the offset at the start of hist |
| 16 | cur int32 |
| 17 | // maximum offset. Should be at least 2x block size. |
| 18 | maxMatchOff int32 |
| Abhay Kumar | a2ae599 | 2025-11-10 14:02:24 +0000 | [diff] [blame^] | 19 | bufferReset int32 |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 20 | hist []byte |
| 21 | crc *xxhash.Digest |
| 22 | tmp [8]byte |
| 23 | blk *blockEnc |
| 24 | lastDictID uint32 |
| 25 | lowMem bool |
| 26 | } |
| 27 | |
| 28 | // CRC returns the underlying CRC writer. |
| 29 | func (e *fastBase) CRC() *xxhash.Digest { |
| 30 | return e.crc |
| 31 | } |
| 32 | |
| 33 | // AppendCRC will append the CRC to the destination slice and return it. |
| 34 | func (e *fastBase) AppendCRC(dst []byte) []byte { |
| 35 | crc := e.crc.Sum(e.tmp[:0]) |
| 36 | dst = append(dst, crc[7], crc[6], crc[5], crc[4]) |
| 37 | return dst |
| 38 | } |
| 39 | |
| 40 | // WindowSize returns the window size of the encoder, |
| 41 | // or a window size small enough to contain the input size, if > 0. |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 42 | func (e *fastBase) WindowSize(size int64) int32 { |
| 43 | if size > 0 && size < int64(e.maxMatchOff) { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 44 | b := int32(1) << uint(bits.Len(uint(size))) |
| 45 | // Keep minimum window. |
| 46 | if b < 1024 { |
| 47 | b = 1024 |
| 48 | } |
| 49 | return b |
| 50 | } |
| 51 | return e.maxMatchOff |
| 52 | } |
| 53 | |
| 54 | // Block returns the current block. |
| 55 | func (e *fastBase) Block() *blockEnc { |
| 56 | return e.blk |
| 57 | } |
| 58 | |
| 59 | func (e *fastBase) addBlock(src []byte) int32 { |
| Abhay Kumar | a2ae599 | 2025-11-10 14:02:24 +0000 | [diff] [blame^] | 60 | if debugAsserts && e.cur > e.bufferReset { |
| 61 | panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, e.bufferReset)) |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 62 | } |
| 63 | // check if we have space already |
| 64 | if len(e.hist)+len(src) > cap(e.hist) { |
| 65 | if cap(e.hist) == 0 { |
| 66 | e.ensureHist(len(src)) |
| 67 | } else { |
| 68 | if cap(e.hist) < int(e.maxMatchOff+maxCompressedBlockSize) { |
| 69 | panic(fmt.Errorf("unexpected buffer cap %d, want at least %d with window %d", cap(e.hist), e.maxMatchOff+maxCompressedBlockSize, e.maxMatchOff)) |
| 70 | } |
| 71 | // Move down |
| 72 | offset := int32(len(e.hist)) - e.maxMatchOff |
| 73 | copy(e.hist[0:e.maxMatchOff], e.hist[offset:]) |
| 74 | e.cur += offset |
| 75 | e.hist = e.hist[:e.maxMatchOff] |
| 76 | } |
| 77 | } |
| 78 | s := int32(len(e.hist)) |
| 79 | e.hist = append(e.hist, src...) |
| 80 | return s |
| 81 | } |
| 82 | |
| 83 | // ensureHist will ensure that history can keep at least this many bytes. |
| 84 | func (e *fastBase) ensureHist(n int) { |
| 85 | if cap(e.hist) >= n { |
| 86 | return |
| 87 | } |
| 88 | l := e.maxMatchOff |
| 89 | if (e.lowMem && e.maxMatchOff > maxCompressedBlockSize) || e.maxMatchOff <= maxCompressedBlockSize { |
| 90 | l += maxCompressedBlockSize |
| 91 | } else { |
| 92 | l += e.maxMatchOff |
| 93 | } |
| 94 | // Make it at least 1MB. |
| 95 | if l < 1<<20 && !e.lowMem { |
| 96 | l = 1 << 20 |
| 97 | } |
| 98 | // Make it at least the requested size. |
| 99 | if l < int32(n) { |
| 100 | l = int32(n) |
| 101 | } |
| 102 | e.hist = make([]byte, 0, l) |
| 103 | } |
| 104 | |
| 105 | // useBlock will replace the block with the provided one, |
| 106 | // but transfer recent offsets from the previous. |
| 107 | func (e *fastBase) UseBlock(enc *blockEnc) { |
| 108 | enc.reset(e.blk) |
| 109 | e.blk = enc |
| 110 | } |
| 111 | |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 112 | func (e *fastBase) matchlen(s, t int32, src []byte) int32 { |
| 113 | if debugAsserts { |
| 114 | if s < 0 { |
| 115 | err := fmt.Sprintf("s (%d) < 0", s) |
| 116 | panic(err) |
| 117 | } |
| 118 | if t < 0 { |
| Abhay Kumar | a2ae599 | 2025-11-10 14:02:24 +0000 | [diff] [blame^] | 119 | err := fmt.Sprintf("t (%d) < 0", t) |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 120 | panic(err) |
| 121 | } |
| 122 | if s-t > e.maxMatchOff { |
| 123 | err := fmt.Sprintf("s (%d) - t (%d) > maxMatchOff (%d)", s, t, e.maxMatchOff) |
| 124 | panic(err) |
| 125 | } |
| 126 | if len(src)-int(s) > maxCompressedBlockSize { |
| 127 | panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize)) |
| 128 | } |
| 129 | } |
| Abhay Kumar | a2ae599 | 2025-11-10 14:02:24 +0000 | [diff] [blame^] | 130 | return int32(matchLen(src[s:], src[t:])) |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 131 | } |
| 132 | |
| 133 | // Reset the encoding table. |
| 134 | func (e *fastBase) resetBase(d *dict, singleBlock bool) { |
| 135 | if e.blk == nil { |
| 136 | e.blk = &blockEnc{lowMem: e.lowMem} |
| 137 | e.blk.init() |
| 138 | } else { |
| 139 | e.blk.reset(nil) |
| 140 | } |
| 141 | e.blk.initNewEncode() |
| 142 | if e.crc == nil { |
| 143 | e.crc = xxhash.New() |
| 144 | } else { |
| 145 | e.crc.Reset() |
| 146 | } |
| Abhay Kumar | a2ae599 | 2025-11-10 14:02:24 +0000 | [diff] [blame^] | 147 | e.blk.dictLitEnc = nil |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 148 | if d != nil { |
| 149 | low := e.lowMem |
| 150 | if singleBlock { |
| 151 | e.lowMem = true |
| 152 | } |
| Abhay Kumar | a2ae599 | 2025-11-10 14:02:24 +0000 | [diff] [blame^] | 153 | e.ensureHist(d.ContentSize() + maxCompressedBlockSize) |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 154 | e.lowMem = low |
| 155 | } |
| 156 | |
| 157 | // We offset current position so everything will be out of reach. |
| 158 | // If above reset line, history will be purged. |
| Abhay Kumar | a2ae599 | 2025-11-10 14:02:24 +0000 | [diff] [blame^] | 159 | if e.cur < e.bufferReset { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 160 | e.cur += e.maxMatchOff + int32(len(e.hist)) |
| 161 | } |
| 162 | e.hist = e.hist[:0] |
| 163 | if d != nil { |
| 164 | // Set offsets (currently not used) |
| 165 | for i, off := range d.offsets { |
| 166 | e.blk.recentOffsets[i] = uint32(off) |
| 167 | e.blk.prevRecentOffsets[i] = e.blk.recentOffsets[i] |
| 168 | } |
| 169 | // Transfer litenc. |
| 170 | e.blk.dictLitEnc = d.litEnc |
| 171 | e.hist = append(e.hist, d.content...) |
| 172 | } |
| 173 | } |