blob: 0dd742fd2a6d98156bd4dd32fb261f46e0cc3248 [file] [log] [blame]
khenaidoo106c61a2021-08-11 18:05:46 -04001// Copyright 2019+ Klaus Post. All rights reserved.
2// License information can be found in the LICENSE file.
3// Based on work by Yann Collet, released under BSD License.
4
5package zstd
6
7import (
8 "errors"
9 "fmt"
Abhay Kumara61c5222025-11-10 07:32:50 +000010 "hash/crc32"
khenaidoo106c61a2021-08-11 18:05:46 -040011 "io"
12 "sync"
13
14 "github.com/klauspost/compress/huff0"
15 "github.com/klauspost/compress/zstd/internal/xxhash"
16)
17
18type blockType uint8
19
20//go:generate stringer -type=blockType,literalsBlockType,seqCompMode,tableIndex
21
22const (
23 blockTypeRaw blockType = iota
24 blockTypeRLE
25 blockTypeCompressed
26 blockTypeReserved
27)
28
29type literalsBlockType uint8
30
31const (
32 literalsBlockRaw literalsBlockType = iota
33 literalsBlockRLE
34 literalsBlockCompressed
35 literalsBlockTreeless
36)
37
38const (
39 // maxCompressedBlockSize is the biggest allowed compressed block size (128KB)
40 maxCompressedBlockSize = 128 << 10
41
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +053042 compressedBlockOverAlloc = 16
43 maxCompressedBlockSizeAlloc = 128<<10 + compressedBlockOverAlloc
44
khenaidoo106c61a2021-08-11 18:05:46 -040045 // Maximum possible block size (all Raw+Uncompressed).
46 maxBlockSize = (1 << 21) - 1
47
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +053048 maxMatchLen = 131074
49 maxSequences = 0x7f00 + 0xffff
khenaidoo106c61a2021-08-11 18:05:46 -040050
51 // We support slightly less than the reference decoder to be able to
52 // use ints on 32 bit archs.
53 maxOffsetBits = 30
54)
55
56var (
57 huffDecoderPool = sync.Pool{New: func() interface{} {
58 return &huff0.Scratch{}
59 }}
60
61 fseDecoderPool = sync.Pool{New: func() interface{} {
62 return &fseDecoder{}
63 }}
64)
65
66type blockDec struct {
67 // Raw source data of the block.
68 data []byte
69 dataStorage []byte
70
71 // Destination of the decoded data.
72 dst []byte
73
74 // Buffer for literals data.
75 literalBuf []byte
76
77 // Window size of the block.
78 WindowSize uint64
79
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +053080 err error
81
Abhay Kumara61c5222025-11-10 07:32:50 +000082 // Check against this crc, if hasCRC is true.
83 checkCRC uint32
84 hasCRC bool
khenaidoo106c61a2021-08-11 18:05:46 -040085
86 // Frame to use for singlethreaded decoding.
87 // Should not be used by the decoder itself since parent may be another frame.
88 localFrame *frameDec
89
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +053090 sequence []seqVals
91
92 async struct {
93 newHist *history
94 literals []byte
95 seqData []byte
96 seqSize int // Size of uncompressed sequences
97 fcs uint64
98 }
99
khenaidoo106c61a2021-08-11 18:05:46 -0400100 // Block is RLE, this is the size.
101 RLESize uint32
khenaidoo106c61a2021-08-11 18:05:46 -0400102
103 Type blockType
104
105 // Is this the last block of a frame?
106 Last bool
107
108 // Use less memory
109 lowMem bool
110}
111
112func (b *blockDec) String() string {
113 if b == nil {
114 return "<nil>"
115 }
116 return fmt.Sprintf("Steam Size: %d, Type: %v, Last: %t, Window: %d", len(b.data), b.Type, b.Last, b.WindowSize)
117}
118
119func newBlockDec(lowMem bool) *blockDec {
120 b := blockDec{
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530121 lowMem: lowMem,
khenaidoo106c61a2021-08-11 18:05:46 -0400122 }
khenaidoo106c61a2021-08-11 18:05:46 -0400123 return &b
124}
125
126// reset will reset the block.
127// Input must be a start of a block and will be at the end of the block when returned.
128func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
129 b.WindowSize = windowSize
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530130 tmp, err := br.readSmall(3)
131 if err != nil {
132 println("Reading block header:", err)
133 return err
khenaidoo106c61a2021-08-11 18:05:46 -0400134 }
135 bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
136 b.Last = bh&1 != 0
137 b.Type = blockType((bh >> 1) & 3)
138 // find size.
139 cSize := int(bh >> 3)
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530140 maxSize := maxCompressedBlockSizeAlloc
khenaidoo106c61a2021-08-11 18:05:46 -0400141 switch b.Type {
142 case blockTypeReserved:
143 return ErrReservedBlockType
144 case blockTypeRLE:
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530145 if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) {
146 if debugDecoder {
147 printf("rle block too big: csize:%d block: %+v\n", uint64(cSize), b)
148 }
149 return ErrWindowSizeExceeded
150 }
khenaidoo106c61a2021-08-11 18:05:46 -0400151 b.RLESize = uint32(cSize)
152 if b.lowMem {
153 maxSize = cSize
154 }
155 cSize = 1
156 case blockTypeCompressed:
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530157 if debugDecoder {
khenaidoo106c61a2021-08-11 18:05:46 -0400158 println("Data size on stream:", cSize)
159 }
160 b.RLESize = 0
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530161 maxSize = maxCompressedBlockSizeAlloc
khenaidoo106c61a2021-08-11 18:05:46 -0400162 if windowSize < maxCompressedBlockSize && b.lowMem {
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530163 maxSize = int(windowSize) + compressedBlockOverAlloc
khenaidoo106c61a2021-08-11 18:05:46 -0400164 }
165 if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize {
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530166 if debugDecoder {
khenaidoo106c61a2021-08-11 18:05:46 -0400167 printf("compressed block too big: csize:%d block: %+v\n", uint64(cSize), b)
168 }
169 return ErrCompressedSizeTooBig
170 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530171 // Empty compressed blocks must at least be 2 bytes
172 // for Literals_Block_Type and one for Sequences_Section_Header.
173 if cSize < 2 {
174 return ErrBlockTooSmall
175 }
khenaidoo106c61a2021-08-11 18:05:46 -0400176 case blockTypeRaw:
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530177 if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) {
178 if debugDecoder {
179 printf("rle block too big: csize:%d block: %+v\n", uint64(cSize), b)
180 }
181 return ErrWindowSizeExceeded
182 }
183
khenaidoo106c61a2021-08-11 18:05:46 -0400184 b.RLESize = 0
185 // We do not need a destination for raw blocks.
186 maxSize = -1
187 default:
188 panic("Invalid block type")
189 }
190
191 // Read block data.
Abhay Kumara61c5222025-11-10 07:32:50 +0000192 if _, ok := br.(*byteBuf); !ok && cap(b.dataStorage) < cSize {
193 // byteBuf doesn't need a destination buffer.
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530194 if b.lowMem || cSize > maxCompressedBlockSize {
195 b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc)
khenaidoo106c61a2021-08-11 18:05:46 -0400196 } else {
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530197 b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc)
khenaidoo106c61a2021-08-11 18:05:46 -0400198 }
199 }
khenaidoo106c61a2021-08-11 18:05:46 -0400200 b.data, err = br.readBig(cSize, b.dataStorage)
201 if err != nil {
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530202 if debugDecoder {
khenaidoo106c61a2021-08-11 18:05:46 -0400203 println("Reading block:", err, "(", cSize, ")", len(b.data))
204 printf("%T", br)
205 }
206 return err
207 }
Abhay Kumara61c5222025-11-10 07:32:50 +0000208 if cap(b.dst) <= maxSize {
209 b.dst = make([]byte, 0, maxSize+1)
210 }
khenaidoo106c61a2021-08-11 18:05:46 -0400211 return nil
212}
213
214// sendEOF will make the decoder send EOF on this frame.
215func (b *blockDec) sendErr(err error) {
216 b.Last = true
217 b.Type = blockTypeReserved
218 b.err = err
khenaidoo106c61a2021-08-11 18:05:46 -0400219}
220
221// Close will release resources.
222// Closed blockDec cannot be reset.
223func (b *blockDec) Close() {
khenaidoo106c61a2021-08-11 18:05:46 -0400224}
225
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530226// decodeBuf
khenaidoo106c61a2021-08-11 18:05:46 -0400227func (b *blockDec) decodeBuf(hist *history) error {
228 switch b.Type {
229 case blockTypeRLE:
230 if cap(b.dst) < int(b.RLESize) {
231 if b.lowMem {
232 b.dst = make([]byte, b.RLESize)
233 } else {
Abhay Kumara61c5222025-11-10 07:32:50 +0000234 b.dst = make([]byte, maxCompressedBlockSize)
khenaidoo106c61a2021-08-11 18:05:46 -0400235 }
236 }
237 b.dst = b.dst[:b.RLESize]
238 v := b.data[0]
239 for i := range b.dst {
240 b.dst[i] = v
241 }
242 hist.appendKeep(b.dst)
243 return nil
244 case blockTypeRaw:
245 hist.appendKeep(b.data)
246 return nil
247 case blockTypeCompressed:
248 saved := b.dst
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530249 // Append directly to history
250 if hist.ignoreBuffer == 0 {
251 b.dst = hist.b
252 hist.b = nil
253 } else {
254 b.dst = b.dst[:0]
255 }
khenaidoo106c61a2021-08-11 18:05:46 -0400256 err := b.decodeCompressed(hist)
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530257 if debugDecoder {
khenaidoo106c61a2021-08-11 18:05:46 -0400258 println("Decompressed to total", len(b.dst), "bytes, hash:", xxhash.Sum64(b.dst), "error:", err)
259 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530260 if hist.ignoreBuffer == 0 {
261 hist.b = b.dst
262 b.dst = saved
263 } else {
264 hist.appendKeep(b.dst)
265 }
khenaidoo106c61a2021-08-11 18:05:46 -0400266 return err
267 case blockTypeReserved:
268 // Used for returning errors.
269 return b.err
270 default:
271 panic("Invalid block type")
272 }
273}
274
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530275func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err error) {
khenaidoo106c61a2021-08-11 18:05:46 -0400276 // There must be at least one byte for Literals_Block_Type and one for Sequences_Section_Header
277 if len(in) < 2 {
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530278 return in, ErrBlockTooSmall
khenaidoo106c61a2021-08-11 18:05:46 -0400279 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530280
khenaidoo106c61a2021-08-11 18:05:46 -0400281 litType := literalsBlockType(in[0] & 3)
282 var litRegenSize int
283 var litCompSize int
284 sizeFormat := (in[0] >> 2) & 3
285 var fourStreams bool
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530286 var literals []byte
khenaidoo106c61a2021-08-11 18:05:46 -0400287 switch litType {
288 case literalsBlockRaw, literalsBlockRLE:
289 switch sizeFormat {
290 case 0, 2:
291 // Regenerated_Size uses 5 bits (0-31). Literals_Section_Header uses 1 byte.
292 litRegenSize = int(in[0] >> 3)
293 in = in[1:]
294 case 1:
295 // Regenerated_Size uses 12 bits (0-4095). Literals_Section_Header uses 2 bytes.
296 litRegenSize = int(in[0]>>4) + (int(in[1]) << 4)
297 in = in[2:]
298 case 3:
299 // Regenerated_Size uses 20 bits (0-1048575). Literals_Section_Header uses 3 bytes.
300 if len(in) < 3 {
301 println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530302 return in, ErrBlockTooSmall
khenaidoo106c61a2021-08-11 18:05:46 -0400303 }
304 litRegenSize = int(in[0]>>4) + (int(in[1]) << 4) + (int(in[2]) << 12)
305 in = in[3:]
306 }
307 case literalsBlockCompressed, literalsBlockTreeless:
308 switch sizeFormat {
309 case 0, 1:
310 // Both Regenerated_Size and Compressed_Size use 10 bits (0-1023).
311 if len(in) < 3 {
312 println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530313 return in, ErrBlockTooSmall
khenaidoo106c61a2021-08-11 18:05:46 -0400314 }
315 n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12)
316 litRegenSize = int(n & 1023)
317 litCompSize = int(n >> 10)
318 fourStreams = sizeFormat == 1
319 in = in[3:]
320 case 2:
321 fourStreams = true
322 if len(in) < 4 {
323 println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530324 return in, ErrBlockTooSmall
khenaidoo106c61a2021-08-11 18:05:46 -0400325 }
326 n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20)
327 litRegenSize = int(n & 16383)
328 litCompSize = int(n >> 14)
329 in = in[4:]
330 case 3:
331 fourStreams = true
332 if len(in) < 5 {
333 println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530334 return in, ErrBlockTooSmall
khenaidoo106c61a2021-08-11 18:05:46 -0400335 }
336 n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20) + (uint64(in[4]) << 28)
337 litRegenSize = int(n & 262143)
338 litCompSize = int(n >> 18)
339 in = in[5:]
340 }
341 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530342 if debugDecoder {
khenaidoo106c61a2021-08-11 18:05:46 -0400343 println("literals type:", litType, "litRegenSize:", litRegenSize, "litCompSize:", litCompSize, "sizeFormat:", sizeFormat, "4X:", fourStreams)
344 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530345 if litRegenSize > int(b.WindowSize) || litRegenSize > maxCompressedBlockSize {
346 return in, ErrWindowSizeExceeded
347 }
348
khenaidoo106c61a2021-08-11 18:05:46 -0400349 switch litType {
350 case literalsBlockRaw:
351 if len(in) < litRegenSize {
352 println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litRegenSize)
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530353 return in, ErrBlockTooSmall
khenaidoo106c61a2021-08-11 18:05:46 -0400354 }
355 literals = in[:litRegenSize]
356 in = in[litRegenSize:]
357 //printf("Found %d uncompressed literals\n", litRegenSize)
358 case literalsBlockRLE:
359 if len(in) < 1 {
360 println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", 1)
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530361 return in, ErrBlockTooSmall
khenaidoo106c61a2021-08-11 18:05:46 -0400362 }
363 if cap(b.literalBuf) < litRegenSize {
364 if b.lowMem {
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530365 b.literalBuf = make([]byte, litRegenSize, litRegenSize+compressedBlockOverAlloc)
khenaidoo106c61a2021-08-11 18:05:46 -0400366 } else {
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530367 b.literalBuf = make([]byte, litRegenSize, maxCompressedBlockSize+compressedBlockOverAlloc)
khenaidoo106c61a2021-08-11 18:05:46 -0400368 }
369 }
370 literals = b.literalBuf[:litRegenSize]
371 v := in[0]
372 for i := range literals {
373 literals[i] = v
374 }
375 in = in[1:]
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530376 if debugDecoder {
khenaidoo106c61a2021-08-11 18:05:46 -0400377 printf("Found %d RLE compressed literals\n", litRegenSize)
378 }
379 case literalsBlockTreeless:
380 if len(in) < litCompSize {
381 println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize)
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530382 return in, ErrBlockTooSmall
khenaidoo106c61a2021-08-11 18:05:46 -0400383 }
384 // Store compressed literals, so we defer decoding until we get history.
385 literals = in[:litCompSize]
386 in = in[litCompSize:]
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530387 if debugDecoder {
khenaidoo106c61a2021-08-11 18:05:46 -0400388 printf("Found %d compressed literals\n", litCompSize)
389 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530390 huff := hist.huffTree
391 if huff == nil {
392 return in, errors.New("literal block was treeless, but no history was defined")
khenaidoo106c61a2021-08-11 18:05:46 -0400393 }
khenaidoo106c61a2021-08-11 18:05:46 -0400394 // Ensure we have space to store it.
395 if cap(b.literalBuf) < litRegenSize {
396 if b.lowMem {
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530397 b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc)
khenaidoo106c61a2021-08-11 18:05:46 -0400398 } else {
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530399 b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc)
khenaidoo106c61a2021-08-11 18:05:46 -0400400 }
401 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530402 var err error
403 // Use our out buffer.
404 huff.MaxDecodedSize = litRegenSize
405 if fourStreams {
406 literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
407 } else {
408 literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals)
khenaidoo106c61a2021-08-11 18:05:46 -0400409 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530410 // Make sure we don't leak our literals buffer
411 if err != nil {
412 println("decompressing literals:", err)
413 return in, err
414 }
415 if len(literals) != litRegenSize {
416 return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
417 }
418
419 case literalsBlockCompressed:
420 if len(in) < litCompSize {
421 println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize)
422 return in, ErrBlockTooSmall
423 }
424 literals = in[:litCompSize]
425 in = in[litCompSize:]
426 // Ensure we have space to store it.
427 if cap(b.literalBuf) < litRegenSize {
428 if b.lowMem {
429 b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc)
430 } else {
431 b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc)
432 }
433 }
434 huff := hist.huffTree
435 if huff == nil || (hist.dict != nil && huff == hist.dict.litEnc) {
436 huff = huffDecoderPool.Get().(*huff0.Scratch)
437 if huff == nil {
438 huff = &huff0.Scratch{}
439 }
440 }
441 var err error
Abhay Kumara61c5222025-11-10 07:32:50 +0000442 if debugDecoder {
443 println("huff table input:", len(literals), "CRC:", crc32.ChecksumIEEE(literals))
444 }
khenaidoo106c61a2021-08-11 18:05:46 -0400445 huff, literals, err = huff0.ReadTable(literals, huff)
446 if err != nil {
447 println("reading huffman table:", err)
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530448 return in, err
khenaidoo106c61a2021-08-11 18:05:46 -0400449 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530450 hist.huffTree = huff
451 huff.MaxDecodedSize = litRegenSize
khenaidoo106c61a2021-08-11 18:05:46 -0400452 // Use our out buffer.
453 if fourStreams {
454 literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
455 } else {
456 literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals)
457 }
458 if err != nil {
459 println("decoding compressed literals:", err)
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530460 return in, err
khenaidoo106c61a2021-08-11 18:05:46 -0400461 }
462 // Make sure we don't leak our literals buffer
463 if len(literals) != litRegenSize {
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530464 return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
khenaidoo106c61a2021-08-11 18:05:46 -0400465 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530466 // Re-cap to get extra size.
467 literals = b.literalBuf[:len(literals)]
468 if debugDecoder {
khenaidoo106c61a2021-08-11 18:05:46 -0400469 printf("Decompressed %d literals into %d bytes\n", litCompSize, litRegenSize)
470 }
471 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530472 hist.decoders.literals = literals
473 return in, nil
474}
khenaidoo106c61a2021-08-11 18:05:46 -0400475
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530476// decodeCompressed will start decompressing a block.
477func (b *blockDec) decodeCompressed(hist *history) error {
478 in := b.data
479 in, err := b.decodeLiterals(in, hist)
480 if err != nil {
481 return err
482 }
483 err = b.prepareSequences(in, hist)
484 if err != nil {
485 return err
486 }
487 if hist.decoders.nSeqs == 0 {
488 b.dst = append(b.dst, hist.decoders.literals...)
489 return nil
490 }
491 before := len(hist.decoders.out)
492 err = hist.decoders.decodeSync(hist.b[hist.ignoreBuffer:])
493 if err != nil {
494 return err
495 }
496 if hist.decoders.maxSyncLen > 0 {
497 hist.decoders.maxSyncLen += uint64(before)
498 hist.decoders.maxSyncLen -= uint64(len(hist.decoders.out))
499 }
500 b.dst = hist.decoders.out
501 hist.recentOffsets = hist.decoders.prevOffset
502 return nil
503}
504
505func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
506 if debugDecoder {
507 printf("prepareSequences: %d byte(s) input\n", len(in))
508 }
khenaidoo106c61a2021-08-11 18:05:46 -0400509 // Decode Sequences
510 // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#sequences-section
511 if len(in) < 1 {
512 return ErrBlockTooSmall
513 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530514 var nSeqs int
khenaidoo106c61a2021-08-11 18:05:46 -0400515 seqHeader := in[0]
khenaidoo106c61a2021-08-11 18:05:46 -0400516 switch {
khenaidoo106c61a2021-08-11 18:05:46 -0400517 case seqHeader < 128:
518 nSeqs = int(seqHeader)
519 in = in[1:]
520 case seqHeader < 255:
521 if len(in) < 2 {
522 return ErrBlockTooSmall
523 }
524 nSeqs = int(seqHeader-128)<<8 | int(in[1])
525 in = in[2:]
526 case seqHeader == 255:
527 if len(in) < 3 {
528 return ErrBlockTooSmall
529 }
530 nSeqs = 0x7f00 + int(in[1]) + (int(in[2]) << 8)
531 in = in[3:]
532 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530533 if nSeqs == 0 && len(in) != 0 {
534 // When no sequences, there should not be any more data...
535 if debugDecoder {
536 printf("prepareSequences: 0 sequences, but %d byte(s) left on stream\n", len(in))
khenaidoo106c61a2021-08-11 18:05:46 -0400537 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530538 return ErrUnexpectedBlockSize
khenaidoo106c61a2021-08-11 18:05:46 -0400539 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530540
541 var seqs = &hist.decoders
542 seqs.nSeqs = nSeqs
khenaidoo106c61a2021-08-11 18:05:46 -0400543 if nSeqs > 0 {
544 if len(in) < 1 {
545 return ErrBlockTooSmall
546 }
547 br := byteReader{b: in, off: 0}
548 compMode := br.Uint8()
549 br.advance(1)
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530550 if debugDecoder {
khenaidoo106c61a2021-08-11 18:05:46 -0400551 printf("Compression modes: 0b%b", compMode)
552 }
Abhay Kumara61c5222025-11-10 07:32:50 +0000553 if compMode&3 != 0 {
554 return errors.New("corrupt block: reserved bits not zero")
555 }
khenaidoo106c61a2021-08-11 18:05:46 -0400556 for i := uint(0); i < 3; i++ {
557 mode := seqCompMode((compMode >> (6 - i*2)) & 3)
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530558 if debugDecoder {
khenaidoo106c61a2021-08-11 18:05:46 -0400559 println("Table", tableIndex(i), "is", mode)
560 }
561 var seq *sequenceDec
562 switch tableIndex(i) {
563 case tableLiteralLengths:
564 seq = &seqs.litLengths
565 case tableOffsets:
566 seq = &seqs.offsets
567 case tableMatchLengths:
568 seq = &seqs.matchLengths
569 default:
570 panic("unknown table")
571 }
572 switch mode {
573 case compModePredefined:
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530574 if seq.fse != nil && !seq.fse.preDefined {
575 fseDecoderPool.Put(seq.fse)
576 }
khenaidoo106c61a2021-08-11 18:05:46 -0400577 seq.fse = &fsePredef[i]
578 case compModeRLE:
579 if br.remain() < 1 {
580 return ErrBlockTooSmall
581 }
582 v := br.Uint8()
583 br.advance(1)
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530584 if seq.fse == nil || seq.fse.preDefined {
585 seq.fse = fseDecoderPool.Get().(*fseDecoder)
586 }
khenaidoo106c61a2021-08-11 18:05:46 -0400587 symb, err := decSymbolValue(v, symbolTableX[i])
588 if err != nil {
589 printf("RLE Transform table (%v) error: %v", tableIndex(i), err)
590 return err
591 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530592 seq.fse.setRLE(symb)
593 if debugDecoder {
Abhay Kumara61c5222025-11-10 07:32:50 +0000594 printf("RLE set to 0x%x, code: %v", symb, v)
khenaidoo106c61a2021-08-11 18:05:46 -0400595 }
596 case compModeFSE:
Abhay Kumara61c5222025-11-10 07:32:50 +0000597 if debugDecoder {
598 println("Reading table for", tableIndex(i))
599 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530600 if seq.fse == nil || seq.fse.preDefined {
601 seq.fse = fseDecoderPool.Get().(*fseDecoder)
602 }
603 err := seq.fse.readNCount(&br, uint16(maxTableSymbol[i]))
khenaidoo106c61a2021-08-11 18:05:46 -0400604 if err != nil {
605 println("Read table error:", err)
606 return err
607 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530608 err = seq.fse.transform(symbolTableX[i])
khenaidoo106c61a2021-08-11 18:05:46 -0400609 if err != nil {
610 println("Transform table error:", err)
611 return err
612 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530613 if debugDecoder {
614 println("Read table ok", "symbolLen:", seq.fse.symbolLen)
khenaidoo106c61a2021-08-11 18:05:46 -0400615 }
khenaidoo106c61a2021-08-11 18:05:46 -0400616 case compModeRepeat:
617 seq.repeat = true
618 }
619 if br.overread() {
620 return io.ErrUnexpectedEOF
621 }
622 }
623 in = br.unread()
624 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530625 if debugDecoder {
626 println("Literals:", len(seqs.literals), "hash:", xxhash.Sum64(seqs.literals), "and", seqs.nSeqs, "sequences.")
khenaidoo106c61a2021-08-11 18:05:46 -0400627 }
628
629 if nSeqs == 0 {
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530630 if len(b.sequence) > 0 {
631 b.sequence = b.sequence[:0]
khenaidoo106c61a2021-08-11 18:05:46 -0400632 }
633 return nil
634 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530635 br := seqs.br
636 if br == nil {
637 br = &bitReader{}
khenaidoo106c61a2021-08-11 18:05:46 -0400638 }
khenaidoo106c61a2021-08-11 18:05:46 -0400639 if err := br.init(in); err != nil {
640 return err
641 }
642
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530643 if err := seqs.initialize(br, hist, b.dst); err != nil {
644 println("initializing sequences:", err)
645 return err
646 }
khenaidoo106c61a2021-08-11 18:05:46 -0400647
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530648 return nil
649}
650
651func (b *blockDec) decodeSequences(hist *history) error {
652 if cap(b.sequence) < hist.decoders.nSeqs {
653 if b.lowMem {
654 b.sequence = make([]seqVals, 0, hist.decoders.nSeqs)
655 } else {
656 b.sequence = make([]seqVals, 0, 0x7F00+0xffff)
657 }
658 }
659 b.sequence = b.sequence[:hist.decoders.nSeqs]
660 if hist.decoders.nSeqs == 0 {
661 hist.decoders.seqSize = len(hist.decoders.literals)
662 return nil
663 }
664 hist.decoders.windowSize = hist.windowSize
665 hist.decoders.prevOffset = hist.recentOffsets
666
667 err := hist.decoders.decode(b.sequence)
668 hist.recentOffsets = hist.decoders.prevOffset
669 return err
670}
671
672func (b *blockDec) executeSequences(hist *history) error {
khenaidoo106c61a2021-08-11 18:05:46 -0400673 hbytes := hist.b
674 if len(hbytes) > hist.windowSize {
675 hbytes = hbytes[len(hbytes)-hist.windowSize:]
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530676 // We do not need history anymore.
khenaidoo106c61a2021-08-11 18:05:46 -0400677 if hist.dict != nil {
678 hist.dict.content = nil
679 }
680 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530681 hist.decoders.windowSize = hist.windowSize
682 hist.decoders.out = b.dst[:0]
683 err := hist.decoders.execute(b.sequence, hbytes)
khenaidoo106c61a2021-08-11 18:05:46 -0400684 if err != nil {
685 return err
686 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530687 return b.updateHistory(hist)
688}
khenaidoo106c61a2021-08-11 18:05:46 -0400689
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530690func (b *blockDec) updateHistory(hist *history) error {
khenaidoo106c61a2021-08-11 18:05:46 -0400691 if len(b.data) > maxCompressedBlockSize {
692 return fmt.Errorf("compressed block size too large (%d)", len(b.data))
693 }
694 // Set output and release references.
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530695 b.dst = hist.decoders.out
696 hist.recentOffsets = hist.decoders.prevOffset
khenaidoo106c61a2021-08-11 18:05:46 -0400697
khenaidoo106c61a2021-08-11 18:05:46 -0400698 if b.Last {
699 // if last block we don't care about history.
700 println("Last block, no history returned")
701 hist.b = hist.b[:0]
702 return nil
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530703 } else {
704 hist.append(b.dst)
705 if debugDecoder {
706 println("Finished block with ", len(b.sequence), "sequences. Added", len(b.dst), "to history, now length", len(hist.b))
707 }
khenaidoo106c61a2021-08-11 18:05:46 -0400708 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530709 hist.decoders.out, hist.decoders.literals = nil, nil
khenaidoo106c61a2021-08-11 18:05:46 -0400710
711 return nil
712}