blob: e47af66e7c903a8f771495fd465beef021f45b61 [file] [log] [blame]
khenaidoo26721882021-08-11 17:42:52 -04001// Copyright 2019+ Klaus Post. All rights reserved.
2// License information can be found in the LICENSE file.
3// Based on work by Yann Collet, released under BSD License.
4
5package zstd
6
7import (
Abhay Kumar40252eb2025-10-13 13:25:53 +00008 "encoding/binary"
khenaidoo26721882021-08-11 17:42:52 -04009 "encoding/hex"
10 "errors"
khenaidoo26721882021-08-11 17:42:52 -040011 "io"
khenaidoo26721882021-08-11 17:42:52 -040012
13 "github.com/klauspost/compress/zstd/internal/xxhash"
14)
15
16type frameDec struct {
Abhay Kumar40252eb2025-10-13 13:25:53 +000017 o decoderOptions
18 crc *xxhash.Digest
khenaidoo26721882021-08-11 17:42:52 -040019
20 WindowSize uint64
21
khenaidoo26721882021-08-11 17:42:52 -040022 // Frame history passed between blocks
23 history history
24
25 rawInput byteBuffer
26
27 // Byte buffer that can be reused for small input blocks.
28 bBuf byteBuf
29
30 FrameContentSize uint64
khenaidoo26721882021-08-11 17:42:52 -040031
Abhay Kumar40252eb2025-10-13 13:25:53 +000032 DictionaryID uint32
khenaidoo26721882021-08-11 17:42:52 -040033 HasCheckSum bool
34 SingleSegment bool
khenaidoo26721882021-08-11 17:42:52 -040035}
36
37const (
Abhay Kumar40252eb2025-10-13 13:25:53 +000038 // MinWindowSize is the minimum Window Size, which is 1 KB.
khenaidoo26721882021-08-11 17:42:52 -040039 MinWindowSize = 1 << 10
Abhay Kumar40252eb2025-10-13 13:25:53 +000040
41 // MaxWindowSize is the maximum encoder window size
42 // and the default decoder maximum window size.
khenaidoo26721882021-08-11 17:42:52 -040043 MaxWindowSize = 1 << 29
44)
45
Abhay Kumar40252eb2025-10-13 13:25:53 +000046const (
47 frameMagic = "\x28\xb5\x2f\xfd"
48 skippableFrameMagic = "\x2a\x4d\x18"
khenaidoo26721882021-08-11 17:42:52 -040049)
50
51func newFrameDec(o decoderOptions) *frameDec {
Abhay Kumar40252eb2025-10-13 13:25:53 +000052 if o.maxWindowSize > o.maxDecodedSize {
53 o.maxWindowSize = o.maxDecodedSize
khenaidoo26721882021-08-11 17:42:52 -040054 }
Abhay Kumar40252eb2025-10-13 13:25:53 +000055 d := frameDec{
56 o: o,
khenaidoo26721882021-08-11 17:42:52 -040057 }
58 return &d
59}
60
61// reset will read the frame header and prepare for block decoding.
62// If nothing can be read from the input, io.EOF will be returned.
63// Any other error indicated that the stream contained data, but
64// there was a problem.
65func (d *frameDec) reset(br byteBuffer) error {
66 d.HasCheckSum = false
67 d.WindowSize = 0
Abhay Kumar40252eb2025-10-13 13:25:53 +000068 var signature [4]byte
khenaidoo26721882021-08-11 17:42:52 -040069 for {
Abhay Kumar40252eb2025-10-13 13:25:53 +000070 var err error
71 // Check if we can read more...
72 b, err := br.readSmall(1)
73 switch err {
74 case io.EOF, io.ErrUnexpectedEOF:
khenaidoo26721882021-08-11 17:42:52 -040075 return io.EOF
Abhay Kumar40252eb2025-10-13 13:25:53 +000076 case nil:
77 signature[0] = b[0]
78 default:
79 return err
khenaidoo26721882021-08-11 17:42:52 -040080 }
Abhay Kumar40252eb2025-10-13 13:25:53 +000081 // Read the rest, don't allow io.ErrUnexpectedEOF
82 b, err = br.readSmall(3)
83 switch err {
84 case io.EOF:
85 return io.EOF
86 case nil:
87 copy(signature[1:], b)
88 default:
89 return err
90 }
91
92 if string(signature[1:4]) != skippableFrameMagic || signature[0]&0xf0 != 0x50 {
93 if debugDecoder {
94 println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString([]byte(skippableFrameMagic)))
khenaidoo26721882021-08-11 17:42:52 -040095 }
96 // Break if not skippable frame.
97 break
98 }
99 // Read size to skip
Abhay Kumar40252eb2025-10-13 13:25:53 +0000100 b, err = br.readSmall(4)
101 if err != nil {
102 if debugDecoder {
103 println("Reading Frame Size", err)
104 }
105 return err
khenaidoo26721882021-08-11 17:42:52 -0400106 }
107 n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
108 println("Skipping frame with", n, "bytes.")
Abhay Kumar40252eb2025-10-13 13:25:53 +0000109 err = br.skipN(int64(n))
khenaidoo26721882021-08-11 17:42:52 -0400110 if err != nil {
Abhay Kumar40252eb2025-10-13 13:25:53 +0000111 if debugDecoder {
khenaidoo26721882021-08-11 17:42:52 -0400112 println("Reading discarded frame", err)
113 }
114 return err
115 }
116 }
Abhay Kumar40252eb2025-10-13 13:25:53 +0000117 if string(signature[:]) != frameMagic {
118 if debugDecoder {
119 println("Got magic numbers: ", signature, "want:", []byte(frameMagic))
120 }
khenaidoo26721882021-08-11 17:42:52 -0400121 return ErrMagicMismatch
122 }
123
124 // Read Frame_Header_Descriptor
125 fhd, err := br.readByte()
126 if err != nil {
Abhay Kumar40252eb2025-10-13 13:25:53 +0000127 if debugDecoder {
128 println("Reading Frame_Header_Descriptor", err)
129 }
khenaidoo26721882021-08-11 17:42:52 -0400130 return err
131 }
132 d.SingleSegment = fhd&(1<<5) != 0
133
134 if fhd&(1<<3) != 0 {
135 return errors.New("reserved bit set on frame header")
136 }
137
138 // Read Window_Descriptor
139 // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
140 d.WindowSize = 0
141 if !d.SingleSegment {
142 wd, err := br.readByte()
143 if err != nil {
Abhay Kumar40252eb2025-10-13 13:25:53 +0000144 if debugDecoder {
145 println("Reading Window_Descriptor", err)
146 }
khenaidoo26721882021-08-11 17:42:52 -0400147 return err
148 }
Abhay Kumar40252eb2025-10-13 13:25:53 +0000149 if debugDecoder {
150 printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3)
151 }
khenaidoo26721882021-08-11 17:42:52 -0400152 windowLog := 10 + (wd >> 3)
153 windowBase := uint64(1) << windowLog
154 windowAdd := (windowBase / 8) * uint64(wd&0x7)
155 d.WindowSize = windowBase + windowAdd
156 }
157
158 // Read Dictionary_ID
159 // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id
Abhay Kumar40252eb2025-10-13 13:25:53 +0000160 d.DictionaryID = 0
khenaidoo26721882021-08-11 17:42:52 -0400161 if size := fhd & 3; size != 0 {
162 if size == 3 {
163 size = 4
164 }
Abhay Kumar40252eb2025-10-13 13:25:53 +0000165
166 b, err := br.readSmall(int(size))
167 if err != nil {
168 println("Reading Dictionary_ID", err)
169 return err
khenaidoo26721882021-08-11 17:42:52 -0400170 }
171 var id uint32
Abhay Kumar40252eb2025-10-13 13:25:53 +0000172 switch len(b) {
khenaidoo26721882021-08-11 17:42:52 -0400173 case 1:
174 id = uint32(b[0])
175 case 2:
176 id = uint32(b[0]) | (uint32(b[1]) << 8)
177 case 4:
178 id = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
179 }
Abhay Kumar40252eb2025-10-13 13:25:53 +0000180 if debugDecoder {
khenaidoo26721882021-08-11 17:42:52 -0400181 println("Dict size", size, "ID:", id)
182 }
Abhay Kumar40252eb2025-10-13 13:25:53 +0000183 d.DictionaryID = id
khenaidoo26721882021-08-11 17:42:52 -0400184 }
185
186 // Read Frame_Content_Size
187 // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_content_size
188 var fcsSize int
189 v := fhd >> 6
190 switch v {
191 case 0:
192 if d.SingleSegment {
193 fcsSize = 1
194 }
195 default:
196 fcsSize = 1 << v
197 }
Abhay Kumar40252eb2025-10-13 13:25:53 +0000198 d.FrameContentSize = fcsUnknown
khenaidoo26721882021-08-11 17:42:52 -0400199 if fcsSize > 0 {
Abhay Kumar40252eb2025-10-13 13:25:53 +0000200 b, err := br.readSmall(fcsSize)
201 if err != nil {
202 println("Reading Frame content", err)
203 return err
khenaidoo26721882021-08-11 17:42:52 -0400204 }
Abhay Kumar40252eb2025-10-13 13:25:53 +0000205 switch len(b) {
khenaidoo26721882021-08-11 17:42:52 -0400206 case 1:
207 d.FrameContentSize = uint64(b[0])
208 case 2:
209 // When FCS_Field_Size is 2, the offset of 256 is added.
210 d.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) + 256
211 case 4:
212 d.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) | (uint64(b[2]) << 16) | (uint64(b[3]) << 24)
213 case 8:
214 d1 := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
215 d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24)
216 d.FrameContentSize = uint64(d1) | (uint64(d2) << 32)
217 }
Abhay Kumar40252eb2025-10-13 13:25:53 +0000218 if debugDecoder {
219 println("Read FCS:", d.FrameContentSize)
khenaidoo26721882021-08-11 17:42:52 -0400220 }
221 }
Abhay Kumar40252eb2025-10-13 13:25:53 +0000222
khenaidoo26721882021-08-11 17:42:52 -0400223 // Move this to shared.
224 d.HasCheckSum = fhd&(1<<2) != 0
225 if d.HasCheckSum {
226 if d.crc == nil {
227 d.crc = xxhash.New()
228 }
229 d.crc.Reset()
230 }
231
Abhay Kumar40252eb2025-10-13 13:25:53 +0000232 if d.WindowSize > d.o.maxWindowSize {
233 if debugDecoder {
234 printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
235 }
236 return ErrWindowSizeExceeded
237 }
238
khenaidoo26721882021-08-11 17:42:52 -0400239 if d.WindowSize == 0 && d.SingleSegment {
240 // We may not need window in this case.
241 d.WindowSize = d.FrameContentSize
242 if d.WindowSize < MinWindowSize {
243 d.WindowSize = MinWindowSize
244 }
Abhay Kumar40252eb2025-10-13 13:25:53 +0000245 if d.WindowSize > d.o.maxDecodedSize {
246 if debugDecoder {
247 printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
248 }
249 return ErrDecoderSizeExceeded
250 }
khenaidoo26721882021-08-11 17:42:52 -0400251 }
252
khenaidoo26721882021-08-11 17:42:52 -0400253 // The minimum Window_Size is 1 KB.
254 if d.WindowSize < MinWindowSize {
Abhay Kumar40252eb2025-10-13 13:25:53 +0000255 if debugDecoder {
256 println("got window size: ", d.WindowSize)
257 }
khenaidoo26721882021-08-11 17:42:52 -0400258 return ErrWindowSizeTooSmall
259 }
260 d.history.windowSize = int(d.WindowSize)
Abhay Kumar40252eb2025-10-13 13:25:53 +0000261 if !d.o.lowMem || d.history.windowSize < maxBlockSize {
262 // Alloc 2x window size if not low-mem, or window size below 2MB.
263 d.history.allocFrameBuffer = d.history.windowSize * 2
khenaidoo26721882021-08-11 17:42:52 -0400264 } else {
Abhay Kumar40252eb2025-10-13 13:25:53 +0000265 if d.o.lowMem {
266 // Alloc with 1MB extra.
267 d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize/2
268 } else {
269 // Alloc with 2MB extra.
270 d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize
271 }
khenaidoo26721882021-08-11 17:42:52 -0400272 }
Abhay Kumar40252eb2025-10-13 13:25:53 +0000273
274 if debugDecoder {
275 println("Frame: Dict:", d.DictionaryID, "FrameContentSize:", d.FrameContentSize, "singleseg:", d.SingleSegment, "window:", d.WindowSize, "crc:", d.HasCheckSum)
276 }
277
khenaidoo26721882021-08-11 17:42:52 -0400278 // history contains input - maybe we do something
279 d.rawInput = br
280 return nil
281}
282
283// next will start decoding the next block from stream.
284func (d *frameDec) next(block *blockDec) error {
Abhay Kumar40252eb2025-10-13 13:25:53 +0000285 if debugDecoder {
286 println("decoding new block")
khenaidoo26721882021-08-11 17:42:52 -0400287 }
288 err := block.reset(d.rawInput, d.WindowSize)
289 if err != nil {
290 println("block error:", err)
291 // Signal the frame decoder we have a problem.
Abhay Kumar40252eb2025-10-13 13:25:53 +0000292 block.sendErr(err)
khenaidoo26721882021-08-11 17:42:52 -0400293 return err
294 }
khenaidoo26721882021-08-11 17:42:52 -0400295 return nil
296}
297
Abhay Kumar40252eb2025-10-13 13:25:53 +0000298// checkCRC will check the checksum, assuming the frame has one.
khenaidoo26721882021-08-11 17:42:52 -0400299// Will return ErrCRCMismatch if crc check failed, otherwise nil.
300func (d *frameDec) checkCRC() error {
khenaidoo26721882021-08-11 17:42:52 -0400301 // We can overwrite upper tmp now
Abhay Kumar40252eb2025-10-13 13:25:53 +0000302 buf, err := d.rawInput.readSmall(4)
303 if err != nil {
304 println("CRC missing?", err)
305 return err
khenaidoo26721882021-08-11 17:42:52 -0400306 }
307
Abhay Kumar40252eb2025-10-13 13:25:53 +0000308 want := binary.LittleEndian.Uint32(buf[:4])
309 got := uint32(d.crc.Sum64())
310
311 if got != want {
312 if debugDecoder {
313 printf("CRC check failed: got %08x, want %08x\n", got, want)
khenaidoo26721882021-08-11 17:42:52 -0400314 }
315 return ErrCRCMismatch
316 }
Abhay Kumar40252eb2025-10-13 13:25:53 +0000317 if debugDecoder {
318 printf("CRC ok %08x\n", got)
khenaidoo26721882021-08-11 17:42:52 -0400319 }
320 return nil
321}
322
Abhay Kumar40252eb2025-10-13 13:25:53 +0000323// consumeCRC skips over the checksum, assuming the frame has one.
324func (d *frameDec) consumeCRC() error {
325 _, err := d.rawInput.readSmall(4)
326 if err != nil {
327 println("CRC missing?", err)
khenaidoo26721882021-08-11 17:42:52 -0400328 }
Abhay Kumar40252eb2025-10-13 13:25:53 +0000329 return err
khenaidoo26721882021-08-11 17:42:52 -0400330}
331
Abhay Kumar40252eb2025-10-13 13:25:53 +0000332// runDecoder will run the decoder for the remainder of the frame.
khenaidoo26721882021-08-11 17:42:52 -0400333func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
334 saved := d.history.b
335
336 // We use the history for output to avoid copying it.
337 d.history.b = dst
Abhay Kumar40252eb2025-10-13 13:25:53 +0000338 d.history.ignoreBuffer = len(dst)
khenaidoo26721882021-08-11 17:42:52 -0400339 // Store input length, so we only check new data.
340 crcStart := len(dst)
Abhay Kumar40252eb2025-10-13 13:25:53 +0000341 d.history.decoders.maxSyncLen = 0
342 if d.o.limitToCap {
343 d.history.decoders.maxSyncLen = uint64(cap(dst) - len(dst))
344 }
345 if d.FrameContentSize != fcsUnknown {
346 if !d.o.limitToCap || d.FrameContentSize+uint64(len(dst)) < d.history.decoders.maxSyncLen {
347 d.history.decoders.maxSyncLen = d.FrameContentSize + uint64(len(dst))
348 }
349 if d.history.decoders.maxSyncLen > d.o.maxDecodedSize {
350 if debugDecoder {
351 println("maxSyncLen:", d.history.decoders.maxSyncLen, "> maxDecodedSize:", d.o.maxDecodedSize)
352 }
353 return dst, ErrDecoderSizeExceeded
354 }
355 if debugDecoder {
356 println("maxSyncLen:", d.history.decoders.maxSyncLen)
357 }
358 if !d.o.limitToCap && uint64(cap(dst)) < d.history.decoders.maxSyncLen {
359 // Alloc for output
360 dst2 := make([]byte, len(dst), d.history.decoders.maxSyncLen+compressedBlockOverAlloc)
361 copy(dst2, dst)
362 dst = dst2
363 }
364 }
khenaidoo26721882021-08-11 17:42:52 -0400365 var err error
366 for {
367 err = dec.reset(d.rawInput, d.WindowSize)
368 if err != nil {
369 break
370 }
Abhay Kumar40252eb2025-10-13 13:25:53 +0000371 if debugDecoder {
khenaidoo26721882021-08-11 17:42:52 -0400372 println("next block:", dec)
373 }
374 err = dec.decodeBuf(&d.history)
Abhay Kumar40252eb2025-10-13 13:25:53 +0000375 if err != nil {
khenaidoo26721882021-08-11 17:42:52 -0400376 break
377 }
Abhay Kumar40252eb2025-10-13 13:25:53 +0000378 if uint64(len(d.history.b)-crcStart) > d.o.maxDecodedSize {
379 println("runDecoder: maxDecodedSize exceeded", uint64(len(d.history.b)-crcStart), ">", d.o.maxDecodedSize)
khenaidoo26721882021-08-11 17:42:52 -0400380 err = ErrDecoderSizeExceeded
381 break
382 }
Abhay Kumar40252eb2025-10-13 13:25:53 +0000383 if d.o.limitToCap && len(d.history.b) > cap(dst) {
384 println("runDecoder: cap exceeded", uint64(len(d.history.b)), ">", cap(dst))
385 err = ErrDecoderSizeExceeded
386 break
387 }
388 if uint64(len(d.history.b)-crcStart) > d.FrameContentSize {
389 println("runDecoder: FrameContentSize exceeded", uint64(len(d.history.b)-crcStart), ">", d.FrameContentSize)
khenaidoo26721882021-08-11 17:42:52 -0400390 err = ErrFrameSizeExceeded
391 break
392 }
Abhay Kumar40252eb2025-10-13 13:25:53 +0000393 if dec.Last {
394 break
395 }
396 if debugDecoder {
397 println("runDecoder: FrameContentSize", uint64(len(d.history.b)-crcStart), "<=", d.FrameContentSize)
398 }
khenaidoo26721882021-08-11 17:42:52 -0400399 }
400 dst = d.history.b
401 if err == nil {
Abhay Kumar40252eb2025-10-13 13:25:53 +0000402 if d.FrameContentSize != fcsUnknown && uint64(len(d.history.b)-crcStart) != d.FrameContentSize {
403 err = ErrFrameSizeMismatch
404 } else if d.HasCheckSum {
405 if d.o.ignoreChecksum {
406 err = d.consumeCRC()
407 } else {
408 d.crc.Write(dst[crcStart:])
409 err = d.checkCRC()
khenaidoo26721882021-08-11 17:42:52 -0400410 }
411 }
412 }
413 d.history.b = saved
414 return dst, err
415}