blob: e47af66e7c903a8f771495fd465beef021f45b61 [file] [log] [blame]
khenaidood948f772021-08-11 17:49:24 -04001// Copyright 2019+ Klaus Post. All rights reserved.
2// License information can be found in the LICENSE file.
3// Based on work by Yann Collet, released under BSD License.
4
5package zstd
6
7import (
Abhay Kumara2ae5992025-11-10 14:02:24 +00008 "encoding/binary"
khenaidood948f772021-08-11 17:49:24 -04009 "encoding/hex"
10 "errors"
khenaidood948f772021-08-11 17:49:24 -040011 "io"
khenaidood948f772021-08-11 17:49:24 -040012
13 "github.com/klauspost/compress/zstd/internal/xxhash"
14)
15
16type frameDec struct {
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053017 o decoderOptions
18 crc *xxhash.Digest
khenaidood948f772021-08-11 17:49:24 -040019
20 WindowSize uint64
21
khenaidood948f772021-08-11 17:49:24 -040022 // Frame history passed between blocks
23 history history
24
25 rawInput byteBuffer
26
27 // Byte buffer that can be reused for small input blocks.
28 bBuf byteBuf
29
30 FrameContentSize uint64
khenaidood948f772021-08-11 17:49:24 -040031
Abhay Kumara2ae5992025-11-10 14:02:24 +000032 DictionaryID uint32
khenaidood948f772021-08-11 17:49:24 -040033 HasCheckSum bool
34 SingleSegment bool
khenaidood948f772021-08-11 17:49:24 -040035}
36
37const (
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053038 // MinWindowSize is the minimum Window Size, which is 1 KB.
khenaidood948f772021-08-11 17:49:24 -040039 MinWindowSize = 1 << 10
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053040
41 // MaxWindowSize is the maximum encoder window size
42 // and the default decoder maximum window size.
khenaidood948f772021-08-11 17:49:24 -040043 MaxWindowSize = 1 << 29
44)
45
Abhay Kumara2ae5992025-11-10 14:02:24 +000046const (
47 frameMagic = "\x28\xb5\x2f\xfd"
48 skippableFrameMagic = "\x2a\x4d\x18"
khenaidood948f772021-08-11 17:49:24 -040049)
50
51func newFrameDec(o decoderOptions) *frameDec {
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053052 if o.maxWindowSize > o.maxDecodedSize {
53 o.maxWindowSize = o.maxDecodedSize
khenaidood948f772021-08-11 17:49:24 -040054 }
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053055 d := frameDec{
56 o: o,
khenaidood948f772021-08-11 17:49:24 -040057 }
58 return &d
59}
60
61// reset will read the frame header and prepare for block decoding.
62// If nothing can be read from the input, io.EOF will be returned.
63// Any other error indicated that the stream contained data, but
64// there was a problem.
65func (d *frameDec) reset(br byteBuffer) error {
66 d.HasCheckSum = false
67 d.WindowSize = 0
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053068 var signature [4]byte
khenaidood948f772021-08-11 17:49:24 -040069 for {
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053070 var err error
71 // Check if we can read more...
72 b, err := br.readSmall(1)
73 switch err {
74 case io.EOF, io.ErrUnexpectedEOF:
khenaidood948f772021-08-11 17:49:24 -040075 return io.EOF
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053076 case nil:
77 signature[0] = b[0]
Abhay Kumara2ae5992025-11-10 14:02:24 +000078 default:
79 return err
khenaidood948f772021-08-11 17:49:24 -040080 }
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053081 // Read the rest, don't allow io.ErrUnexpectedEOF
82 b, err = br.readSmall(3)
83 switch err {
84 case io.EOF:
85 return io.EOF
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053086 case nil:
87 copy(signature[1:], b)
Abhay Kumara2ae5992025-11-10 14:02:24 +000088 default:
89 return err
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053090 }
91
Abhay Kumara2ae5992025-11-10 14:02:24 +000092 if string(signature[1:4]) != skippableFrameMagic || signature[0]&0xf0 != 0x50 {
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053093 if debugDecoder {
Abhay Kumara2ae5992025-11-10 14:02:24 +000094 println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString([]byte(skippableFrameMagic)))
khenaidood948f772021-08-11 17:49:24 -040095 }
96 // Break if not skippable frame.
97 break
98 }
99 // Read size to skip
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530100 b, err = br.readSmall(4)
101 if err != nil {
102 if debugDecoder {
103 println("Reading Frame Size", err)
104 }
105 return err
khenaidood948f772021-08-11 17:49:24 -0400106 }
107 n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
108 println("Skipping frame with", n, "bytes.")
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530109 err = br.skipN(int64(n))
khenaidood948f772021-08-11 17:49:24 -0400110 if err != nil {
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530111 if debugDecoder {
khenaidood948f772021-08-11 17:49:24 -0400112 println("Reading discarded frame", err)
113 }
114 return err
115 }
116 }
Abhay Kumara2ae5992025-11-10 14:02:24 +0000117 if string(signature[:]) != frameMagic {
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530118 if debugDecoder {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000119 println("Got magic numbers: ", signature, "want:", []byte(frameMagic))
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530120 }
khenaidood948f772021-08-11 17:49:24 -0400121 return ErrMagicMismatch
122 }
123
124 // Read Frame_Header_Descriptor
125 fhd, err := br.readByte()
126 if err != nil {
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530127 if debugDecoder {
128 println("Reading Frame_Header_Descriptor", err)
129 }
khenaidood948f772021-08-11 17:49:24 -0400130 return err
131 }
132 d.SingleSegment = fhd&(1<<5) != 0
133
134 if fhd&(1<<3) != 0 {
135 return errors.New("reserved bit set on frame header")
136 }
137
138 // Read Window_Descriptor
139 // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
140 d.WindowSize = 0
141 if !d.SingleSegment {
142 wd, err := br.readByte()
143 if err != nil {
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530144 if debugDecoder {
145 println("Reading Window_Descriptor", err)
146 }
khenaidood948f772021-08-11 17:49:24 -0400147 return err
148 }
Abhay Kumara2ae5992025-11-10 14:02:24 +0000149 if debugDecoder {
150 printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3)
151 }
khenaidood948f772021-08-11 17:49:24 -0400152 windowLog := 10 + (wd >> 3)
153 windowBase := uint64(1) << windowLog
154 windowAdd := (windowBase / 8) * uint64(wd&0x7)
155 d.WindowSize = windowBase + windowAdd
156 }
157
158 // Read Dictionary_ID
159 // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id
Abhay Kumara2ae5992025-11-10 14:02:24 +0000160 d.DictionaryID = 0
khenaidood948f772021-08-11 17:49:24 -0400161 if size := fhd & 3; size != 0 {
162 if size == 3 {
163 size = 4
164 }
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530165
166 b, err := br.readSmall(int(size))
167 if err != nil {
168 println("Reading Dictionary_ID", err)
169 return err
khenaidood948f772021-08-11 17:49:24 -0400170 }
171 var id uint32
Abhay Kumara2ae5992025-11-10 14:02:24 +0000172 switch len(b) {
khenaidood948f772021-08-11 17:49:24 -0400173 case 1:
174 id = uint32(b[0])
175 case 2:
176 id = uint32(b[0]) | (uint32(b[1]) << 8)
177 case 4:
178 id = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
179 }
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530180 if debugDecoder {
khenaidood948f772021-08-11 17:49:24 -0400181 println("Dict size", size, "ID:", id)
182 }
Abhay Kumara2ae5992025-11-10 14:02:24 +0000183 d.DictionaryID = id
khenaidood948f772021-08-11 17:49:24 -0400184 }
185
186 // Read Frame_Content_Size
187 // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_content_size
188 var fcsSize int
189 v := fhd >> 6
190 switch v {
191 case 0:
192 if d.SingleSegment {
193 fcsSize = 1
194 }
195 default:
196 fcsSize = 1 << v
197 }
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530198 d.FrameContentSize = fcsUnknown
khenaidood948f772021-08-11 17:49:24 -0400199 if fcsSize > 0 {
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530200 b, err := br.readSmall(fcsSize)
201 if err != nil {
202 println("Reading Frame content", err)
203 return err
khenaidood948f772021-08-11 17:49:24 -0400204 }
Abhay Kumara2ae5992025-11-10 14:02:24 +0000205 switch len(b) {
khenaidood948f772021-08-11 17:49:24 -0400206 case 1:
207 d.FrameContentSize = uint64(b[0])
208 case 2:
209 // When FCS_Field_Size is 2, the offset of 256 is added.
210 d.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) + 256
211 case 4:
212 d.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) | (uint64(b[2]) << 16) | (uint64(b[3]) << 24)
213 case 8:
214 d1 := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
215 d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24)
216 d.FrameContentSize = uint64(d1) | (uint64(d2) << 32)
217 }
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530218 if debugDecoder {
219 println("Read FCS:", d.FrameContentSize)
khenaidood948f772021-08-11 17:49:24 -0400220 }
221 }
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530222
khenaidood948f772021-08-11 17:49:24 -0400223 // Move this to shared.
224 d.HasCheckSum = fhd&(1<<2) != 0
225 if d.HasCheckSum {
226 if d.crc == nil {
227 d.crc = xxhash.New()
228 }
229 d.crc.Reset()
230 }
231
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530232 if d.WindowSize > d.o.maxWindowSize {
233 if debugDecoder {
234 printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
235 }
236 return ErrWindowSizeExceeded
237 }
238
khenaidood948f772021-08-11 17:49:24 -0400239 if d.WindowSize == 0 && d.SingleSegment {
240 // We may not need window in this case.
241 d.WindowSize = d.FrameContentSize
242 if d.WindowSize < MinWindowSize {
243 d.WindowSize = MinWindowSize
244 }
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530245 if d.WindowSize > d.o.maxDecodedSize {
246 if debugDecoder {
247 printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
248 }
249 return ErrDecoderSizeExceeded
250 }
khenaidood948f772021-08-11 17:49:24 -0400251 }
252
khenaidood948f772021-08-11 17:49:24 -0400253 // The minimum Window_Size is 1 KB.
254 if d.WindowSize < MinWindowSize {
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530255 if debugDecoder {
256 println("got window size: ", d.WindowSize)
257 }
khenaidood948f772021-08-11 17:49:24 -0400258 return ErrWindowSizeTooSmall
259 }
260 d.history.windowSize = int(d.WindowSize)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530261 if !d.o.lowMem || d.history.windowSize < maxBlockSize {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000262 // Alloc 2x window size if not low-mem, or window size below 2MB.
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530263 d.history.allocFrameBuffer = d.history.windowSize * 2
khenaidood948f772021-08-11 17:49:24 -0400264 } else {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000265 if d.o.lowMem {
266 // Alloc with 1MB extra.
267 d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize/2
268 } else {
269 // Alloc with 2MB extra.
270 d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize
271 }
khenaidood948f772021-08-11 17:49:24 -0400272 }
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530273
274 if debugDecoder {
275 println("Frame: Dict:", d.DictionaryID, "FrameContentSize:", d.FrameContentSize, "singleseg:", d.SingleSegment, "window:", d.WindowSize, "crc:", d.HasCheckSum)
276 }
277
khenaidood948f772021-08-11 17:49:24 -0400278 // history contains input - maybe we do something
279 d.rawInput = br
280 return nil
281}
282
283// next will start decoding the next block from stream.
284func (d *frameDec) next(block *blockDec) error {
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530285 if debugDecoder {
286 println("decoding new block")
khenaidood948f772021-08-11 17:49:24 -0400287 }
288 err := block.reset(d.rawInput, d.WindowSize)
289 if err != nil {
290 println("block error:", err)
291 // Signal the frame decoder we have a problem.
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530292 block.sendErr(err)
khenaidood948f772021-08-11 17:49:24 -0400293 return err
294 }
khenaidood948f772021-08-11 17:49:24 -0400295 return nil
296}
297
Abhay Kumara2ae5992025-11-10 14:02:24 +0000298// checkCRC will check the checksum, assuming the frame has one.
khenaidood948f772021-08-11 17:49:24 -0400299// Will return ErrCRCMismatch if crc check failed, otherwise nil.
300func (d *frameDec) checkCRC() error {
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530301 // We can overwrite upper tmp now
Abhay Kumara2ae5992025-11-10 14:02:24 +0000302 buf, err := d.rawInput.readSmall(4)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530303 if err != nil {
304 println("CRC missing?", err)
305 return err
306 }
307
Abhay Kumara2ae5992025-11-10 14:02:24 +0000308 want := binary.LittleEndian.Uint32(buf[:4])
309 got := uint32(d.crc.Sum64())
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530310
Abhay Kumara2ae5992025-11-10 14:02:24 +0000311 if got != want {
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530312 if debugDecoder {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000313 printf("CRC check failed: got %08x, want %08x\n", got, want)
khenaidood948f772021-08-11 17:49:24 -0400314 }
315 return ErrCRCMismatch
316 }
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530317 if debugDecoder {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000318 printf("CRC ok %08x\n", got)
khenaidood948f772021-08-11 17:49:24 -0400319 }
320 return nil
321}
322
Abhay Kumara2ae5992025-11-10 14:02:24 +0000323// consumeCRC skips over the checksum, assuming the frame has one.
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530324func (d *frameDec) consumeCRC() error {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000325 _, err := d.rawInput.readSmall(4)
326 if err != nil {
327 println("CRC missing?", err)
khenaidood948f772021-08-11 17:49:24 -0400328 }
Abhay Kumara2ae5992025-11-10 14:02:24 +0000329 return err
khenaidood948f772021-08-11 17:49:24 -0400330}
331
Abhay Kumara2ae5992025-11-10 14:02:24 +0000332// runDecoder will run the decoder for the remainder of the frame.
khenaidood948f772021-08-11 17:49:24 -0400333func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
334 saved := d.history.b
335
336 // We use the history for output to avoid copying it.
337 d.history.b = dst
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530338 d.history.ignoreBuffer = len(dst)
khenaidood948f772021-08-11 17:49:24 -0400339 // Store input length, so we only check new data.
340 crcStart := len(dst)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530341 d.history.decoders.maxSyncLen = 0
Abhay Kumara2ae5992025-11-10 14:02:24 +0000342 if d.o.limitToCap {
343 d.history.decoders.maxSyncLen = uint64(cap(dst) - len(dst))
344 }
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530345 if d.FrameContentSize != fcsUnknown {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000346 if !d.o.limitToCap || d.FrameContentSize+uint64(len(dst)) < d.history.decoders.maxSyncLen {
347 d.history.decoders.maxSyncLen = d.FrameContentSize + uint64(len(dst))
348 }
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530349 if d.history.decoders.maxSyncLen > d.o.maxDecodedSize {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000350 if debugDecoder {
351 println("maxSyncLen:", d.history.decoders.maxSyncLen, "> maxDecodedSize:", d.o.maxDecodedSize)
352 }
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530353 return dst, ErrDecoderSizeExceeded
354 }
Abhay Kumara2ae5992025-11-10 14:02:24 +0000355 if debugDecoder {
356 println("maxSyncLen:", d.history.decoders.maxSyncLen)
357 }
358 if !d.o.limitToCap && uint64(cap(dst)) < d.history.decoders.maxSyncLen {
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530359 // Alloc for output
360 dst2 := make([]byte, len(dst), d.history.decoders.maxSyncLen+compressedBlockOverAlloc)
361 copy(dst2, dst)
362 dst = dst2
363 }
364 }
khenaidood948f772021-08-11 17:49:24 -0400365 var err error
366 for {
367 err = dec.reset(d.rawInput, d.WindowSize)
368 if err != nil {
369 break
370 }
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530371 if debugDecoder {
khenaidood948f772021-08-11 17:49:24 -0400372 println("next block:", dec)
373 }
374 err = dec.decodeBuf(&d.history)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530375 if err != nil {
khenaidood948f772021-08-11 17:49:24 -0400376 break
377 }
Abhay Kumara2ae5992025-11-10 14:02:24 +0000378 if uint64(len(d.history.b)-crcStart) > d.o.maxDecodedSize {
379 println("runDecoder: maxDecodedSize exceeded", uint64(len(d.history.b)-crcStart), ">", d.o.maxDecodedSize)
380 err = ErrDecoderSizeExceeded
381 break
382 }
383 if d.o.limitToCap && len(d.history.b) > cap(dst) {
384 println("runDecoder: cap exceeded", uint64(len(d.history.b)), ">", cap(dst))
khenaidood948f772021-08-11 17:49:24 -0400385 err = ErrDecoderSizeExceeded
386 break
387 }
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530388 if uint64(len(d.history.b)-crcStart) > d.FrameContentSize {
389 println("runDecoder: FrameContentSize exceeded", uint64(len(d.history.b)-crcStart), ">", d.FrameContentSize)
khenaidood948f772021-08-11 17:49:24 -0400390 err = ErrFrameSizeExceeded
391 break
392 }
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530393 if dec.Last {
394 break
395 }
396 if debugDecoder {
397 println("runDecoder: FrameContentSize", uint64(len(d.history.b)-crcStart), "<=", d.FrameContentSize)
398 }
khenaidood948f772021-08-11 17:49:24 -0400399 }
400 dst = d.history.b
401 if err == nil {
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530402 if d.FrameContentSize != fcsUnknown && uint64(len(d.history.b)-crcStart) != d.FrameContentSize {
403 err = ErrFrameSizeMismatch
404 } else if d.HasCheckSum {
405 if d.o.ignoreChecksum {
406 err = d.consumeCRC()
407 } else {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000408 d.crc.Write(dst[crcStart:])
409 err = d.checkCRC()
khenaidood948f772021-08-11 17:49:24 -0400410 }
411 }
412 }
413 d.history.b = saved
414 return dst, err
415}