blob: 6a5a2988b6f365ca40ff85485f8114be1f34dce8 [file] [log] [blame]
khenaidoo106c61a2021-08-11 18:05:46 -04001// Copyright 2020+ Klaus Post. All rights reserved.
2// License information can be found in the LICENSE file.
3
4package zstd
5
6import (
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +05307 "encoding/binary"
khenaidoo106c61a2021-08-11 18:05:46 -04008 "errors"
9 "io"
10)
11
12// HeaderMaxSize is the maximum size of a Frame and Block Header.
13// If less is sent to Header.Decode it *may* still contain enough information.
14const HeaderMaxSize = 14 + 3
15
16// Header contains information about the first frame and block within that.
17type Header struct {
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +053018 // SingleSegment specifies whether the data is to be decompressed into a
19 // single contiguous memory segment.
20 // It implies that WindowSize is invalid and that FrameContentSize is valid.
21 SingleSegment bool
khenaidoo106c61a2021-08-11 18:05:46 -040022
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +053023 // WindowSize is the window of data to keep while decoding.
24 // Will only be set if SingleSegment is false.
25 WindowSize uint64
khenaidoo106c61a2021-08-11 18:05:46 -040026
27 // Dictionary ID.
28 // If 0, no dictionary.
29 DictionaryID uint32
30
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +053031 // HasFCS specifies whether FrameContentSize has a valid value.
32 HasFCS bool
33
34 // FrameContentSize is the expected uncompressed size of the entire frame.
35 FrameContentSize uint64
36
37 // Skippable will be true if the frame is meant to be skipped.
38 // This implies that FirstBlock.OK is false.
39 Skippable bool
40
41 // SkippableID is the user-specific ID for the skippable frame.
42 // Valid values are between 0 to 15, inclusive.
43 SkippableID int
44
45 // SkippableSize is the length of the user data to skip following
46 // the header.
47 SkippableSize uint32
48
49 // HeaderSize is the raw size of the frame header.
50 //
51 // For normal frames, it includes the size of the magic number and
52 // the size of the header (per section 3.1.1.1).
53 // It does not include the size for any data blocks (section 3.1.1.2) nor
54 // the size for the trailing content checksum.
55 //
56 // For skippable frames, this counts the size of the magic number
57 // along with the size of the size field of the payload.
58 // It does not include the size of the skippable payload itself.
59 // The total frame size is the HeaderSize plus the SkippableSize.
60 HeaderSize int
61
khenaidoo106c61a2021-08-11 18:05:46 -040062 // First block information.
63 FirstBlock struct {
64 // OK will be set if first block could be decoded.
65 OK bool
66
67 // Is this the last block of a frame?
68 Last bool
69
70 // Is the data compressed?
71 // If true CompressedSize will be populated.
72 // Unfortunately DecompressedSize cannot be determined
73 // without decoding the blocks.
74 Compressed bool
75
76 // DecompressedSize is the expected decompressed size of the block.
77 // Will be 0 if it cannot be determined.
78 DecompressedSize int
79
80 // CompressedSize of the data in the block.
81 // Does not include the block header.
82 // Will be equal to DecompressedSize if not Compressed.
83 CompressedSize int
84 }
85
khenaidoo106c61a2021-08-11 18:05:46 -040086 // If set there is a checksum present for the block content.
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +053087 // The checksum field at the end is always 4 bytes long.
khenaidoo106c61a2021-08-11 18:05:46 -040088 HasCheckSum bool
khenaidoo106c61a2021-08-11 18:05:46 -040089}
90
91// Decode the header from the beginning of the stream.
92// This will decode the frame header and the first block header if enough bytes are provided.
93// It is recommended to provide at least HeaderMaxSize bytes.
94// If the frame header cannot be read an error will be returned.
95// If there isn't enough input, io.ErrUnexpectedEOF is returned.
96// The FirstBlock.OK will indicate if enough information was available to decode the first block header.
97func (h *Header) Decode(in []byte) error {
Abhay Kumara61c5222025-11-10 07:32:50 +000098 _, err := h.DecodeAndStrip(in)
99 return err
100}
101
102// DecodeAndStrip will decode the header from the beginning of the stream
103// and on success return the remaining bytes.
104// This will decode the frame header and the first block header if enough bytes are provided.
105// It is recommended to provide at least HeaderMaxSize bytes.
106// If the frame header cannot be read an error will be returned.
107// If there isn't enough input, io.ErrUnexpectedEOF is returned.
108// The FirstBlock.OK will indicate if enough information was available to decode the first block header.
109func (h *Header) DecodeAndStrip(in []byte) (remain []byte, err error) {
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530110 *h = Header{}
khenaidoo106c61a2021-08-11 18:05:46 -0400111 if len(in) < 4 {
Abhay Kumara61c5222025-11-10 07:32:50 +0000112 return nil, io.ErrUnexpectedEOF
khenaidoo106c61a2021-08-11 18:05:46 -0400113 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530114 h.HeaderSize += 4
khenaidoo106c61a2021-08-11 18:05:46 -0400115 b, in := in[:4], in[4:]
Abhay Kumara61c5222025-11-10 07:32:50 +0000116 if string(b) != frameMagic {
117 if string(b[1:4]) != skippableFrameMagic || b[0]&0xf0 != 0x50 {
118 return nil, ErrMagicMismatch
khenaidoo106c61a2021-08-11 18:05:46 -0400119 }
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530120 if len(in) < 4 {
Abhay Kumara61c5222025-11-10 07:32:50 +0000121 return nil, io.ErrUnexpectedEOF
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530122 }
123 h.HeaderSize += 4
124 h.Skippable = true
125 h.SkippableID = int(b[0] & 0xf)
126 h.SkippableSize = binary.LittleEndian.Uint32(in)
Abhay Kumara61c5222025-11-10 07:32:50 +0000127 return in[4:], nil
khenaidoo106c61a2021-08-11 18:05:46 -0400128 }
khenaidoo106c61a2021-08-11 18:05:46 -0400129
130 // Read Window_Descriptor
131 // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530132 if len(in) < 1 {
Abhay Kumara61c5222025-11-10 07:32:50 +0000133 return nil, io.ErrUnexpectedEOF
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530134 }
135 fhd, in := in[0], in[1:]
136 h.HeaderSize++
137 h.SingleSegment = fhd&(1<<5) != 0
138 h.HasCheckSum = fhd&(1<<2) != 0
139 if fhd&(1<<3) != 0 {
Abhay Kumara61c5222025-11-10 07:32:50 +0000140 return nil, errors.New("reserved bit set on frame header")
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530141 }
142
khenaidoo106c61a2021-08-11 18:05:46 -0400143 if !h.SingleSegment {
144 if len(in) < 1 {
Abhay Kumara61c5222025-11-10 07:32:50 +0000145 return nil, io.ErrUnexpectedEOF
khenaidoo106c61a2021-08-11 18:05:46 -0400146 }
147 var wd byte
148 wd, in = in[0], in[1:]
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530149 h.HeaderSize++
khenaidoo106c61a2021-08-11 18:05:46 -0400150 windowLog := 10 + (wd >> 3)
151 windowBase := uint64(1) << windowLog
152 windowAdd := (windowBase / 8) * uint64(wd&0x7)
153 h.WindowSize = windowBase + windowAdd
154 }
155
156 // Read Dictionary_ID
157 // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id
158 if size := fhd & 3; size != 0 {
159 if size == 3 {
160 size = 4
161 }
162 if len(in) < int(size) {
Abhay Kumara61c5222025-11-10 07:32:50 +0000163 return nil, io.ErrUnexpectedEOF
khenaidoo106c61a2021-08-11 18:05:46 -0400164 }
165 b, in = in[:size], in[size:]
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530166 h.HeaderSize += int(size)
Abhay Kumara61c5222025-11-10 07:32:50 +0000167 switch len(b) {
khenaidoo106c61a2021-08-11 18:05:46 -0400168 case 1:
169 h.DictionaryID = uint32(b[0])
170 case 2:
171 h.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8)
172 case 4:
173 h.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
174 }
175 }
176
177 // Read Frame_Content_Size
178 // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_content_size
179 var fcsSize int
180 v := fhd >> 6
181 switch v {
182 case 0:
183 if h.SingleSegment {
184 fcsSize = 1
185 }
186 default:
187 fcsSize = 1 << v
188 }
189
190 if fcsSize > 0 {
191 h.HasFCS = true
192 if len(in) < fcsSize {
Abhay Kumara61c5222025-11-10 07:32:50 +0000193 return nil, io.ErrUnexpectedEOF
khenaidoo106c61a2021-08-11 18:05:46 -0400194 }
195 b, in = in[:fcsSize], in[fcsSize:]
Akash Reddy Kankanalac6b6ca12025-06-12 14:26:57 +0530196 h.HeaderSize += int(fcsSize)
Abhay Kumara61c5222025-11-10 07:32:50 +0000197 switch len(b) {
khenaidoo106c61a2021-08-11 18:05:46 -0400198 case 1:
199 h.FrameContentSize = uint64(b[0])
200 case 2:
201 // When FCS_Field_Size is 2, the offset of 256 is added.
202 h.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) + 256
203 case 4:
204 h.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) | (uint64(b[2]) << 16) | (uint64(b[3]) << 24)
205 case 8:
206 d1 := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
207 d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24)
208 h.FrameContentSize = uint64(d1) | (uint64(d2) << 32)
209 }
210 }
211
212 // Frame Header done, we will not fail from now on.
213 if len(in) < 3 {
Abhay Kumara61c5222025-11-10 07:32:50 +0000214 return in, nil
khenaidoo106c61a2021-08-11 18:05:46 -0400215 }
216 tmp := in[:3]
217 bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
218 h.FirstBlock.Last = bh&1 != 0
219 blockType := blockType((bh >> 1) & 3)
220 // find size.
221 cSize := int(bh >> 3)
222 switch blockType {
223 case blockTypeReserved:
Abhay Kumara61c5222025-11-10 07:32:50 +0000224 return in, nil
khenaidoo106c61a2021-08-11 18:05:46 -0400225 case blockTypeRLE:
226 h.FirstBlock.Compressed = true
227 h.FirstBlock.DecompressedSize = cSize
228 h.FirstBlock.CompressedSize = 1
229 case blockTypeCompressed:
230 h.FirstBlock.Compressed = true
231 h.FirstBlock.CompressedSize = cSize
232 case blockTypeRaw:
233 h.FirstBlock.DecompressedSize = cSize
234 h.FirstBlock.CompressedSize = cSize
235 default:
236 panic("Invalid block type")
237 }
238
239 h.FirstBlock.OK = true
Abhay Kumara61c5222025-11-10 07:32:50 +0000240 return in, nil
241}
242
243// AppendTo will append the encoded header to the dst slice.
244// There is no error checking performed on the header values.
245func (h *Header) AppendTo(dst []byte) ([]byte, error) {
246 if h.Skippable {
247 magic := [4]byte{0x50, 0x2a, 0x4d, 0x18}
248 magic[0] |= byte(h.SkippableID & 0xf)
249 dst = append(dst, magic[:]...)
250 f := h.SkippableSize
251 return append(dst, uint8(f), uint8(f>>8), uint8(f>>16), uint8(f>>24)), nil
252 }
253 f := frameHeader{
254 ContentSize: h.FrameContentSize,
255 WindowSize: uint32(h.WindowSize),
256 SingleSegment: h.SingleSegment,
257 Checksum: h.HasCheckSum,
258 DictID: h.DictionaryID,
259 }
260 return f.appendTo(dst), nil
khenaidoo106c61a2021-08-11 18:05:46 -0400261}