| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 1 | // Copyright 2019+ Klaus Post. All rights reserved. |
| 2 | // License information can be found in the LICENSE file. |
| 3 | // Based on work by Yann Collet, released under BSD License. |
| 4 | |
| 5 | package zstd |
| 6 | |
| 7 | import ( |
| 8 | "errors" |
| 9 | "fmt" |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 10 | "hash/crc32" |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 11 | "io" |
| 12 | "sync" |
| 13 | |
| 14 | "github.com/klauspost/compress/huff0" |
| 15 | "github.com/klauspost/compress/zstd/internal/xxhash" |
| 16 | ) |
| 17 | |
| 18 | type blockType uint8 |
| 19 | |
| 20 | //go:generate stringer -type=blockType,literalsBlockType,seqCompMode,tableIndex |
| 21 | |
| 22 | const ( |
| 23 | blockTypeRaw blockType = iota |
| 24 | blockTypeRLE |
| 25 | blockTypeCompressed |
| 26 | blockTypeReserved |
| 27 | ) |
| 28 | |
| 29 | type literalsBlockType uint8 |
| 30 | |
| 31 | const ( |
| 32 | literalsBlockRaw literalsBlockType = iota |
| 33 | literalsBlockRLE |
| 34 | literalsBlockCompressed |
| 35 | literalsBlockTreeless |
| 36 | ) |
| 37 | |
| 38 | const ( |
| 39 | // maxCompressedBlockSize is the biggest allowed compressed block size (128KB) |
| 40 | maxCompressedBlockSize = 128 << 10 |
| 41 | |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 42 | compressedBlockOverAlloc = 16 |
| 43 | maxCompressedBlockSizeAlloc = 128<<10 + compressedBlockOverAlloc |
| 44 | |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 45 | // Maximum possible block size (all Raw+Uncompressed). |
| 46 | maxBlockSize = (1 << 21) - 1 |
| 47 | |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 48 | maxMatchLen = 131074 |
| 49 | maxSequences = 0x7f00 + 0xffff |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 50 | |
| 51 | // We support slightly less than the reference decoder to be able to |
| 52 | // use ints on 32 bit archs. |
| 53 | maxOffsetBits = 30 |
| 54 | ) |
| 55 | |
| 56 | var ( |
| 57 | huffDecoderPool = sync.Pool{New: func() interface{} { |
| 58 | return &huff0.Scratch{} |
| 59 | }} |
| 60 | |
| 61 | fseDecoderPool = sync.Pool{New: func() interface{} { |
| 62 | return &fseDecoder{} |
| 63 | }} |
| 64 | ) |
| 65 | |
| 66 | type blockDec struct { |
| 67 | // Raw source data of the block. |
| 68 | data []byte |
| 69 | dataStorage []byte |
| 70 | |
| 71 | // Destination of the decoded data. |
| 72 | dst []byte |
| 73 | |
| 74 | // Buffer for literals data. |
| 75 | literalBuf []byte |
| 76 | |
| 77 | // Window size of the block. |
| 78 | WindowSize uint64 |
| 79 | |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 80 | err error |
| 81 | |
| 82 | // Check against this crc, if hasCRC is true. |
| 83 | checkCRC uint32 |
| 84 | hasCRC bool |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 85 | |
| 86 | // Frame to use for singlethreaded decoding. |
| 87 | // Should not be used by the decoder itself since parent may be another frame. |
| 88 | localFrame *frameDec |
| 89 | |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 90 | sequence []seqVals |
| 91 | |
| 92 | async struct { |
| 93 | newHist *history |
| 94 | literals []byte |
| 95 | seqData []byte |
| 96 | seqSize int // Size of uncompressed sequences |
| 97 | fcs uint64 |
| 98 | } |
| 99 | |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 100 | // Block is RLE, this is the size. |
| 101 | RLESize uint32 |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 102 | |
| 103 | Type blockType |
| 104 | |
| 105 | // Is this the last block of a frame? |
| 106 | Last bool |
| 107 | |
| 108 | // Use less memory |
| 109 | lowMem bool |
| 110 | } |
| 111 | |
| 112 | func (b *blockDec) String() string { |
| 113 | if b == nil { |
| 114 | return "<nil>" |
| 115 | } |
| 116 | return fmt.Sprintf("Steam Size: %d, Type: %v, Last: %t, Window: %d", len(b.data), b.Type, b.Last, b.WindowSize) |
| 117 | } |
| 118 | |
| 119 | func newBlockDec(lowMem bool) *blockDec { |
| 120 | b := blockDec{ |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 121 | lowMem: lowMem, |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 122 | } |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 123 | return &b |
| 124 | } |
| 125 | |
| 126 | // reset will reset the block. |
| 127 | // Input must be a start of a block and will be at the end of the block when returned. |
| 128 | func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { |
| 129 | b.WindowSize = windowSize |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 130 | tmp, err := br.readSmall(3) |
| 131 | if err != nil { |
| 132 | println("Reading block header:", err) |
| 133 | return err |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 134 | } |
| 135 | bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16) |
| 136 | b.Last = bh&1 != 0 |
| 137 | b.Type = blockType((bh >> 1) & 3) |
| 138 | // find size. |
| 139 | cSize := int(bh >> 3) |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 140 | maxSize := maxCompressedBlockSizeAlloc |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 141 | switch b.Type { |
| 142 | case blockTypeReserved: |
| 143 | return ErrReservedBlockType |
| 144 | case blockTypeRLE: |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 145 | if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) { |
| 146 | if debugDecoder { |
| 147 | printf("rle block too big: csize:%d block: %+v\n", uint64(cSize), b) |
| 148 | } |
| 149 | return ErrWindowSizeExceeded |
| 150 | } |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 151 | b.RLESize = uint32(cSize) |
| 152 | if b.lowMem { |
| 153 | maxSize = cSize |
| 154 | } |
| 155 | cSize = 1 |
| 156 | case blockTypeCompressed: |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 157 | if debugDecoder { |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 158 | println("Data size on stream:", cSize) |
| 159 | } |
| 160 | b.RLESize = 0 |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 161 | maxSize = maxCompressedBlockSizeAlloc |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 162 | if windowSize < maxCompressedBlockSize && b.lowMem { |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 163 | maxSize = int(windowSize) + compressedBlockOverAlloc |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 164 | } |
| 165 | if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize { |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 166 | if debugDecoder { |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 167 | printf("compressed block too big: csize:%d block: %+v\n", uint64(cSize), b) |
| 168 | } |
| 169 | return ErrCompressedSizeTooBig |
| 170 | } |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 171 | // Empty compressed blocks must at least be 2 bytes |
| 172 | // for Literals_Block_Type and one for Sequences_Section_Header. |
| 173 | if cSize < 2 { |
| 174 | return ErrBlockTooSmall |
| 175 | } |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 176 | case blockTypeRaw: |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 177 | if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) { |
| 178 | if debugDecoder { |
| 179 | printf("rle block too big: csize:%d block: %+v\n", uint64(cSize), b) |
| 180 | } |
| 181 | return ErrWindowSizeExceeded |
| 182 | } |
| 183 | |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 184 | b.RLESize = 0 |
| 185 | // We do not need a destination for raw blocks. |
| 186 | maxSize = -1 |
| 187 | default: |
| 188 | panic("Invalid block type") |
| 189 | } |
| 190 | |
| 191 | // Read block data. |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 192 | if _, ok := br.(*byteBuf); !ok && cap(b.dataStorage) < cSize { |
| 193 | // byteBuf doesn't need a destination buffer. |
| 194 | if b.lowMem || cSize > maxCompressedBlockSize { |
| 195 | b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc) |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 196 | } else { |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 197 | b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc) |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 198 | } |
| 199 | } |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 200 | b.data, err = br.readBig(cSize, b.dataStorage) |
| 201 | if err != nil { |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 202 | if debugDecoder { |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 203 | println("Reading block:", err, "(", cSize, ")", len(b.data)) |
| 204 | printf("%T", br) |
| 205 | } |
| 206 | return err |
| 207 | } |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 208 | if cap(b.dst) <= maxSize { |
| 209 | b.dst = make([]byte, 0, maxSize+1) |
| 210 | } |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 211 | return nil |
| 212 | } |
| 213 | |
| 214 | // sendEOF will make the decoder send EOF on this frame. |
| 215 | func (b *blockDec) sendErr(err error) { |
| 216 | b.Last = true |
| 217 | b.Type = blockTypeReserved |
| 218 | b.err = err |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 219 | } |
| 220 | |
| 221 | // Close will release resources. |
| 222 | // Closed blockDec cannot be reset. |
| 223 | func (b *blockDec) Close() { |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 224 | } |
| 225 | |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 226 | // decodeBuf |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 227 | func (b *blockDec) decodeBuf(hist *history) error { |
| 228 | switch b.Type { |
| 229 | case blockTypeRLE: |
| 230 | if cap(b.dst) < int(b.RLESize) { |
| 231 | if b.lowMem { |
| 232 | b.dst = make([]byte, b.RLESize) |
| 233 | } else { |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 234 | b.dst = make([]byte, maxCompressedBlockSize) |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 235 | } |
| 236 | } |
| 237 | b.dst = b.dst[:b.RLESize] |
| 238 | v := b.data[0] |
| 239 | for i := range b.dst { |
| 240 | b.dst[i] = v |
| 241 | } |
| 242 | hist.appendKeep(b.dst) |
| 243 | return nil |
| 244 | case blockTypeRaw: |
| 245 | hist.appendKeep(b.data) |
| 246 | return nil |
| 247 | case blockTypeCompressed: |
| 248 | saved := b.dst |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 249 | // Append directly to history |
| 250 | if hist.ignoreBuffer == 0 { |
| 251 | b.dst = hist.b |
| 252 | hist.b = nil |
| 253 | } else { |
| 254 | b.dst = b.dst[:0] |
| 255 | } |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 256 | err := b.decodeCompressed(hist) |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 257 | if debugDecoder { |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 258 | println("Decompressed to total", len(b.dst), "bytes, hash:", xxhash.Sum64(b.dst), "error:", err) |
| 259 | } |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 260 | if hist.ignoreBuffer == 0 { |
| 261 | hist.b = b.dst |
| 262 | b.dst = saved |
| 263 | } else { |
| 264 | hist.appendKeep(b.dst) |
| 265 | } |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 266 | return err |
| 267 | case blockTypeReserved: |
| 268 | // Used for returning errors. |
| 269 | return b.err |
| 270 | default: |
| 271 | panic("Invalid block type") |
| 272 | } |
| 273 | } |
| 274 | |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 275 | func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err error) { |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 276 | // There must be at least one byte for Literals_Block_Type and one for Sequences_Section_Header |
| 277 | if len(in) < 2 { |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 278 | return in, ErrBlockTooSmall |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 279 | } |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 280 | |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 281 | litType := literalsBlockType(in[0] & 3) |
| 282 | var litRegenSize int |
| 283 | var litCompSize int |
| 284 | sizeFormat := (in[0] >> 2) & 3 |
| 285 | var fourStreams bool |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 286 | var literals []byte |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 287 | switch litType { |
| 288 | case literalsBlockRaw, literalsBlockRLE: |
| 289 | switch sizeFormat { |
| 290 | case 0, 2: |
| 291 | // Regenerated_Size uses 5 bits (0-31). Literals_Section_Header uses 1 byte. |
| 292 | litRegenSize = int(in[0] >> 3) |
| 293 | in = in[1:] |
| 294 | case 1: |
| 295 | // Regenerated_Size uses 12 bits (0-4095). Literals_Section_Header uses 2 bytes. |
| 296 | litRegenSize = int(in[0]>>4) + (int(in[1]) << 4) |
| 297 | in = in[2:] |
| 298 | case 3: |
| 299 | // Regenerated_Size uses 20 bits (0-1048575). Literals_Section_Header uses 3 bytes. |
| 300 | if len(in) < 3 { |
| 301 | println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in)) |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 302 | return in, ErrBlockTooSmall |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 303 | } |
| 304 | litRegenSize = int(in[0]>>4) + (int(in[1]) << 4) + (int(in[2]) << 12) |
| 305 | in = in[3:] |
| 306 | } |
| 307 | case literalsBlockCompressed, literalsBlockTreeless: |
| 308 | switch sizeFormat { |
| 309 | case 0, 1: |
| 310 | // Both Regenerated_Size and Compressed_Size use 10 bits (0-1023). |
| 311 | if len(in) < 3 { |
| 312 | println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in)) |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 313 | return in, ErrBlockTooSmall |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 314 | } |
| 315 | n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) |
| 316 | litRegenSize = int(n & 1023) |
| 317 | litCompSize = int(n >> 10) |
| 318 | fourStreams = sizeFormat == 1 |
| 319 | in = in[3:] |
| 320 | case 2: |
| 321 | fourStreams = true |
| 322 | if len(in) < 4 { |
| 323 | println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in)) |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 324 | return in, ErrBlockTooSmall |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 325 | } |
| 326 | n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20) |
| 327 | litRegenSize = int(n & 16383) |
| 328 | litCompSize = int(n >> 14) |
| 329 | in = in[4:] |
| 330 | case 3: |
| 331 | fourStreams = true |
| 332 | if len(in) < 5 { |
| 333 | println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in)) |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 334 | return in, ErrBlockTooSmall |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 335 | } |
| 336 | n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20) + (uint64(in[4]) << 28) |
| 337 | litRegenSize = int(n & 262143) |
| 338 | litCompSize = int(n >> 18) |
| 339 | in = in[5:] |
| 340 | } |
| 341 | } |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 342 | if debugDecoder { |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 343 | println("literals type:", litType, "litRegenSize:", litRegenSize, "litCompSize:", litCompSize, "sizeFormat:", sizeFormat, "4X:", fourStreams) |
| 344 | } |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 345 | if litRegenSize > int(b.WindowSize) || litRegenSize > maxCompressedBlockSize { |
| 346 | return in, ErrWindowSizeExceeded |
| 347 | } |
| 348 | |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 349 | switch litType { |
| 350 | case literalsBlockRaw: |
| 351 | if len(in) < litRegenSize { |
| 352 | println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litRegenSize) |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 353 | return in, ErrBlockTooSmall |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 354 | } |
| 355 | literals = in[:litRegenSize] |
| 356 | in = in[litRegenSize:] |
| 357 | //printf("Found %d uncompressed literals\n", litRegenSize) |
| 358 | case literalsBlockRLE: |
| 359 | if len(in) < 1 { |
| 360 | println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", 1) |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 361 | return in, ErrBlockTooSmall |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 362 | } |
| 363 | if cap(b.literalBuf) < litRegenSize { |
| 364 | if b.lowMem { |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 365 | b.literalBuf = make([]byte, litRegenSize, litRegenSize+compressedBlockOverAlloc) |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 366 | } else { |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 367 | b.literalBuf = make([]byte, litRegenSize, maxCompressedBlockSize+compressedBlockOverAlloc) |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 368 | } |
| 369 | } |
| 370 | literals = b.literalBuf[:litRegenSize] |
| 371 | v := in[0] |
| 372 | for i := range literals { |
| 373 | literals[i] = v |
| 374 | } |
| 375 | in = in[1:] |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 376 | if debugDecoder { |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 377 | printf("Found %d RLE compressed literals\n", litRegenSize) |
| 378 | } |
| 379 | case literalsBlockTreeless: |
| 380 | if len(in) < litCompSize { |
| 381 | println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize) |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 382 | return in, ErrBlockTooSmall |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 383 | } |
| 384 | // Store compressed literals, so we defer decoding until we get history. |
| 385 | literals = in[:litCompSize] |
| 386 | in = in[litCompSize:] |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 387 | if debugDecoder { |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 388 | printf("Found %d compressed literals\n", litCompSize) |
| 389 | } |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 390 | huff := hist.huffTree |
| 391 | if huff == nil { |
| 392 | return in, errors.New("literal block was treeless, but no history was defined") |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 393 | } |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 394 | // Ensure we have space to store it. |
| 395 | if cap(b.literalBuf) < litRegenSize { |
| 396 | if b.lowMem { |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 397 | b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc) |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 398 | } else { |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 399 | b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc) |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 400 | } |
| 401 | } |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 402 | var err error |
| 403 | // Use our out buffer. |
| 404 | huff.MaxDecodedSize = litRegenSize |
| 405 | if fourStreams { |
| 406 | literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals) |
| 407 | } else { |
| 408 | literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals) |
| 409 | } |
| 410 | // Make sure we don't leak our literals buffer |
| 411 | if err != nil { |
| 412 | println("decompressing literals:", err) |
| 413 | return in, err |
| 414 | } |
| 415 | if len(literals) != litRegenSize { |
| 416 | return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals)) |
| 417 | } |
| 418 | |
| 419 | case literalsBlockCompressed: |
| 420 | if len(in) < litCompSize { |
| 421 | println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize) |
| 422 | return in, ErrBlockTooSmall |
| 423 | } |
| 424 | literals = in[:litCompSize] |
| 425 | in = in[litCompSize:] |
| 426 | // Ensure we have space to store it. |
| 427 | if cap(b.literalBuf) < litRegenSize { |
| 428 | if b.lowMem { |
| 429 | b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc) |
| 430 | } else { |
| 431 | b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc) |
| 432 | } |
| 433 | } |
| 434 | huff := hist.huffTree |
| 435 | if huff == nil || (hist.dict != nil && huff == hist.dict.litEnc) { |
| 436 | huff = huffDecoderPool.Get().(*huff0.Scratch) |
| 437 | if huff == nil { |
| 438 | huff = &huff0.Scratch{} |
| 439 | } |
| 440 | } |
| 441 | var err error |
| 442 | if debugDecoder { |
| 443 | println("huff table input:", len(literals), "CRC:", crc32.ChecksumIEEE(literals)) |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 444 | } |
| 445 | huff, literals, err = huff0.ReadTable(literals, huff) |
| 446 | if err != nil { |
| 447 | println("reading huffman table:", err) |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 448 | return in, err |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 449 | } |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 450 | hist.huffTree = huff |
| 451 | huff.MaxDecodedSize = litRegenSize |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 452 | // Use our out buffer. |
| 453 | if fourStreams { |
| 454 | literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals) |
| 455 | } else { |
| 456 | literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals) |
| 457 | } |
| 458 | if err != nil { |
| 459 | println("decoding compressed literals:", err) |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 460 | return in, err |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 461 | } |
| 462 | // Make sure we don't leak our literals buffer |
| 463 | if len(literals) != litRegenSize { |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 464 | return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals)) |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 465 | } |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 466 | // Re-cap to get extra size. |
| 467 | literals = b.literalBuf[:len(literals)] |
| 468 | if debugDecoder { |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 469 | printf("Decompressed %d literals into %d bytes\n", litCompSize, litRegenSize) |
| 470 | } |
| 471 | } |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 472 | hist.decoders.literals = literals |
| 473 | return in, nil |
| 474 | } |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 475 | |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 476 | // decodeCompressed will start decompressing a block. |
| 477 | func (b *blockDec) decodeCompressed(hist *history) error { |
| 478 | in := b.data |
| 479 | in, err := b.decodeLiterals(in, hist) |
| 480 | if err != nil { |
| 481 | return err |
| 482 | } |
| 483 | err = b.prepareSequences(in, hist) |
| 484 | if err != nil { |
| 485 | return err |
| 486 | } |
| 487 | if hist.decoders.nSeqs == 0 { |
| 488 | b.dst = append(b.dst, hist.decoders.literals...) |
| 489 | return nil |
| 490 | } |
| 491 | before := len(hist.decoders.out) |
| 492 | err = hist.decoders.decodeSync(hist.b[hist.ignoreBuffer:]) |
| 493 | if err != nil { |
| 494 | return err |
| 495 | } |
| 496 | if hist.decoders.maxSyncLen > 0 { |
| 497 | hist.decoders.maxSyncLen += uint64(before) |
| 498 | hist.decoders.maxSyncLen -= uint64(len(hist.decoders.out)) |
| 499 | } |
| 500 | b.dst = hist.decoders.out |
| 501 | hist.recentOffsets = hist.decoders.prevOffset |
| 502 | return nil |
| 503 | } |
| 504 | |
| 505 | func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) { |
| 506 | if debugDecoder { |
| 507 | printf("prepareSequences: %d byte(s) input\n", len(in)) |
| 508 | } |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 509 | // Decode Sequences |
| 510 | // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#sequences-section |
| 511 | if len(in) < 1 { |
| 512 | return ErrBlockTooSmall |
| 513 | } |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 514 | var nSeqs int |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 515 | seqHeader := in[0] |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 516 | switch { |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 517 | case seqHeader < 128: |
| 518 | nSeqs = int(seqHeader) |
| 519 | in = in[1:] |
| 520 | case seqHeader < 255: |
| 521 | if len(in) < 2 { |
| 522 | return ErrBlockTooSmall |
| 523 | } |
| 524 | nSeqs = int(seqHeader-128)<<8 | int(in[1]) |
| 525 | in = in[2:] |
| 526 | case seqHeader == 255: |
| 527 | if len(in) < 3 { |
| 528 | return ErrBlockTooSmall |
| 529 | } |
| 530 | nSeqs = 0x7f00 + int(in[1]) + (int(in[2]) << 8) |
| 531 | in = in[3:] |
| 532 | } |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 533 | if nSeqs == 0 && len(in) != 0 { |
| 534 | // When no sequences, there should not be any more data... |
| 535 | if debugDecoder { |
| 536 | printf("prepareSequences: 0 sequences, but %d byte(s) left on stream\n", len(in)) |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 537 | } |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 538 | return ErrUnexpectedBlockSize |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 539 | } |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 540 | |
| 541 | var seqs = &hist.decoders |
| 542 | seqs.nSeqs = nSeqs |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 543 | if nSeqs > 0 { |
| 544 | if len(in) < 1 { |
| 545 | return ErrBlockTooSmall |
| 546 | } |
| 547 | br := byteReader{b: in, off: 0} |
| 548 | compMode := br.Uint8() |
| 549 | br.advance(1) |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 550 | if debugDecoder { |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 551 | printf("Compression modes: 0b%b", compMode) |
| 552 | } |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 553 | if compMode&3 != 0 { |
| 554 | return errors.New("corrupt block: reserved bits not zero") |
| 555 | } |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 556 | for i := uint(0); i < 3; i++ { |
| 557 | mode := seqCompMode((compMode >> (6 - i*2)) & 3) |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 558 | if debugDecoder { |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 559 | println("Table", tableIndex(i), "is", mode) |
| 560 | } |
| 561 | var seq *sequenceDec |
| 562 | switch tableIndex(i) { |
| 563 | case tableLiteralLengths: |
| 564 | seq = &seqs.litLengths |
| 565 | case tableOffsets: |
| 566 | seq = &seqs.offsets |
| 567 | case tableMatchLengths: |
| 568 | seq = &seqs.matchLengths |
| 569 | default: |
| 570 | panic("unknown table") |
| 571 | } |
| 572 | switch mode { |
| 573 | case compModePredefined: |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 574 | if seq.fse != nil && !seq.fse.preDefined { |
| 575 | fseDecoderPool.Put(seq.fse) |
| 576 | } |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 577 | seq.fse = &fsePredef[i] |
| 578 | case compModeRLE: |
| 579 | if br.remain() < 1 { |
| 580 | return ErrBlockTooSmall |
| 581 | } |
| 582 | v := br.Uint8() |
| 583 | br.advance(1) |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 584 | if seq.fse == nil || seq.fse.preDefined { |
| 585 | seq.fse = fseDecoderPool.Get().(*fseDecoder) |
| 586 | } |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 587 | symb, err := decSymbolValue(v, symbolTableX[i]) |
| 588 | if err != nil { |
| 589 | printf("RLE Transform table (%v) error: %v", tableIndex(i), err) |
| 590 | return err |
| 591 | } |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 592 | seq.fse.setRLE(symb) |
| 593 | if debugDecoder { |
| 594 | printf("RLE set to 0x%x, code: %v", symb, v) |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 595 | } |
| 596 | case compModeFSE: |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 597 | if debugDecoder { |
| 598 | println("Reading table for", tableIndex(i)) |
| 599 | } |
| 600 | if seq.fse == nil || seq.fse.preDefined { |
| 601 | seq.fse = fseDecoderPool.Get().(*fseDecoder) |
| 602 | } |
| 603 | err := seq.fse.readNCount(&br, uint16(maxTableSymbol[i])) |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 604 | if err != nil { |
| 605 | println("Read table error:", err) |
| 606 | return err |
| 607 | } |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 608 | err = seq.fse.transform(symbolTableX[i]) |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 609 | if err != nil { |
| 610 | println("Transform table error:", err) |
| 611 | return err |
| 612 | } |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 613 | if debugDecoder { |
| 614 | println("Read table ok", "symbolLen:", seq.fse.symbolLen) |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 615 | } |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 616 | case compModeRepeat: |
| 617 | seq.repeat = true |
| 618 | } |
| 619 | if br.overread() { |
| 620 | return io.ErrUnexpectedEOF |
| 621 | } |
| 622 | } |
| 623 | in = br.unread() |
| 624 | } |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 625 | if debugDecoder { |
| 626 | println("Literals:", len(seqs.literals), "hash:", xxhash.Sum64(seqs.literals), "and", seqs.nSeqs, "sequences.") |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 627 | } |
| 628 | |
| 629 | if nSeqs == 0 { |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 630 | if len(b.sequence) > 0 { |
| 631 | b.sequence = b.sequence[:0] |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 632 | } |
| 633 | return nil |
| 634 | } |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 635 | br := seqs.br |
| 636 | if br == nil { |
| 637 | br = &bitReader{} |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 638 | } |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 639 | if err := br.init(in); err != nil { |
| 640 | return err |
| 641 | } |
| 642 | |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 643 | if err := seqs.initialize(br, hist, b.dst); err != nil { |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 644 | println("initializing sequences:", err) |
| 645 | return err |
| 646 | } |
| 647 | |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 648 | return nil |
| 649 | } |
| 650 | |
| 651 | func (b *blockDec) decodeSequences(hist *history) error { |
| 652 | if cap(b.sequence) < hist.decoders.nSeqs { |
| 653 | if b.lowMem { |
| 654 | b.sequence = make([]seqVals, 0, hist.decoders.nSeqs) |
| 655 | } else { |
| 656 | b.sequence = make([]seqVals, 0, 0x7F00+0xffff) |
| 657 | } |
| 658 | } |
| 659 | b.sequence = b.sequence[:hist.decoders.nSeqs] |
| 660 | if hist.decoders.nSeqs == 0 { |
| 661 | hist.decoders.seqSize = len(hist.decoders.literals) |
| 662 | return nil |
| 663 | } |
| 664 | hist.decoders.windowSize = hist.windowSize |
| 665 | hist.decoders.prevOffset = hist.recentOffsets |
| 666 | |
| 667 | err := hist.decoders.decode(b.sequence) |
| 668 | hist.recentOffsets = hist.decoders.prevOffset |
| 669 | return err |
| 670 | } |
| 671 | |
| 672 | func (b *blockDec) executeSequences(hist *history) error { |
| 673 | hbytes := hist.b |
| 674 | if len(hbytes) > hist.windowSize { |
| 675 | hbytes = hbytes[len(hbytes)-hist.windowSize:] |
| 676 | // We do not need history anymore. |
| 677 | if hist.dict != nil { |
| 678 | hist.dict.content = nil |
| 679 | } |
| 680 | } |
| 681 | hist.decoders.windowSize = hist.windowSize |
| 682 | hist.decoders.out = b.dst[:0] |
| 683 | err := hist.decoders.execute(b.sequence, hbytes) |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 684 | if err != nil { |
| 685 | return err |
| 686 | } |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 687 | return b.updateHistory(hist) |
| 688 | } |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 689 | |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 690 | func (b *blockDec) updateHistory(hist *history) error { |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 691 | if len(b.data) > maxCompressedBlockSize { |
| 692 | return fmt.Errorf("compressed block size too large (%d)", len(b.data)) |
| 693 | } |
| 694 | // Set output and release references. |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 695 | b.dst = hist.decoders.out |
| 696 | hist.recentOffsets = hist.decoders.prevOffset |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 697 | |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 698 | if b.Last { |
| 699 | // if last block we don't care about history. |
| 700 | println("Last block, no history returned") |
| 701 | hist.b = hist.b[:0] |
| 702 | return nil |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 703 | } else { |
| 704 | hist.append(b.dst) |
| 705 | if debugDecoder { |
| 706 | println("Finished block with ", len(b.sequence), "sequences. Added", len(b.dst), "to history, now length", len(hist.b)) |
| 707 | } |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 708 | } |
| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 709 | hist.decoders.out, hist.decoders.literals = nil, nil |
| khenaidoo | 2672188 | 2021-08-11 17:42:52 -0400 | [diff] [blame] | 710 | |
| 711 | return nil |
| 712 | } |