| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 1 | // Copyright 2009 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // Package flate implements the DEFLATE compressed data format, described in |
| 6 | // RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file |
| 7 | // formats. |
| 8 | package flate |
| 9 | |
| 10 | import ( |
| 11 | "bufio" |
| 12 | "compress/flate" |
| 13 | "fmt" |
| 14 | "io" |
| 15 | "math/bits" |
| 16 | "sync" |
| 17 | ) |
| 18 | |
| 19 | const ( |
| 20 | maxCodeLen = 16 // max length of Huffman code |
| 21 | maxCodeLenMask = 15 // mask for max length of Huffman code |
| 22 | // The next three numbers come from the RFC section 3.2.7, with the |
| 23 | // additional proviso in section 3.2.5 which implies that distance codes |
| 24 | // 30 and 31 should never occur in compressed data. |
| 25 | maxNumLit = 286 |
| 26 | maxNumDist = 30 |
| 27 | numCodes = 19 // number of codes in Huffman meta-code |
| 28 | |
| 29 | debugDecode = false |
| 30 | ) |
| 31 | |
| 32 | // Value of length - 3 and extra bits. |
| 33 | type lengthExtra struct { |
| 34 | length, extra uint8 |
| 35 | } |
| 36 | |
| 37 | var decCodeToLen = [32]lengthExtra{{length: 0x0, extra: 0x0}, {length: 0x1, extra: 0x0}, {length: 0x2, extra: 0x0}, {length: 0x3, extra: 0x0}, {length: 0x4, extra: 0x0}, {length: 0x5, extra: 0x0}, {length: 0x6, extra: 0x0}, {length: 0x7, extra: 0x0}, {length: 0x8, extra: 0x1}, {length: 0xa, extra: 0x1}, {length: 0xc, extra: 0x1}, {length: 0xe, extra: 0x1}, {length: 0x10, extra: 0x2}, {length: 0x14, extra: 0x2}, {length: 0x18, extra: 0x2}, {length: 0x1c, extra: 0x2}, {length: 0x20, extra: 0x3}, {length: 0x28, extra: 0x3}, {length: 0x30, extra: 0x3}, {length: 0x38, extra: 0x3}, {length: 0x40, extra: 0x4}, {length: 0x50, extra: 0x4}, {length: 0x60, extra: 0x4}, {length: 0x70, extra: 0x4}, {length: 0x80, extra: 0x5}, {length: 0xa0, extra: 0x5}, {length: 0xc0, extra: 0x5}, {length: 0xe0, extra: 0x5}, {length: 0xff, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}} |
| 38 | |
| 39 | var bitMask32 = [32]uint32{ |
| 40 | 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, |
| 41 | 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, |
| 42 | 0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF, |
| 43 | 0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF, |
| 44 | } // up to 32 bits |
| 45 | |
| 46 | // Initialize the fixedHuffmanDecoder only once upon first use. |
| 47 | var fixedOnce sync.Once |
| 48 | var fixedHuffmanDecoder huffmanDecoder |
| 49 | |
| 50 | // A CorruptInputError reports the presence of corrupt input at a given offset. |
| 51 | type CorruptInputError = flate.CorruptInputError |
| 52 | |
| 53 | // An InternalError reports an error in the flate code itself. |
| 54 | type InternalError string |
| 55 | |
| 56 | func (e InternalError) Error() string { return "flate: internal error: " + string(e) } |
| 57 | |
| 58 | // A ReadError reports an error encountered while reading input. |
| 59 | // |
| 60 | // Deprecated: No longer returned. |
| 61 | type ReadError = flate.ReadError |
| 62 | |
| 63 | // A WriteError reports an error encountered while writing output. |
| 64 | // |
| 65 | // Deprecated: No longer returned. |
| 66 | type WriteError = flate.WriteError |
| 67 | |
| 68 | // Resetter resets a ReadCloser returned by NewReader or NewReaderDict to |
| 69 | // to switch to a new underlying Reader. This permits reusing a ReadCloser |
| 70 | // instead of allocating a new one. |
| 71 | type Resetter interface { |
| 72 | // Reset discards any buffered data and resets the Resetter as if it was |
| 73 | // newly initialized with the given reader. |
| 74 | Reset(r io.Reader, dict []byte) error |
| 75 | } |
| 76 | |
| 77 | // The data structure for decoding Huffman tables is based on that of |
| 78 | // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), |
| 79 | // For codes smaller than the table width, there are multiple entries |
| 80 | // (each combination of trailing bits has the same value). For codes |
| 81 | // larger than the table width, the table contains a link to an overflow |
| 82 | // table. The width of each entry in the link table is the maximum code |
| 83 | // size minus the chunk width. |
| 84 | // |
| 85 | // Note that you can do a lookup in the table even without all bits |
| 86 | // filled. Since the extra bits are zero, and the DEFLATE Huffman codes |
| 87 | // have the property that shorter codes come before longer ones, the |
| 88 | // bit length estimate in the result is a lower bound on the actual |
| 89 | // number of bits. |
| 90 | // |
| 91 | // See the following: |
| 92 | // http://www.gzip.org/algorithm.txt |
| 93 | |
| 94 | // chunk & 15 is number of bits |
| 95 | // chunk >> 4 is value, including table link |
| 96 | |
| 97 | const ( |
| 98 | huffmanChunkBits = 9 |
| 99 | huffmanNumChunks = 1 << huffmanChunkBits |
| 100 | huffmanCountMask = 15 |
| 101 | huffmanValueShift = 4 |
| 102 | ) |
| 103 | |
| 104 | type huffmanDecoder struct { |
| 105 | maxRead int // the maximum number of bits we can read and not overread |
| 106 | chunks *[huffmanNumChunks]uint16 // chunks as described above |
| 107 | links [][]uint16 // overflow links |
| 108 | linkMask uint32 // mask the width of the link table |
| 109 | } |
| 110 | |
| 111 | // Initialize Huffman decoding tables from array of code lengths. |
| 112 | // Following this function, h is guaranteed to be initialized into a complete |
| 113 | // tree (i.e., neither over-subscribed nor under-subscribed). The exception is a |
| 114 | // degenerate case where the tree has only a single symbol with length 1. Empty |
| 115 | // trees are permitted. |
| 116 | func (h *huffmanDecoder) init(lengths []int) bool { |
| 117 | // Sanity enables additional runtime tests during Huffman |
| 118 | // table construction. It's intended to be used during |
| 119 | // development to supplement the currently ad-hoc unit tests. |
| 120 | const sanity = false |
| 121 | |
| 122 | if h.chunks == nil { |
| 123 | h.chunks = new([huffmanNumChunks]uint16) |
| 124 | } |
| 125 | |
| 126 | if h.maxRead != 0 { |
| 127 | *h = huffmanDecoder{chunks: h.chunks, links: h.links} |
| 128 | } |
| 129 | |
| 130 | // Count number of codes of each length, |
| 131 | // compute maxRead and max length. |
| 132 | var count [maxCodeLen]int |
| 133 | var min, max int |
| 134 | for _, n := range lengths { |
| 135 | if n == 0 { |
| 136 | continue |
| 137 | } |
| 138 | if min == 0 || n < min { |
| 139 | min = n |
| 140 | } |
| 141 | if n > max { |
| 142 | max = n |
| 143 | } |
| 144 | count[n&maxCodeLenMask]++ |
| 145 | } |
| 146 | |
| 147 | // Empty tree. The decompressor.huffSym function will fail later if the tree |
| 148 | // is used. Technically, an empty tree is only valid for the HDIST tree and |
| 149 | // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree |
| 150 | // is guaranteed to fail since it will attempt to use the tree to decode the |
| 151 | // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is |
| 152 | // guaranteed to fail later since the compressed data section must be |
| 153 | // composed of at least one symbol (the end-of-block marker). |
| 154 | if max == 0 { |
| 155 | return true |
| 156 | } |
| 157 | |
| 158 | code := 0 |
| 159 | var nextcode [maxCodeLen]int |
| 160 | for i := min; i <= max; i++ { |
| 161 | code <<= 1 |
| 162 | nextcode[i&maxCodeLenMask] = code |
| 163 | code += count[i&maxCodeLenMask] |
| 164 | } |
| 165 | |
| 166 | // Check that the coding is complete (i.e., that we've |
| 167 | // assigned all 2-to-the-max possible bit sequences). |
| 168 | // Exception: To be compatible with zlib, we also need to |
| 169 | // accept degenerate single-code codings. See also |
| 170 | // TestDegenerateHuffmanCoding. |
| 171 | if code != 1<<uint(max) && !(code == 1 && max == 1) { |
| 172 | if debugDecode { |
| 173 | fmt.Println("coding failed, code, max:", code, max, code == 1<<uint(max), code == 1 && max == 1, "(one should be true)") |
| 174 | } |
| 175 | return false |
| 176 | } |
| 177 | |
| 178 | h.maxRead = min |
| 179 | |
| 180 | chunks := h.chunks[:] |
| 181 | for i := range chunks { |
| 182 | chunks[i] = 0 |
| 183 | } |
| 184 | |
| 185 | if max > huffmanChunkBits { |
| 186 | numLinks := 1 << (uint(max) - huffmanChunkBits) |
| 187 | h.linkMask = uint32(numLinks - 1) |
| 188 | |
| 189 | // create link tables |
| 190 | link := nextcode[huffmanChunkBits+1] >> 1 |
| 191 | if cap(h.links) < huffmanNumChunks-link { |
| 192 | h.links = make([][]uint16, huffmanNumChunks-link) |
| 193 | } else { |
| 194 | h.links = h.links[:huffmanNumChunks-link] |
| 195 | } |
| 196 | for j := uint(link); j < huffmanNumChunks; j++ { |
| 197 | reverse := int(bits.Reverse16(uint16(j))) |
| 198 | reverse >>= uint(16 - huffmanChunkBits) |
| 199 | off := j - uint(link) |
| 200 | if sanity && h.chunks[reverse] != 0 { |
| 201 | panic("impossible: overwriting existing chunk") |
| 202 | } |
| 203 | h.chunks[reverse] = uint16(off<<huffmanValueShift | (huffmanChunkBits + 1)) |
| 204 | if cap(h.links[off]) < numLinks { |
| 205 | h.links[off] = make([]uint16, numLinks) |
| 206 | } else { |
| 207 | h.links[off] = h.links[off][:numLinks] |
| 208 | } |
| 209 | } |
| 210 | } else { |
| 211 | h.links = h.links[:0] |
| 212 | } |
| 213 | |
| 214 | for i, n := range lengths { |
| 215 | if n == 0 { |
| 216 | continue |
| 217 | } |
| 218 | code := nextcode[n] |
| 219 | nextcode[n]++ |
| 220 | chunk := uint16(i<<huffmanValueShift | n) |
| 221 | reverse := int(bits.Reverse16(uint16(code))) |
| 222 | reverse >>= uint(16 - n) |
| 223 | if n <= huffmanChunkBits { |
| 224 | for off := reverse; off < len(h.chunks); off += 1 << uint(n) { |
| 225 | // We should never need to overwrite |
| 226 | // an existing chunk. Also, 0 is |
| 227 | // never a valid chunk, because the |
| 228 | // lower 4 "count" bits should be |
| 229 | // between 1 and 15. |
| 230 | if sanity && h.chunks[off] != 0 { |
| 231 | panic("impossible: overwriting existing chunk") |
| 232 | } |
| 233 | h.chunks[off] = chunk |
| 234 | } |
| 235 | } else { |
| 236 | j := reverse & (huffmanNumChunks - 1) |
| 237 | if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 { |
| 238 | // Longer codes should have been |
| 239 | // associated with a link table above. |
| 240 | panic("impossible: not an indirect chunk") |
| 241 | } |
| 242 | value := h.chunks[j] >> huffmanValueShift |
| 243 | linktab := h.links[value] |
| 244 | reverse >>= huffmanChunkBits |
| 245 | for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) { |
| 246 | if sanity && linktab[off] != 0 { |
| 247 | panic("impossible: overwriting existing chunk") |
| 248 | } |
| 249 | linktab[off] = chunk |
| 250 | } |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | if sanity { |
| 255 | // Above we've sanity checked that we never overwrote |
| 256 | // an existing entry. Here we additionally check that |
| 257 | // we filled the tables completely. |
| 258 | for i, chunk := range h.chunks { |
| 259 | if chunk == 0 { |
| 260 | // As an exception, in the degenerate |
| 261 | // single-code case, we allow odd |
| 262 | // chunks to be missing. |
| 263 | if code == 1 && i%2 == 1 { |
| 264 | continue |
| 265 | } |
| 266 | panic("impossible: missing chunk") |
| 267 | } |
| 268 | } |
| 269 | for _, linktab := range h.links { |
| 270 | for _, chunk := range linktab { |
| 271 | if chunk == 0 { |
| 272 | panic("impossible: missing chunk") |
| 273 | } |
| 274 | } |
| 275 | } |
| 276 | } |
| 277 | |
| 278 | return true |
| 279 | } |
| 280 | |
| 281 | // Reader is the actual read interface needed by NewReader. |
| 282 | // If the passed in io.Reader does not also have ReadByte, |
| 283 | // the NewReader will introduce its own buffering. |
| 284 | type Reader interface { |
| 285 | io.Reader |
| 286 | io.ByteReader |
| 287 | } |
| 288 | |
| 289 | type step uint8 |
| 290 | |
| 291 | const ( |
| 292 | copyData step = iota + 1 |
| 293 | nextBlock |
| 294 | huffmanBytesBuffer |
| 295 | huffmanBytesReader |
| 296 | huffmanBufioReader |
| 297 | huffmanStringsReader |
| 298 | huffmanGenericReader |
| 299 | ) |
| 300 | |
| 301 | // flushMode tells decompressor when to return data |
| 302 | type flushMode uint8 |
| 303 | |
| 304 | const ( |
| 305 | syncFlush flushMode = iota // return data after sync flush block |
| 306 | partialFlush // return data after each block |
| 307 | ) |
| 308 | |
| 309 | // Decompress state. |
| 310 | type decompressor struct { |
| 311 | // Input source. |
| 312 | r Reader |
| 313 | roffset int64 |
| 314 | |
| 315 | // Huffman decoders for literal/length, distance. |
| 316 | h1, h2 huffmanDecoder |
| 317 | |
| 318 | // Length arrays used to define Huffman codes. |
| 319 | bits *[maxNumLit + maxNumDist]int |
| 320 | codebits *[numCodes]int |
| 321 | |
| 322 | // Output history, buffer. |
| 323 | dict dictDecoder |
| 324 | |
| 325 | // Next step in the decompression, |
| 326 | // and decompression state. |
| 327 | step step |
| 328 | stepState int |
| 329 | err error |
| 330 | toRead []byte |
| 331 | hl, hd *huffmanDecoder |
| 332 | copyLen int |
| 333 | copyDist int |
| 334 | |
| 335 | // Temporary buffer (avoids repeated allocation). |
| 336 | buf [4]byte |
| 337 | |
| 338 | // Input bits, in top of b. |
| 339 | b uint32 |
| 340 | |
| 341 | nb uint |
| 342 | final bool |
| 343 | |
| 344 | flushMode flushMode |
| 345 | } |
| 346 | |
| 347 | func (f *decompressor) nextBlock() { |
| 348 | for f.nb < 1+2 { |
| 349 | if f.err = f.moreBits(); f.err != nil { |
| 350 | return |
| 351 | } |
| 352 | } |
| 353 | f.final = f.b&1 == 1 |
| 354 | f.b >>= 1 |
| 355 | typ := f.b & 3 |
| 356 | f.b >>= 2 |
| 357 | f.nb -= 1 + 2 |
| 358 | switch typ { |
| 359 | case 0: |
| 360 | f.dataBlock() |
| 361 | if debugDecode { |
| 362 | fmt.Println("stored block") |
| 363 | } |
| 364 | case 1: |
| 365 | // compressed, fixed Huffman tables |
| 366 | f.hl = &fixedHuffmanDecoder |
| 367 | f.hd = nil |
| 368 | f.huffmanBlockDecoder() |
| 369 | if debugDecode { |
| 370 | fmt.Println("predefinied huffman block") |
| 371 | } |
| 372 | case 2: |
| 373 | // compressed, dynamic Huffman tables |
| 374 | if f.err = f.readHuffman(); f.err != nil { |
| 375 | break |
| 376 | } |
| 377 | f.hl = &f.h1 |
| 378 | f.hd = &f.h2 |
| 379 | f.huffmanBlockDecoder() |
| 380 | if debugDecode { |
| 381 | fmt.Println("dynamic huffman block") |
| 382 | } |
| 383 | default: |
| 384 | // 3 is reserved. |
| 385 | if debugDecode { |
| 386 | fmt.Println("reserved data block encountered") |
| 387 | } |
| 388 | f.err = CorruptInputError(f.roffset) |
| 389 | } |
| 390 | } |
| 391 | |
| 392 | func (f *decompressor) Read(b []byte) (int, error) { |
| 393 | for { |
| 394 | if len(f.toRead) > 0 { |
| 395 | n := copy(b, f.toRead) |
| 396 | f.toRead = f.toRead[n:] |
| 397 | if len(f.toRead) == 0 { |
| 398 | return n, f.err |
| 399 | } |
| 400 | return n, nil |
| 401 | } |
| 402 | if f.err != nil { |
| 403 | return 0, f.err |
| 404 | } |
| 405 | |
| 406 | f.doStep() |
| 407 | |
| 408 | if f.err != nil && len(f.toRead) == 0 { |
| 409 | f.toRead = f.dict.readFlush() // Flush what's left in case of error |
| 410 | } |
| 411 | } |
| 412 | } |
| 413 | |
| 414 | // WriteTo implements the io.WriteTo interface for io.Copy and friends. |
| 415 | func (f *decompressor) WriteTo(w io.Writer) (int64, error) { |
| 416 | total := int64(0) |
| 417 | flushed := false |
| 418 | for { |
| 419 | if len(f.toRead) > 0 { |
| 420 | n, err := w.Write(f.toRead) |
| 421 | total += int64(n) |
| 422 | if err != nil { |
| 423 | f.err = err |
| 424 | return total, err |
| 425 | } |
| 426 | if n != len(f.toRead) { |
| 427 | return total, io.ErrShortWrite |
| 428 | } |
| 429 | f.toRead = f.toRead[:0] |
| 430 | } |
| 431 | if f.err != nil && flushed { |
| 432 | if f.err == io.EOF { |
| 433 | return total, nil |
| 434 | } |
| 435 | return total, f.err |
| 436 | } |
| 437 | if f.err == nil { |
| 438 | f.doStep() |
| 439 | } |
| 440 | if len(f.toRead) == 0 && f.err != nil && !flushed { |
| 441 | f.toRead = f.dict.readFlush() // Flush what's left in case of error |
| 442 | flushed = true |
| 443 | } |
| 444 | } |
| 445 | } |
| 446 | |
| 447 | func (f *decompressor) Close() error { |
| 448 | if f.err == io.EOF { |
| 449 | return nil |
| 450 | } |
| 451 | return f.err |
| 452 | } |
| 453 | |
| 454 | // RFC 1951 section 3.2.7. |
| 455 | // Compression with dynamic Huffman codes |
| 456 | |
| 457 | var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} |
| 458 | |
| 459 | func (f *decompressor) readHuffman() error { |
| 460 | // HLIT[5], HDIST[5], HCLEN[4]. |
| 461 | for f.nb < 5+5+4 { |
| 462 | if err := f.moreBits(); err != nil { |
| 463 | return err |
| 464 | } |
| 465 | } |
| 466 | nlit := int(f.b&0x1F) + 257 |
| 467 | if nlit > maxNumLit { |
| 468 | if debugDecode { |
| 469 | fmt.Println("nlit > maxNumLit", nlit) |
| 470 | } |
| 471 | return CorruptInputError(f.roffset) |
| 472 | } |
| 473 | f.b >>= 5 |
| 474 | ndist := int(f.b&0x1F) + 1 |
| 475 | if ndist > maxNumDist { |
| 476 | if debugDecode { |
| 477 | fmt.Println("ndist > maxNumDist", ndist) |
| 478 | } |
| 479 | return CorruptInputError(f.roffset) |
| 480 | } |
| 481 | f.b >>= 5 |
| 482 | nclen := int(f.b&0xF) + 4 |
| 483 | // numCodes is 19, so nclen is always valid. |
| 484 | f.b >>= 4 |
| 485 | f.nb -= 5 + 5 + 4 |
| 486 | |
| 487 | // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. |
| 488 | for i := 0; i < nclen; i++ { |
| 489 | for f.nb < 3 { |
| 490 | if err := f.moreBits(); err != nil { |
| 491 | return err |
| 492 | } |
| 493 | } |
| 494 | f.codebits[codeOrder[i]] = int(f.b & 0x7) |
| 495 | f.b >>= 3 |
| 496 | f.nb -= 3 |
| 497 | } |
| 498 | for i := nclen; i < len(codeOrder); i++ { |
| 499 | f.codebits[codeOrder[i]] = 0 |
| 500 | } |
| 501 | if !f.h1.init(f.codebits[0:]) { |
| 502 | if debugDecode { |
| 503 | fmt.Println("init codebits failed") |
| 504 | } |
| 505 | return CorruptInputError(f.roffset) |
| 506 | } |
| 507 | |
| 508 | // HLIT + 257 code lengths, HDIST + 1 code lengths, |
| 509 | // using the code length Huffman code. |
| 510 | for i, n := 0, nlit+ndist; i < n; { |
| 511 | x, err := f.huffSym(&f.h1) |
| 512 | if err != nil { |
| 513 | return err |
| 514 | } |
| 515 | if x < 16 { |
| 516 | // Actual length. |
| 517 | f.bits[i] = x |
| 518 | i++ |
| 519 | continue |
| 520 | } |
| 521 | // Repeat previous length or zero. |
| 522 | var rep int |
| 523 | var nb uint |
| 524 | var b int |
| 525 | switch x { |
| 526 | default: |
| 527 | return InternalError("unexpected length code") |
| 528 | case 16: |
| 529 | rep = 3 |
| 530 | nb = 2 |
| 531 | if i == 0 { |
| 532 | if debugDecode { |
| 533 | fmt.Println("i==0") |
| 534 | } |
| 535 | return CorruptInputError(f.roffset) |
| 536 | } |
| 537 | b = f.bits[i-1] |
| 538 | case 17: |
| 539 | rep = 3 |
| 540 | nb = 3 |
| 541 | b = 0 |
| 542 | case 18: |
| 543 | rep = 11 |
| 544 | nb = 7 |
| 545 | b = 0 |
| 546 | } |
| 547 | for f.nb < nb { |
| 548 | if err := f.moreBits(); err != nil { |
| 549 | if debugDecode { |
| 550 | fmt.Println("morebits:", err) |
| 551 | } |
| 552 | return err |
| 553 | } |
| 554 | } |
| 555 | rep += int(f.b & uint32(1<<(nb®SizeMaskUint32)-1)) |
| 556 | f.b >>= nb & regSizeMaskUint32 |
| 557 | f.nb -= nb |
| 558 | if i+rep > n { |
| 559 | if debugDecode { |
| 560 | fmt.Println("i+rep > n", i, rep, n) |
| 561 | } |
| 562 | return CorruptInputError(f.roffset) |
| 563 | } |
| 564 | for j := 0; j < rep; j++ { |
| 565 | f.bits[i] = b |
| 566 | i++ |
| 567 | } |
| 568 | } |
| 569 | |
| 570 | if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { |
| 571 | if debugDecode { |
| 572 | fmt.Println("init2 failed") |
| 573 | } |
| 574 | return CorruptInputError(f.roffset) |
| 575 | } |
| 576 | |
| 577 | // As an optimization, we can initialize the maxRead bits to read at a time |
| 578 | // for the HLIT tree to the length of the EOB marker since we know that |
| 579 | // every block must terminate with one. This preserves the property that |
| 580 | // we never read any extra bytes after the end of the DEFLATE stream. |
| 581 | if f.h1.maxRead < f.bits[endBlockMarker] { |
| 582 | f.h1.maxRead = f.bits[endBlockMarker] |
| 583 | } |
| 584 | if !f.final { |
| 585 | // If not the final block, the smallest block possible is |
| 586 | // a predefined table, BTYPE=01, with a single EOB marker. |
| 587 | // This will take up 3 + 7 bits. |
| 588 | f.h1.maxRead += 10 |
| 589 | } |
| 590 | |
| 591 | return nil |
| 592 | } |
| 593 | |
| 594 | // Copy a single uncompressed data block from input to output. |
| 595 | func (f *decompressor) dataBlock() { |
| 596 | // Uncompressed. |
| 597 | // Discard current half-byte. |
| 598 | left := (f.nb) & 7 |
| 599 | f.nb -= left |
| 600 | f.b >>= left |
| 601 | |
| 602 | offBytes := f.nb >> 3 |
| 603 | // Unfilled values will be overwritten. |
| 604 | f.buf[0] = uint8(f.b) |
| 605 | f.buf[1] = uint8(f.b >> 8) |
| 606 | f.buf[2] = uint8(f.b >> 16) |
| 607 | f.buf[3] = uint8(f.b >> 24) |
| 608 | |
| 609 | f.roffset += int64(offBytes) |
| 610 | f.nb, f.b = 0, 0 |
| 611 | |
| 612 | // Length then ones-complement of length. |
| 613 | nr, err := io.ReadFull(f.r, f.buf[offBytes:4]) |
| 614 | f.roffset += int64(nr) |
| 615 | if err != nil { |
| 616 | f.err = noEOF(err) |
| 617 | return |
| 618 | } |
| 619 | n := uint16(f.buf[0]) | uint16(f.buf[1])<<8 |
| 620 | nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8 |
| 621 | if nn != ^n { |
| 622 | if debugDecode { |
| 623 | ncomp := ^n |
| 624 | fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp) |
| 625 | } |
| 626 | f.err = CorruptInputError(f.roffset) |
| 627 | return |
| 628 | } |
| 629 | |
| 630 | if n == 0 { |
| 631 | if f.flushMode == syncFlush { |
| 632 | f.toRead = f.dict.readFlush() |
| 633 | } |
| 634 | |
| 635 | f.finishBlock() |
| 636 | return |
| 637 | } |
| 638 | |
| 639 | f.copyLen = int(n) |
| 640 | f.copyData() |
| 641 | } |
| 642 | |
| 643 | // copyData copies f.copyLen bytes from the underlying reader into f.hist. |
| 644 | // It pauses for reads when f.hist is full. |
| 645 | func (f *decompressor) copyData() { |
| 646 | buf := f.dict.writeSlice() |
| 647 | if len(buf) > f.copyLen { |
| 648 | buf = buf[:f.copyLen] |
| 649 | } |
| 650 | |
| 651 | cnt, err := io.ReadFull(f.r, buf) |
| 652 | f.roffset += int64(cnt) |
| 653 | f.copyLen -= cnt |
| 654 | f.dict.writeMark(cnt) |
| 655 | if err != nil { |
| 656 | f.err = noEOF(err) |
| 657 | return |
| 658 | } |
| 659 | |
| 660 | if f.dict.availWrite() == 0 || f.copyLen > 0 { |
| 661 | f.toRead = f.dict.readFlush() |
| 662 | f.step = copyData |
| 663 | return |
| 664 | } |
| 665 | f.finishBlock() |
| 666 | } |
| 667 | |
| 668 | func (f *decompressor) finishBlock() { |
| 669 | if f.final { |
| 670 | if f.dict.availRead() > 0 { |
| 671 | f.toRead = f.dict.readFlush() |
| 672 | } |
| 673 | |
| 674 | f.err = io.EOF |
| 675 | } else if f.flushMode == partialFlush && f.dict.availRead() > 0 { |
| 676 | f.toRead = f.dict.readFlush() |
| 677 | } |
| 678 | |
| 679 | f.step = nextBlock |
| 680 | } |
| 681 | |
| 682 | func (f *decompressor) doStep() { |
| 683 | switch f.step { |
| 684 | case copyData: |
| 685 | f.copyData() |
| 686 | case nextBlock: |
| 687 | f.nextBlock() |
| 688 | case huffmanBytesBuffer: |
| 689 | f.huffmanBytesBuffer() |
| 690 | case huffmanBytesReader: |
| 691 | f.huffmanBytesReader() |
| 692 | case huffmanBufioReader: |
| 693 | f.huffmanBufioReader() |
| 694 | case huffmanStringsReader: |
| 695 | f.huffmanStringsReader() |
| 696 | case huffmanGenericReader: |
| 697 | f.huffmanGenericReader() |
| 698 | default: |
| 699 | panic("BUG: unexpected step state") |
| 700 | } |
| 701 | } |
| 702 | |
| 703 | // noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF. |
| 704 | func noEOF(e error) error { |
| 705 | if e == io.EOF { |
| 706 | return io.ErrUnexpectedEOF |
| 707 | } |
| 708 | return e |
| 709 | } |
| 710 | |
| 711 | func (f *decompressor) moreBits() error { |
| 712 | c, err := f.r.ReadByte() |
| 713 | if err != nil { |
| 714 | return noEOF(err) |
| 715 | } |
| 716 | f.roffset++ |
| 717 | f.b |= uint32(c) << (f.nb & regSizeMaskUint32) |
| 718 | f.nb += 8 |
| 719 | return nil |
| 720 | } |
| 721 | |
| 722 | // Read the next Huffman-encoded symbol from f according to h. |
| 723 | func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { |
| 724 | // Since a huffmanDecoder can be empty or be composed of a degenerate tree |
| 725 | // with single element, huffSym must error on these two edge cases. In both |
| 726 | // cases, the chunks slice will be 0 for the invalid sequence, leading it |
| 727 | // satisfy the n == 0 check below. |
| 728 | n := uint(h.maxRead) |
| 729 | // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, |
| 730 | // but is smart enough to keep local variables in registers, so use nb and b, |
| 731 | // inline call to moreBits and reassign b,nb back to f on return. |
| 732 | nb, b := f.nb, f.b |
| 733 | for { |
| 734 | for nb < n { |
| 735 | c, err := f.r.ReadByte() |
| 736 | if err != nil { |
| 737 | f.b = b |
| 738 | f.nb = nb |
| 739 | return 0, noEOF(err) |
| 740 | } |
| 741 | f.roffset++ |
| 742 | b |= uint32(c) << (nb & regSizeMaskUint32) |
| 743 | nb += 8 |
| 744 | } |
| 745 | chunk := h.chunks[b&(huffmanNumChunks-1)] |
| 746 | n = uint(chunk & huffmanCountMask) |
| 747 | if n > huffmanChunkBits { |
| 748 | chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask] |
| 749 | n = uint(chunk & huffmanCountMask) |
| 750 | } |
| 751 | if n <= nb { |
| 752 | if n == 0 { |
| 753 | f.b = b |
| 754 | f.nb = nb |
| 755 | if debugDecode { |
| 756 | fmt.Println("huffsym: n==0") |
| 757 | } |
| 758 | f.err = CorruptInputError(f.roffset) |
| 759 | return 0, f.err |
| 760 | } |
| 761 | f.b = b >> (n & regSizeMaskUint32) |
| 762 | f.nb = nb - n |
| 763 | return int(chunk >> huffmanValueShift), nil |
| 764 | } |
| 765 | } |
| 766 | } |
| 767 | |
| 768 | func makeReader(r io.Reader) Reader { |
| 769 | if rr, ok := r.(Reader); ok { |
| 770 | return rr |
| 771 | } |
| 772 | return bufio.NewReader(r) |
| 773 | } |
| 774 | |
| 775 | func fixedHuffmanDecoderInit() { |
| 776 | fixedOnce.Do(func() { |
| 777 | // These come from the RFC section 3.2.6. |
| 778 | var bits [288]int |
| 779 | for i := 0; i < 144; i++ { |
| 780 | bits[i] = 8 |
| 781 | } |
| 782 | for i := 144; i < 256; i++ { |
| 783 | bits[i] = 9 |
| 784 | } |
| 785 | for i := 256; i < 280; i++ { |
| 786 | bits[i] = 7 |
| 787 | } |
| 788 | for i := 280; i < 288; i++ { |
| 789 | bits[i] = 8 |
| 790 | } |
| 791 | fixedHuffmanDecoder.init(bits[:]) |
| 792 | }) |
| 793 | } |
| 794 | |
| 795 | func (f *decompressor) Reset(r io.Reader, dict []byte) error { |
| 796 | *f = decompressor{ |
| 797 | r: makeReader(r), |
| 798 | bits: f.bits, |
| 799 | codebits: f.codebits, |
| 800 | h1: f.h1, |
| 801 | h2: f.h2, |
| 802 | dict: f.dict, |
| 803 | step: nextBlock, |
| 804 | } |
| 805 | f.dict.init(maxMatchOffset, dict) |
| 806 | return nil |
| 807 | } |
| 808 | |
| 809 | type ReaderOpt func(*decompressor) |
| 810 | |
| 811 | // WithPartialBlock tells decompressor to return after each block, |
| 812 | // so it can read data written with partial flush |
| 813 | func WithPartialBlock() ReaderOpt { |
| 814 | return func(f *decompressor) { |
| 815 | f.flushMode = partialFlush |
| 816 | } |
| 817 | } |
| 818 | |
| 819 | // WithDict initializes the reader with a preset dictionary |
| 820 | func WithDict(dict []byte) ReaderOpt { |
| 821 | return func(f *decompressor) { |
| 822 | f.dict.init(maxMatchOffset, dict) |
| 823 | } |
| 824 | } |
| 825 | |
| 826 | // NewReaderOpts returns new reader with provided options |
| 827 | func NewReaderOpts(r io.Reader, opts ...ReaderOpt) io.ReadCloser { |
| 828 | fixedHuffmanDecoderInit() |
| 829 | |
| 830 | var f decompressor |
| 831 | f.r = makeReader(r) |
| 832 | f.bits = new([maxNumLit + maxNumDist]int) |
| 833 | f.codebits = new([numCodes]int) |
| 834 | f.step = nextBlock |
| 835 | f.dict.init(maxMatchOffset, nil) |
| 836 | |
| 837 | for _, opt := range opts { |
| 838 | opt(&f) |
| 839 | } |
| 840 | |
| 841 | return &f |
| 842 | } |
| 843 | |
| 844 | // NewReader returns a new ReadCloser that can be used |
| 845 | // to read the uncompressed version of r. |
| 846 | // If r does not also implement io.ByteReader, |
| 847 | // the decompressor may read more data than necessary from r. |
| 848 | // It is the caller's responsibility to call Close on the ReadCloser |
| 849 | // when finished reading. |
| 850 | // |
| 851 | // The ReadCloser returned by NewReader also implements Resetter. |
| 852 | func NewReader(r io.Reader) io.ReadCloser { |
| 853 | return NewReaderOpts(r) |
| 854 | } |
| 855 | |
| 856 | // NewReaderDict is like NewReader but initializes the reader |
| 857 | // with a preset dictionary. The returned Reader behaves as if |
| 858 | // the uncompressed data stream started with the given dictionary, |
| 859 | // which has already been read. NewReaderDict is typically used |
| 860 | // to read data compressed by NewWriterDict. |
| 861 | // |
| 862 | // The ReadCloser returned by NewReader also implements Resetter. |
| 863 | func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { |
| 864 | return NewReaderOpts(r, WithDict(dict)) |
| 865 | } |