| Abhay Kumar | a61c522 | 2025-11-10 07:32:50 +0000 | [diff] [blame^] | 1 | // Copyright 2016 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package flate |
| 6 | |
| 7 | // dictDecoder implements the LZ77 sliding dictionary as used in decompression. |
| 8 | // LZ77 decompresses data through sequences of two forms of commands: |
| 9 | // |
| 10 | // - Literal insertions: Runs of one or more symbols are inserted into the data |
| 11 | // stream as is. This is accomplished through the writeByte method for a |
| 12 | // single symbol, or combinations of writeSlice/writeMark for multiple symbols. |
| 13 | // Any valid stream must start with a literal insertion if no preset dictionary |
| 14 | // is used. |
| 15 | // |
| 16 | // - Backward copies: Runs of one or more symbols are copied from previously |
| 17 | // emitted data. Backward copies come as the tuple (dist, length) where dist |
| 18 | // determines how far back in the stream to copy from and length determines how |
| 19 | // many bytes to copy. Note that it is valid for the length to be greater than |
| 20 | // the distance. Since LZ77 uses forward copies, that situation is used to |
| 21 | // perform a form of run-length encoding on repeated runs of symbols. |
| 22 | // The writeCopy and tryWriteCopy are used to implement this command. |
| 23 | // |
| 24 | // For performance reasons, this implementation performs little to no sanity |
| 25 | // checks about the arguments. As such, the invariants documented for each |
| 26 | // method call must be respected. |
| 27 | type dictDecoder struct { |
| 28 | hist []byte // Sliding window history |
| 29 | |
| 30 | // Invariant: 0 <= rdPos <= wrPos <= len(hist) |
| 31 | wrPos int // Current output position in buffer |
| 32 | rdPos int // Have emitted hist[:rdPos] already |
| 33 | full bool // Has a full window length been written yet? |
| 34 | } |
| 35 | |
| 36 | // init initializes dictDecoder to have a sliding window dictionary of the given |
| 37 | // size. If a preset dict is provided, it will initialize the dictionary with |
| 38 | // the contents of dict. |
| 39 | func (dd *dictDecoder) init(size int, dict []byte) { |
| 40 | *dd = dictDecoder{hist: dd.hist} |
| 41 | |
| 42 | if cap(dd.hist) < size { |
| 43 | dd.hist = make([]byte, size) |
| 44 | } |
| 45 | dd.hist = dd.hist[:size] |
| 46 | |
| 47 | if len(dict) > len(dd.hist) { |
| 48 | dict = dict[len(dict)-len(dd.hist):] |
| 49 | } |
| 50 | dd.wrPos = copy(dd.hist, dict) |
| 51 | if dd.wrPos == len(dd.hist) { |
| 52 | dd.wrPos = 0 |
| 53 | dd.full = true |
| 54 | } |
| 55 | dd.rdPos = dd.wrPos |
| 56 | } |
| 57 | |
| 58 | // histSize reports the total amount of historical data in the dictionary. |
| 59 | func (dd *dictDecoder) histSize() int { |
| 60 | if dd.full { |
| 61 | return len(dd.hist) |
| 62 | } |
| 63 | return dd.wrPos |
| 64 | } |
| 65 | |
| 66 | // availRead reports the number of bytes that can be flushed by readFlush. |
| 67 | func (dd *dictDecoder) availRead() int { |
| 68 | return dd.wrPos - dd.rdPos |
| 69 | } |
| 70 | |
| 71 | // availWrite reports the available amount of output buffer space. |
| 72 | func (dd *dictDecoder) availWrite() int { |
| 73 | return len(dd.hist) - dd.wrPos |
| 74 | } |
| 75 | |
| 76 | // writeSlice returns a slice of the available buffer to write data to. |
| 77 | // |
| 78 | // This invariant will be kept: len(s) <= availWrite() |
| 79 | func (dd *dictDecoder) writeSlice() []byte { |
| 80 | return dd.hist[dd.wrPos:] |
| 81 | } |
| 82 | |
| 83 | // writeMark advances the writer pointer by cnt. |
| 84 | // |
| 85 | // This invariant must be kept: 0 <= cnt <= availWrite() |
| 86 | func (dd *dictDecoder) writeMark(cnt int) { |
| 87 | dd.wrPos += cnt |
| 88 | } |
| 89 | |
| 90 | // writeByte writes a single byte to the dictionary. |
| 91 | // |
| 92 | // This invariant must be kept: 0 < availWrite() |
| 93 | func (dd *dictDecoder) writeByte(c byte) { |
| 94 | dd.hist[dd.wrPos] = c |
| 95 | dd.wrPos++ |
| 96 | } |
| 97 | |
| 98 | // writeCopy copies a string at a given (dist, length) to the output. |
| 99 | // This returns the number of bytes copied and may be less than the requested |
| 100 | // length if the available space in the output buffer is too small. |
| 101 | // |
| 102 | // This invariant must be kept: 0 < dist <= histSize() |
| 103 | func (dd *dictDecoder) writeCopy(dist, length int) int { |
| 104 | dstBase := dd.wrPos |
| 105 | dstPos := dstBase |
| 106 | srcPos := dstPos - dist |
| 107 | endPos := dstPos + length |
| 108 | if endPos > len(dd.hist) { |
| 109 | endPos = len(dd.hist) |
| 110 | } |
| 111 | |
| 112 | // Copy non-overlapping section after destination position. |
| 113 | // |
| 114 | // This section is non-overlapping in that the copy length for this section |
| 115 | // is always less than or equal to the backwards distance. This can occur |
| 116 | // if a distance refers to data that wraps-around in the buffer. |
| 117 | // Thus, a backwards copy is performed here; that is, the exact bytes in |
| 118 | // the source prior to the copy is placed in the destination. |
| 119 | if srcPos < 0 { |
| 120 | srcPos += len(dd.hist) |
| 121 | dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:]) |
| 122 | srcPos = 0 |
| 123 | } |
| 124 | |
| 125 | // Copy possibly overlapping section before destination position. |
| 126 | // |
| 127 | // This section can overlap if the copy length for this section is larger |
| 128 | // than the backwards distance. This is allowed by LZ77 so that repeated |
| 129 | // strings can be succinctly represented using (dist, length) pairs. |
| 130 | // Thus, a forwards copy is performed here; that is, the bytes copied is |
| 131 | // possibly dependent on the resulting bytes in the destination as the copy |
| 132 | // progresses along. This is functionally equivalent to the following: |
| 133 | // |
| 134 | // for i := 0; i < endPos-dstPos; i++ { |
| 135 | // dd.hist[dstPos+i] = dd.hist[srcPos+i] |
| 136 | // } |
| 137 | // dstPos = endPos |
| 138 | // |
| 139 | for dstPos < endPos { |
| 140 | dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) |
| 141 | } |
| 142 | |
| 143 | dd.wrPos = dstPos |
| 144 | return dstPos - dstBase |
| 145 | } |
| 146 | |
| 147 | // tryWriteCopy tries to copy a string at a given (distance, length) to the |
| 148 | // output. This specialized version is optimized for short distances. |
| 149 | // |
| 150 | // This method is designed to be inlined for performance reasons. |
| 151 | // |
| 152 | // This invariant must be kept: 0 < dist <= histSize() |
| 153 | func (dd *dictDecoder) tryWriteCopy(dist, length int) int { |
| 154 | dstPos := dd.wrPos |
| 155 | endPos := dstPos + length |
| 156 | if dstPos < dist || endPos > len(dd.hist) { |
| 157 | return 0 |
| 158 | } |
| 159 | dstBase := dstPos |
| 160 | srcPos := dstPos - dist |
| 161 | |
| 162 | // Copy possibly overlapping section before destination position. |
| 163 | loop: |
| 164 | dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) |
| 165 | if dstPos < endPos { |
| 166 | goto loop // Avoid for-loop so that this function can be inlined |
| 167 | } |
| 168 | |
| 169 | dd.wrPos = dstPos |
| 170 | return dstPos - dstBase |
| 171 | } |
| 172 | |
| 173 | // readFlush returns a slice of the historical buffer that is ready to be |
| 174 | // emitted to the user. The data returned by readFlush must be fully consumed |
| 175 | // before calling any other dictDecoder methods. |
| 176 | func (dd *dictDecoder) readFlush() []byte { |
| 177 | toRead := dd.hist[dd.rdPos:dd.wrPos] |
| 178 | dd.rdPos = dd.wrPos |
| 179 | if dd.wrPos == len(dd.hist) { |
| 180 | dd.wrPos, dd.rdPos = 0, 0 |
| 181 | dd.full = true |
| 182 | } |
| 183 | return toRead |
| 184 | } |