blob: c3581a3420e4be31fdd2dcfb2c8f208130cb88ae [file] [log] [blame]
Abhay Kumar40252eb2025-10-13 13:25:53 +00001package flate
2
3import (
4 "fmt"
5
6 "github.com/klauspost/compress/internal/le"
7)
8
9// fastGen maintains the table for matches,
10// and the previous byte block for level 2.
11// This is the generic implementation.
12type fastEncL1 struct {
13 fastGen
14 table [tableSize]tableEntry
15}
16
17// EncodeL1 uses a similar algorithm to level 1
18func (e *fastEncL1) Encode(dst *tokens, src []byte) {
19 const (
20 inputMargin = 12 - 1
21 minNonLiteralBlockSize = 1 + 1 + inputMargin
22 hashBytes = 5
23 )
24 if debugDeflate && e.cur < 0 {
25 panic(fmt.Sprint("e.cur < 0: ", e.cur))
26 }
27
28 // Protect against e.cur wraparound.
29 for e.cur >= bufferReset {
30 if len(e.hist) == 0 {
31 for i := range e.table[:] {
32 e.table[i] = tableEntry{}
33 }
34 e.cur = maxMatchOffset
35 break
36 }
37 // Shift down everything in the table that isn't already too far away.
38 minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
39 for i := range e.table[:] {
40 v := e.table[i].offset
41 if v <= minOff {
42 v = 0
43 } else {
44 v = v - e.cur + maxMatchOffset
45 }
46 e.table[i].offset = v
47 }
48 e.cur = maxMatchOffset
49 }
50
51 s := e.addBlock(src)
52
53 // This check isn't in the Snappy implementation, but there, the caller
54 // instead of the callee handles this case.
55 if len(src) < minNonLiteralBlockSize {
56 // We do not fill the token table.
57 // This will be picked up by caller.
58 dst.n = uint16(len(src))
59 return
60 }
61
62 // Override src
63 src = e.hist
64 nextEmit := s
65
66 // sLimit is when to stop looking for offset/length copies. The inputMargin
67 // lets us use a fast path for emitLiteral in the main loop, while we are
68 // looking for copies.
69 sLimit := int32(len(src) - inputMargin)
70
71 // nextEmit is where in src the next emitLiteral should start from.
72 cv := load6432(src, s)
73
74 for {
75 const skipLog = 5
76 const doEvery = 2
77
78 nextS := s
79 var candidate tableEntry
80 var t int32
81 for {
82 nextHash := hashLen(cv, tableBits, hashBytes)
83 candidate = e.table[nextHash]
84 nextS = s + doEvery + (s-nextEmit)>>skipLog
85 if nextS > sLimit {
86 goto emitRemainder
87 }
88
89 now := load6432(src, nextS)
90 e.table[nextHash] = tableEntry{offset: s + e.cur}
91 nextHash = hashLen(now, tableBits, hashBytes)
92 t = candidate.offset - e.cur
93 if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) {
94 e.table[nextHash] = tableEntry{offset: nextS + e.cur}
95 break
96 }
97
98 // Do one right away...
99 cv = now
100 s = nextS
101 nextS++
102 candidate = e.table[nextHash]
103 now >>= 8
104 e.table[nextHash] = tableEntry{offset: s + e.cur}
105
106 t = candidate.offset - e.cur
107 if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) {
108 e.table[nextHash] = tableEntry{offset: nextS + e.cur}
109 break
110 }
111 cv = now
112 s = nextS
113 }
114
115 // A 4-byte match has been found. We'll later see if more than 4 bytes
116 // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
117 // them as literal bytes.
118 for {
119 // Invariant: we have a 4-byte match at s, and no need to emit any
120 // literal bytes prior to s.
121
122 // Extend the 4-byte match as long as possible.
123 l := e.matchlenLong(int(s+4), int(t+4), src) + 4
124
125 // Extend backwards
126 for t > 0 && s > nextEmit && le.Load8(src, t-1) == le.Load8(src, s-1) {
127 s--
128 t--
129 l++
130 }
131 if nextEmit < s {
132 if false {
133 emitLiteral(dst, src[nextEmit:s])
134 } else {
135 for _, v := range src[nextEmit:s] {
136 dst.tokens[dst.n] = token(v)
137 dst.litHist[v]++
138 dst.n++
139 }
140 }
141 }
142
143 // Save the match found
144 if false {
145 dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
146 } else {
147 // Inlined...
148 xoffset := uint32(s - t - baseMatchOffset)
149 xlength := l
150 oc := offsetCode(xoffset)
151 xoffset |= oc << 16
152 for xlength > 0 {
153 xl := xlength
154 if xl > 258 {
155 if xl > 258+baseMatchLength {
156 xl = 258
157 } else {
158 xl = 258 - baseMatchLength
159 }
160 }
161 xlength -= xl
162 xl -= baseMatchLength
163 dst.extraHist[lengthCodes1[uint8(xl)]]++
164 dst.offHist[oc]++
165 dst.tokens[dst.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
166 dst.n++
167 }
168 }
169 s += l
170 nextEmit = s
171 if nextS >= s {
172 s = nextS + 1
173 }
174 if s >= sLimit {
175 // Index first pair after match end.
176 if int(s+l+8) < len(src) {
177 cv := load6432(src, s)
178 e.table[hashLen(cv, tableBits, hashBytes)] = tableEntry{offset: s + e.cur}
179 }
180 goto emitRemainder
181 }
182
183 // We could immediately start working at s now, but to improve
184 // compression we first update the hash table at s-2 and at s. If
185 // another emitCopy is not our next move, also calculate nextHash
186 // at s+1. At least on GOARCH=amd64, these three hash calculations
187 // are faster as one load64 call (with some shifts) instead of
188 // three load32 calls.
189 x := load6432(src, s-2)
190 o := e.cur + s - 2
191 prevHash := hashLen(x, tableBits, hashBytes)
192 e.table[prevHash] = tableEntry{offset: o}
193 x >>= 16
194 currHash := hashLen(x, tableBits, hashBytes)
195 candidate = e.table[currHash]
196 e.table[currHash] = tableEntry{offset: o + 2}
197
198 t = candidate.offset - e.cur
199 if s-t > maxMatchOffset || uint32(x) != load3232(src, t) {
200 cv = x >> 8
201 s++
202 break
203 }
204 }
205 }
206
207emitRemainder:
208 if int(nextEmit) < len(src) {
209 // If nothing was added, don't encode literals.
210 if dst.n == 0 {
211 return
212 }
213 emitLiteral(dst, src[nextEmit:])
214 }
215}