blob: be7b58b473f9038d5efab27267774b09b7306033 [file] [log] [blame]
Abhay Kumara2ae5992025-11-10 14:02:24 +00001// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package flate
6
7import (
8 "math"
9 "math/bits"
10)
11
12const (
13 maxBitsLimit = 16
14 // number of valid literals
15 literalCount = 286
16)
17
18// hcode is a huffman code with a bit code and bit length.
19type hcode uint32
20
21func (h hcode) len() uint8 {
22 return uint8(h)
23}
24
25func (h hcode) code64() uint64 {
26 return uint64(h >> 8)
27}
28
29func (h hcode) zero() bool {
30 return h == 0
31}
32
33type huffmanEncoder struct {
34 codes []hcode
35 bitCount [17]int32
36
37 // Allocate a reusable buffer with the longest possible frequency table.
38 // Possible lengths are codegenCodeCount, offsetCodeCount and literalCount.
39 // The largest of these is literalCount, so we allocate for that case.
40 freqcache [literalCount + 1]literalNode
41}
42
43type literalNode struct {
44 literal uint16
45 freq uint16
46}
47
48// A levelInfo describes the state of the constructed tree for a given depth.
49type levelInfo struct {
50 // Our level. for better printing
51 level int32
52
53 // The frequency of the last node at this level
54 lastFreq int32
55
56 // The frequency of the next character to add to this level
57 nextCharFreq int32
58
59 // The frequency of the next pair (from level below) to add to this level.
60 // Only valid if the "needed" value of the next lower level is 0.
61 nextPairFreq int32
62
63 // The number of chains remaining to generate for this level before moving
64 // up to the next level
65 needed int32
66}
67
68// set sets the code and length of an hcode.
69func (h *hcode) set(code uint16, length uint8) {
70 *h = hcode(length) | (hcode(code) << 8)
71}
72
73func newhcode(code uint16, length uint8) hcode {
74 return hcode(length) | (hcode(code) << 8)
75}
76
77func reverseBits(number uint16, bitLength byte) uint16 {
78 return bits.Reverse16(number << ((16 - bitLength) & 15))
79}
80
81func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxUint16} }
82
83func newHuffmanEncoder(size int) *huffmanEncoder {
84 // Make capacity to next power of two.
85 c := uint(bits.Len32(uint32(size - 1)))
86 return &huffmanEncoder{codes: make([]hcode, size, 1<<c)}
87}
88
89// Generates a HuffmanCode corresponding to the fixed literal table
90func generateFixedLiteralEncoding() *huffmanEncoder {
91 h := newHuffmanEncoder(literalCount)
92 codes := h.codes
93 var ch uint16
94 for ch = 0; ch < literalCount; ch++ {
95 var bits uint16
96 var size uint8
97 switch {
98 case ch < 144:
99 // size 8, 000110000 .. 10111111
100 bits = ch + 48
101 size = 8
102 case ch < 256:
103 // size 9, 110010000 .. 111111111
104 bits = ch + 400 - 144
105 size = 9
106 case ch < 280:
107 // size 7, 0000000 .. 0010111
108 bits = ch - 256
109 size = 7
110 default:
111 // size 8, 11000000 .. 11000111
112 bits = ch + 192 - 280
113 size = 8
114 }
115 codes[ch] = newhcode(reverseBits(bits, size), size)
116 }
117 return h
118}
119
120func generateFixedOffsetEncoding() *huffmanEncoder {
121 h := newHuffmanEncoder(30)
122 codes := h.codes
123 for ch := range codes {
124 codes[ch] = newhcode(reverseBits(uint16(ch), 5), 5)
125 }
126 return h
127}
128
129var fixedLiteralEncoding = generateFixedLiteralEncoding()
130var fixedOffsetEncoding = generateFixedOffsetEncoding()
131
132func (h *huffmanEncoder) bitLength(freq []uint16) int {
133 var total int
134 for i, f := range freq {
135 if f != 0 {
136 total += int(f) * int(h.codes[i].len())
137 }
138 }
139 return total
140}
141
142func (h *huffmanEncoder) bitLengthRaw(b []byte) int {
143 var total int
144 for _, f := range b {
145 total += int(h.codes[f].len())
146 }
147 return total
148}
149
150// canReuseBits returns the number of bits or math.MaxInt32 if the encoder cannot be reused.
151func (h *huffmanEncoder) canReuseBits(freq []uint16) int {
152 var total int
153 for i, f := range freq {
154 if f != 0 {
155 code := h.codes[i]
156 if code.zero() {
157 return math.MaxInt32
158 }
159 total += int(f) * int(code.len())
160 }
161 }
162 return total
163}
164
165// Return the number of literals assigned to each bit size in the Huffman encoding
166//
167// This method is only called when list.length >= 3
168// The cases of 0, 1, and 2 literals are handled by special case code.
169//
170// list An array of the literals with non-zero frequencies
171//
172// and their associated frequencies. The array is in order of increasing
173// frequency, and has as its last element a special element with frequency
174// MaxInt32
175//
176// maxBits The maximum number of bits that should be used to encode any literal.
177//
178// Must be less than 16.
179//
180// return An integer array in which array[i] indicates the number of literals
181//
182// that should be encoded in i bits.
183func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
184 if maxBits >= maxBitsLimit {
185 panic("flate: maxBits too large")
186 }
187 n := int32(len(list))
188 list = list[0 : n+1]
189 list[n] = maxNode()
190
191 // The tree can't have greater depth than n - 1, no matter what. This
192 // saves a little bit of work in some small cases
193 if maxBits > n-1 {
194 maxBits = n - 1
195 }
196
197 // Create information about each of the levels.
198 // A bogus "Level 0" whose sole purpose is so that
199 // level1.prev.needed==0. This makes level1.nextPairFreq
200 // be a legitimate value that never gets chosen.
201 var levels [maxBitsLimit]levelInfo
202 // leafCounts[i] counts the number of literals at the left
203 // of ancestors of the rightmost node at level i.
204 // leafCounts[i][j] is the number of literals at the left
205 // of the level j ancestor.
206 var leafCounts [maxBitsLimit][maxBitsLimit]int32
207
208 // Descending to only have 1 bounds check.
209 l2f := int32(list[2].freq)
210 l1f := int32(list[1].freq)
211 l0f := int32(list[0].freq) + int32(list[1].freq)
212
213 for level := int32(1); level <= maxBits; level++ {
214 // For every level, the first two items are the first two characters.
215 // We initialize the levels as if we had already figured this out.
216 levels[level] = levelInfo{
217 level: level,
218 lastFreq: l1f,
219 nextCharFreq: l2f,
220 nextPairFreq: l0f,
221 }
222 leafCounts[level][level] = 2
223 if level == 1 {
224 levels[level].nextPairFreq = math.MaxInt32
225 }
226 }
227
228 // We need a total of 2*n - 2 items at top level and have already generated 2.
229 levels[maxBits].needed = 2*n - 4
230
231 level := uint32(maxBits)
232 for level < 16 {
233 l := &levels[level]
234 if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
235 // We've run out of both leafs and pairs.
236 // End all calculations for this level.
237 // To make sure we never come back to this level or any lower level,
238 // set nextPairFreq impossibly large.
239 l.needed = 0
240 levels[level+1].nextPairFreq = math.MaxInt32
241 level++
242 continue
243 }
244
245 prevFreq := l.lastFreq
246 if l.nextCharFreq < l.nextPairFreq {
247 // The next item on this row is a leaf node.
248 n := leafCounts[level][level] + 1
249 l.lastFreq = l.nextCharFreq
250 // Lower leafCounts are the same of the previous node.
251 leafCounts[level][level] = n
252 e := list[n]
253 if e.literal < math.MaxUint16 {
254 l.nextCharFreq = int32(e.freq)
255 } else {
256 l.nextCharFreq = math.MaxInt32
257 }
258 } else {
259 // The next item on this row is a pair from the previous row.
260 // nextPairFreq isn't valid until we generate two
261 // more values in the level below
262 l.lastFreq = l.nextPairFreq
263 // Take leaf counts from the lower level, except counts[level] remains the same.
264 if true {
265 save := leafCounts[level][level]
266 leafCounts[level] = leafCounts[level-1]
267 leafCounts[level][level] = save
268 } else {
269 copy(leafCounts[level][:level], leafCounts[level-1][:level])
270 }
271 levels[l.level-1].needed = 2
272 }
273
274 if l.needed--; l.needed == 0 {
275 // We've done everything we need to do for this level.
276 // Continue calculating one level up. Fill in nextPairFreq
277 // of that level with the sum of the two nodes we've just calculated on
278 // this level.
279 if l.level == maxBits {
280 // All done!
281 break
282 }
283 levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq
284 level++
285 } else {
286 // If we stole from below, move down temporarily to replenish it.
287 for levels[level-1].needed > 0 {
288 level--
289 }
290 }
291 }
292
293 // Somethings is wrong if at the end, the top level is null or hasn't used
294 // all of the leaves.
295 if leafCounts[maxBits][maxBits] != n {
296 panic("leafCounts[maxBits][maxBits] != n")
297 }
298
299 bitCount := h.bitCount[:maxBits+1]
300 bits := 1
301 counts := &leafCounts[maxBits]
302 for level := maxBits; level > 0; level-- {
303 // chain.leafCount gives the number of literals requiring at least "bits"
304 // bits to encode.
305 bitCount[bits] = counts[level] - counts[level-1]
306 bits++
307 }
308 return bitCount
309}
310
311// Look at the leaves and assign them a bit count and an encoding as specified
312// in RFC 1951 3.2.2
313func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) {
314 code := uint16(0)
315 for n, bits := range bitCount {
316 code <<= 1
317 if n == 0 || bits == 0 {
318 continue
319 }
320 // The literals list[len(list)-bits] .. list[len(list)-bits]
321 // are encoded using "bits" bits, and get the values
322 // code, code + 1, .... The code values are
323 // assigned in literal order (not frequency order).
324 chunk := list[len(list)-int(bits):]
325
326 sortByLiteral(chunk)
327 for _, node := range chunk {
328 h.codes[node.literal] = newhcode(reverseBits(code, uint8(n)), uint8(n))
329 code++
330 }
331 list = list[0 : len(list)-int(bits)]
332 }
333}
334
335// Update this Huffman Code object to be the minimum code for the specified frequency count.
336//
337// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
338// maxBits The maximum number of bits to use for any literal.
339func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
340 list := h.freqcache[:len(freq)+1]
341 codes := h.codes[:len(freq)]
342 // Number of non-zero literals
343 count := 0
344 // Set list to be the set of all non-zero literals and their frequencies
345 for i, f := range freq {
346 if f != 0 {
347 list[count] = literalNode{uint16(i), f}
348 count++
349 } else {
350 codes[i] = 0
351 }
352 }
353 list[count] = literalNode{}
354
355 list = list[:count]
356 if count <= 2 {
357 // Handle the small cases here, because they are awkward for the general case code. With
358 // two or fewer literals, everything has bit length 1.
359 for i, node := range list {
360 // "list" is in order of increasing literal value.
361 h.codes[node.literal].set(uint16(i), 1)
362 }
363 return
364 }
365 sortByFreq(list)
366
367 // Get the number of literals for each bit count
368 bitCount := h.bitCounts(list, maxBits)
369 // And do the assignment
370 h.assignEncodingAndSize(bitCount, list)
371}
372
373// atLeastOne clamps the result between 1 and 15.
374func atLeastOne(v float32) float32 {
375 if v < 1 {
376 return 1
377 }
378 if v > 15 {
379 return 15
380 }
381 return v
382}
383
384func histogram(b []byte, h []uint16) {
385 if true && len(b) >= 8<<10 {
386 // Split for bigger inputs
387 histogramSplit(b, h)
388 } else {
389 h = h[:256]
390 for _, t := range b {
391 h[t]++
392 }
393 }
394}
395
396func histogramSplit(b []byte, h []uint16) {
397 // Tested, and slightly faster than 2-way.
398 // Writing to separate arrays and combining is also slightly slower.
399 h = h[:256]
400 for len(b)&3 != 0 {
401 h[b[0]]++
402 b = b[1:]
403 }
404 n := len(b) / 4
405 x, y, z, w := b[:n], b[n:], b[n+n:], b[n+n+n:]
406 y, z, w = y[:len(x)], z[:len(x)], w[:len(x)]
407 for i, t := range x {
408 v0 := &h[t]
409 v1 := &h[y[i]]
410 v3 := &h[w[i]]
411 v2 := &h[z[i]]
412 *v0++
413 *v1++
414 *v2++
415 *v3++
416 }
417}