| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 1 | // Copyright 2019+ Klaus Post. All rights reserved. |
| 2 | // License information can be found in the LICENSE file. |
| 3 | // Based on work by Yann Collet, released under BSD License. |
| 4 | |
| 5 | package zstd |
| 6 | |
| 7 | import ( |
| 8 | "errors" |
| 9 | "fmt" |
| 10 | "math" |
| 11 | "math/bits" |
| Abhay Kumar | a2ae599 | 2025-11-10 14:02:24 +0000 | [diff] [blame^] | 12 | "slices" |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 13 | |
| 14 | "github.com/klauspost/compress/huff0" |
| 15 | ) |
| 16 | |
| 17 | type blockEnc struct { |
| 18 | size int |
| 19 | literals []byte |
| 20 | sequences []seq |
| 21 | coders seqCoders |
| 22 | litEnc *huff0.Scratch |
| 23 | dictLitEnc *huff0.Scratch |
| 24 | wr bitWriter |
| 25 | |
| 26 | extraLits int |
| 27 | output []byte |
| 28 | recentOffsets [3]uint32 |
| 29 | prevRecentOffsets [3]uint32 |
| 30 | |
| 31 | last bool |
| 32 | lowMem bool |
| 33 | } |
| 34 | |
| 35 | // init should be used once the block has been created. |
| 36 | // If called more than once, the effect is the same as calling reset. |
| 37 | func (b *blockEnc) init() { |
| 38 | if b.lowMem { |
| 39 | // 1K literals |
| 40 | if cap(b.literals) < 1<<10 { |
| 41 | b.literals = make([]byte, 0, 1<<10) |
| 42 | } |
| 43 | const defSeqs = 20 |
| 44 | if cap(b.sequences) < defSeqs { |
| 45 | b.sequences = make([]seq, 0, defSeqs) |
| 46 | } |
| 47 | // 1K |
| 48 | if cap(b.output) < 1<<10 { |
| 49 | b.output = make([]byte, 0, 1<<10) |
| 50 | } |
| 51 | } else { |
| 52 | if cap(b.literals) < maxCompressedBlockSize { |
| 53 | b.literals = make([]byte, 0, maxCompressedBlockSize) |
| 54 | } |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 55 | const defSeqs = 2000 |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 56 | if cap(b.sequences) < defSeqs { |
| 57 | b.sequences = make([]seq, 0, defSeqs) |
| 58 | } |
| 59 | if cap(b.output) < maxCompressedBlockSize { |
| 60 | b.output = make([]byte, 0, maxCompressedBlockSize) |
| 61 | } |
| 62 | } |
| 63 | |
| 64 | if b.coders.mlEnc == nil { |
| 65 | b.coders.mlEnc = &fseEncoder{} |
| 66 | b.coders.mlPrev = &fseEncoder{} |
| 67 | b.coders.ofEnc = &fseEncoder{} |
| 68 | b.coders.ofPrev = &fseEncoder{} |
| 69 | b.coders.llEnc = &fseEncoder{} |
| 70 | b.coders.llPrev = &fseEncoder{} |
| 71 | } |
| 72 | b.litEnc = &huff0.Scratch{WantLogLess: 4} |
| 73 | b.reset(nil) |
| 74 | } |
| 75 | |
| 76 | // initNewEncode can be used to reset offsets and encoders to the initial state. |
| 77 | func (b *blockEnc) initNewEncode() { |
| 78 | b.recentOffsets = [3]uint32{1, 4, 8} |
| 79 | b.litEnc.Reuse = huff0.ReusePolicyNone |
| 80 | b.coders.setPrev(nil, nil, nil) |
| 81 | } |
| 82 | |
| 83 | // reset will reset the block for a new encode, but in the same stream, |
| 84 | // meaning that state will be carried over, but the block content is reset. |
| 85 | // If a previous block is provided, the recent offsets are carried over. |
| 86 | func (b *blockEnc) reset(prev *blockEnc) { |
| 87 | b.extraLits = 0 |
| 88 | b.literals = b.literals[:0] |
| 89 | b.size = 0 |
| 90 | b.sequences = b.sequences[:0] |
| 91 | b.output = b.output[:0] |
| 92 | b.last = false |
| 93 | if prev != nil { |
| 94 | b.recentOffsets = prev.prevRecentOffsets |
| 95 | } |
| 96 | b.dictLitEnc = nil |
| 97 | } |
| 98 | |
| 99 | // reset will reset the block for a new encode, but in the same stream, |
| 100 | // meaning that state will be carried over, but the block content is reset. |
| 101 | // If a previous block is provided, the recent offsets are carried over. |
| 102 | func (b *blockEnc) swapEncoders(prev *blockEnc) { |
| 103 | b.coders.swap(&prev.coders) |
| 104 | b.litEnc, prev.litEnc = prev.litEnc, b.litEnc |
| 105 | } |
| 106 | |
| 107 | // blockHeader contains the information for a block header. |
| 108 | type blockHeader uint32 |
| 109 | |
| 110 | // setLast sets the 'last' indicator on a block. |
| 111 | func (h *blockHeader) setLast(b bool) { |
| 112 | if b { |
| 113 | *h = *h | 1 |
| 114 | } else { |
| 115 | const mask = (1 << 24) - 2 |
| 116 | *h = *h & mask |
| 117 | } |
| 118 | } |
| 119 | |
| 120 | // setSize will store the compressed size of a block. |
| 121 | func (h *blockHeader) setSize(v uint32) { |
| 122 | const mask = 7 |
| 123 | *h = (*h)&mask | blockHeader(v<<3) |
| 124 | } |
| 125 | |
| 126 | // setType sets the block type. |
| 127 | func (h *blockHeader) setType(t blockType) { |
| 128 | const mask = 1 | (((1 << 24) - 1) ^ 7) |
| 129 | *h = (*h & mask) | blockHeader(t<<1) |
| 130 | } |
| 131 | |
| 132 | // appendTo will append the block header to a slice. |
| 133 | func (h blockHeader) appendTo(b []byte) []byte { |
| 134 | return append(b, uint8(h), uint8(h>>8), uint8(h>>16)) |
| 135 | } |
| 136 | |
| 137 | // String returns a string representation of the block. |
| 138 | func (h blockHeader) String() string { |
| 139 | return fmt.Sprintf("Type: %d, Size: %d, Last:%t", (h>>1)&3, h>>3, h&1 == 1) |
| 140 | } |
| 141 | |
| 142 | // literalsHeader contains literals header information. |
| 143 | type literalsHeader uint64 |
| 144 | |
| 145 | // setType can be used to set the type of literal block. |
| 146 | func (h *literalsHeader) setType(t literalsBlockType) { |
| 147 | const mask = math.MaxUint64 - 3 |
| 148 | *h = (*h & mask) | literalsHeader(t) |
| 149 | } |
| 150 | |
| 151 | // setSize can be used to set a single size, for uncompressed and RLE content. |
| 152 | func (h *literalsHeader) setSize(regenLen int) { |
| 153 | inBits := bits.Len32(uint32(regenLen)) |
| 154 | // Only retain 2 bits |
| 155 | const mask = 3 |
| 156 | lh := uint64(*h & mask) |
| 157 | switch { |
| 158 | case inBits < 5: |
| 159 | lh |= (uint64(regenLen) << 3) | (1 << 60) |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 160 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 161 | got := int(lh>>3) & 0xff |
| 162 | if got != regenLen { |
| 163 | panic(fmt.Sprint("litRegenSize = ", regenLen, "(want) != ", got, "(got)")) |
| 164 | } |
| 165 | } |
| 166 | case inBits < 12: |
| 167 | lh |= (1 << 2) | (uint64(regenLen) << 4) | (2 << 60) |
| 168 | case inBits < 20: |
| 169 | lh |= (3 << 2) | (uint64(regenLen) << 4) | (3 << 60) |
| 170 | default: |
| 171 | panic(fmt.Errorf("internal error: block too big (%d)", regenLen)) |
| 172 | } |
| 173 | *h = literalsHeader(lh) |
| 174 | } |
| 175 | |
| 176 | // setSizes will set the size of a compressed literals section and the input length. |
| 177 | func (h *literalsHeader) setSizes(compLen, inLen int, single bool) { |
| 178 | compBits, inBits := bits.Len32(uint32(compLen)), bits.Len32(uint32(inLen)) |
| 179 | // Only retain 2 bits |
| 180 | const mask = 3 |
| 181 | lh := uint64(*h & mask) |
| 182 | switch { |
| 183 | case compBits <= 10 && inBits <= 10: |
| 184 | if !single { |
| 185 | lh |= 1 << 2 |
| 186 | } |
| 187 | lh |= (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60) |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 188 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 189 | const mmask = (1 << 24) - 1 |
| 190 | n := (lh >> 4) & mmask |
| 191 | if int(n&1023) != inLen { |
| 192 | panic(fmt.Sprint("regensize:", int(n&1023), "!=", inLen, inBits)) |
| 193 | } |
| 194 | if int(n>>10) != compLen { |
| 195 | panic(fmt.Sprint("compsize:", int(n>>10), "!=", compLen, compBits)) |
| 196 | } |
| 197 | } |
| 198 | case compBits <= 14 && inBits <= 14: |
| 199 | lh |= (2 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (14 + 4)) | (4 << 60) |
| 200 | if single { |
| 201 | panic("single stream used with more than 10 bits length.") |
| 202 | } |
| 203 | case compBits <= 18 && inBits <= 18: |
| 204 | lh |= (3 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (18 + 4)) | (5 << 60) |
| 205 | if single { |
| 206 | panic("single stream used with more than 10 bits length.") |
| 207 | } |
| 208 | default: |
| 209 | panic("internal error: block too big") |
| 210 | } |
| 211 | *h = literalsHeader(lh) |
| 212 | } |
| 213 | |
| 214 | // appendTo will append the literals header to a byte slice. |
| 215 | func (h literalsHeader) appendTo(b []byte) []byte { |
| 216 | size := uint8(h >> 60) |
| 217 | switch size { |
| 218 | case 1: |
| 219 | b = append(b, uint8(h)) |
| 220 | case 2: |
| 221 | b = append(b, uint8(h), uint8(h>>8)) |
| 222 | case 3: |
| 223 | b = append(b, uint8(h), uint8(h>>8), uint8(h>>16)) |
| 224 | case 4: |
| 225 | b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24)) |
| 226 | case 5: |
| 227 | b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24), uint8(h>>32)) |
| 228 | default: |
| 229 | panic(fmt.Errorf("internal error: literalsHeader has invalid size (%d)", size)) |
| 230 | } |
| 231 | return b |
| 232 | } |
| 233 | |
| 234 | // size returns the output size with currently set values. |
| 235 | func (h literalsHeader) size() int { |
| 236 | return int(h >> 60) |
| 237 | } |
| 238 | |
| 239 | func (h literalsHeader) String() string { |
| 240 | return fmt.Sprintf("Type: %d, SizeFormat: %d, Size: 0x%d, Bytes:%d", literalsBlockType(h&3), (h>>2)&3, h&((1<<60)-1)>>4, h>>60) |
| 241 | } |
| 242 | |
| 243 | // pushOffsets will push the recent offsets to the backup store. |
| 244 | func (b *blockEnc) pushOffsets() { |
| 245 | b.prevRecentOffsets = b.recentOffsets |
| 246 | } |
| 247 | |
| 248 | // pushOffsets will push the recent offsets to the backup store. |
| 249 | func (b *blockEnc) popOffsets() { |
| 250 | b.recentOffsets = b.prevRecentOffsets |
| 251 | } |
| 252 | |
| 253 | // matchOffset will adjust recent offsets and return the adjusted one, |
| 254 | // if it matches a previous offset. |
| 255 | func (b *blockEnc) matchOffset(offset, lits uint32) uint32 { |
| 256 | // Check if offset is one of the recent offsets. |
| 257 | // Adjusts the output offset accordingly. |
| 258 | // Gives a tiny bit of compression, typically around 1%. |
| 259 | if true { |
| 260 | if lits > 0 { |
| 261 | switch offset { |
| 262 | case b.recentOffsets[0]: |
| 263 | offset = 1 |
| 264 | case b.recentOffsets[1]: |
| 265 | b.recentOffsets[1] = b.recentOffsets[0] |
| 266 | b.recentOffsets[0] = offset |
| 267 | offset = 2 |
| 268 | case b.recentOffsets[2]: |
| 269 | b.recentOffsets[2] = b.recentOffsets[1] |
| 270 | b.recentOffsets[1] = b.recentOffsets[0] |
| 271 | b.recentOffsets[0] = offset |
| 272 | offset = 3 |
| 273 | default: |
| 274 | b.recentOffsets[2] = b.recentOffsets[1] |
| 275 | b.recentOffsets[1] = b.recentOffsets[0] |
| 276 | b.recentOffsets[0] = offset |
| 277 | offset += 3 |
| 278 | } |
| 279 | } else { |
| 280 | switch offset { |
| 281 | case b.recentOffsets[1]: |
| 282 | b.recentOffsets[1] = b.recentOffsets[0] |
| 283 | b.recentOffsets[0] = offset |
| 284 | offset = 1 |
| 285 | case b.recentOffsets[2]: |
| 286 | b.recentOffsets[2] = b.recentOffsets[1] |
| 287 | b.recentOffsets[1] = b.recentOffsets[0] |
| 288 | b.recentOffsets[0] = offset |
| 289 | offset = 2 |
| 290 | case b.recentOffsets[0] - 1: |
| 291 | b.recentOffsets[2] = b.recentOffsets[1] |
| 292 | b.recentOffsets[1] = b.recentOffsets[0] |
| 293 | b.recentOffsets[0] = offset |
| 294 | offset = 3 |
| 295 | default: |
| 296 | b.recentOffsets[2] = b.recentOffsets[1] |
| 297 | b.recentOffsets[1] = b.recentOffsets[0] |
| 298 | b.recentOffsets[0] = offset |
| 299 | offset += 3 |
| 300 | } |
| 301 | } |
| 302 | } else { |
| 303 | offset += 3 |
| 304 | } |
| 305 | return offset |
| 306 | } |
| 307 | |
| 308 | // encodeRaw can be used to set the output to a raw representation of supplied bytes. |
| 309 | func (b *blockEnc) encodeRaw(a []byte) { |
| 310 | var bh blockHeader |
| 311 | bh.setLast(b.last) |
| 312 | bh.setSize(uint32(len(a))) |
| 313 | bh.setType(blockTypeRaw) |
| 314 | b.output = bh.appendTo(b.output[:0]) |
| 315 | b.output = append(b.output, a...) |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 316 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 317 | println("Adding RAW block, length", len(a), "last:", b.last) |
| 318 | } |
| 319 | } |
| 320 | |
| 321 | // encodeRaw can be used to set the output to a raw representation of supplied bytes. |
| 322 | func (b *blockEnc) encodeRawTo(dst, src []byte) []byte { |
| 323 | var bh blockHeader |
| 324 | bh.setLast(b.last) |
| 325 | bh.setSize(uint32(len(src))) |
| 326 | bh.setType(blockTypeRaw) |
| 327 | dst = bh.appendTo(dst) |
| 328 | dst = append(dst, src...) |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 329 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 330 | println("Adding RAW block, length", len(src), "last:", b.last) |
| 331 | } |
| 332 | return dst |
| 333 | } |
| 334 | |
| 335 | // encodeLits can be used if the block is only litLen. |
| 336 | func (b *blockEnc) encodeLits(lits []byte, raw bool) error { |
| 337 | var bh blockHeader |
| 338 | bh.setLast(b.last) |
| 339 | bh.setSize(uint32(len(lits))) |
| 340 | |
| 341 | // Don't compress extremely small blocks |
| 342 | if len(lits) < 8 || (len(lits) < 32 && b.dictLitEnc == nil) || raw { |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 343 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 344 | println("Adding RAW block, length", len(lits), "last:", b.last) |
| 345 | } |
| 346 | bh.setType(blockTypeRaw) |
| 347 | b.output = bh.appendTo(b.output) |
| 348 | b.output = append(b.output, lits...) |
| 349 | return nil |
| 350 | } |
| 351 | |
| 352 | var ( |
| 353 | out []byte |
| 354 | reUsed, single bool |
| 355 | err error |
| 356 | ) |
| 357 | if b.dictLitEnc != nil { |
| 358 | b.litEnc.TransferCTable(b.dictLitEnc) |
| 359 | b.litEnc.Reuse = huff0.ReusePolicyAllow |
| 360 | b.dictLitEnc = nil |
| 361 | } |
| 362 | if len(lits) >= 1024 { |
| 363 | // Use 4 Streams. |
| 364 | out, reUsed, err = huff0.Compress4X(lits, b.litEnc) |
| Abhay Kumar | a2ae599 | 2025-11-10 14:02:24 +0000 | [diff] [blame^] | 365 | } else if len(lits) > 16 { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 366 | // Use 1 stream |
| 367 | single = true |
| 368 | out, reUsed, err = huff0.Compress1X(lits, b.litEnc) |
| 369 | } else { |
| 370 | err = huff0.ErrIncompressible |
| 371 | } |
| Abhay Kumar | a2ae599 | 2025-11-10 14:02:24 +0000 | [diff] [blame^] | 372 | if err == nil && len(out)+5 > len(lits) { |
| 373 | // If we are close, we may still be worse or equal to raw. |
| 374 | var lh literalsHeader |
| 375 | lh.setSizes(len(out), len(lits), single) |
| 376 | if len(out)+lh.size() >= len(lits) { |
| 377 | err = huff0.ErrIncompressible |
| 378 | } |
| 379 | } |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 380 | switch err { |
| 381 | case huff0.ErrIncompressible: |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 382 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 383 | println("Adding RAW block, length", len(lits), "last:", b.last) |
| 384 | } |
| 385 | bh.setType(blockTypeRaw) |
| 386 | b.output = bh.appendTo(b.output) |
| 387 | b.output = append(b.output, lits...) |
| 388 | return nil |
| 389 | case huff0.ErrUseRLE: |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 390 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 391 | println("Adding RLE block, length", len(lits)) |
| 392 | } |
| 393 | bh.setType(blockTypeRLE) |
| 394 | b.output = bh.appendTo(b.output) |
| 395 | b.output = append(b.output, lits[0]) |
| 396 | return nil |
| 397 | case nil: |
| 398 | default: |
| 399 | return err |
| 400 | } |
| 401 | // Compressed... |
| 402 | // Now, allow reuse |
| 403 | b.litEnc.Reuse = huff0.ReusePolicyAllow |
| 404 | bh.setType(blockTypeCompressed) |
| 405 | var lh literalsHeader |
| 406 | if reUsed { |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 407 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 408 | println("Reused tree, compressed to", len(out)) |
| 409 | } |
| 410 | lh.setType(literalsBlockTreeless) |
| 411 | } else { |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 412 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 413 | println("New tree, compressed to", len(out), "tree size:", len(b.litEnc.OutTable)) |
| 414 | } |
| 415 | lh.setType(literalsBlockCompressed) |
| 416 | } |
| 417 | // Set sizes |
| 418 | lh.setSizes(len(out), len(lits), single) |
| 419 | bh.setSize(uint32(len(out) + lh.size() + 1)) |
| 420 | |
| 421 | // Write block headers. |
| 422 | b.output = bh.appendTo(b.output) |
| 423 | b.output = lh.appendTo(b.output) |
| 424 | // Add compressed data. |
| 425 | b.output = append(b.output, out...) |
| 426 | // No sequences. |
| 427 | b.output = append(b.output, 0) |
| 428 | return nil |
| 429 | } |
| 430 | |
| Abhay Kumar | a2ae599 | 2025-11-10 14:02:24 +0000 | [diff] [blame^] | 431 | // encodeRLE will encode an RLE block. |
| 432 | func (b *blockEnc) encodeRLE(val byte, length uint32) { |
| 433 | var bh blockHeader |
| 434 | bh.setLast(b.last) |
| 435 | bh.setSize(length) |
| 436 | bh.setType(blockTypeRLE) |
| 437 | b.output = bh.appendTo(b.output) |
| 438 | b.output = append(b.output, val) |
| 439 | } |
| 440 | |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 441 | // fuzzFseEncoder can be used to fuzz the FSE encoder. |
| 442 | func fuzzFseEncoder(data []byte) int { |
| 443 | if len(data) > maxSequences || len(data) < 2 { |
| 444 | return 0 |
| 445 | } |
| 446 | enc := fseEncoder{} |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 447 | hist := enc.Histogram() |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 448 | maxSym := uint8(0) |
| 449 | for i, v := range data { |
| 450 | v = v & 63 |
| 451 | data[i] = v |
| 452 | hist[v]++ |
| 453 | if v > maxSym { |
| 454 | maxSym = v |
| 455 | } |
| 456 | } |
| 457 | if maxSym == 0 { |
| 458 | // All 0 |
| 459 | return 0 |
| 460 | } |
| Abhay Kumar | a2ae599 | 2025-11-10 14:02:24 +0000 | [diff] [blame^] | 461 | cnt := int(slices.Max(hist[:maxSym])) |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 462 | if cnt == len(data) { |
| 463 | // RLE |
| 464 | return 0 |
| 465 | } |
| 466 | enc.HistogramFinished(maxSym, cnt) |
| 467 | err := enc.normalizeCount(len(data)) |
| 468 | if err != nil { |
| 469 | return 0 |
| 470 | } |
| 471 | _, err = enc.writeCount(nil) |
| 472 | if err != nil { |
| 473 | panic(err) |
| 474 | } |
| 475 | return 1 |
| 476 | } |
| 477 | |
| 478 | // encode will encode the block and append the output in b.output. |
| 479 | // Previous offset codes must be pushed if more blocks are expected. |
| 480 | func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { |
| 481 | if len(b.sequences) == 0 { |
| 482 | return b.encodeLits(b.literals, rawAllLits) |
| 483 | } |
| Abhay Kumar | a2ae599 | 2025-11-10 14:02:24 +0000 | [diff] [blame^] | 484 | if len(b.sequences) == 1 && len(org) > 0 && len(b.literals) <= 1 { |
| 485 | // Check common RLE cases. |
| 486 | seq := b.sequences[0] |
| 487 | if seq.litLen == uint32(len(b.literals)) && seq.offset-3 == 1 { |
| 488 | // Offset == 1 and 0 or 1 literals. |
| 489 | b.encodeRLE(org[0], b.sequences[0].matchLen+zstdMinMatch+seq.litLen) |
| 490 | return nil |
| 491 | } |
| 492 | } |
| 493 | |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 494 | // We want some difference to at least account for the headers. |
| Abhay Kumar | a2ae599 | 2025-11-10 14:02:24 +0000 | [diff] [blame^] | 495 | saved := b.size - len(b.literals) - (b.size >> 6) |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 496 | if saved < 16 { |
| 497 | if org == nil { |
| 498 | return errIncompressible |
| 499 | } |
| 500 | b.popOffsets() |
| 501 | return b.encodeLits(org, rawAllLits) |
| 502 | } |
| 503 | |
| 504 | var bh blockHeader |
| 505 | var lh literalsHeader |
| 506 | bh.setLast(b.last) |
| 507 | bh.setType(blockTypeCompressed) |
| 508 | // Store offset of the block header. Needed when we know the size. |
| 509 | bhOffset := len(b.output) |
| 510 | b.output = bh.appendTo(b.output) |
| 511 | |
| 512 | var ( |
| 513 | out []byte |
| 514 | reUsed, single bool |
| 515 | err error |
| 516 | ) |
| 517 | if b.dictLitEnc != nil { |
| 518 | b.litEnc.TransferCTable(b.dictLitEnc) |
| 519 | b.litEnc.Reuse = huff0.ReusePolicyAllow |
| 520 | b.dictLitEnc = nil |
| 521 | } |
| 522 | if len(b.literals) >= 1024 && !raw { |
| 523 | // Use 4 Streams. |
| 524 | out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc) |
| Abhay Kumar | a2ae599 | 2025-11-10 14:02:24 +0000 | [diff] [blame^] | 525 | } else if len(b.literals) > 16 && !raw { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 526 | // Use 1 stream |
| 527 | single = true |
| 528 | out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc) |
| 529 | } else { |
| 530 | err = huff0.ErrIncompressible |
| 531 | } |
| 532 | |
| Abhay Kumar | a2ae599 | 2025-11-10 14:02:24 +0000 | [diff] [blame^] | 533 | if err == nil && len(out)+5 > len(b.literals) { |
| 534 | // If we are close, we may still be worse or equal to raw. |
| 535 | var lh literalsHeader |
| 536 | lh.setSize(len(b.literals)) |
| 537 | szRaw := lh.size() |
| 538 | lh.setSizes(len(out), len(b.literals), single) |
| 539 | szComp := lh.size() |
| 540 | if len(out)+szComp >= len(b.literals)+szRaw { |
| 541 | err = huff0.ErrIncompressible |
| 542 | } |
| 543 | } |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 544 | switch err { |
| 545 | case huff0.ErrIncompressible: |
| 546 | lh.setType(literalsBlockRaw) |
| 547 | lh.setSize(len(b.literals)) |
| 548 | b.output = lh.appendTo(b.output) |
| 549 | b.output = append(b.output, b.literals...) |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 550 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 551 | println("Adding literals RAW, length", len(b.literals)) |
| 552 | } |
| 553 | case huff0.ErrUseRLE: |
| 554 | lh.setType(literalsBlockRLE) |
| 555 | lh.setSize(len(b.literals)) |
| 556 | b.output = lh.appendTo(b.output) |
| 557 | b.output = append(b.output, b.literals[0]) |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 558 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 559 | println("Adding literals RLE") |
| 560 | } |
| 561 | case nil: |
| 562 | // Compressed litLen... |
| 563 | if reUsed { |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 564 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 565 | println("reused tree") |
| 566 | } |
| 567 | lh.setType(literalsBlockTreeless) |
| 568 | } else { |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 569 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 570 | println("new tree, size:", len(b.litEnc.OutTable)) |
| 571 | } |
| 572 | lh.setType(literalsBlockCompressed) |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 573 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 574 | _, _, err := huff0.ReadTable(out, nil) |
| 575 | if err != nil { |
| 576 | panic(err) |
| 577 | } |
| 578 | } |
| 579 | } |
| 580 | lh.setSizes(len(out), len(b.literals), single) |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 581 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 582 | printf("Compressed %d literals to %d bytes", len(b.literals), len(out)) |
| 583 | println("Adding literal header:", lh) |
| 584 | } |
| 585 | b.output = lh.appendTo(b.output) |
| 586 | b.output = append(b.output, out...) |
| 587 | b.litEnc.Reuse = huff0.ReusePolicyAllow |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 588 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 589 | println("Adding literals compressed") |
| 590 | } |
| 591 | default: |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 592 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 593 | println("Adding literals ERROR:", err) |
| 594 | } |
| 595 | return err |
| 596 | } |
| 597 | // Sequence compression |
| 598 | |
| 599 | // Write the number of sequences |
| 600 | switch { |
| 601 | case len(b.sequences) < 128: |
| 602 | b.output = append(b.output, uint8(len(b.sequences))) |
| 603 | case len(b.sequences) < 0x7f00: // TODO: this could be wrong |
| 604 | n := len(b.sequences) |
| 605 | b.output = append(b.output, 128+uint8(n>>8), uint8(n)) |
| 606 | default: |
| 607 | n := len(b.sequences) - 0x7f00 |
| 608 | b.output = append(b.output, 255, uint8(n), uint8(n>>8)) |
| 609 | } |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 610 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 611 | println("Encoding", len(b.sequences), "sequences") |
| 612 | } |
| 613 | b.genCodes() |
| 614 | llEnc := b.coders.llEnc |
| 615 | ofEnc := b.coders.ofEnc |
| 616 | mlEnc := b.coders.mlEnc |
| 617 | err = llEnc.normalizeCount(len(b.sequences)) |
| 618 | if err != nil { |
| 619 | return err |
| 620 | } |
| 621 | err = ofEnc.normalizeCount(len(b.sequences)) |
| 622 | if err != nil { |
| 623 | return err |
| 624 | } |
| 625 | err = mlEnc.normalizeCount(len(b.sequences)) |
| 626 | if err != nil { |
| 627 | return err |
| 628 | } |
| 629 | |
| 630 | // Choose the best compression mode for each type. |
| 631 | // Will evaluate the new vs predefined and previous. |
| 632 | chooseComp := func(cur, prev, preDef *fseEncoder) (*fseEncoder, seqCompMode) { |
| 633 | // See if predefined/previous is better |
| 634 | hist := cur.count[:cur.symbolLen] |
| 635 | nSize := cur.approxSize(hist) + cur.maxHeaderSize() |
| 636 | predefSize := preDef.approxSize(hist) |
| 637 | prevSize := prev.approxSize(hist) |
| 638 | |
| 639 | // Add a small penalty for new encoders. |
| 640 | // Don't bother with extremely small (<2 byte gains). |
| 641 | nSize = nSize + (nSize+2*8*16)>>4 |
| 642 | switch { |
| 643 | case predefSize <= prevSize && predefSize <= nSize || forcePreDef: |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 644 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 645 | println("Using predefined", predefSize>>3, "<=", nSize>>3) |
| 646 | } |
| 647 | return preDef, compModePredefined |
| 648 | case prevSize <= nSize: |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 649 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 650 | println("Using previous", prevSize>>3, "<=", nSize>>3) |
| 651 | } |
| 652 | return prev, compModeRepeat |
| 653 | default: |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 654 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 655 | println("Using new, predef", predefSize>>3, ". previous:", prevSize>>3, ">", nSize>>3, "header max:", cur.maxHeaderSize()>>3, "bytes") |
| 656 | println("tl:", cur.actualTableLog, "symbolLen:", cur.symbolLen, "norm:", cur.norm[:cur.symbolLen], "hist", cur.count[:cur.symbolLen]) |
| 657 | } |
| 658 | return cur, compModeFSE |
| 659 | } |
| 660 | } |
| 661 | |
| 662 | // Write compression mode |
| 663 | var mode uint8 |
| 664 | if llEnc.useRLE { |
| 665 | mode |= uint8(compModeRLE) << 6 |
| 666 | llEnc.setRLE(b.sequences[0].llCode) |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 667 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 668 | println("llEnc.useRLE") |
| 669 | } |
| 670 | } else { |
| 671 | var m seqCompMode |
| 672 | llEnc, m = chooseComp(llEnc, b.coders.llPrev, &fsePredefEnc[tableLiteralLengths]) |
| 673 | mode |= uint8(m) << 6 |
| 674 | } |
| 675 | if ofEnc.useRLE { |
| 676 | mode |= uint8(compModeRLE) << 4 |
| 677 | ofEnc.setRLE(b.sequences[0].ofCode) |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 678 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 679 | println("ofEnc.useRLE") |
| 680 | } |
| 681 | } else { |
| 682 | var m seqCompMode |
| 683 | ofEnc, m = chooseComp(ofEnc, b.coders.ofPrev, &fsePredefEnc[tableOffsets]) |
| 684 | mode |= uint8(m) << 4 |
| 685 | } |
| 686 | |
| 687 | if mlEnc.useRLE { |
| 688 | mode |= uint8(compModeRLE) << 2 |
| 689 | mlEnc.setRLE(b.sequences[0].mlCode) |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 690 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 691 | println("mlEnc.useRLE, code: ", b.sequences[0].mlCode, "value", b.sequences[0].matchLen) |
| 692 | } |
| 693 | } else { |
| 694 | var m seqCompMode |
| 695 | mlEnc, m = chooseComp(mlEnc, b.coders.mlPrev, &fsePredefEnc[tableMatchLengths]) |
| 696 | mode |= uint8(m) << 2 |
| 697 | } |
| 698 | b.output = append(b.output, mode) |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 699 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 700 | printf("Compression modes: 0b%b", mode) |
| 701 | } |
| 702 | b.output, err = llEnc.writeCount(b.output) |
| 703 | if err != nil { |
| 704 | return err |
| 705 | } |
| 706 | start := len(b.output) |
| 707 | b.output, err = ofEnc.writeCount(b.output) |
| 708 | if err != nil { |
| 709 | return err |
| 710 | } |
| 711 | if false { |
| 712 | println("block:", b.output[start:], "tablelog", ofEnc.actualTableLog, "maxcount:", ofEnc.maxCount) |
| 713 | fmt.Printf("selected TableLog: %d, Symbol length: %d\n", ofEnc.actualTableLog, ofEnc.symbolLen) |
| 714 | for i, v := range ofEnc.norm[:ofEnc.symbolLen] { |
| 715 | fmt.Printf("%3d: %5d -> %4d \n", i, ofEnc.count[i], v) |
| 716 | } |
| 717 | } |
| 718 | b.output, err = mlEnc.writeCount(b.output) |
| 719 | if err != nil { |
| 720 | return err |
| 721 | } |
| 722 | |
| 723 | // Maybe in block? |
| 724 | wr := &b.wr |
| 725 | wr.reset(b.output) |
| 726 | |
| 727 | var ll, of, ml cState |
| 728 | |
| 729 | // Current sequence |
| 730 | seq := len(b.sequences) - 1 |
| 731 | s := b.sequences[seq] |
| 732 | llEnc.setBits(llBitsTable[:]) |
| 733 | mlEnc.setBits(mlBitsTable[:]) |
| 734 | ofEnc.setBits(nil) |
| 735 | |
| 736 | llTT, ofTT, mlTT := llEnc.ct.symbolTT[:256], ofEnc.ct.symbolTT[:256], mlEnc.ct.symbolTT[:256] |
| 737 | |
| 738 | // We have 3 bounds checks here (and in the loop). |
| 739 | // Since we are iterating backwards it is kinda hard to avoid. |
| 740 | llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode] |
| 741 | ll.init(wr, &llEnc.ct, llB) |
| 742 | of.init(wr, &ofEnc.ct, ofB) |
| 743 | wr.flush32() |
| 744 | ml.init(wr, &mlEnc.ct, mlB) |
| 745 | |
| 746 | // Each of these lookups also generates a bounds check. |
| 747 | wr.addBits32NC(s.litLen, llB.outBits) |
| 748 | wr.addBits32NC(s.matchLen, mlB.outBits) |
| 749 | wr.flush32() |
| 750 | wr.addBits32NC(s.offset, ofB.outBits) |
| 751 | if debugSequences { |
| 752 | println("Encoded seq", seq, s, "codes:", s.llCode, s.mlCode, s.ofCode, "states:", ll.state, ml.state, of.state, "bits:", llB, mlB, ofB) |
| 753 | } |
| 754 | seq-- |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 755 | // Store sequences in reverse... |
| 756 | for seq >= 0 { |
| 757 | s = b.sequences[seq] |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 758 | |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 759 | ofB := ofTT[s.ofCode] |
| 760 | wr.flush32() // tablelog max is below 8 for each, so it will fill max 24 bits. |
| 761 | //of.encode(ofB) |
| 762 | nbBitsOut := (uint32(of.state) + ofB.deltaNbBits) >> 16 |
| 763 | dstState := int32(of.state>>(nbBitsOut&15)) + int32(ofB.deltaFindState) |
| 764 | wr.addBits16NC(of.state, uint8(nbBitsOut)) |
| 765 | of.state = of.stateTable[dstState] |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 766 | |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 767 | // Accumulate extra bits. |
| 768 | outBits := ofB.outBits & 31 |
| 769 | extraBits := uint64(s.offset & bitMask32[outBits]) |
| 770 | extraBitsN := outBits |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 771 | |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 772 | mlB := mlTT[s.mlCode] |
| 773 | //ml.encode(mlB) |
| 774 | nbBitsOut = (uint32(ml.state) + mlB.deltaNbBits) >> 16 |
| 775 | dstState = int32(ml.state>>(nbBitsOut&15)) + int32(mlB.deltaFindState) |
| 776 | wr.addBits16NC(ml.state, uint8(nbBitsOut)) |
| 777 | ml.state = ml.stateTable[dstState] |
| 778 | |
| 779 | outBits = mlB.outBits & 31 |
| 780 | extraBits = extraBits<<outBits | uint64(s.matchLen&bitMask32[outBits]) |
| 781 | extraBitsN += outBits |
| 782 | |
| 783 | llB := llTT[s.llCode] |
| 784 | //ll.encode(llB) |
| 785 | nbBitsOut = (uint32(ll.state) + llB.deltaNbBits) >> 16 |
| 786 | dstState = int32(ll.state>>(nbBitsOut&15)) + int32(llB.deltaFindState) |
| 787 | wr.addBits16NC(ll.state, uint8(nbBitsOut)) |
| 788 | ll.state = ll.stateTable[dstState] |
| 789 | |
| 790 | outBits = llB.outBits & 31 |
| 791 | extraBits = extraBits<<outBits | uint64(s.litLen&bitMask32[outBits]) |
| 792 | extraBitsN += outBits |
| 793 | |
| 794 | wr.flush32() |
| 795 | wr.addBits64NC(extraBits, extraBitsN) |
| 796 | |
| 797 | if debugSequences { |
| 798 | println("Encoded seq", seq, s) |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 799 | } |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 800 | |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 801 | seq-- |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 802 | } |
| 803 | ml.flush(mlEnc.actualTableLog) |
| 804 | of.flush(ofEnc.actualTableLog) |
| 805 | ll.flush(llEnc.actualTableLog) |
| Abhay Kumar | a2ae599 | 2025-11-10 14:02:24 +0000 | [diff] [blame^] | 806 | wr.close() |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 807 | b.output = wr.out |
| 808 | |
| Abhay Kumar | a2ae599 | 2025-11-10 14:02:24 +0000 | [diff] [blame^] | 809 | // Maybe even add a bigger margin. |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 810 | if len(b.output)-3-bhOffset >= b.size { |
| Abhay Kumar | a2ae599 | 2025-11-10 14:02:24 +0000 | [diff] [blame^] | 811 | // Discard and encode as raw block. |
| 812 | b.output = b.encodeRawTo(b.output[:bhOffset], org) |
| 813 | b.popOffsets() |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 814 | b.litEnc.Reuse = huff0.ReusePolicyNone |
| Abhay Kumar | a2ae599 | 2025-11-10 14:02:24 +0000 | [diff] [blame^] | 815 | return nil |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 816 | } |
| 817 | |
| 818 | // Size is output minus block header. |
| 819 | bh.setSize(uint32(len(b.output)-bhOffset) - 3) |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 820 | if debugEncoder { |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 821 | println("Rewriting block header", bh) |
| 822 | } |
| 823 | _ = bh.appendTo(b.output[bhOffset:bhOffset]) |
| 824 | b.coders.setPrev(llEnc, mlEnc, ofEnc) |
| 825 | return nil |
| 826 | } |
| 827 | |
| 828 | var errIncompressible = errors.New("incompressible") |
| 829 | |
| 830 | func (b *blockEnc) genCodes() { |
| 831 | if len(b.sequences) == 0 { |
| 832 | // nothing to do |
| 833 | return |
| 834 | } |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 835 | if len(b.sequences) > math.MaxUint16 { |
| 836 | panic("can only encode up to 64K sequences") |
| 837 | } |
| 838 | // No bounds checks after here: |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 839 | llH := b.coders.llEnc.Histogram() |
| 840 | ofH := b.coders.ofEnc.Histogram() |
| 841 | mlH := b.coders.mlEnc.Histogram() |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 842 | for i := range llH { |
| 843 | llH[i] = 0 |
| 844 | } |
| 845 | for i := range ofH { |
| 846 | ofH[i] = 0 |
| 847 | } |
| 848 | for i := range mlH { |
| 849 | mlH[i] = 0 |
| 850 | } |
| 851 | |
| 852 | var llMax, ofMax, mlMax uint8 |
| Akash Reddy Kankanala | cf04537 | 2025-06-10 14:11:24 +0530 | [diff] [blame] | 853 | for i := range b.sequences { |
| 854 | seq := &b.sequences[i] |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 855 | v := llCode(seq.litLen) |
| 856 | seq.llCode = v |
| 857 | llH[v]++ |
| 858 | if v > llMax { |
| 859 | llMax = v |
| 860 | } |
| 861 | |
| 862 | v = ofCode(seq.offset) |
| 863 | seq.ofCode = v |
| 864 | ofH[v]++ |
| 865 | if v > ofMax { |
| 866 | ofMax = v |
| 867 | } |
| 868 | |
| 869 | v = mlCode(seq.matchLen) |
| 870 | seq.mlCode = v |
| 871 | mlH[v]++ |
| 872 | if v > mlMax { |
| 873 | mlMax = v |
| 874 | if debugAsserts && mlMax > maxMatchLengthSymbol { |
| 875 | panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d), matchlen: %d", mlMax, seq.matchLen)) |
| 876 | } |
| 877 | } |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 878 | } |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 879 | if debugAsserts && mlMax > maxMatchLengthSymbol { |
| 880 | panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d)", mlMax)) |
| 881 | } |
| 882 | if debugAsserts && ofMax > maxOffsetBits { |
| 883 | panic(fmt.Errorf("ofMax > maxOffsetBits (%d)", ofMax)) |
| 884 | } |
| 885 | if debugAsserts && llMax > maxLiteralLengthSymbol { |
| 886 | panic(fmt.Errorf("llMax > maxLiteralLengthSymbol (%d)", llMax)) |
| 887 | } |
| 888 | |
| Abhay Kumar | a2ae599 | 2025-11-10 14:02:24 +0000 | [diff] [blame^] | 889 | b.coders.mlEnc.HistogramFinished(mlMax, int(slices.Max(mlH[:mlMax+1]))) |
| 890 | b.coders.ofEnc.HistogramFinished(ofMax, int(slices.Max(ofH[:ofMax+1]))) |
| 891 | b.coders.llEnc.HistogramFinished(llMax, int(slices.Max(llH[:llMax+1]))) |
| khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 892 | } |