Blame - vendor/github.com/klauspost/compress/flate/stateless.go - voltha-go

blob: 13b9b100dbc63795c791ebcf4af7cdf3aeb3a88f [file] [log] [blame]

Abhay Kumar	a2ae599	2025-11-10 14:02:24 +0000	[diff] [blame^]	1	package flate
				2
				3	import (
				4	"io"
				5	"math"
				6	"sync"
				7
				8	"github.com/klauspost/compress/internal/le"
				9	)
				10
				11	const (
				12	maxStatelessBlock = math.MaxInt16
				13	// dictionary will be taken from maxStatelessBlock, so limit it.
				14	maxStatelessDict = 8 << 10
				15
				16	slTableBits = 13
				17	slTableSize = 1 << slTableBits
				18	slTableShift = 32 - slTableBits
				19	)
				20
				21	type statelessWriter struct {
				22	dst io.Writer
				23	closed bool
				24	}
				25
				26	func (s *statelessWriter) Close() error {
				27	if s.closed {
				28	return nil
				29	}
				30	s.closed = true
				31	// Emit EOF block
				32	return StatelessDeflate(s.dst, nil, true, nil)
				33	}
				34
				35	func (s *statelessWriter) Write(p []byte) (n int, err error) {
				36	err = StatelessDeflate(s.dst, p, false, nil)
				37	if err != nil {
				38	return 0, err
				39	}
				40	return len(p), nil
				41	}
				42
				43	func (s *statelessWriter) Reset(w io.Writer) {
				44	s.dst = w
				45	s.closed = false
				46	}
				47
				48	// NewStatelessWriter will do compression but without maintaining any state
				49	// between Write calls.
				50	// There will be no memory kept between Write calls,
				51	// but compression and speed will be suboptimal.
				52	// Because of this, the size of actual Write calls will affect output size.
				53	func NewStatelessWriter(dst io.Writer) io.WriteCloser {
				54	return &statelessWriter{dst: dst}
				55	}
				56
				57	// bitWriterPool contains bit writers that can be reused.
				58	var bitWriterPool = sync.Pool{
				59	New: func() interface{} {
				60	return newHuffmanBitWriter(nil)
				61	},
				62	}
				63
				64	// StatelessDeflate allows compressing directly to a Writer without retaining state.
				65	// When returning everything will be flushed.
				66	// Up to 8KB of an optional dictionary can be given which is presumed to precede the block.
				67	// Longer dictionaries will be truncated and will still produce valid output.
				68	// Sending nil dictionary is perfectly fine.
				69	func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
				70	var dst tokens
				71	bw := bitWriterPool.Get().(*huffmanBitWriter)
				72	bw.reset(out)
				73	defer func() {
				74	// don't keep a reference to our output
				75	bw.reset(nil)
				76	bitWriterPool.Put(bw)
				77	}()
				78	if eof && len(in) == 0 {
				79	// Just write an EOF block.
				80	// Could be faster...
				81	bw.writeStoredHeader(0, true)
				82	bw.flush()
				83	return bw.err
				84	}
				85
				86	// Truncate dict
				87	if len(dict) > maxStatelessDict {
				88	dict = dict[len(dict)-maxStatelessDict:]
				89	}
				90
				91	// For subsequent loops, keep shallow dict reference to avoid alloc+copy.
				92	var inDict []byte
				93
				94	for len(in) > 0 {
				95	todo := in
				96	if len(inDict) > 0 {
				97	if len(todo) > maxStatelessBlock-maxStatelessDict {
				98	todo = todo[:maxStatelessBlock-maxStatelessDict]
				99	}
				100	} else if len(todo) > maxStatelessBlock-len(dict) {
				101	todo = todo[:maxStatelessBlock-len(dict)]
				102	}
				103	inOrg := in
				104	in = in[len(todo):]
				105	uncompressed := todo
				106	if len(dict) > 0 {
				107	// combine dict and source
				108	bufLen := len(todo) + len(dict)
				109	combined := make([]byte, bufLen)
				110	copy(combined, dict)
				111	copy(combined[len(dict):], todo)
				112	todo = combined
				113	}
				114	// Compress
				115	if len(inDict) == 0 {
				116	statelessEnc(&dst, todo, int16(len(dict)))
				117	} else {
				118	statelessEnc(&dst, inDict[:maxStatelessDict+len(todo)], maxStatelessDict)
				119	}
				120	isEof := eof && len(in) == 0
				121
				122	if dst.n == 0 {
				123	bw.writeStoredHeader(len(uncompressed), isEof)
				124	if bw.err != nil {
				125	return bw.err
				126	}
				127	bw.writeBytes(uncompressed)
				128	} else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 {
				129	// If we removed less than 1/16th, huffman compress the block.
				130	bw.writeBlockHuff(isEof, uncompressed, len(in) == 0)
				131	} else {
				132	bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0)
				133	}
				134	if len(in) > 0 {
				135	// Retain a dict if we have more
				136	inDict = inOrg[len(uncompressed)-maxStatelessDict:]
				137	dict = nil
				138	dst.Reset()
				139	}
				140	if bw.err != nil {
				141	return bw.err
				142	}
				143	}
				144	if !eof {
				145	// Align, only a stored block can do that.
				146	bw.writeStoredHeader(0, false)
				147	}
				148	bw.flush()
				149	return bw.err
				150	}
				151
				152	func hashSL(u uint32) uint32 {
				153	return (u * 0x1e35a7bd) >> slTableShift
				154	}
				155
				156	func load3216(b []byte, i int16) uint32 {
				157	return le.Load32(b, i)
				158	}
				159
				160	func load6416(b []byte, i int16) uint64 {
				161	return le.Load64(b, i)
				162	}
				163
				164	func statelessEnc(dst *tokens, src []byte, startAt int16) {
				165	const (
				166	inputMargin = 12 - 1
				167	minNonLiteralBlockSize = 1 + 1 + inputMargin
				168	)
				169
				170	type tableEntry struct {
				171	offset int16
				172	}
				173
				174	var table [slTableSize]tableEntry
				175
				176	// This check isn't in the Snappy implementation, but there, the caller
				177	// instead of the callee handles this case.
				178	if len(src)-int(startAt) < minNonLiteralBlockSize {
				179	// We do not fill the token table.
				180	// This will be picked up by caller.
				181	dst.n = 0
				182	return
				183	}
				184	// Index until startAt
				185	if startAt > 0 {
				186	cv := load3232(src, 0)
				187	for i := int16(0); i < startAt; i++ {
				188	table[hashSL(cv)] = tableEntry{offset: i}
				189	cv = (cv >> 8) \| (uint32(src[i+4]) << 24)
				190	}
				191	}
				192
				193	s := startAt + 1
				194	nextEmit := startAt
				195	// sLimit is when to stop looking for offset/length copies. The inputMargin
				196	// lets us use a fast path for emitLiteral in the main loop, while we are
				197	// looking for copies.
				198	sLimit := int16(len(src) - inputMargin)
				199
				200	// nextEmit is where in src the next emitLiteral should start from.
				201	cv := load3216(src, s)
				202
				203	for {
				204	const skipLog = 5
				205	const doEvery = 2
				206
				207	nextS := s
				208	var candidate tableEntry
				209	for {
				210	nextHash := hashSL(cv)
				211	candidate = table[nextHash]
				212	nextS = s + doEvery + (s-nextEmit)>>skipLog
				213	if nextS > sLimit \|\| nextS <= 0 {
				214	goto emitRemainder
				215	}
				216
				217	now := load6416(src, nextS)
				218	table[nextHash] = tableEntry{offset: s}
				219	nextHash = hashSL(uint32(now))
				220
				221	if cv == load3216(src, candidate.offset) {
				222	table[nextHash] = tableEntry{offset: nextS}
				223	break
				224	}
				225
				226	// Do one right away...
				227	cv = uint32(now)
				228	s = nextS
				229	nextS++
				230	candidate = table[nextHash]
				231	now >>= 8
				232	table[nextHash] = tableEntry{offset: s}
				233
				234	if cv == load3216(src, candidate.offset) {
				235	table[nextHash] = tableEntry{offset: nextS}
				236	break
				237	}
				238	cv = uint32(now)
				239	s = nextS
				240	}
				241
				242	// A 4-byte match has been found. We'll later see if more than 4 bytes
				243	// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
				244	// them as literal bytes.
				245	for {
				246	// Invariant: we have a 4-byte match at s, and no need to emit any
				247	// literal bytes prior to s.
				248
				249	// Extend the 4-byte match as long as possible.
				250	t := candidate.offset
				251	l := int16(matchLen(src[s+4:], src[t+4:]) + 4)
				252
				253	// Extend backwards
				254	for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
				255	s--
				256	t--
				257	l++
				258	}
				259	if nextEmit < s {
				260	if false {
				261	emitLiteral(dst, src[nextEmit:s])
				262	} else {
				263	for _, v := range src[nextEmit:s] {
				264	dst.tokens[dst.n] = token(v)
				265	dst.litHist[v]++
				266	dst.n++
				267	}
				268	}
				269	}
				270
				271	// Save the match found
				272	dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset))
				273	s += l
				274	nextEmit = s
				275	if nextS >= s {
				276	s = nextS + 1
				277	}
				278	if s >= sLimit {
				279	goto emitRemainder
				280	}
				281
				282	// We could immediately start working at s now, but to improve
				283	// compression we first update the hash table at s-2 and at s. If
				284	// another emitCopy is not our next move, also calculate nextHash
				285	// at s+1. At least on GOARCH=amd64, these three hash calculations
				286	// are faster as one load64 call (with some shifts) instead of
				287	// three load32 calls.
				288	x := load6416(src, s-2)
				289	o := s - 2
				290	prevHash := hashSL(uint32(x))
				291	table[prevHash] = tableEntry{offset: o}
				292	x >>= 16
				293	currHash := hashSL(uint32(x))
				294	candidate = table[currHash]
				295	table[currHash] = tableEntry{offset: o + 2}
				296
				297	if uint32(x) != load3216(src, candidate.offset) {
				298	cv = uint32(x >> 8)
				299	s++
				300	break
				301	}
				302	}
				303	}
				304
				305	emitRemainder:
				306	if int(nextEmit) < len(src) {
				307	// If nothing was added, don't encode literals.
				308	if dst.n == 0 {
				309	return
				310	}
				311	emitLiteral(dst, src[nextEmit:])
				312	}
				313	}