Blame - vendor/github.com/klauspost/compress/zstd/enc_fast.go - voltha-go

blob: f45a3da7dae6ab88bbec3761f6308250c6bbf521 [file] [log] [blame]

khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	1	// Copyright 2019+ Klaus Post. All rights reserved.
				2	// License information can be found in the LICENSE file.
				3	// Based on work by Yann Collet, released under BSD License.
				4
				5	package zstd
				6
				7	import (
				8	"fmt"
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	9	)
				10
				11	const (
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	12	tableBits = 15 // Bits used in the table
				13	tableSize = 1 << tableBits // Size of the table
				14	tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table
				15	tableShardSize = tableSize / tableShardCnt // Size of an individual shard
				16	tableFastHashLen = 6
				17	tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
				18	maxMatchLength = 131074
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	19	)
				20
				21	type tableEntry struct {
				22	val uint32
				23	offset int32
				24	}
				25
				26	type fastEncoder struct {
				27	fastBase
				28	table [tableSize]tableEntry
				29	}
				30
				31	type fastEncoderDict struct {
				32	fastEncoder
				33	dictTable []tableEntry
				34	tableShardDirty [tableShardCnt]bool
				35	allDirty bool
				36	}
				37
				38	// Encode mimmics functionality in zstd_fast.c
				39	func (e fastEncoder) Encode(blk blockEnc, src []byte) {
				40	const (
				41	inputMargin = 8
				42	minNonLiteralBlockSize = 1 + 1 + inputMargin
				43	)
				44
				45	// Protect against e.cur wraparound.
Abhay Kumar	a2ae599	2025-11-10 14:02:24 +0000	[diff] [blame^]	46	for e.cur >= e.bufferReset-int32(len(e.hist)) {
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	47	if len(e.hist) == 0 {
				48	for i := range e.table[:] {
				49	e.table[i] = tableEntry{}
				50	}
				51	e.cur = e.maxMatchOff
				52	break
				53	}
				54	// Shift down everything in the table that isn't already too far away.
				55	minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
				56	for i := range e.table[:] {
				57	v := e.table[i].offset
				58	if v < minOff {
				59	v = 0
				60	} else {
				61	v = v - e.cur + e.maxMatchOff
				62	}
				63	e.table[i].offset = v
				64	}
				65	e.cur = e.maxMatchOff
				66	break
				67	}
				68
				69	s := e.addBlock(src)
				70	blk.size = len(src)
				71	if len(src) < minNonLiteralBlockSize {
				72	blk.extraLits = len(src)
				73	blk.literals = blk.literals[:len(src)]
				74	copy(blk.literals, src)
				75	return
				76	}
				77
				78	// Override src
				79	src = e.hist
				80	sLimit := int32(len(src)) - inputMargin
				81	// stepSize is the number of bytes to skip on every main loop iteration.
				82	// It should be >= 2.
				83	const stepSize = 2
				84
				85	// TEMPLATE
				86	const hashLog = tableBits
				87	// seems global, but would be nice to tweak.
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	88	const kSearchStrength = 6
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	89
				90	// nextEmit is where in src the next emitLiteral should start from.
				91	nextEmit := s
				92	cv := load6432(src, s)
				93
				94	// Relative offsets
				95	offset1 := int32(blk.recentOffsets[0])
				96	offset2 := int32(blk.recentOffsets[1])
				97
				98	addLiterals := func(s *seq, until int32) {
				99	if until == nextEmit {
				100	return
				101	}
				102	blk.literals = append(blk.literals, src[nextEmit:until]...)
				103	s.litLen = uint32(until - nextEmit)
				104	}
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	105	if debugEncoder {
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	106	println("recent offsets:", blk.recentOffsets)
				107	}
				108
				109	encodeLoop:
				110	for {
				111	// t will contain the match offset when we find one.
				112	// When existing the search loop, we have already checked 4 bytes.
				113	var t int32
				114
				115	// We will not use repeat offsets across blocks.
				116	// By not using them for the first 3 matches
				117	canRepeat := len(blk.sequences) > 2
				118
				119	for {
				120	if debugAsserts && canRepeat && offset1 == 0 {
				121	panic("offset0 was 0")
				122	}
				123
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	124	nextHash := hashLen(cv, hashLog, tableFastHashLen)
				125	nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	126	candidate := e.table[nextHash]
				127	candidate2 := e.table[nextHash2]
				128	repIndex := s - offset1 + 2
				129
				130	e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
				131	e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
				132
				133	if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
				134	// Consider history as well.
				135	var seq seq
Abhay Kumar	a2ae599	2025-11-10 14:02:24 +0000	[diff] [blame^]	136	length := 4 + e.matchlen(s+6, repIndex+4, src)
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	137	seq.matchLen = uint32(length - zstdMinMatch)
				138
				139	// We might be able to match backwards.
				140	// Extend as long as we can.
				141	start := s + 2
				142	// We end the search early, so we don't risk 0 literals
				143	// and have to do special offset treatment.
				144	startLimit := nextEmit + 1
				145
				146	sMin := s - e.maxMatchOff
				147	if sMin < 0 {
				148	sMin = 0
				149	}
				150	for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
				151	repIndex--
				152	start--
				153	seq.matchLen++
				154	}
				155	addLiterals(&seq, start)
				156
				157	// rep 0
				158	seq.offset = 1
				159	if debugSequences {
				160	println("repeat sequence", seq, "next s:", s)
				161	}
				162	blk.sequences = append(blk.sequences, seq)
				163	s += length + 2
				164	nextEmit = s
				165	if s >= sLimit {
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	166	if debugEncoder {
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	167	println("repeat ended", s, length)
				168
				169	}
				170	break encodeLoop
				171	}
				172	cv = load6432(src, s)
				173	continue
				174	}
				175	coffset0 := s - (candidate.offset - e.cur)
				176	coffset1 := s - (candidate2.offset - e.cur) + 1
				177	if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
				178	// found a regular match
				179	t = candidate.offset - e.cur
				180	if debugAsserts && s <= t {
				181	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				182	}
				183	if debugAsserts && s-t > e.maxMatchOff {
				184	panic("s - t >e.maxMatchOff")
				185	}
				186	break
				187	}
				188
				189	if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
				190	// found a regular match
				191	t = candidate2.offset - e.cur
				192	s++
				193	if debugAsserts && s <= t {
				194	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				195	}
				196	if debugAsserts && s-t > e.maxMatchOff {
				197	panic("s - t >e.maxMatchOff")
				198	}
				199	if debugAsserts && t < 0 {
				200	panic("t<0")
				201	}
				202	break
				203	}
				204	s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
				205	if s >= sLimit {
				206	break encodeLoop
				207	}
				208	cv = load6432(src, s)
				209	}
				210	// A 4-byte match has been found. We'll later see if more than 4 bytes.
				211	offset2 = offset1
				212	offset1 = s - t
				213
				214	if debugAsserts && s <= t {
				215	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				216	}
				217
				218	if debugAsserts && canRepeat && int(offset1) > len(src) {
				219	panic("invalid offset")
				220	}
				221
				222	// Extend the 4-byte match as long as possible.
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	223	l := e.matchlen(s+4, t+4, src) + 4
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	224
				225	// Extend backwards
				226	tMin := s - e.maxMatchOff
				227	if tMin < 0 {
				228	tMin = 0
				229	}
				230	for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
				231	s--
				232	t--
				233	l++
				234	}
				235
				236	// Write our sequence.
				237	var seq seq
				238	seq.litLen = uint32(s - nextEmit)
				239	seq.matchLen = uint32(l - zstdMinMatch)
				240	if seq.litLen > 0 {
				241	blk.literals = append(blk.literals, src[nextEmit:s]...)
				242	}
				243	// Don't use repeat offsets
				244	seq.offset = uint32(s-t) + 3
				245	s += l
				246	if debugSequences {
				247	println("sequence", seq, "next s:", s)
				248	}
				249	blk.sequences = append(blk.sequences, seq)
				250	nextEmit = s
				251	if s >= sLimit {
				252	break encodeLoop
				253	}
				254	cv = load6432(src, s)
				255
				256	// Check offset 2
				257	if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
				258	// We have at least 4 byte match.
				259	// No need to check backwards. We come straight from a match
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	260	l := 4 + e.matchlen(s+4, o2+4, src)
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	261
				262	// Store this, since we have it.
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	263	nextHash := hashLen(cv, hashLog, tableFastHashLen)
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	264	e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
				265	seq.matchLen = uint32(l) - zstdMinMatch
				266	seq.litLen = 0
				267	// Since litlen is always 0, this is offset 1.
				268	seq.offset = 1
				269	s += l
				270	nextEmit = s
				271	if debugSequences {
				272	println("sequence", seq, "next s:", s)
				273	}
				274	blk.sequences = append(blk.sequences, seq)
				275
				276	// Swap offset 1 and 2.
				277	offset1, offset2 = offset2, offset1
				278	if s >= sLimit {
				279	break encodeLoop
				280	}
				281	// Prepare next loop.
				282	cv = load6432(src, s)
				283	}
				284	}
				285
				286	if int(nextEmit) < len(src) {
				287	blk.literals = append(blk.literals, src[nextEmit:]...)
				288	blk.extraLits = len(src) - int(nextEmit)
				289	}
				290	blk.recentOffsets[0] = uint32(offset1)
				291	blk.recentOffsets[1] = uint32(offset2)
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	292	if debugEncoder {
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	293	println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
				294	}
				295	}
				296
				297	// EncodeNoHist will encode a block with no history and no following blocks.
				298	// Most notable difference is that src will not be copied for history and
				299	// we do not need to check for max match length.
				300	func (e fastEncoder) EncodeNoHist(blk blockEnc, src []byte) {
				301	const (
				302	inputMargin = 8
				303	minNonLiteralBlockSize = 1 + 1 + inputMargin
				304	)
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	305	if debugEncoder {
Abhay Kumar	a2ae599	2025-11-10 14:02:24 +0000	[diff] [blame^]	306	if len(src) > maxCompressedBlockSize {
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	307	panic("src too big")
				308	}
				309	}
				310
				311	// Protect against e.cur wraparound.
Abhay Kumar	a2ae599	2025-11-10 14:02:24 +0000	[diff] [blame^]	312	if e.cur >= e.bufferReset {
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	313	for i := range e.table[:] {
				314	e.table[i] = tableEntry{}
				315	}
				316	e.cur = e.maxMatchOff
				317	}
				318
				319	s := int32(0)
				320	blk.size = len(src)
				321	if len(src) < minNonLiteralBlockSize {
				322	blk.extraLits = len(src)
				323	blk.literals = blk.literals[:len(src)]
				324	copy(blk.literals, src)
				325	return
				326	}
				327
				328	sLimit := int32(len(src)) - inputMargin
				329	// stepSize is the number of bytes to skip on every main loop iteration.
				330	// It should be >= 2.
				331	const stepSize = 2
				332
				333	// TEMPLATE
				334	const hashLog = tableBits
				335	// seems global, but would be nice to tweak.
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	336	const kSearchStrength = 6
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	337
				338	// nextEmit is where in src the next emitLiteral should start from.
				339	nextEmit := s
				340	cv := load6432(src, s)
				341
				342	// Relative offsets
				343	offset1 := int32(blk.recentOffsets[0])
				344	offset2 := int32(blk.recentOffsets[1])
				345
				346	addLiterals := func(s *seq, until int32) {
				347	if until == nextEmit {
				348	return
				349	}
				350	blk.literals = append(blk.literals, src[nextEmit:until]...)
				351	s.litLen = uint32(until - nextEmit)
				352	}
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	353	if debugEncoder {
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	354	println("recent offsets:", blk.recentOffsets)
				355	}
				356
				357	encodeLoop:
				358	for {
				359	// t will contain the match offset when we find one.
				360	// When existing the search loop, we have already checked 4 bytes.
				361	var t int32
				362
				363	// We will not use repeat offsets across blocks.
				364	// By not using them for the first 3 matches
				365
				366	for {
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	367	nextHash := hashLen(cv, hashLog, tableFastHashLen)
				368	nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	369	candidate := e.table[nextHash]
				370	candidate2 := e.table[nextHash2]
				371	repIndex := s - offset1 + 2
				372
				373	e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
				374	e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
				375
				376	if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
				377	// Consider history as well.
				378	var seq seq
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	379	length := 4 + e.matchlen(s+6, repIndex+4, src)
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	380
				381	seq.matchLen = uint32(length - zstdMinMatch)
				382
				383	// We might be able to match backwards.
				384	// Extend as long as we can.
				385	start := s + 2
				386	// We end the search early, so we don't risk 0 literals
				387	// and have to do special offset treatment.
				388	startLimit := nextEmit + 1
				389
				390	sMin := s - e.maxMatchOff
				391	if sMin < 0 {
				392	sMin = 0
				393	}
				394	for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] {
				395	repIndex--
				396	start--
				397	seq.matchLen++
				398	}
				399	addLiterals(&seq, start)
				400
				401	// rep 0
				402	seq.offset = 1
				403	if debugSequences {
				404	println("repeat sequence", seq, "next s:", s)
				405	}
				406	blk.sequences = append(blk.sequences, seq)
				407	s += length + 2
				408	nextEmit = s
				409	if s >= sLimit {
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	410	if debugEncoder {
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	411	println("repeat ended", s, length)
				412
				413	}
				414	break encodeLoop
				415	}
				416	cv = load6432(src, s)
				417	continue
				418	}
				419	coffset0 := s - (candidate.offset - e.cur)
				420	coffset1 := s - (candidate2.offset - e.cur) + 1
				421	if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
				422	// found a regular match
				423	t = candidate.offset - e.cur
				424	if debugAsserts && s <= t {
				425	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				426	}
				427	if debugAsserts && s-t > e.maxMatchOff {
				428	panic("s - t >e.maxMatchOff")
				429	}
				430	if debugAsserts && t < 0 {
				431	panic(fmt.Sprintf("t (%d) < 0, candidate.offset: %d, e.cur: %d, coffset0: %d, e.maxMatchOff: %d", t, candidate.offset, e.cur, coffset0, e.maxMatchOff))
				432	}
				433	break
				434	}
				435
				436	if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
				437	// found a regular match
				438	t = candidate2.offset - e.cur
				439	s++
				440	if debugAsserts && s <= t {
				441	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				442	}
				443	if debugAsserts && s-t > e.maxMatchOff {
				444	panic("s - t >e.maxMatchOff")
				445	}
				446	if debugAsserts && t < 0 {
				447	panic("t<0")
				448	}
				449	break
				450	}
				451	s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
				452	if s >= sLimit {
				453	break encodeLoop
				454	}
				455	cv = load6432(src, s)
				456	}
				457	// A 4-byte match has been found. We'll later see if more than 4 bytes.
				458	offset2 = offset1
				459	offset1 = s - t
				460
				461	if debugAsserts && s <= t {
				462	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				463	}
				464
				465	if debugAsserts && t < 0 {
				466	panic(fmt.Sprintf("t (%d) < 0 ", t))
				467	}
				468	// Extend the 4-byte match as long as possible.
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	469	l := e.matchlen(s+4, t+4, src) + 4
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	470
				471	// Extend backwards
				472	tMin := s - e.maxMatchOff
				473	if tMin < 0 {
				474	tMin = 0
				475	}
				476	for t > tMin && s > nextEmit && src[t-1] == src[s-1] {
				477	s--
				478	t--
				479	l++
				480	}
				481
				482	// Write our sequence.
				483	var seq seq
				484	seq.litLen = uint32(s - nextEmit)
				485	seq.matchLen = uint32(l - zstdMinMatch)
				486	if seq.litLen > 0 {
				487	blk.literals = append(blk.literals, src[nextEmit:s]...)
				488	}
				489	// Don't use repeat offsets
				490	seq.offset = uint32(s-t) + 3
				491	s += l
				492	if debugSequences {
				493	println("sequence", seq, "next s:", s)
				494	}
				495	blk.sequences = append(blk.sequences, seq)
				496	nextEmit = s
				497	if s >= sLimit {
				498	break encodeLoop
				499	}
				500	cv = load6432(src, s)
				501
				502	// Check offset 2
				503	if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) {
				504	// We have at least 4 byte match.
				505	// No need to check backwards. We come straight from a match
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	506	l := 4 + e.matchlen(s+4, o2+4, src)
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	507
				508	// Store this, since we have it.
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	509	nextHash := hashLen(cv, hashLog, tableFastHashLen)
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	510	e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
				511	seq.matchLen = uint32(l) - zstdMinMatch
				512	seq.litLen = 0
				513	// Since litlen is always 0, this is offset 1.
				514	seq.offset = 1
				515	s += l
				516	nextEmit = s
				517	if debugSequences {
				518	println("sequence", seq, "next s:", s)
				519	}
				520	blk.sequences = append(blk.sequences, seq)
				521
				522	// Swap offset 1 and 2.
				523	offset1, offset2 = offset2, offset1
				524	if s >= sLimit {
				525	break encodeLoop
				526	}
				527	// Prepare next loop.
				528	cv = load6432(src, s)
				529	}
				530	}
				531
				532	if int(nextEmit) < len(src) {
				533	blk.literals = append(blk.literals, src[nextEmit:]...)
				534	blk.extraLits = len(src) - int(nextEmit)
				535	}
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	536	if debugEncoder {
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	537	println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
				538	}
				539	// We do not store history, so we must offset e.cur to avoid false matches for next user.
Abhay Kumar	a2ae599	2025-11-10 14:02:24 +0000	[diff] [blame^]	540	if e.cur < e.bufferReset {
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	541	e.cur += int32(len(src))
				542	}
				543	}
				544
				545	// Encode will encode the content, with a dictionary if initialized for it.
				546	func (e fastEncoderDict) Encode(blk blockEnc, src []byte) {
				547	const (
				548	inputMargin = 8
				549	minNonLiteralBlockSize = 1 + 1 + inputMargin
				550	)
				551	if e.allDirty \|\| len(src) > 32<<10 {
				552	e.fastEncoder.Encode(blk, src)
				553	e.allDirty = true
				554	return
				555	}
				556	// Protect against e.cur wraparound.
Abhay Kumar	a2ae599	2025-11-10 14:02:24 +0000	[diff] [blame^]	557	for e.cur >= e.bufferReset-int32(len(e.hist)) {
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	558	if len(e.hist) == 0 {
Abhay Kumar	a2ae599	2025-11-10 14:02:24 +0000	[diff] [blame^]	559	e.table = [tableSize]tableEntry{}
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	560	e.cur = e.maxMatchOff
				561	break
				562	}
				563	// Shift down everything in the table that isn't already too far away.
				564	minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
				565	for i := range e.table[:] {
				566	v := e.table[i].offset
				567	if v < minOff {
				568	v = 0
				569	} else {
				570	v = v - e.cur + e.maxMatchOff
				571	}
				572	e.table[i].offset = v
				573	}
				574	e.cur = e.maxMatchOff
				575	break
				576	}
				577
				578	s := e.addBlock(src)
				579	blk.size = len(src)
				580	if len(src) < minNonLiteralBlockSize {
				581	blk.extraLits = len(src)
				582	blk.literals = blk.literals[:len(src)]
				583	copy(blk.literals, src)
				584	return
				585	}
				586
				587	// Override src
				588	src = e.hist
				589	sLimit := int32(len(src)) - inputMargin
				590	// stepSize is the number of bytes to skip on every main loop iteration.
				591	// It should be >= 2.
				592	const stepSize = 2
				593
				594	// TEMPLATE
				595	const hashLog = tableBits
				596	// seems global, but would be nice to tweak.
				597	const kSearchStrength = 7
				598
				599	// nextEmit is where in src the next emitLiteral should start from.
				600	nextEmit := s
				601	cv := load6432(src, s)
				602
				603	// Relative offsets
				604	offset1 := int32(blk.recentOffsets[0])
				605	offset2 := int32(blk.recentOffsets[1])
				606
				607	addLiterals := func(s *seq, until int32) {
				608	if until == nextEmit {
				609	return
				610	}
				611	blk.literals = append(blk.literals, src[nextEmit:until]...)
				612	s.litLen = uint32(until - nextEmit)
				613	}
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	614	if debugEncoder {
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	615	println("recent offsets:", blk.recentOffsets)
				616	}
				617
				618	encodeLoop:
				619	for {
				620	// t will contain the match offset when we find one.
				621	// When existing the search loop, we have already checked 4 bytes.
				622	var t int32
				623
				624	// We will not use repeat offsets across blocks.
				625	// By not using them for the first 3 matches
				626	canRepeat := len(blk.sequences) > 2
				627
				628	for {
				629	if debugAsserts && canRepeat && offset1 == 0 {
				630	panic("offset0 was 0")
				631	}
				632
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	633	nextHash := hashLen(cv, hashLog, tableFastHashLen)
				634	nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	635	candidate := e.table[nextHash]
				636	candidate2 := e.table[nextHash2]
				637	repIndex := s - offset1 + 2
				638
				639	e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
				640	e.markShardDirty(nextHash)
				641	e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
				642	e.markShardDirty(nextHash2)
				643
				644	if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
				645	// Consider history as well.
				646	var seq seq
Abhay Kumar	a2ae599	2025-11-10 14:02:24 +0000	[diff] [blame^]	647	length := 4 + e.matchlen(s+6, repIndex+4, src)
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	648
				649	seq.matchLen = uint32(length - zstdMinMatch)
				650
				651	// We might be able to match backwards.
				652	// Extend as long as we can.
				653	start := s + 2
				654	// We end the search early, so we don't risk 0 literals
				655	// and have to do special offset treatment.
				656	startLimit := nextEmit + 1
				657
				658	sMin := s - e.maxMatchOff
				659	if sMin < 0 {
				660	sMin = 0
				661	}
				662	for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
				663	repIndex--
				664	start--
				665	seq.matchLen++
				666	}
				667	addLiterals(&seq, start)
				668
				669	// rep 0
				670	seq.offset = 1
				671	if debugSequences {
				672	println("repeat sequence", seq, "next s:", s)
				673	}
				674	blk.sequences = append(blk.sequences, seq)
				675	s += length + 2
				676	nextEmit = s
				677	if s >= sLimit {
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	678	if debugEncoder {
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	679	println("repeat ended", s, length)
				680
				681	}
				682	break encodeLoop
				683	}
				684	cv = load6432(src, s)
				685	continue
				686	}
				687	coffset0 := s - (candidate.offset - e.cur)
				688	coffset1 := s - (candidate2.offset - e.cur) + 1
				689	if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
				690	// found a regular match
				691	t = candidate.offset - e.cur
				692	if debugAsserts && s <= t {
				693	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				694	}
				695	if debugAsserts && s-t > e.maxMatchOff {
				696	panic("s - t >e.maxMatchOff")
				697	}
				698	break
				699	}
				700
				701	if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
				702	// found a regular match
				703	t = candidate2.offset - e.cur
				704	s++
				705	if debugAsserts && s <= t {
				706	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				707	}
				708	if debugAsserts && s-t > e.maxMatchOff {
				709	panic("s - t >e.maxMatchOff")
				710	}
				711	if debugAsserts && t < 0 {
				712	panic("t<0")
				713	}
				714	break
				715	}
				716	s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
				717	if s >= sLimit {
				718	break encodeLoop
				719	}
				720	cv = load6432(src, s)
				721	}
				722	// A 4-byte match has been found. We'll later see if more than 4 bytes.
				723	offset2 = offset1
				724	offset1 = s - t
				725
				726	if debugAsserts && s <= t {
				727	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				728	}
				729
				730	if debugAsserts && canRepeat && int(offset1) > len(src) {
				731	panic("invalid offset")
				732	}
				733
				734	// Extend the 4-byte match as long as possible.
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	735	l := e.matchlen(s+4, t+4, src) + 4
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	736
				737	// Extend backwards
				738	tMin := s - e.maxMatchOff
				739	if tMin < 0 {
				740	tMin = 0
				741	}
				742	for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
				743	s--
				744	t--
				745	l++
				746	}
				747
				748	// Write our sequence.
				749	var seq seq
				750	seq.litLen = uint32(s - nextEmit)
				751	seq.matchLen = uint32(l - zstdMinMatch)
				752	if seq.litLen > 0 {
				753	blk.literals = append(blk.literals, src[nextEmit:s]...)
				754	}
				755	// Don't use repeat offsets
				756	seq.offset = uint32(s-t) + 3
				757	s += l
				758	if debugSequences {
				759	println("sequence", seq, "next s:", s)
				760	}
				761	blk.sequences = append(blk.sequences, seq)
				762	nextEmit = s
				763	if s >= sLimit {
				764	break encodeLoop
				765	}
				766	cv = load6432(src, s)
				767
				768	// Check offset 2
				769	if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
				770	// We have at least 4 byte match.
				771	// No need to check backwards. We come straight from a match
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	772	l := 4 + e.matchlen(s+4, o2+4, src)
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	773
				774	// Store this, since we have it.
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	775	nextHash := hashLen(cv, hashLog, tableFastHashLen)
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	776	e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
				777	e.markShardDirty(nextHash)
				778	seq.matchLen = uint32(l) - zstdMinMatch
				779	seq.litLen = 0
				780	// Since litlen is always 0, this is offset 1.
				781	seq.offset = 1
				782	s += l
				783	nextEmit = s
				784	if debugSequences {
				785	println("sequence", seq, "next s:", s)
				786	}
				787	blk.sequences = append(blk.sequences, seq)
				788
				789	// Swap offset 1 and 2.
				790	offset1, offset2 = offset2, offset1
				791	if s >= sLimit {
				792	break encodeLoop
				793	}
				794	// Prepare next loop.
				795	cv = load6432(src, s)
				796	}
				797	}
				798
				799	if int(nextEmit) < len(src) {
				800	blk.literals = append(blk.literals, src[nextEmit:]...)
				801	blk.extraLits = len(src) - int(nextEmit)
				802	}
				803	blk.recentOffsets[0] = uint32(offset1)
				804	blk.recentOffsets[1] = uint32(offset2)
Akash Reddy Kankanala	cf04537	2025-06-10 14:11:24 +0530	[diff] [blame]	805	if debugEncoder {
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	806	println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
				807	}
				808	}
				809
				810	// ResetDict will reset and set a dictionary if not nil
				811	func (e fastEncoder) Reset(d dict, singleBlock bool) {
				812	e.resetBase(d, singleBlock)
				813	if d != nil {
				814	panic("fastEncoder: Reset with dict")
				815	}
				816	}
				817
				818	// ResetDict will reset and set a dictionary if not nil
				819	func (e fastEncoderDict) Reset(d dict, singleBlock bool) {
				820	e.resetBase(d, singleBlock)
				821	if d == nil {
				822	return
				823	}
				824
				825	// Init or copy dict table
				826	if len(e.dictTable) != len(e.table) \|\| d.id != e.lastDictID {
				827	if len(e.dictTable) != len(e.table) {
				828	e.dictTable = make([]tableEntry, len(e.table))
				829	}
				830	if true {
				831	end := e.maxMatchOff + int32(len(d.content)) - 8
Abhay Kumar	a2ae599	2025-11-10 14:02:24 +0000	[diff] [blame^]	832	for i := e.maxMatchOff; i < end; i += 2 {
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	833	const hashLog = tableBits
				834
				835	cv := load6432(d.content, i-e.maxMatchOff)
Abhay Kumar	a2ae599	2025-11-10 14:02:24 +0000	[diff] [blame^]	836	nextHash := hashLen(cv, hashLog, tableFastHashLen) // 0 -> 6
				837	nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen) // 1 -> 7
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	838	e.dictTable[nextHash] = tableEntry{
				839	val: uint32(cv),
				840	offset: i,
				841	}
				842	e.dictTable[nextHash1] = tableEntry{
				843	val: uint32(cv >> 8),
				844	offset: i + 1,
				845	}
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	846	}
				847	}
				848	e.lastDictID = d.id
				849	e.allDirty = true
				850	}
				851
				852	e.cur = e.maxMatchOff
				853	dirtyShardCnt := 0
				854	if !e.allDirty {
				855	for i := range e.tableShardDirty {
				856	if e.tableShardDirty[i] {
				857	dirtyShardCnt++
				858	}
				859	}
				860	}
				861
				862	const shardCnt = tableShardCnt
				863	const shardSize = tableShardSize
				864	if e.allDirty \|\| dirtyShardCnt > shardCnt*4/6 {
Abhay Kumar	a2ae599	2025-11-10 14:02:24 +0000	[diff] [blame^]	865	//copy(e.table[:], e.dictTable)
				866	e.table = ([tableSize]tableEntry)(e.dictTable)
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	867	for i := range e.tableShardDirty {
				868	e.tableShardDirty[i] = false
				869	}
				870	e.allDirty = false
				871	return
				872	}
				873	for i := range e.tableShardDirty {
				874	if !e.tableShardDirty[i] {
				875	continue
				876	}
				877
Abhay Kumar	a2ae599	2025-11-10 14:02:24 +0000	[diff] [blame^]	878	//copy(e.table[ishardSize:(i+1)shardSize], e.dictTable[ishardSize:(i+1)shardSize])
				879	([shardSize]tableEntry)(e.table[ishardSize:]) = ([shardSize]tableEntry)(e.dictTable[ishardSize:])
khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame]	880	e.tableShardDirty[i] = false
				881	}
				882	e.allDirty = false
				883	}
				884
				885	func (e *fastEncoderDict) markAllShardsDirty() {
				886	e.allDirty = true
				887	}
				888
				889	func (e *fastEncoderDict) markShardDirty(entryNum uint32) {
				890	e.tableShardDirty[entryNum/tableShardSize] = true
				891	}