blob: 20671dcb91d9329216d7e36932b3374ee9d7c95b [file] [log] [blame]
khenaidoo26721882021-08-11 17:42:52 -04001package zstd
2
3import (
4 "errors"
5 "fmt"
Abhay Kumar40252eb2025-10-13 13:25:53 +00006 "math"
7 "math/bits"
khenaidoo26721882021-08-11 17:42:52 -04008 "runtime"
9 "strings"
10)
11
12// EOption is an option for creating a encoder.
13type EOption func(*encoderOptions) error
14
15// options retains accumulated state of multiple options.
16type encoderOptions struct {
17 concurrent int
18 level EncoderLevel
19 single *bool
20 pad int
21 blockSize int
22 windowSize int
23 crc bool
24 fullZero bool
25 noEntropy bool
26 allLitEntropy bool
27 customWindow bool
28 customALEntropy bool
Abhay Kumar40252eb2025-10-13 13:25:53 +000029 customBlockSize bool
khenaidoo26721882021-08-11 17:42:52 -040030 lowMem bool
31 dict *dict
32}
33
34func (o *encoderOptions) setDefault() {
35 *o = encoderOptions{
36 concurrent: runtime.GOMAXPROCS(0),
37 crc: true,
38 single: nil,
Abhay Kumar40252eb2025-10-13 13:25:53 +000039 blockSize: maxCompressedBlockSize,
khenaidoo26721882021-08-11 17:42:52 -040040 windowSize: 8 << 20,
41 level: SpeedDefault,
Abhay Kumar40252eb2025-10-13 13:25:53 +000042 allLitEntropy: false,
khenaidoo26721882021-08-11 17:42:52 -040043 lowMem: false,
44 }
45}
46
47// encoder returns an encoder with the selected options.
48func (o encoderOptions) encoder() encoder {
49 switch o.level {
50 case SpeedFastest:
51 if o.dict != nil {
Abhay Kumar40252eb2025-10-13 13:25:53 +000052 return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
khenaidoo26721882021-08-11 17:42:52 -040053 }
Abhay Kumar40252eb2025-10-13 13:25:53 +000054 return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
khenaidoo26721882021-08-11 17:42:52 -040055
56 case SpeedDefault:
57 if o.dict != nil {
Abhay Kumar40252eb2025-10-13 13:25:53 +000058 return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}}
khenaidoo26721882021-08-11 17:42:52 -040059 }
Abhay Kumar40252eb2025-10-13 13:25:53 +000060 return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
khenaidoo26721882021-08-11 17:42:52 -040061 case SpeedBetterCompression:
62 if o.dict != nil {
Abhay Kumar40252eb2025-10-13 13:25:53 +000063 return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
khenaidoo26721882021-08-11 17:42:52 -040064 }
Abhay Kumar40252eb2025-10-13 13:25:53 +000065 return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
khenaidoo26721882021-08-11 17:42:52 -040066 case SpeedBestCompression:
Abhay Kumar40252eb2025-10-13 13:25:53 +000067 return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
khenaidoo26721882021-08-11 17:42:52 -040068 }
69 panic("unknown compression level")
70}
71
72// WithEncoderCRC will add CRC value to output.
73// Output will be 4 bytes larger.
74func WithEncoderCRC(b bool) EOption {
75 return func(o *encoderOptions) error { o.crc = b; return nil }
76}
77
78// WithEncoderConcurrency will set the concurrency,
79// meaning the maximum number of encoders to run concurrently.
80// The value supplied must be at least 1.
Abhay Kumar40252eb2025-10-13 13:25:53 +000081// For streams, setting a value of 1 will disable async compression.
khenaidoo26721882021-08-11 17:42:52 -040082// By default this will be set to GOMAXPROCS.
83func WithEncoderConcurrency(n int) EOption {
84 return func(o *encoderOptions) error {
85 if n <= 0 {
86 return fmt.Errorf("concurrency must be at least 1")
87 }
88 o.concurrent = n
89 return nil
90 }
91}
92
93// WithWindowSize will set the maximum allowed back-reference distance.
94// The value must be a power of two between MinWindowSize and MaxWindowSize.
95// A larger value will enable better compression but allocate more memory and,
96// for above-default values, take considerably longer.
Abhay Kumar40252eb2025-10-13 13:25:53 +000097// The default value is determined by the compression level and max 8MB.
khenaidoo26721882021-08-11 17:42:52 -040098func WithWindowSize(n int) EOption {
99 return func(o *encoderOptions) error {
100 switch {
101 case n < MinWindowSize:
102 return fmt.Errorf("window size must be at least %d", MinWindowSize)
103 case n > MaxWindowSize:
104 return fmt.Errorf("window size must be at most %d", MaxWindowSize)
105 case (n & (n - 1)) != 0:
106 return errors.New("window size must be a power of 2")
107 }
108
109 o.windowSize = n
110 o.customWindow = true
111 if o.blockSize > o.windowSize {
112 o.blockSize = o.windowSize
Abhay Kumar40252eb2025-10-13 13:25:53 +0000113 o.customBlockSize = true
khenaidoo26721882021-08-11 17:42:52 -0400114 }
115 return nil
116 }
117}
118
119// WithEncoderPadding will add padding to all output so the size will be a multiple of n.
120// This can be used to obfuscate the exact output size or make blocks of a certain size.
121// The contents will be a skippable frame, so it will be invisible by the decoder.
122// n must be > 0 and <= 1GB, 1<<30 bytes.
123// The padded area will be filled with data from crypto/rand.Reader.
124// If `EncodeAll` is used with data already in the destination, the total size will be multiple of this.
125func WithEncoderPadding(n int) EOption {
126 return func(o *encoderOptions) error {
127 if n <= 0 {
128 return fmt.Errorf("padding must be at least 1")
129 }
130 // No need to waste our time.
131 if n == 1 {
Abhay Kumar40252eb2025-10-13 13:25:53 +0000132 n = 0
khenaidoo26721882021-08-11 17:42:52 -0400133 }
134 if n > 1<<30 {
135 return fmt.Errorf("padding must less than 1GB (1<<30 bytes) ")
136 }
137 o.pad = n
138 return nil
139 }
140}
141
142// EncoderLevel predefines encoder compression levels.
143// Only use the constants made available, since the actual mapping
144// of these values are very likely to change and your compression could change
145// unpredictably when upgrading the library.
146type EncoderLevel int
147
148const (
149 speedNotSet EncoderLevel = iota
150
151 // SpeedFastest will choose the fastest reasonable compression.
152 // This is roughly equivalent to the fastest Zstandard mode.
153 SpeedFastest
154
155 // SpeedDefault is the default "pretty fast" compression option.
156 // This is roughly equivalent to the default Zstandard mode (level 3).
157 SpeedDefault
158
159 // SpeedBetterCompression will yield better compression than the default.
160 // Currently it is about zstd level 7-8 with ~ 2x-3x the default CPU usage.
161 // By using this, notice that CPU usage may go up in the future.
162 SpeedBetterCompression
163
164 // SpeedBestCompression will choose the best available compression option.
165 // This will offer the best compression no matter the CPU cost.
166 SpeedBestCompression
167
168 // speedLast should be kept as the last actual compression option.
169 // The is not for external usage, but is used to keep track of the valid options.
170 speedLast
171)
172
173// EncoderLevelFromString will convert a string representation of an encoding level back
174// to a compression level. The compare is not case sensitive.
175// If the string wasn't recognized, (false, SpeedDefault) will be returned.
176func EncoderLevelFromString(s string) (bool, EncoderLevel) {
177 for l := speedNotSet + 1; l < speedLast; l++ {
178 if strings.EqualFold(s, l.String()) {
179 return true, l
180 }
181 }
182 return false, SpeedDefault
183}
184
185// EncoderLevelFromZstd will return an encoder level that closest matches the compression
186// ratio of a specific zstd compression level.
187// Many input values will provide the same compression level.
188func EncoderLevelFromZstd(level int) EncoderLevel {
189 switch {
190 case level < 3:
191 return SpeedFastest
192 case level >= 3 && level < 6:
193 return SpeedDefault
194 case level >= 6 && level < 10:
195 return SpeedBetterCompression
Abhay Kumar40252eb2025-10-13 13:25:53 +0000196 default:
197 return SpeedBestCompression
khenaidoo26721882021-08-11 17:42:52 -0400198 }
khenaidoo26721882021-08-11 17:42:52 -0400199}
200
201// String provides a string representation of the compression level.
202func (e EncoderLevel) String() string {
203 switch e {
204 case SpeedFastest:
205 return "fastest"
206 case SpeedDefault:
207 return "default"
208 case SpeedBetterCompression:
209 return "better"
210 case SpeedBestCompression:
211 return "best"
212 default:
213 return "invalid"
214 }
215}
216
217// WithEncoderLevel specifies a predefined compression level.
218func WithEncoderLevel(l EncoderLevel) EOption {
219 return func(o *encoderOptions) error {
220 switch {
221 case l <= speedNotSet || l >= speedLast:
222 return fmt.Errorf("unknown encoder level")
223 }
224 o.level = l
225 if !o.customWindow {
226 switch o.level {
227 case SpeedFastest:
228 o.windowSize = 4 << 20
Abhay Kumar40252eb2025-10-13 13:25:53 +0000229 if !o.customBlockSize {
230 o.blockSize = 1 << 16
231 }
khenaidoo26721882021-08-11 17:42:52 -0400232 case SpeedDefault:
233 o.windowSize = 8 << 20
234 case SpeedBetterCompression:
Abhay Kumar40252eb2025-10-13 13:25:53 +0000235 o.windowSize = 8 << 20
khenaidoo26721882021-08-11 17:42:52 -0400236 case SpeedBestCompression:
Abhay Kumar40252eb2025-10-13 13:25:53 +0000237 o.windowSize = 8 << 20
khenaidoo26721882021-08-11 17:42:52 -0400238 }
239 }
240 if !o.customALEntropy {
Abhay Kumar40252eb2025-10-13 13:25:53 +0000241 o.allLitEntropy = l > SpeedDefault
khenaidoo26721882021-08-11 17:42:52 -0400242 }
243
244 return nil
245 }
246}
247
248// WithZeroFrames will encode 0 length input as full frames.
249// This can be needed for compatibility with zstandard usage,
250// but is not needed for this package.
251func WithZeroFrames(b bool) EOption {
252 return func(o *encoderOptions) error {
253 o.fullZero = b
254 return nil
255 }
256}
257
258// WithAllLitEntropyCompression will apply entropy compression if no matches are found.
259// Disabling this will skip incompressible data faster, but in cases with no matches but
260// skewed character distribution compression is lost.
261// Default value depends on the compression level selected.
262func WithAllLitEntropyCompression(b bool) EOption {
263 return func(o *encoderOptions) error {
264 o.customALEntropy = true
265 o.allLitEntropy = b
266 return nil
267 }
268}
269
270// WithNoEntropyCompression will always skip entropy compression of literals.
271// This can be useful if content has matches, but unlikely to benefit from entropy
272// compression. Usually the slight speed improvement is not worth enabling this.
273func WithNoEntropyCompression(b bool) EOption {
274 return func(o *encoderOptions) error {
275 o.noEntropy = b
276 return nil
277 }
278}
279
280// WithSingleSegment will set the "single segment" flag when EncodeAll is used.
281// If this flag is set, data must be regenerated within a single continuous memory segment.
282// In this case, Window_Descriptor byte is skipped, but Frame_Content_Size is necessarily present.
283// As a consequence, the decoder must allocate a memory segment of size equal or larger than size of your content.
284// In order to preserve the decoder from unreasonable memory requirements,
285// a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range.
286// For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB.
287// This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations.
Abhay Kumar40252eb2025-10-13 13:25:53 +0000288// If this is not specified, block encodes will automatically choose this based on the input size and the window size.
khenaidoo26721882021-08-11 17:42:52 -0400289// This setting has no effect on streamed encodes.
290func WithSingleSegment(b bool) EOption {
291 return func(o *encoderOptions) error {
292 o.single = &b
293 return nil
294 }
295}
296
297// WithLowerEncoderMem will trade in some memory cases trade less memory usage for
298// slower encoding speed.
299// This will not change the window size which is the primary function for reducing
300// memory usage. See WithWindowSize.
301func WithLowerEncoderMem(b bool) EOption {
302 return func(o *encoderOptions) error {
303 o.lowMem = b
304 return nil
305 }
306}
307
308// WithEncoderDict allows to register a dictionary that will be used for the encode.
Abhay Kumar40252eb2025-10-13 13:25:53 +0000309//
310// The slice dict must be in the [dictionary format] produced by
311// "zstd --train" from the Zstandard reference implementation.
312//
khenaidoo26721882021-08-11 17:42:52 -0400313// The encoder *may* choose to use no dictionary instead for certain payloads.
Abhay Kumar40252eb2025-10-13 13:25:53 +0000314//
315// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
khenaidoo26721882021-08-11 17:42:52 -0400316func WithEncoderDict(dict []byte) EOption {
317 return func(o *encoderOptions) error {
318 d, err := loadDict(dict)
319 if err != nil {
320 return err
321 }
322 o.dict = d
323 return nil
324 }
325}
Abhay Kumar40252eb2025-10-13 13:25:53 +0000326
327// WithEncoderDictRaw registers a dictionary that may be used by the encoder.
328//
329// The slice content may contain arbitrary data. It will be used as an initial
330// history.
331func WithEncoderDictRaw(id uint32, content []byte) EOption {
332 return func(o *encoderOptions) error {
333 if bits.UintSize > 32 && uint(len(content)) > dictMaxLength {
334 return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content))
335 }
336 o.dict = &dict{id: id, content: content, offsets: [3]int{1, 4, 8}}
337 return nil
338 }
339}