blob: 20671dcb91d9329216d7e36932b3374ee9d7c95b [file] [log] [blame]
khenaidood948f772021-08-11 17:49:24 -04001package zstd
2
3import (
4 "errors"
5 "fmt"
Abhay Kumara2ae5992025-11-10 14:02:24 +00006 "math"
7 "math/bits"
khenaidood948f772021-08-11 17:49:24 -04008 "runtime"
9 "strings"
10)
11
12// EOption is an option for creating a encoder.
13type EOption func(*encoderOptions) error
14
15// options retains accumulated state of multiple options.
16type encoderOptions struct {
17 concurrent int
18 level EncoderLevel
19 single *bool
20 pad int
21 blockSize int
22 windowSize int
23 crc bool
24 fullZero bool
25 noEntropy bool
26 allLitEntropy bool
27 customWindow bool
28 customALEntropy bool
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053029 customBlockSize bool
khenaidood948f772021-08-11 17:49:24 -040030 lowMem bool
31 dict *dict
32}
33
34func (o *encoderOptions) setDefault() {
35 *o = encoderOptions{
36 concurrent: runtime.GOMAXPROCS(0),
37 crc: true,
38 single: nil,
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053039 blockSize: maxCompressedBlockSize,
khenaidood948f772021-08-11 17:49:24 -040040 windowSize: 8 << 20,
41 level: SpeedDefault,
Abhay Kumara2ae5992025-11-10 14:02:24 +000042 allLitEntropy: false,
khenaidood948f772021-08-11 17:49:24 -040043 lowMem: false,
44 }
45}
46
47// encoder returns an encoder with the selected options.
48func (o encoderOptions) encoder() encoder {
49 switch o.level {
50 case SpeedFastest:
51 if o.dict != nil {
Abhay Kumara2ae5992025-11-10 14:02:24 +000052 return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
khenaidood948f772021-08-11 17:49:24 -040053 }
Abhay Kumara2ae5992025-11-10 14:02:24 +000054 return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
khenaidood948f772021-08-11 17:49:24 -040055
56 case SpeedDefault:
57 if o.dict != nil {
Abhay Kumara2ae5992025-11-10 14:02:24 +000058 return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}}
khenaidood948f772021-08-11 17:49:24 -040059 }
Abhay Kumara2ae5992025-11-10 14:02:24 +000060 return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
khenaidood948f772021-08-11 17:49:24 -040061 case SpeedBetterCompression:
62 if o.dict != nil {
Abhay Kumara2ae5992025-11-10 14:02:24 +000063 return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
khenaidood948f772021-08-11 17:49:24 -040064 }
Abhay Kumara2ae5992025-11-10 14:02:24 +000065 return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
khenaidood948f772021-08-11 17:49:24 -040066 case SpeedBestCompression:
Abhay Kumara2ae5992025-11-10 14:02:24 +000067 return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
khenaidood948f772021-08-11 17:49:24 -040068 }
69 panic("unknown compression level")
70}
71
72// WithEncoderCRC will add CRC value to output.
73// Output will be 4 bytes larger.
74func WithEncoderCRC(b bool) EOption {
75 return func(o *encoderOptions) error { o.crc = b; return nil }
76}
77
78// WithEncoderConcurrency will set the concurrency,
79// meaning the maximum number of encoders to run concurrently.
80// The value supplied must be at least 1.
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053081// For streams, setting a value of 1 will disable async compression.
khenaidood948f772021-08-11 17:49:24 -040082// By default this will be set to GOMAXPROCS.
83func WithEncoderConcurrency(n int) EOption {
84 return func(o *encoderOptions) error {
85 if n <= 0 {
86 return fmt.Errorf("concurrency must be at least 1")
87 }
88 o.concurrent = n
89 return nil
90 }
91}
92
93// WithWindowSize will set the maximum allowed back-reference distance.
94// The value must be a power of two between MinWindowSize and MaxWindowSize.
95// A larger value will enable better compression but allocate more memory and,
96// for above-default values, take considerably longer.
Abhay Kumara2ae5992025-11-10 14:02:24 +000097// The default value is determined by the compression level and max 8MB.
khenaidood948f772021-08-11 17:49:24 -040098func WithWindowSize(n int) EOption {
99 return func(o *encoderOptions) error {
100 switch {
101 case n < MinWindowSize:
102 return fmt.Errorf("window size must be at least %d", MinWindowSize)
103 case n > MaxWindowSize:
104 return fmt.Errorf("window size must be at most %d", MaxWindowSize)
105 case (n & (n - 1)) != 0:
106 return errors.New("window size must be a power of 2")
107 }
108
109 o.windowSize = n
110 o.customWindow = true
111 if o.blockSize > o.windowSize {
112 o.blockSize = o.windowSize
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530113 o.customBlockSize = true
khenaidood948f772021-08-11 17:49:24 -0400114 }
115 return nil
116 }
117}
118
119// WithEncoderPadding will add padding to all output so the size will be a multiple of n.
120// This can be used to obfuscate the exact output size or make blocks of a certain size.
121// The contents will be a skippable frame, so it will be invisible by the decoder.
122// n must be > 0 and <= 1GB, 1<<30 bytes.
123// The padded area will be filled with data from crypto/rand.Reader.
124// If `EncodeAll` is used with data already in the destination, the total size will be multiple of this.
125func WithEncoderPadding(n int) EOption {
126 return func(o *encoderOptions) error {
127 if n <= 0 {
128 return fmt.Errorf("padding must be at least 1")
129 }
130 // No need to waste our time.
131 if n == 1 {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000132 n = 0
khenaidood948f772021-08-11 17:49:24 -0400133 }
134 if n > 1<<30 {
135 return fmt.Errorf("padding must less than 1GB (1<<30 bytes) ")
136 }
137 o.pad = n
138 return nil
139 }
140}
141
142// EncoderLevel predefines encoder compression levels.
143// Only use the constants made available, since the actual mapping
144// of these values are very likely to change and your compression could change
145// unpredictably when upgrading the library.
146type EncoderLevel int
147
148const (
149 speedNotSet EncoderLevel = iota
150
151 // SpeedFastest will choose the fastest reasonable compression.
152 // This is roughly equivalent to the fastest Zstandard mode.
153 SpeedFastest
154
155 // SpeedDefault is the default "pretty fast" compression option.
156 // This is roughly equivalent to the default Zstandard mode (level 3).
157 SpeedDefault
158
159 // SpeedBetterCompression will yield better compression than the default.
160 // Currently it is about zstd level 7-8 with ~ 2x-3x the default CPU usage.
161 // By using this, notice that CPU usage may go up in the future.
162 SpeedBetterCompression
163
164 // SpeedBestCompression will choose the best available compression option.
165 // This will offer the best compression no matter the CPU cost.
166 SpeedBestCompression
167
168 // speedLast should be kept as the last actual compression option.
169 // The is not for external usage, but is used to keep track of the valid options.
170 speedLast
171)
172
173// EncoderLevelFromString will convert a string representation of an encoding level back
174// to a compression level. The compare is not case sensitive.
175// If the string wasn't recognized, (false, SpeedDefault) will be returned.
176func EncoderLevelFromString(s string) (bool, EncoderLevel) {
177 for l := speedNotSet + 1; l < speedLast; l++ {
178 if strings.EqualFold(s, l.String()) {
179 return true, l
180 }
181 }
182 return false, SpeedDefault
183}
184
185// EncoderLevelFromZstd will return an encoder level that closest matches the compression
186// ratio of a specific zstd compression level.
187// Many input values will provide the same compression level.
188func EncoderLevelFromZstd(level int) EncoderLevel {
189 switch {
190 case level < 3:
191 return SpeedFastest
192 case level >= 3 && level < 6:
193 return SpeedDefault
194 case level >= 6 && level < 10:
195 return SpeedBetterCompression
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530196 default:
197 return SpeedBestCompression
khenaidood948f772021-08-11 17:49:24 -0400198 }
khenaidood948f772021-08-11 17:49:24 -0400199}
200
201// String provides a string representation of the compression level.
202func (e EncoderLevel) String() string {
203 switch e {
204 case SpeedFastest:
205 return "fastest"
206 case SpeedDefault:
207 return "default"
208 case SpeedBetterCompression:
209 return "better"
210 case SpeedBestCompression:
211 return "best"
212 default:
213 return "invalid"
214 }
215}
216
217// WithEncoderLevel specifies a predefined compression level.
218func WithEncoderLevel(l EncoderLevel) EOption {
219 return func(o *encoderOptions) error {
220 switch {
221 case l <= speedNotSet || l >= speedLast:
222 return fmt.Errorf("unknown encoder level")
223 }
224 o.level = l
225 if !o.customWindow {
226 switch o.level {
227 case SpeedFastest:
228 o.windowSize = 4 << 20
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530229 if !o.customBlockSize {
230 o.blockSize = 1 << 16
231 }
khenaidood948f772021-08-11 17:49:24 -0400232 case SpeedDefault:
233 o.windowSize = 8 << 20
234 case SpeedBetterCompression:
Abhay Kumara2ae5992025-11-10 14:02:24 +0000235 o.windowSize = 8 << 20
khenaidood948f772021-08-11 17:49:24 -0400236 case SpeedBestCompression:
Abhay Kumara2ae5992025-11-10 14:02:24 +0000237 o.windowSize = 8 << 20
khenaidood948f772021-08-11 17:49:24 -0400238 }
239 }
240 if !o.customALEntropy {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000241 o.allLitEntropy = l > SpeedDefault
khenaidood948f772021-08-11 17:49:24 -0400242 }
243
244 return nil
245 }
246}
247
248// WithZeroFrames will encode 0 length input as full frames.
249// This can be needed for compatibility with zstandard usage,
250// but is not needed for this package.
251func WithZeroFrames(b bool) EOption {
252 return func(o *encoderOptions) error {
253 o.fullZero = b
254 return nil
255 }
256}
257
258// WithAllLitEntropyCompression will apply entropy compression if no matches are found.
259// Disabling this will skip incompressible data faster, but in cases with no matches but
260// skewed character distribution compression is lost.
261// Default value depends on the compression level selected.
262func WithAllLitEntropyCompression(b bool) EOption {
263 return func(o *encoderOptions) error {
264 o.customALEntropy = true
265 o.allLitEntropy = b
266 return nil
267 }
268}
269
270// WithNoEntropyCompression will always skip entropy compression of literals.
271// This can be useful if content has matches, but unlikely to benefit from entropy
272// compression. Usually the slight speed improvement is not worth enabling this.
273func WithNoEntropyCompression(b bool) EOption {
274 return func(o *encoderOptions) error {
275 o.noEntropy = b
276 return nil
277 }
278}
279
280// WithSingleSegment will set the "single segment" flag when EncodeAll is used.
281// If this flag is set, data must be regenerated within a single continuous memory segment.
282// In this case, Window_Descriptor byte is skipped, but Frame_Content_Size is necessarily present.
283// As a consequence, the decoder must allocate a memory segment of size equal or larger than size of your content.
284// In order to preserve the decoder from unreasonable memory requirements,
285// a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range.
286// For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB.
287// This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations.
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530288// If this is not specified, block encodes will automatically choose this based on the input size and the window size.
khenaidood948f772021-08-11 17:49:24 -0400289// This setting has no effect on streamed encodes.
290func WithSingleSegment(b bool) EOption {
291 return func(o *encoderOptions) error {
292 o.single = &b
293 return nil
294 }
295}
296
297// WithLowerEncoderMem will trade in some memory cases trade less memory usage for
298// slower encoding speed.
299// This will not change the window size which is the primary function for reducing
300// memory usage. See WithWindowSize.
301func WithLowerEncoderMem(b bool) EOption {
302 return func(o *encoderOptions) error {
303 o.lowMem = b
304 return nil
305 }
306}
307
308// WithEncoderDict allows to register a dictionary that will be used for the encode.
Abhay Kumara2ae5992025-11-10 14:02:24 +0000309//
310// The slice dict must be in the [dictionary format] produced by
311// "zstd --train" from the Zstandard reference implementation.
312//
khenaidood948f772021-08-11 17:49:24 -0400313// The encoder *may* choose to use no dictionary instead for certain payloads.
Abhay Kumara2ae5992025-11-10 14:02:24 +0000314//
315// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
khenaidood948f772021-08-11 17:49:24 -0400316func WithEncoderDict(dict []byte) EOption {
317 return func(o *encoderOptions) error {
318 d, err := loadDict(dict)
319 if err != nil {
320 return err
321 }
322 o.dict = d
323 return nil
324 }
325}
Abhay Kumara2ae5992025-11-10 14:02:24 +0000326
327// WithEncoderDictRaw registers a dictionary that may be used by the encoder.
328//
329// The slice content may contain arbitrary data. It will be used as an initial
330// history.
331func WithEncoderDictRaw(id uint32, content []byte) EOption {
332 return func(o *encoderOptions) error {
333 if bits.UintSize > 32 && uint(len(content)) > dictMaxLength {
334 return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content))
335 }
336 o.dict = &dict{id: id, content: content, offsets: [3]int{1, 4, 8}}
337 return nil
338 }
339}