blob: c59f17e07ad1dd66834f449e943c48295167154e [file] [log] [blame]
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +05301//go:build amd64 && !appengine && !noasm && gc
2// +build amd64,!appengine,!noasm,gc
3
4package zstd
5
6import (
7 "fmt"
Abhay Kumara2ae5992025-11-10 14:02:24 +00008 "io"
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +05309
10 "github.com/klauspost/compress/internal/cpuinfo"
11)
12
13type decodeSyncAsmContext struct {
14 llTable []decSymbol
15 mlTable []decSymbol
16 ofTable []decSymbol
17 llState uint64
18 mlState uint64
19 ofState uint64
20 iteration int
21 litRemain int
22 out []byte
23 outPosition int
24 literals []byte
25 litPosition int
26 history []byte
27 windowSize int
28 ll int // set on error (not for all errors, please refer to _generate/gen.go)
29 ml int // set on error (not for all errors, please refer to _generate/gen.go)
30 mo int // set on error (not for all errors, please refer to _generate/gen.go)
31}
32
33// sequenceDecs_decodeSync_amd64 implements the main loop of sequenceDecs.decodeSync in x86 asm.
34//
35// Please refer to seqdec_generic.go for the reference implementation.
Abhay Kumara2ae5992025-11-10 14:02:24 +000036//
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053037//go:noescape
38func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
39
40// sequenceDecs_decodeSync_bmi2 implements the main loop of sequenceDecs.decodeSync in x86 asm with BMI2 extensions.
Abhay Kumara2ae5992025-11-10 14:02:24 +000041//
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053042//go:noescape
43func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
44
45// sequenceDecs_decodeSync_safe_amd64 does the same as above, but does not write more than output buffer.
Abhay Kumara2ae5992025-11-10 14:02:24 +000046//
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053047//go:noescape
48func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
49
50// sequenceDecs_decodeSync_safe_bmi2 does the same as above, but does not write more than output buffer.
Abhay Kumara2ae5992025-11-10 14:02:24 +000051//
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +053052//go:noescape
53func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
54
55// decode sequences from the stream with the provided history but without a dictionary.
56func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
57 if len(s.dict) > 0 {
58 return false, nil
59 }
60 if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSize {
61 return false, nil
62 }
63
64 // FIXME: Using unsafe memory copies leads to rare, random crashes
65 // with fuzz testing. It is therefore disabled for now.
66 const useSafe = true
67 /*
68 useSafe := false
69 if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc {
70 useSafe = true
71 }
72 if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) {
73 useSafe = true
74 }
75 if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
76 useSafe = true
77 }
78 */
79
80 br := s.br
81
82 maxBlockSize := maxCompressedBlockSize
83 if s.windowSize < maxBlockSize {
84 maxBlockSize = s.windowSize
85 }
86
87 ctx := decodeSyncAsmContext{
88 llTable: s.litLengths.fse.dt[:maxTablesize],
89 mlTable: s.matchLengths.fse.dt[:maxTablesize],
90 ofTable: s.offsets.fse.dt[:maxTablesize],
91 llState: uint64(s.litLengths.state.state),
92 mlState: uint64(s.matchLengths.state.state),
93 ofState: uint64(s.offsets.state.state),
94 iteration: s.nSeqs - 1,
95 litRemain: len(s.literals),
96 out: s.out,
97 outPosition: len(s.out),
98 literals: s.literals,
99 windowSize: s.windowSize,
100 history: hist,
101 }
102
103 s.seqSize = 0
104 startSize := len(s.out)
105
106 var errCode int
107 if cpuinfo.HasBMI2() {
108 if useSafe {
109 errCode = sequenceDecs_decodeSync_safe_bmi2(s, br, &ctx)
110 } else {
111 errCode = sequenceDecs_decodeSync_bmi2(s, br, &ctx)
112 }
113 } else {
114 if useSafe {
115 errCode = sequenceDecs_decodeSync_safe_amd64(s, br, &ctx)
116 } else {
117 errCode = sequenceDecs_decodeSync_amd64(s, br, &ctx)
118 }
119 }
120 switch errCode {
121 case noError:
122 break
123
124 case errorMatchLenOfsMismatch:
125 return true, fmt.Errorf("zero matchoff and matchlen (%d) > 0", ctx.ml)
126
127 case errorMatchLenTooBig:
128 return true, fmt.Errorf("match len (%d) bigger than max allowed length", ctx.ml)
129
130 case errorMatchOffTooBig:
131 return true, fmt.Errorf("match offset (%d) bigger than current history (%d)",
132 ctx.mo, ctx.outPosition+len(hist)-startSize)
133
134 case errorNotEnoughLiterals:
135 return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available",
136 ctx.ll, ctx.litRemain+ctx.ll)
137
Abhay Kumara2ae5992025-11-10 14:02:24 +0000138 case errorOverread:
139 return true, io.ErrUnexpectedEOF
140
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530141 case errorNotEnoughSpace:
142 size := ctx.outPosition + ctx.ll + ctx.ml
143 if debugDecoder {
144 println("msl:", s.maxSyncLen, "cap", cap(s.out), "bef:", startSize, "sz:", size-startSize, "mbs:", maxBlockSize, "outsz:", cap(s.out)-startSize)
145 }
Abhay Kumara2ae5992025-11-10 14:02:24 +0000146 return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530147
148 default:
Abhay Kumara2ae5992025-11-10 14:02:24 +0000149 return true, fmt.Errorf("sequenceDecs_decode returned erroneous code %d", errCode)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530150 }
151
152 s.seqSize += ctx.litRemain
153 if s.seqSize > maxBlockSize {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000154 return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530155 }
156 err := br.close()
157 if err != nil {
158 printf("Closing sequences: %v, %+v\n", err, *br)
159 return true, err
160 }
161
162 s.literals = s.literals[ctx.litPosition:]
163 t := ctx.outPosition
164 s.out = s.out[:t]
165
166 // Add final literals
167 s.out = append(s.out, s.literals...)
168 if debugDecoder {
169 t += len(s.literals)
170 if t != len(s.out) {
171 panic(fmt.Errorf("length mismatch, want %d, got %d", len(s.out), t))
172 }
173 }
174
175 return true, nil
176}
177
178// --------------------------------------------------------------------------------
179
180type decodeAsmContext struct {
181 llTable []decSymbol
182 mlTable []decSymbol
183 ofTable []decSymbol
184 llState uint64
185 mlState uint64
186 ofState uint64
187 iteration int
188 seqs []seqVals
189 litRemain int
190}
191
192const noError = 0
193
194// error reported when mo == 0 && ml > 0
195const errorMatchLenOfsMismatch = 1
196
197// error reported when ml > maxMatchLen
198const errorMatchLenTooBig = 2
199
200// error reported when mo > available history or mo > s.windowSize
201const errorMatchOffTooBig = 3
202
203// error reported when the sum of literal lengths exeeceds the literal buffer size
204const errorNotEnoughLiterals = 4
205
206// error reported when capacity of `out` is too small
207const errorNotEnoughSpace = 5
208
Abhay Kumara2ae5992025-11-10 14:02:24 +0000209// error reported when bits are overread.
210const errorOverread = 6
211
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530212// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
213//
214// Please refer to seqdec_generic.go for the reference implementation.
Abhay Kumara2ae5992025-11-10 14:02:24 +0000215//
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530216//go:noescape
217func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
218
219// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
220//
221// Please refer to seqdec_generic.go for the reference implementation.
Abhay Kumara2ae5992025-11-10 14:02:24 +0000222//
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530223//go:noescape
224func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
225
226// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
Abhay Kumara2ae5992025-11-10 14:02:24 +0000227//
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530228//go:noescape
229func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
230
231// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
Abhay Kumara2ae5992025-11-10 14:02:24 +0000232//
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530233//go:noescape
234func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
235
236// decode sequences from the stream without the provided history.
237func (s *sequenceDecs) decode(seqs []seqVals) error {
238 br := s.br
239
240 maxBlockSize := maxCompressedBlockSize
241 if s.windowSize < maxBlockSize {
242 maxBlockSize = s.windowSize
243 }
244
245 ctx := decodeAsmContext{
246 llTable: s.litLengths.fse.dt[:maxTablesize],
247 mlTable: s.matchLengths.fse.dt[:maxTablesize],
248 ofTable: s.offsets.fse.dt[:maxTablesize],
249 llState: uint64(s.litLengths.state.state),
250 mlState: uint64(s.matchLengths.state.state),
251 ofState: uint64(s.offsets.state.state),
252 seqs: seqs,
253 iteration: len(seqs) - 1,
254 litRemain: len(s.literals),
255 }
256
Abhay Kumara2ae5992025-11-10 14:02:24 +0000257 if debugDecoder {
258 println("decode: decoding", len(seqs), "sequences", br.remain(), "bits remain on stream")
259 }
260
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530261 s.seqSize = 0
262 lte56bits := s.maxBits+s.offsets.fse.actualTableLog+s.matchLengths.fse.actualTableLog+s.litLengths.fse.actualTableLog <= 56
263 var errCode int
264 if cpuinfo.HasBMI2() {
265 if lte56bits {
266 errCode = sequenceDecs_decode_56_bmi2(s, br, &ctx)
267 } else {
268 errCode = sequenceDecs_decode_bmi2(s, br, &ctx)
269 }
270 } else {
271 if lte56bits {
272 errCode = sequenceDecs_decode_56_amd64(s, br, &ctx)
273 } else {
274 errCode = sequenceDecs_decode_amd64(s, br, &ctx)
275 }
276 }
277 if errCode != 0 {
278 i := len(seqs) - ctx.iteration - 1
279 switch errCode {
280 case errorMatchLenOfsMismatch:
281 ml := ctx.seqs[i].ml
282 return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
283
284 case errorMatchLenTooBig:
285 ml := ctx.seqs[i].ml
286 return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
287
288 case errorNotEnoughLiterals:
289 ll := ctx.seqs[i].ll
290 return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, ctx.litRemain+ll)
Abhay Kumara2ae5992025-11-10 14:02:24 +0000291 case errorOverread:
292 return io.ErrUnexpectedEOF
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530293 }
294
Abhay Kumara2ae5992025-11-10 14:02:24 +0000295 return fmt.Errorf("sequenceDecs_decode_amd64 returned erroneous code %d", errCode)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530296 }
297
298 if ctx.litRemain < 0 {
299 return fmt.Errorf("literal count is too big: total available %d, total requested %d",
300 len(s.literals), len(s.literals)-ctx.litRemain)
301 }
302
303 s.seqSize += ctx.litRemain
304 if s.seqSize > maxBlockSize {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000305 return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
306 }
307 if debugDecoder {
308 println("decode: ", br.remain(), "bits remain on stream. code:", errCode)
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530309 }
310 err := br.close()
311 if err != nil {
312 printf("Closing sequences: %v, %+v\n", err, *br)
313 }
314 return err
315}
316
317// --------------------------------------------------------------------------------
318
319type executeAsmContext struct {
320 seqs []seqVals
321 seqIndex int
322 out []byte
323 history []byte
324 literals []byte
325 outPosition int
326 litPosition int
327 windowSize int
328}
329
330// sequenceDecs_executeSimple_amd64 implements the main loop of sequenceDecs.executeSimple in x86 asm.
331//
332// Returns false if a match offset is too big.
333//
334// Please refer to seqdec_generic.go for the reference implementation.
Abhay Kumara2ae5992025-11-10 14:02:24 +0000335//
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530336//go:noescape
337func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
338
339// Same as above, but with safe memcopies
Abhay Kumara2ae5992025-11-10 14:02:24 +0000340//
Akash Reddy Kankanalacf045372025-06-10 14:11:24 +0530341//go:noescape
342func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool
343
344// executeSimple handles cases when dictionary is not used.
345func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error {
346 // Ensure we have enough output size...
347 if len(s.out)+s.seqSize+compressedBlockOverAlloc > cap(s.out) {
348 addBytes := s.seqSize + len(s.out) + compressedBlockOverAlloc
349 s.out = append(s.out, make([]byte, addBytes)...)
350 s.out = s.out[:len(s.out)-addBytes]
351 }
352
353 if debugDecoder {
354 printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize)
355 }
356
357 var t = len(s.out)
358 out := s.out[:t+s.seqSize]
359
360 ctx := executeAsmContext{
361 seqs: seqs,
362 seqIndex: 0,
363 out: out,
364 history: hist,
365 outPosition: t,
366 litPosition: 0,
367 literals: s.literals,
368 windowSize: s.windowSize,
369 }
370 var ok bool
371 if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
372 ok = sequenceDecs_executeSimple_safe_amd64(&ctx)
373 } else {
374 ok = sequenceDecs_executeSimple_amd64(&ctx)
375 }
376 if !ok {
377 return fmt.Errorf("match offset (%d) bigger than current history (%d)",
378 seqs[ctx.seqIndex].mo, ctx.outPosition+len(hist))
379 }
380 s.literals = s.literals[ctx.litPosition:]
381 t = ctx.outPosition
382
383 // Add final literals
384 copy(out[t:], s.literals)
385 if debugDecoder {
386 t += len(s.literals)
387 if t != len(out) {
388 panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize))
389 }
390 }
391 s.out = out
392
393 return nil
394}