[VOL-5486] Fix deprecated versions
Change-Id: I3e03ea246020547ae75fa92ce8cf5cbba7e8f3bb
Signed-off-by: Abhay Kumar <abhay.kumar@radisys.com>
diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md
index beb7fa8..c11d7fa 100644
--- a/vendor/github.com/klauspost/compress/zstd/README.md
+++ b/vendor/github.com/klauspost/compress/zstd/README.md
@@ -6,12 +6,14 @@
This package provides [compression](#Compressor) to and [decompression](#Decompressor) of Zstandard content.
-This package is pure Go and without use of "unsafe".
+This package is pure Go. Use `noasm` and `nounsafe` to disable relevant features.
The `zstd` package is provided as open source software using a Go standard license.
Currently the package is heavily optimized for 64 bit processors and will be significantly slower on 32 bit processors.
+For seekable zstd streams, see [this excellent package](https://github.com/SaveTheRbtz/zstd-seekable-format-go).
+
## Installation
Install using `go get -u github.com/klauspost/compress`. The package is located in `github.com/klauspost/compress/zstd`.
@@ -257,7 +259,7 @@
## Decompressor
-Staus: STABLE - there may still be subtle bugs, but a wide variety of content has been tested.
+Status: STABLE - there may still be subtle bugs, but a wide variety of content has been tested.
This library is being continuously [fuzz-tested](https://github.com/klauspost/compress-fuzz),
kindly supplied by [fuzzit.dev](https://fuzzit.dev/).
@@ -302,7 +304,7 @@
// Create a reader that caches decompressors.
// For this operation type we supply a nil Reader.
-var decoder, _ = zstd.NewReader(nil, WithDecoderConcurrency(0))
+var decoder, _ = zstd.NewReader(nil, zstd.WithDecoderConcurrency(0))
// Decompress a buffer. We don't supply a destination buffer,
// so it will be allocated by the decoder.
diff --git a/vendor/github.com/klauspost/compress/zstd/bitreader.go b/vendor/github.com/klauspost/compress/zstd/bitreader.go
index 97299d4..d41e3e1 100644
--- a/vendor/github.com/klauspost/compress/zstd/bitreader.go
+++ b/vendor/github.com/klauspost/compress/zstd/bitreader.go
@@ -5,11 +5,12 @@
package zstd
import (
- "encoding/binary"
"errors"
"fmt"
"io"
"math/bits"
+
+ "github.com/klauspost/compress/internal/le"
)
// bitReader reads a bitstream in reverse.
@@ -17,8 +18,8 @@
// for aligning the input.
type bitReader struct {
in []byte
- off uint // next byte to read is at in[off - 1]
value uint64 // Maybe use [16]byte, but shifting is awkward.
+ cursor int // offset where next read should end
bitsRead uint8
}
@@ -28,12 +29,12 @@
return errors.New("corrupt stream: too short")
}
b.in = in
- b.off = uint(len(in))
// The highest bit of the last byte indicates where to start
v := in[len(in)-1]
if v == 0 {
return errors.New("corrupt stream, did not find end of stream")
}
+ b.cursor = len(in)
b.bitsRead = 64
b.value = 0
if len(in) >= 8 {
@@ -69,21 +70,16 @@
if b.bitsRead < 32 {
return
}
- // 2 bounds checks.
- v := b.in[b.off-4:]
- v = v[:4]
- low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
- b.value = (b.value << 32) | uint64(low)
+ b.cursor -= 4
+ b.value = (b.value << 32) | uint64(le.Load32(b.in, b.cursor))
b.bitsRead -= 32
- b.off -= 4
}
// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
func (b *bitReader) fillFastStart() {
- // Do single re-slice to avoid bounds checks.
- b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+ b.cursor -= 8
+ b.value = le.Load64(b.in, b.cursor)
b.bitsRead = 0
- b.off -= 8
}
// fill() will make sure at least 32 bits are available.
@@ -91,25 +87,23 @@
if b.bitsRead < 32 {
return
}
- if b.off >= 4 {
- v := b.in[b.off-4:]
- v = v[:4]
- low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
- b.value = (b.value << 32) | uint64(low)
+ if b.cursor >= 4 {
+ b.cursor -= 4
+ b.value = (b.value << 32) | uint64(le.Load32(b.in, b.cursor))
b.bitsRead -= 32
- b.off -= 4
return
}
- for b.off > 0 {
- b.value = (b.value << 8) | uint64(b.in[b.off-1])
- b.bitsRead -= 8
- b.off--
+
+ b.bitsRead -= uint8(8 * b.cursor)
+ for b.cursor > 0 {
+ b.cursor -= 1
+ b.value = (b.value << 8) | uint64(b.in[b.cursor])
}
}
// finished returns true if all bits have been read from the bit stream.
func (b *bitReader) finished() bool {
- return b.off == 0 && b.bitsRead >= 64
+ return b.cursor == 0 && b.bitsRead >= 64
}
// overread returns true if more bits have been requested than is on the stream.
@@ -119,13 +113,14 @@
// remain returns the number of bits remaining.
func (b *bitReader) remain() uint {
- return b.off*8 + 64 - uint(b.bitsRead)
+ return 8*uint(b.cursor) + 64 - uint(b.bitsRead)
}
// close the bitstream and returns an error if out-of-buffer reads occurred.
func (b *bitReader) close() error {
// Release reference.
b.in = nil
+ b.cursor = 0
if !b.finished() {
return fmt.Errorf("%d extra bits on block, should be 0", b.remain())
}
diff --git a/vendor/github.com/klauspost/compress/zstd/bitwriter.go b/vendor/github.com/klauspost/compress/zstd/bitwriter.go
index 78b3c61..1952f17 100644
--- a/vendor/github.com/klauspost/compress/zstd/bitwriter.go
+++ b/vendor/github.com/klauspost/compress/zstd/bitwriter.go
@@ -97,12 +97,11 @@
// close will write the alignment bit and write the final byte(s)
// to the output.
-func (b *bitWriter) close() error {
+func (b *bitWriter) close() {
// End mark
b.addBits16Clean(1, 1)
// flush until next byte.
b.flushAlign()
- return nil
}
// reset and continue writing by appending to out.
diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go
index 7eed729..0dd742f 100644
--- a/vendor/github.com/klauspost/compress/zstd/blockdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go
@@ -5,14 +5,10 @@
package zstd
import (
- "bytes"
- "encoding/binary"
"errors"
"fmt"
+ "hash/crc32"
"io"
- "io/ioutil"
- "os"
- "path/filepath"
"sync"
"github.com/klauspost/compress/huff0"
@@ -83,8 +79,9 @@
err error
- // Check against this crc
- checkCRC []byte
+ // Check against this crc, if hasCRC is true.
+ checkCRC uint32
+ hasCRC bool
// Frame to use for singlethreaded decoding.
// Should not be used by the decoder itself since parent may be another frame.
@@ -192,16 +189,14 @@
}
// Read block data.
- if cap(b.dataStorage) < cSize {
+ if _, ok := br.(*byteBuf); !ok && cap(b.dataStorage) < cSize {
+ // byteBuf doesn't need a destination buffer.
if b.lowMem || cSize > maxCompressedBlockSize {
b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc)
} else {
b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc)
}
}
- if cap(b.dst) <= maxSize {
- b.dst = make([]byte, 0, maxSize+1)
- }
b.data, err = br.readBig(cSize, b.dataStorage)
if err != nil {
if debugDecoder {
@@ -210,6 +205,9 @@
}
return err
}
+ if cap(b.dst) <= maxSize {
+ b.dst = make([]byte, 0, maxSize+1)
+ }
return nil
}
@@ -233,7 +231,7 @@
if b.lowMem {
b.dst = make([]byte, b.RLESize)
} else {
- b.dst = make([]byte, maxBlockSize)
+ b.dst = make([]byte, maxCompressedBlockSize)
}
}
b.dst = b.dst[:b.RLESize]
@@ -441,6 +439,9 @@
}
}
var err error
+ if debugDecoder {
+ println("huff table input:", len(literals), "CRC:", crc32.ChecksumIEEE(literals))
+ }
huff, literals, err = huff0.ReadTable(literals, huff)
if err != nil {
println("reading huffman table:", err)
@@ -549,6 +550,9 @@
if debugDecoder {
printf("Compression modes: 0b%b", compMode)
}
+ if compMode&3 != 0 {
+ return errors.New("corrupt block: reserved bits not zero")
+ }
for i := uint(0); i < 3; i++ {
mode := seqCompMode((compMode >> (6 - i*2)) & 3)
if debugDecoder {
@@ -587,10 +591,12 @@
}
seq.fse.setRLE(symb)
if debugDecoder {
- printf("RLE set to %+v, code: %v", symb, v)
+ printf("RLE set to 0x%x, code: %v", symb, v)
}
case compModeFSE:
- println("Reading table for", tableIndex(i))
+ if debugDecoder {
+ println("Reading table for", tableIndex(i))
+ }
if seq.fse == nil || seq.fse.preDefined {
seq.fse = fseDecoderPool.Get().(*fseDecoder)
}
@@ -638,21 +644,6 @@
println("initializing sequences:", err)
return err
}
- // Extract blocks...
- if false && hist.dict == nil {
- fatalErr := func(err error) {
- if err != nil {
- panic(err)
- }
- }
- fn := fmt.Sprintf("n-%d-lits-%d-prev-%d-%d-%d-win-%d.blk", hist.decoders.nSeqs, len(hist.decoders.literals), hist.recentOffsets[0], hist.recentOffsets[1], hist.recentOffsets[2], hist.windowSize)
- var buf bytes.Buffer
- fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.litLengths.fse))
- fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.matchLengths.fse))
- fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.offsets.fse))
- buf.Write(in)
- ioutil.WriteFile(filepath.Join("testdata", "seqs", fn), buf.Bytes(), os.ModePerm)
- }
return nil
}
diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go
index 12e8f6f..fd35ea1 100644
--- a/vendor/github.com/klauspost/compress/zstd/blockenc.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go
@@ -9,6 +9,7 @@
"fmt"
"math"
"math/bits"
+ "slices"
"github.com/klauspost/compress/huff0"
)
@@ -361,14 +362,21 @@
if len(lits) >= 1024 {
// Use 4 Streams.
out, reUsed, err = huff0.Compress4X(lits, b.litEnc)
- } else if len(lits) > 32 {
+ } else if len(lits) > 16 {
// Use 1 stream
single = true
out, reUsed, err = huff0.Compress1X(lits, b.litEnc)
} else {
err = huff0.ErrIncompressible
}
-
+ if err == nil && len(out)+5 > len(lits) {
+ // If we are close, we may still be worse or equal to raw.
+ var lh literalsHeader
+ lh.setSizes(len(out), len(lits), single)
+ if len(out)+lh.size() >= len(lits) {
+ err = huff0.ErrIncompressible
+ }
+ }
switch err {
case huff0.ErrIncompressible:
if debugEncoder {
@@ -420,6 +428,16 @@
return nil
}
+// encodeRLE will encode an RLE block.
+func (b *blockEnc) encodeRLE(val byte, length uint32) {
+ var bh blockHeader
+ bh.setLast(b.last)
+ bh.setSize(length)
+ bh.setType(blockTypeRLE)
+ b.output = bh.appendTo(b.output)
+ b.output = append(b.output, val)
+}
+
// fuzzFseEncoder can be used to fuzz the FSE encoder.
func fuzzFseEncoder(data []byte) int {
if len(data) > maxSequences || len(data) < 2 {
@@ -440,16 +458,7 @@
// All 0
return 0
}
- maxCount := func(a []uint32) int {
- var max uint32
- for _, v := range a {
- if v > max {
- max = v
- }
- }
- return int(max)
- }
- cnt := maxCount(hist[:maxSym])
+ cnt := int(slices.Max(hist[:maxSym]))
if cnt == len(data) {
// RLE
return 0
@@ -472,8 +481,18 @@
if len(b.sequences) == 0 {
return b.encodeLits(b.literals, rawAllLits)
}
+ if len(b.sequences) == 1 && len(org) > 0 && len(b.literals) <= 1 {
+ // Check common RLE cases.
+ seq := b.sequences[0]
+ if seq.litLen == uint32(len(b.literals)) && seq.offset-3 == 1 {
+ // Offset == 1 and 0 or 1 literals.
+ b.encodeRLE(org[0], b.sequences[0].matchLen+zstdMinMatch+seq.litLen)
+ return nil
+ }
+ }
+
// We want some difference to at least account for the headers.
- saved := b.size - len(b.literals) - (b.size >> 5)
+ saved := b.size - len(b.literals) - (b.size >> 6)
if saved < 16 {
if org == nil {
return errIncompressible
@@ -503,7 +522,7 @@
if len(b.literals) >= 1024 && !raw {
// Use 4 Streams.
out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
- } else if len(b.literals) > 32 && !raw {
+ } else if len(b.literals) > 16 && !raw {
// Use 1 stream
single = true
out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
@@ -511,6 +530,17 @@
err = huff0.ErrIncompressible
}
+ if err == nil && len(out)+5 > len(b.literals) {
+ // If we are close, we may still be worse or equal to raw.
+ var lh literalsHeader
+ lh.setSize(len(b.literals))
+ szRaw := lh.size()
+ lh.setSizes(len(out), len(b.literals), single)
+ szComp := lh.size()
+ if len(out)+szComp >= len(b.literals)+szRaw {
+ err = huff0.ErrIncompressible
+ }
+ }
switch err {
case huff0.ErrIncompressible:
lh.setType(literalsBlockRaw)
@@ -773,16 +803,16 @@
ml.flush(mlEnc.actualTableLog)
of.flush(ofEnc.actualTableLog)
ll.flush(llEnc.actualTableLog)
- err = wr.close()
- if err != nil {
- return err
- }
+ wr.close()
b.output = wr.out
+ // Maybe even add a bigger margin.
if len(b.output)-3-bhOffset >= b.size {
- // Maybe even add a bigger margin.
+ // Discard and encode as raw block.
+ b.output = b.encodeRawTo(b.output[:bhOffset], org)
+ b.popOffsets()
b.litEnc.Reuse = huff0.ReusePolicyNone
- return errIncompressible
+ return nil
}
// Size is output minus block header.
@@ -846,15 +876,6 @@
}
}
}
- maxCount := func(a []uint32) int {
- var max uint32
- for _, v := range a {
- if v > max {
- max = v
- }
- }
- return int(max)
- }
if debugAsserts && mlMax > maxMatchLengthSymbol {
panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d)", mlMax))
}
@@ -865,7 +886,7 @@
panic(fmt.Errorf("llMax > maxLiteralLengthSymbol (%d)", llMax))
}
- b.coders.mlEnc.HistogramFinished(mlMax, maxCount(mlH[:mlMax+1]))
- b.coders.ofEnc.HistogramFinished(ofMax, maxCount(ofH[:ofMax+1]))
- b.coders.llEnc.HistogramFinished(llMax, maxCount(llH[:llMax+1]))
+ b.coders.mlEnc.HistogramFinished(mlMax, int(slices.Max(mlH[:mlMax+1])))
+ b.coders.ofEnc.HistogramFinished(ofMax, int(slices.Max(ofH[:ofMax+1])))
+ b.coders.llEnc.HistogramFinished(llMax, int(slices.Max(llH[:llMax+1])))
}
diff --git a/vendor/github.com/klauspost/compress/zstd/bytebuf.go b/vendor/github.com/klauspost/compress/zstd/bytebuf.go
index 2ad0207..55a3885 100644
--- a/vendor/github.com/klauspost/compress/zstd/bytebuf.go
+++ b/vendor/github.com/klauspost/compress/zstd/bytebuf.go
@@ -7,7 +7,6 @@
import (
"fmt"
"io"
- "io/ioutil"
)
type byteBuffer interface {
@@ -55,7 +54,7 @@
func (b *byteBuf) readByte() (byte, error) {
bb := *b
if len(bb) < 1 {
- return 0, nil
+ return 0, io.ErrUnexpectedEOF
}
r := bb[0]
*b = bb[1:]
@@ -110,7 +109,7 @@
}
func (r *readerWrapper) readByte() (byte, error) {
- n2, err := r.r.Read(r.tmp[:1])
+ n2, err := io.ReadFull(r.r, r.tmp[:1])
if err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
@@ -124,7 +123,7 @@
}
func (r *readerWrapper) skipN(n int64) error {
- n2, err := io.CopyN(ioutil.Discard, r.r, n)
+ n2, err := io.CopyN(io.Discard, r.r, n)
if n2 != n {
err = io.ErrUnexpectedEOF
}
diff --git a/vendor/github.com/klauspost/compress/zstd/decodeheader.go b/vendor/github.com/klauspost/compress/zstd/decodeheader.go
index 5022e71..6a5a298 100644
--- a/vendor/github.com/klauspost/compress/zstd/decodeheader.go
+++ b/vendor/github.com/klauspost/compress/zstd/decodeheader.go
@@ -4,7 +4,6 @@
package zstd
import (
- "bytes"
"encoding/binary"
"errors"
"io"
@@ -96,42 +95,54 @@
// If there isn't enough input, io.ErrUnexpectedEOF is returned.
// The FirstBlock.OK will indicate if enough information was available to decode the first block header.
func (h *Header) Decode(in []byte) error {
+ _, err := h.DecodeAndStrip(in)
+ return err
+}
+
+// DecodeAndStrip will decode the header from the beginning of the stream
+// and on success return the remaining bytes.
+// This will decode the frame header and the first block header if enough bytes are provided.
+// It is recommended to provide at least HeaderMaxSize bytes.
+// If the frame header cannot be read an error will be returned.
+// If there isn't enough input, io.ErrUnexpectedEOF is returned.
+// The FirstBlock.OK will indicate if enough information was available to decode the first block header.
+func (h *Header) DecodeAndStrip(in []byte) (remain []byte, err error) {
*h = Header{}
if len(in) < 4 {
- return io.ErrUnexpectedEOF
+ return nil, io.ErrUnexpectedEOF
}
h.HeaderSize += 4
b, in := in[:4], in[4:]
- if !bytes.Equal(b, frameMagic) {
- if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 {
- return ErrMagicMismatch
+ if string(b) != frameMagic {
+ if string(b[1:4]) != skippableFrameMagic || b[0]&0xf0 != 0x50 {
+ return nil, ErrMagicMismatch
}
if len(in) < 4 {
- return io.ErrUnexpectedEOF
+ return nil, io.ErrUnexpectedEOF
}
h.HeaderSize += 4
h.Skippable = true
h.SkippableID = int(b[0] & 0xf)
h.SkippableSize = binary.LittleEndian.Uint32(in)
- return nil
+ return in[4:], nil
}
// Read Window_Descriptor
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
if len(in) < 1 {
- return io.ErrUnexpectedEOF
+ return nil, io.ErrUnexpectedEOF
}
fhd, in := in[0], in[1:]
h.HeaderSize++
h.SingleSegment = fhd&(1<<5) != 0
h.HasCheckSum = fhd&(1<<2) != 0
if fhd&(1<<3) != 0 {
- return errors.New("reserved bit set on frame header")
+ return nil, errors.New("reserved bit set on frame header")
}
if !h.SingleSegment {
if len(in) < 1 {
- return io.ErrUnexpectedEOF
+ return nil, io.ErrUnexpectedEOF
}
var wd byte
wd, in = in[0], in[1:]
@@ -149,11 +160,11 @@
size = 4
}
if len(in) < int(size) {
- return io.ErrUnexpectedEOF
+ return nil, io.ErrUnexpectedEOF
}
b, in = in[:size], in[size:]
h.HeaderSize += int(size)
- switch size {
+ switch len(b) {
case 1:
h.DictionaryID = uint32(b[0])
case 2:
@@ -179,11 +190,11 @@
if fcsSize > 0 {
h.HasFCS = true
if len(in) < fcsSize {
- return io.ErrUnexpectedEOF
+ return nil, io.ErrUnexpectedEOF
}
b, in = in[:fcsSize], in[fcsSize:]
h.HeaderSize += int(fcsSize)
- switch fcsSize {
+ switch len(b) {
case 1:
h.FrameContentSize = uint64(b[0])
case 2:
@@ -200,7 +211,7 @@
// Frame Header done, we will not fail from now on.
if len(in) < 3 {
- return nil
+ return in, nil
}
tmp := in[:3]
bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
@@ -210,7 +221,7 @@
cSize := int(bh >> 3)
switch blockType {
case blockTypeReserved:
- return nil
+ return in, nil
case blockTypeRLE:
h.FirstBlock.Compressed = true
h.FirstBlock.DecompressedSize = cSize
@@ -226,5 +237,25 @@
}
h.FirstBlock.OK = true
- return nil
+ return in, nil
+}
+
+// AppendTo will append the encoded header to the dst slice.
+// There is no error checking performed on the header values.
+func (h *Header) AppendTo(dst []byte) ([]byte, error) {
+ if h.Skippable {
+ magic := [4]byte{0x50, 0x2a, 0x4d, 0x18}
+ magic[0] |= byte(h.SkippableID & 0xf)
+ dst = append(dst, magic[:]...)
+ f := h.SkippableSize
+ return append(dst, uint8(f), uint8(f>>8), uint8(f>>16), uint8(f>>24)), nil
+ }
+ f := frameHeader{
+ ContentSize: h.FrameContentSize,
+ WindowSize: uint32(h.WindowSize),
+ SingleSegment: h.SingleSegment,
+ Checksum: h.HasCheckSum,
+ DictID: h.DictionaryID,
+ }
+ return f.appendTo(dst), nil
}
diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go
index d212f47..ea2a193 100644
--- a/vendor/github.com/klauspost/compress/zstd/decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder.go
@@ -5,7 +5,6 @@
package zstd
import (
- "bytes"
"context"
"encoding/binary"
"io"
@@ -35,13 +34,13 @@
br readerWrapper
enabled bool
inFrame bool
+ dstBuf []byte
}
frame *frameDec
// Custom dictionaries.
- // Always uses copies.
- dicts map[uint32]dict
+ dicts map[uint32]*dict
// streamWg is the waitgroup for all streams
streamWg sync.WaitGroup
@@ -83,7 +82,7 @@
// can run multiple concurrent stateless decodes. It is even possible to
// use stateless decodes while a stream is being decoded.
//
-// The Reset function can be used to initiate a new stream, which is will considerably
+// The Reset function can be used to initiate a new stream, which will considerably
// reduce the allocations normally caused by NewReader.
func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
initPredefined()
@@ -103,7 +102,7 @@
}
// Transfer option dicts.
- d.dicts = make(map[uint32]dict, len(d.o.dicts))
+ d.dicts = make(map[uint32]*dict, len(d.o.dicts))
for _, dc := range d.o.dicts {
d.dicts[dc.id] = dc
}
@@ -124,7 +123,7 @@
}
// Read bytes from the decompressed stream into p.
-// Returns the number of bytes written and any error that occurred.
+// Returns the number of bytes read and any error that occurred.
// When the stream is done, io.EOF will be returned.
func (d *Decoder) Read(p []byte) (int, error) {
var n int
@@ -187,21 +186,23 @@
}
// If bytes buffer and < 5MB, do sync decoding anyway.
- if bb, ok := r.(byter); ok && bb.Len() < 5<<20 {
+ if bb, ok := r.(byter); ok && bb.Len() < d.o.decodeBufsBelow && !d.o.limitToCap {
bb2 := bb
if debugDecoder {
println("*bytes.Buffer detected, doing sync decode, len:", bb.Len())
}
b := bb2.Bytes()
var dst []byte
- if cap(d.current.b) > 0 {
- dst = d.current.b
+ if cap(d.syncStream.dstBuf) > 0 {
+ dst = d.syncStream.dstBuf[:0]
}
- dst, err := d.DecodeAll(b, dst[:0])
+ dst, err := d.DecodeAll(b, dst)
if err == nil {
err = io.EOF
}
+ // Save output buffer
+ d.syncStream.dstBuf = dst
d.current.b = dst
d.current.err = err
d.current.flushed = true
@@ -216,6 +217,7 @@
d.current.err = nil
d.current.flushed = false
d.current.d = nil
+ d.syncStream.dstBuf = nil
// Ensure no-one else is still running...
d.streamWg.Wait()
@@ -312,6 +314,7 @@
// Grab a block decoder and frame decoder.
block := <-d.decoders
frame := block.localFrame
+ initialSize := len(dst)
defer func() {
if debugDecoder {
printf("re-adding decoder: %p", block)
@@ -320,6 +323,7 @@
frame.bBuf = nil
if frame.history.decoders.br != nil {
frame.history.decoders.br.in = nil
+ frame.history.decoders.br.cursor = 0
}
d.decoders <- block
}()
@@ -337,15 +341,8 @@
}
return dst, err
}
- if frame.DictionaryID != nil {
- dict, ok := d.dicts[*frame.DictionaryID]
- if !ok {
- return nil, ErrUnknownDictionary
- }
- if debugDecoder {
- println("setting dict", frame.DictionaryID)
- }
- frame.history.setDict(&dict)
+ if err = d.setDict(frame); err != nil {
+ return nil, err
}
if frame.WindowSize > d.o.maxWindowSize {
if debugDecoder {
@@ -354,7 +351,16 @@
return dst, ErrWindowSizeExceeded
}
if frame.FrameContentSize != fcsUnknown {
- if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) {
+ if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)-initialSize) {
+ if debugDecoder {
+ println("decoder size exceeded; fcs:", frame.FrameContentSize, "> mcs:", d.o.maxDecodedSize-uint64(len(dst)-initialSize), "len:", len(dst))
+ }
+ return dst, ErrDecoderSizeExceeded
+ }
+ if d.o.limitToCap && frame.FrameContentSize > uint64(cap(dst)-len(dst)) {
+ if debugDecoder {
+ println("decoder size exceeded; fcs:", frame.FrameContentSize, "> (cap-len)", cap(dst)-len(dst))
+ }
return dst, ErrDecoderSizeExceeded
}
if cap(dst)-len(dst) < int(frame.FrameContentSize) {
@@ -364,7 +370,7 @@
}
}
- if cap(dst) == 0 {
+ if cap(dst) == 0 && !d.o.limitToCap {
// Allocate len(input) * 2 by default if nothing is provided
// and we didn't get frame content size.
size := len(input) * 2
@@ -382,6 +388,9 @@
if err != nil {
return dst, err
}
+ if uint64(len(dst)-initialSize) > d.o.maxDecodedSize {
+ return dst, ErrDecoderSizeExceeded
+ }
if len(frame.bBuf) == 0 {
if debugDecoder {
println("frame dbuf empty")
@@ -442,26 +451,23 @@
println("got", len(d.current.b), "bytes, error:", d.current.err, "data crc:", tmp)
}
- if !d.o.ignoreChecksum && len(next.b) > 0 {
- n, err := d.current.crc.Write(next.b)
- if err == nil {
- if n != len(next.b) {
- d.current.err = io.ErrShortWrite
- }
- }
+ if d.o.ignoreChecksum {
+ return true
}
- if next.err == nil && next.d != nil && len(next.d.checkCRC) != 0 {
- got := d.current.crc.Sum64()
- var tmp [4]byte
- binary.LittleEndian.PutUint32(tmp[:], uint32(got))
- if !d.o.ignoreChecksum && !bytes.Equal(tmp[:], next.d.checkCRC) {
+
+ if len(next.b) > 0 {
+ d.current.crc.Write(next.b)
+ }
+ if next.err == nil && next.d != nil && next.d.hasCRC {
+ got := uint32(d.current.crc.Sum64())
+ if got != next.d.checkCRC {
if debugDecoder {
- println("CRC Check Failed:", tmp[:], " (got) !=", next.d.checkCRC, "(on stream)")
+ printf("CRC Check Failed: %08x (got) != %08x (on stream)\n", got, next.d.checkCRC)
}
d.current.err = ErrCRCMismatch
} else {
if debugDecoder {
- println("CRC ok", tmp[:])
+ printf("CRC ok %08x\n", got)
}
}
}
@@ -477,18 +483,12 @@
if !d.syncStream.inFrame {
d.frame.history.reset()
d.current.err = d.frame.reset(&d.syncStream.br)
+ if d.current.err == nil {
+ d.current.err = d.setDict(d.frame)
+ }
if d.current.err != nil {
return false
}
- if d.frame.DictionaryID != nil {
- dict, ok := d.dicts[*d.frame.DictionaryID]
- if !ok {
- d.current.err = ErrUnknownDictionary
- return false
- } else {
- d.frame.history.setDict(&dict)
- }
- }
if d.frame.WindowSize > d.o.maxDecodedSize || d.frame.WindowSize > d.o.maxWindowSize {
d.current.err = ErrDecoderSizeExceeded
return false
@@ -667,6 +667,7 @@
if debugDecoder {
println("Async 1: new history, recent:", block.async.newHist.recentOffsets)
}
+ hist.reset()
hist.decoders = block.async.newHist.decoders
hist.recentOffsets = block.async.newHist.recentOffsets
hist.windowSize = block.async.newHist.windowSize
@@ -698,6 +699,7 @@
seqExecute <- block
}
close(seqExecute)
+ hist.reset()
}()
var wg sync.WaitGroup
@@ -721,6 +723,7 @@
if debugDecoder {
println("Async 2: new history")
}
+ hist.reset()
hist.windowSize = block.async.newHist.windowSize
hist.allocFrameBuffer = block.async.newHist.allocFrameBuffer
if block.async.newHist.dict != nil {
@@ -750,7 +753,7 @@
if block.lowMem {
block.dst = make([]byte, block.RLESize)
} else {
- block.dst = make([]byte, maxBlockSize)
+ block.dst = make([]byte, maxCompressedBlockSize)
}
}
block.dst = block.dst[:block.RLESize]
@@ -802,13 +805,14 @@
if debugDecoder {
println("decoder goroutines finished")
}
+ hist.reset()
}()
+ var hist history
decodeStream:
for {
- var hist history
var hasErr bool
-
+ hist.reset()
decodeBlock := func(block *blockDec) {
if hasErr {
if block != nil {
@@ -843,15 +847,14 @@
if debugDecoder && err != nil {
println("Frame decoder returned", err)
}
- if err == nil && frame.DictionaryID != nil {
- dict, ok := d.dicts[*frame.DictionaryID]
- if !ok {
- err = ErrUnknownDictionary
- } else {
- frame.history.setDict(&dict)
- }
+ if err == nil {
+ err = d.setDict(frame)
}
if err == nil && d.frame.WindowSize > d.o.maxWindowSize {
+ if debugDecoder {
+ println("decoder size exceeded, fws:", d.frame.WindowSize, "> mws:", d.o.maxWindowSize)
+ }
+
err = ErrDecoderSizeExceeded
}
if err != nil {
@@ -893,18 +896,22 @@
println("next block returned error:", err)
}
dec.err = err
- dec.checkCRC = nil
+ dec.hasCRC = false
if dec.Last && frame.HasCheckSum && err == nil {
crc, err := frame.rawInput.readSmall(4)
- if err != nil {
+ if len(crc) < 4 {
+ if err == nil {
+ err = io.ErrUnexpectedEOF
+
+ }
println("CRC missing?", err)
dec.err = err
- }
- var tmp [4]byte
- copy(tmp[:], crc)
- dec.checkCRC = tmp[:]
- if debugDecoder {
- println("found crc to check:", dec.checkCRC)
+ } else {
+ dec.checkCRC = binary.LittleEndian.Uint32(crc)
+ dec.hasCRC = true
+ if debugDecoder {
+ printf("found crc to check: %08x\n", dec.checkCRC)
+ }
}
}
err = dec.err
@@ -920,5 +927,23 @@
}
close(seqDecode)
wg.Wait()
+ hist.reset()
d.frame.history.b = frameHistCache
}
+
+func (d *Decoder) setDict(frame *frameDec) (err error) {
+ dict, ok := d.dicts[frame.DictionaryID]
+ if ok {
+ if debugDecoder {
+ println("setting dict", frame.DictionaryID)
+ }
+ frame.history.setDict(dict)
+ } else if frame.DictionaryID != 0 {
+ // A zero or missing dictionary id is ambiguous:
+ // either dictionary zero, or no dictionary. In particular,
+ // zstd --patch-from uses this id for the source file,
+ // so only return an error if the dictionary id is not zero.
+ err = ErrUnknownDictionary
+ }
+ return err
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/decoder_options.go b/vendor/github.com/klauspost/compress/zstd/decoder_options.go
index c70e6fa..774c5f0 100644
--- a/vendor/github.com/klauspost/compress/zstd/decoder_options.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder_options.go
@@ -6,6 +6,8 @@
import (
"errors"
+ "fmt"
+ "math/bits"
"runtime"
)
@@ -14,20 +16,23 @@
// options retains accumulated state of multiple options.
type decoderOptions struct {
- lowMem bool
- concurrent int
- maxDecodedSize uint64
- maxWindowSize uint64
- dicts []dict
- ignoreChecksum bool
+ lowMem bool
+ concurrent int
+ maxDecodedSize uint64
+ maxWindowSize uint64
+ dicts []*dict
+ ignoreChecksum bool
+ limitToCap bool
+ decodeBufsBelow int
}
func (o *decoderOptions) setDefault() {
*o = decoderOptions{
// use less ram: true for now, but may change.
- lowMem: true,
- concurrent: runtime.GOMAXPROCS(0),
- maxWindowSize: MaxWindowSize,
+ lowMem: true,
+ concurrent: runtime.GOMAXPROCS(0),
+ maxWindowSize: MaxWindowSize,
+ decodeBufsBelow: 128 << 10,
}
if o.concurrent > 4 {
o.concurrent = 4
@@ -82,7 +87,13 @@
}
// WithDecoderDicts allows to register one or more dictionaries for the decoder.
-// If several dictionaries with the same ID is provided the last one will be used.
+//
+// Each slice in dict must be in the [dictionary format] produced by
+// "zstd --train" from the Zstandard reference implementation.
+//
+// If several dictionaries with the same ID are provided, the last one will be used.
+//
+// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
func WithDecoderDicts(dicts ...[]byte) DOption {
return func(o *decoderOptions) error {
for _, b := range dicts {
@@ -90,12 +101,24 @@
if err != nil {
return err
}
- o.dicts = append(o.dicts, *d)
+ o.dicts = append(o.dicts, d)
}
return nil
}
}
+// WithDecoderDictRaw registers a dictionary that may be used by the decoder.
+// The slice content can be arbitrary data.
+func WithDecoderDictRaw(id uint32, content []byte) DOption {
+ return func(o *decoderOptions) error {
+ if bits.UintSize > 32 && uint(len(content)) > dictMaxLength {
+ return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content))
+ }
+ o.dicts = append(o.dicts, &dict{id: id, content: content, offsets: [3]int{1, 4, 8}})
+ return nil
+ }
+}
+
// WithDecoderMaxWindow allows to set a maximum window size for decodes.
// This allows rejecting packets that will cause big memory usage.
// The Decoder will likely allocate more memory based on the WithDecoderLowmem setting.
@@ -114,6 +137,29 @@
}
}
+// WithDecodeAllCapLimit will limit DecodeAll to decoding cap(dst)-len(dst) bytes,
+// or any size set in WithDecoderMaxMemory.
+// This can be used to limit decoding to a specific maximum output size.
+// Disabled by default.
+func WithDecodeAllCapLimit(b bool) DOption {
+ return func(o *decoderOptions) error {
+ o.limitToCap = b
+ return nil
+ }
+}
+
+// WithDecodeBuffersBelow will fully decode readers that have a
+// `Bytes() []byte` and `Len() int` interface similar to bytes.Buffer.
+// This typically uses less allocations but will have the full decompressed object in memory.
+// Note that DecodeAllCapLimit will disable this, as well as giving a size of 0 or less.
+// Default is 128KiB.
+func WithDecodeBuffersBelow(size int) DOption {
+ return func(o *decoderOptions) error {
+ o.decodeBufsBelow = size
+ return nil
+ }
+}
+
// IgnoreChecksum allows to forcibly ignore checksum checking.
func IgnoreChecksum(b bool) DOption {
return func(o *decoderOptions) error {
diff --git a/vendor/github.com/klauspost/compress/zstd/dict.go b/vendor/github.com/klauspost/compress/zstd/dict.go
index a36ae83..b7b8316 100644
--- a/vendor/github.com/klauspost/compress/zstd/dict.go
+++ b/vendor/github.com/klauspost/compress/zstd/dict.go
@@ -6,6 +6,8 @@
"errors"
"fmt"
"io"
+ "math"
+ "sort"
"github.com/klauspost/compress/huff0"
)
@@ -15,12 +17,14 @@
litEnc *huff0.Scratch
llDec, ofDec, mlDec sequenceDec
- //llEnc, ofEnc, mlEnc []*fseEncoder
- offsets [3]int
- content []byte
+ offsets [3]int
+ content []byte
}
-var dictMagic = [4]byte{0x37, 0xa4, 0x30, 0xec}
+const dictMagic = "\x37\xa4\x30\xec"
+
+// Maximum dictionary size for the reference implementation (1.5.3) is 2 GiB.
+const dictMaxLength = 1 << 31
// ID returns the dictionary id or 0 if d is nil.
func (d *dict) ID() uint32 {
@@ -30,14 +34,38 @@
return d.id
}
-// DictContentSize returns the dictionary content size or 0 if d is nil.
-func (d *dict) DictContentSize() int {
+// ContentSize returns the dictionary content size or 0 if d is nil.
+func (d *dict) ContentSize() int {
if d == nil {
return 0
}
return len(d.content)
}
+// Content returns the dictionary content.
+func (d *dict) Content() []byte {
+ if d == nil {
+ return nil
+ }
+ return d.content
+}
+
+// Offsets returns the initial offsets.
+func (d *dict) Offsets() [3]int {
+ if d == nil {
+ return [3]int{}
+ }
+ return d.offsets
+}
+
+// LitEncoder returns the literal encoder.
+func (d *dict) LitEncoder() *huff0.Scratch {
+ if d == nil {
+ return nil
+ }
+ return d.litEnc
+}
+
// Load a dictionary as described in
// https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
func loadDict(b []byte) (*dict, error) {
@@ -50,7 +78,7 @@
ofDec: sequenceDec{fse: &fseDecoder{}},
mlDec: sequenceDec{fse: &fseDecoder{}},
}
- if !bytes.Equal(b[:4], dictMagic[:]) {
+ if string(b[:4]) != dictMagic {
return nil, ErrMagicMismatch
}
d.id = binary.LittleEndian.Uint32(b[4:8])
@@ -62,7 +90,7 @@
var err error
d.litEnc, b, err = huff0.ReadTable(b[8:], nil)
if err != nil {
- return nil, err
+ return nil, fmt.Errorf("loading literal table: %w", err)
}
d.litEnc.Reuse = huff0.ReusePolicyMust
@@ -120,3 +148,418 @@
return &d, nil
}
+
+// InspectDictionary loads a zstd dictionary and provides functions to inspect the content.
+func InspectDictionary(b []byte) (interface {
+ ID() uint32
+ ContentSize() int
+ Content() []byte
+ Offsets() [3]int
+ LitEncoder() *huff0.Scratch
+}, error) {
+ initPredefined()
+ d, err := loadDict(b)
+ return d, err
+}
+
+type BuildDictOptions struct {
+ // Dictionary ID.
+ ID uint32
+
+ // Content to use to create dictionary tables.
+ Contents [][]byte
+
+ // History to use for all blocks.
+ History []byte
+
+ // Offsets to use.
+ Offsets [3]int
+
+ // CompatV155 will make the dictionary compatible with Zstd v1.5.5 and earlier.
+ // See https://github.com/facebook/zstd/issues/3724
+ CompatV155 bool
+
+ // Use the specified encoder level.
+ // The dictionary will be built using the specified encoder level,
+ // which will reflect speed and make the dictionary tailored for that level.
+ // If not set SpeedBestCompression will be used.
+ Level EncoderLevel
+
+ // DebugOut will write stats and other details here if set.
+ DebugOut io.Writer
+}
+
+func BuildDict(o BuildDictOptions) ([]byte, error) {
+ initPredefined()
+ hist := o.History
+ contents := o.Contents
+ debug := o.DebugOut != nil
+ println := func(args ...interface{}) {
+ if o.DebugOut != nil {
+ fmt.Fprintln(o.DebugOut, args...)
+ }
+ }
+ printf := func(s string, args ...interface{}) {
+ if o.DebugOut != nil {
+ fmt.Fprintf(o.DebugOut, s, args...)
+ }
+ }
+ print := func(args ...interface{}) {
+ if o.DebugOut != nil {
+ fmt.Fprint(o.DebugOut, args...)
+ }
+ }
+
+ if int64(len(hist)) > dictMaxLength {
+ return nil, fmt.Errorf("dictionary of size %d > %d", len(hist), int64(dictMaxLength))
+ }
+ if len(hist) < 8 {
+ return nil, fmt.Errorf("dictionary of size %d < %d", len(hist), 8)
+ }
+ if len(contents) == 0 {
+ return nil, errors.New("no content provided")
+ }
+ d := dict{
+ id: o.ID,
+ litEnc: nil,
+ llDec: sequenceDec{},
+ ofDec: sequenceDec{},
+ mlDec: sequenceDec{},
+ offsets: o.Offsets,
+ content: hist,
+ }
+ block := blockEnc{lowMem: false}
+ block.init()
+ enc := encoder(&bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(maxMatchLen), bufferReset: math.MaxInt32 - int32(maxMatchLen*2), lowMem: false}})
+ if o.Level != 0 {
+ eOpts := encoderOptions{
+ level: o.Level,
+ blockSize: maxMatchLen,
+ windowSize: maxMatchLen,
+ dict: &d,
+ lowMem: false,
+ }
+ enc = eOpts.encoder()
+ } else {
+ o.Level = SpeedBestCompression
+ }
+ var (
+ remain [256]int
+ ll [256]int
+ ml [256]int
+ of [256]int
+ )
+ addValues := func(dst *[256]int, src []byte) {
+ for _, v := range src {
+ dst[v]++
+ }
+ }
+ addHist := func(dst *[256]int, src *[256]uint32) {
+ for i, v := range src {
+ dst[i] += int(v)
+ }
+ }
+ seqs := 0
+ nUsed := 0
+ litTotal := 0
+ newOffsets := make(map[uint32]int, 1000)
+ for _, b := range contents {
+ block.reset(nil)
+ if len(b) < 8 {
+ continue
+ }
+ nUsed++
+ enc.Reset(&d, true)
+ enc.Encode(&block, b)
+ addValues(&remain, block.literals)
+ litTotal += len(block.literals)
+ if len(block.sequences) == 0 {
+ continue
+ }
+ seqs += len(block.sequences)
+ block.genCodes()
+ addHist(&ll, block.coders.llEnc.Histogram())
+ addHist(&ml, block.coders.mlEnc.Histogram())
+ addHist(&of, block.coders.ofEnc.Histogram())
+ for i, seq := range block.sequences {
+ if i > 3 {
+ break
+ }
+ offset := seq.offset
+ if offset == 0 {
+ continue
+ }
+ if int(offset) >= len(o.History) {
+ continue
+ }
+ if offset > 3 {
+ newOffsets[offset-3]++
+ } else {
+ newOffsets[uint32(o.Offsets[offset-1])]++
+ }
+ }
+ }
+ // Find most used offsets.
+ var sortedOffsets []uint32
+ for k := range newOffsets {
+ sortedOffsets = append(sortedOffsets, k)
+ }
+ sort.Slice(sortedOffsets, func(i, j int) bool {
+ a, b := sortedOffsets[i], sortedOffsets[j]
+ if a == b {
+ // Prefer the longer offset
+ return sortedOffsets[i] > sortedOffsets[j]
+ }
+ return newOffsets[sortedOffsets[i]] > newOffsets[sortedOffsets[j]]
+ })
+ if len(sortedOffsets) > 3 {
+ if debug {
+ print("Offsets:")
+ for i, v := range sortedOffsets {
+ if i > 20 {
+ break
+ }
+ printf("[%d: %d],", v, newOffsets[v])
+ }
+ println("")
+ }
+
+ sortedOffsets = sortedOffsets[:3]
+ }
+ for i, v := range sortedOffsets {
+ o.Offsets[i] = int(v)
+ }
+ if debug {
+ println("New repeat offsets", o.Offsets)
+ }
+
+ if nUsed == 0 || seqs == 0 {
+ return nil, fmt.Errorf("%d blocks, %d sequences found", nUsed, seqs)
+ }
+ if debug {
+ println("Sequences:", seqs, "Blocks:", nUsed, "Literals:", litTotal)
+ }
+ if seqs/nUsed < 512 {
+ // Use 512 as minimum.
+ nUsed = seqs / 512
+ if nUsed == 0 {
+ nUsed = 1
+ }
+ }
+ copyHist := func(dst *fseEncoder, src *[256]int) ([]byte, error) {
+ hist := dst.Histogram()
+ var maxSym uint8
+ var maxCount int
+ var fakeLength int
+ for i, v := range src {
+ if v > 0 {
+ v = v / nUsed
+ if v == 0 {
+ v = 1
+ }
+ }
+ if v > maxCount {
+ maxCount = v
+ }
+ if v != 0 {
+ maxSym = uint8(i)
+ }
+ fakeLength += v
+ hist[i] = uint32(v)
+ }
+
+ // Ensure we aren't trying to represent RLE.
+ if maxCount == fakeLength {
+ for i := range hist {
+ if uint8(i) == maxSym {
+ fakeLength++
+ maxSym++
+ hist[i+1] = 1
+ if maxSym > 1 {
+ break
+ }
+ }
+ if hist[0] == 0 {
+ fakeLength++
+ hist[i] = 1
+ if maxSym > 1 {
+ break
+ }
+ }
+ }
+ }
+
+ dst.HistogramFinished(maxSym, maxCount)
+ dst.reUsed = false
+ dst.useRLE = false
+ err := dst.normalizeCount(fakeLength)
+ if err != nil {
+ return nil, err
+ }
+ if debug {
+ println("RAW:", dst.count[:maxSym+1], "NORM:", dst.norm[:maxSym+1], "LEN:", fakeLength)
+ }
+ return dst.writeCount(nil)
+ }
+ if debug {
+ print("Literal lengths: ")
+ }
+ llTable, err := copyHist(block.coders.llEnc, &ll)
+ if err != nil {
+ return nil, err
+ }
+ if debug {
+ print("Match lengths: ")
+ }
+ mlTable, err := copyHist(block.coders.mlEnc, &ml)
+ if err != nil {
+ return nil, err
+ }
+ if debug {
+ print("Offsets: ")
+ }
+ ofTable, err := copyHist(block.coders.ofEnc, &of)
+ if err != nil {
+ return nil, err
+ }
+
+ // Literal table
+ avgSize := litTotal
+ if avgSize > huff0.BlockSizeMax/2 {
+ avgSize = huff0.BlockSizeMax / 2
+ }
+ huffBuff := make([]byte, 0, avgSize)
+ // Target size
+ div := litTotal / avgSize
+ if div < 1 {
+ div = 1
+ }
+ if debug {
+ println("Huffman weights:")
+ }
+ for i, n := range remain[:] {
+ if n > 0 {
+ n = n / div
+ // Allow all entries to be represented.
+ if n == 0 {
+ n = 1
+ }
+ huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
+ if debug {
+ printf("[%d: %d], ", i, n)
+ }
+ }
+ }
+ if o.CompatV155 && remain[255]/div == 0 {
+ huffBuff = append(huffBuff, 255)
+ }
+ scratch := &huff0.Scratch{TableLog: 11}
+ for tries := 0; tries < 255; tries++ {
+ scratch = &huff0.Scratch{TableLog: 11}
+ _, _, err = huff0.Compress1X(huffBuff, scratch)
+ if err == nil {
+ break
+ }
+ if debug {
+ printf("Try %d: Huffman error: %v\n", tries+1, err)
+ }
+ huffBuff = huffBuff[:0]
+ if tries == 250 {
+ if debug {
+ println("Huffman: Bailing out with predefined table")
+ }
+
+ // Bail out.... Just generate something
+ huffBuff = append(huffBuff, bytes.Repeat([]byte{255}, 10000)...)
+ for i := 0; i < 128; i++ {
+ huffBuff = append(huffBuff, byte(i))
+ }
+ continue
+ }
+ if errors.Is(err, huff0.ErrIncompressible) {
+ // Try truncating least common.
+ for i, n := range remain[:] {
+ if n > 0 {
+ n = n / (div * (i + 1))
+ if n > 0 {
+ huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
+ }
+ }
+ }
+ if o.CompatV155 && len(huffBuff) > 0 && huffBuff[len(huffBuff)-1] != 255 {
+ huffBuff = append(huffBuff, 255)
+ }
+ if len(huffBuff) == 0 {
+ huffBuff = append(huffBuff, 0, 255)
+ }
+ }
+ if errors.Is(err, huff0.ErrUseRLE) {
+ for i, n := range remain[:] {
+ n = n / (div * (i + 1))
+ // Allow all entries to be represented.
+ if n == 0 {
+ n = 1
+ }
+ huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
+ }
+ }
+ }
+
+ var out bytes.Buffer
+ out.Write([]byte(dictMagic))
+ out.Write(binary.LittleEndian.AppendUint32(nil, o.ID))
+ out.Write(scratch.OutTable)
+ if debug {
+ println("huff table:", len(scratch.OutTable), "bytes")
+ println("of table:", len(ofTable), "bytes")
+ println("ml table:", len(mlTable), "bytes")
+ println("ll table:", len(llTable), "bytes")
+ }
+ out.Write(ofTable)
+ out.Write(mlTable)
+ out.Write(llTable)
+ out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[0])))
+ out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[1])))
+ out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[2])))
+ out.Write(hist)
+ if debug {
+ _, err := loadDict(out.Bytes())
+ if err != nil {
+ panic(err)
+ }
+ i, err := InspectDictionary(out.Bytes())
+ if err != nil {
+ panic(err)
+ }
+ println("ID:", i.ID())
+ println("Content size:", i.ContentSize())
+ println("Encoder:", i.LitEncoder() != nil)
+ println("Offsets:", i.Offsets())
+ var totalSize int
+ for _, b := range contents {
+ totalSize += len(b)
+ }
+
+ encWith := func(opts ...EOption) int {
+ enc, err := NewWriter(nil, opts...)
+ if err != nil {
+ panic(err)
+ }
+ defer enc.Close()
+ var dst []byte
+ var totalSize int
+ for _, b := range contents {
+ dst = enc.EncodeAll(b, dst[:0])
+ totalSize += len(dst)
+ }
+ return totalSize
+ }
+ plain := encWith(WithEncoderLevel(o.Level))
+ withDict := encWith(WithEncoderLevel(o.Level), WithEncoderDict(out.Bytes()))
+ println("Input size:", totalSize)
+ println("Plain Compressed:", plain)
+ println("Dict Compressed:", withDict)
+ println("Saved:", plain-withDict, (plain-withDict)/len(contents), "bytes per input (rounded down)")
+ }
+ return out.Bytes(), nil
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_base.go b/vendor/github.com/klauspost/compress/zstd/enc_base.go
index 15ae8ee..7d250c6 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_base.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_base.go
@@ -16,6 +16,7 @@
cur int32
// maximum offset. Should be at least 2x block size.
maxMatchOff int32
+ bufferReset int32
hist []byte
crc *xxhash.Digest
tmp [8]byte
@@ -56,8 +57,8 @@
}
func (e *fastBase) addBlock(src []byte) int32 {
- if debugAsserts && e.cur > bufferReset {
- panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset))
+ if debugAsserts && e.cur > e.bufferReset {
+ panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, e.bufferReset))
}
// check if we have space already
if len(e.hist)+len(src) > cap(e.hist) {
@@ -115,7 +116,7 @@
panic(err)
}
if t < 0 {
- err := fmt.Sprintf("s (%d) < 0", s)
+ err := fmt.Sprintf("t (%d) < 0", t)
panic(err)
}
if s-t > e.maxMatchOff {
@@ -126,24 +127,7 @@
panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
}
}
- a := src[s:]
- b := src[t:]
- b = b[:len(a)]
- end := int32((len(a) >> 3) << 3)
- for i := int32(0); i < end; i += 8 {
- if diff := load6432(a, i) ^ load6432(b, i); diff != 0 {
- return i + int32(bits.TrailingZeros64(diff)>>3)
- }
- }
-
- a = a[end:]
- b = b[end:]
- for i := range a {
- if a[i] != b[i] {
- return int32(i) + end
- }
- }
- return int32(len(a)) + end
+ return int32(matchLen(src[s:], src[t:]))
}
// Reset the encoding table.
@@ -160,18 +144,19 @@
} else {
e.crc.Reset()
}
+ e.blk.dictLitEnc = nil
if d != nil {
low := e.lowMem
if singleBlock {
e.lowMem = true
}
- e.ensureHist(d.DictContentSize() + maxCompressedBlockSize)
+ e.ensureHist(d.ContentSize() + maxCompressedBlockSize)
e.lowMem = low
}
// We offset current position so everything will be out of reach.
// If above reset line, history will be purged.
- if e.cur < bufferReset {
+ if e.cur < e.bufferReset {
e.cur += e.maxMatchOff + int32(len(e.hist))
}
e.hist = e.hist[:0]
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_best.go b/vendor/github.com/klauspost/compress/zstd/enc_best.go
index 96028ec..4613724 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_best.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go
@@ -34,7 +34,7 @@
est int32
}
-const highScore = 25000
+const highScore = maxMatchLen * 8
// estBits will estimate output bits from predefined tables.
func (m *match) estBits(bitsPerByte int32) {
@@ -43,7 +43,7 @@
if m.rep < 0 {
ofc = ofCode(uint32(m.s-m.offset) + 3)
} else {
- ofc = ofCode(uint32(m.rep))
+ ofc = ofCode(uint32(m.rep) & 3)
}
// Cost, excluding
ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc]
@@ -84,14 +84,10 @@
)
// Protect against e.cur wraparound.
- for e.cur >= bufferReset {
+ for e.cur >= e.bufferReset-int32(len(e.hist)) {
if len(e.hist) == 0 {
- for i := range e.table[:] {
- e.table[i] = prevEntry{}
- }
- for i := range e.longTable[:] {
- e.longTable[i] = prevEntry{}
- }
+ e.table = [bestShortTableSize]prevEntry{}
+ e.longTable = [bestLongTableSize]prevEntry{}
e.cur = e.maxMatchOff
break
}
@@ -139,8 +135,20 @@
break
}
+ // Add block to history
s := e.addBlock(src)
blk.size = len(src)
+
+ // Check RLE first
+ if len(src) > zstdMinMatch {
+ ml := matchLen(src[1:], src)
+ if ml == len(src)-1 {
+ blk.literals = append(blk.literals, src[0])
+ blk.sequences = append(blk.sequences, seq{litLen: 1, matchLen: uint32(len(src)-1) - zstdMinMatch, offset: 1 + 3})
+ return
+ }
+ }
+
if len(src) < minNonLiteralBlockSize {
blk.extraLits = len(src)
blk.literals = blk.literals[:len(src)]
@@ -163,7 +171,6 @@
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := s
- cv := load6432(src, s)
// Relative offsets
offset1 := int32(blk.recentOffsets[0])
@@ -177,7 +184,6 @@
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
- _ = addLiterals
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
@@ -192,54 +198,104 @@
panic("offset0 was 0")
}
- bestOf := func(a, b match) match {
- if a.est+(a.s-b.s)*bitsPerByte>>10 < b.est+(b.s-a.s)*bitsPerByte>>10 {
- return a
- }
- return b
- }
- const goodEnough = 100
+ const goodEnough = 250
+
+ cv := load6432(src, s)
nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
- matchAt := func(offset int32, s int32, first uint32, rep int32) match {
- if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
- return match{s: s, est: highScore}
+ // Set m to a match at offset if it looks like that will improve compression.
+ improve := func(m *match, offset int32, s int32, first uint32, rep int32) {
+ delta := s - offset
+ if delta >= e.maxMatchOff || delta <= 0 || load3232(src, offset) != first {
+ return
}
- if debugAsserts {
- if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
- panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
+ // Try to quick reject if we already have a long match.
+ if m.length > 16 {
+ left := len(src) - int(m.s+m.length)
+ // If we are too close to the end, keep as is.
+ if left <= 0 {
+ return
+ }
+ checkLen := m.length - (s - m.s) - 8
+ if left > 2 && checkLen > 4 {
+ // Check 4 bytes, 4 bytes from the end of the current match.
+ a := load3232(src, offset+checkLen)
+ b := load3232(src, s+checkLen)
+ if a != b {
+ return
+ }
}
}
- m := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
- m.estBits(bitsPerByte)
- return m
+ l := 4 + e.matchlen(s+4, offset+4, src)
+ if m.rep <= 0 {
+ // Extend candidate match backwards as far as possible.
+ // Do not extend repeats as we can assume they are optimal
+ // and offsets change if s == nextEmit.
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for offset > tMin && s > nextEmit && src[offset-1] == src[s-1] && l < maxMatchLength {
+ s--
+ offset--
+ l++
+ }
+ }
+ if debugAsserts {
+ if offset >= s {
+ panic(fmt.Sprintf("offset: %d - s:%d - rep: %d - cur :%d - max: %d", offset, s, rep, e.cur, e.maxMatchOff))
+ }
+ if !bytes.Equal(src[s:s+l], src[offset:offset+l]) {
+ panic(fmt.Sprintf("second match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
+ }
+ }
+ cand := match{offset: offset, s: s, length: l, rep: rep}
+ cand.estBits(bitsPerByte)
+ if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 {
+ *m = cand
+ }
}
- best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
- best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
- best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1))
+ best := match{s: s, est: highScore}
+ improve(&best, candidateL.offset-e.cur, s, uint32(cv), -1)
+ improve(&best, candidateL.prev-e.cur, s, uint32(cv), -1)
+ improve(&best, candidateS.offset-e.cur, s, uint32(cv), -1)
+ improve(&best, candidateS.prev-e.cur, s, uint32(cv), -1)
if canRepeat && best.length < goodEnough {
- cv32 := uint32(cv >> 8)
- spp := s + 1
- best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
- best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
- best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
- if best.length > 0 {
- cv32 = uint32(cv >> 24)
- spp += 2
- best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
- best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
- best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
+ if s == nextEmit {
+ // Check repeats straight after a match.
+ improve(&best, s-offset2, s, uint32(cv), 1|4)
+ improve(&best, s-offset3, s, uint32(cv), 2|4)
+ if offset1 > 1 {
+ improve(&best, s-(offset1-1), s, uint32(cv), 3|4)
+ }
+ }
+
+ // If either no match or a non-repeat match, check at + 1
+ if best.rep <= 0 {
+ cv32 := uint32(cv >> 8)
+ spp := s + 1
+ improve(&best, spp-offset1, spp, cv32, 1)
+ improve(&best, spp-offset2, spp, cv32, 2)
+ improve(&best, spp-offset3, spp, cv32, 3)
+ if best.rep < 0 {
+ cv32 = uint32(cv >> 24)
+ spp += 2
+ improve(&best, spp-offset1, spp, cv32, 1)
+ improve(&best, spp-offset2, spp, cv32, 2)
+ improve(&best, spp-offset3, spp, cv32, 3)
+ }
}
}
// Load next and check...
e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset}
e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset}
+ index0 := s + 1
// Look far ahead, unless we have a really long match already...
if best.length < goodEnough {
@@ -249,97 +305,89 @@
if s >= sLimit {
break encodeLoop
}
- cv = load6432(src, s)
continue
}
- s++
candidateS = e.table[hashLen(cv>>8, bestShortTableBits, bestShortLen)]
- cv = load6432(src, s)
- cv2 := load6432(src, s+1)
+ cv = load6432(src, s+1)
+ cv2 := load6432(src, s+2)
candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)]
candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]
// Short at s+1
- best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
+ improve(&best, candidateS.offset-e.cur, s+1, uint32(cv), -1)
// Long at s+1, s+2
- best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1))
- best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
- best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1))
- best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1))
+ improve(&best, candidateL.offset-e.cur, s+1, uint32(cv), -1)
+ improve(&best, candidateL.prev-e.cur, s+1, uint32(cv), -1)
+ improve(&best, candidateL2.offset-e.cur, s+2, uint32(cv2), -1)
+ improve(&best, candidateL2.prev-e.cur, s+2, uint32(cv2), -1)
if false {
// Short at s+3.
// Too often worse...
- best = bestOf(best, matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1))
+ improve(&best, e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+3, uint32(cv2>>8), -1)
}
- // See if we can find a better match by checking where the current best ends.
- // Use that offset to see if we can find a better full match.
- if sAt := best.s + best.length; sAt < sLimit {
- nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
- candidateEnd := e.longTable[nextHashL]
- if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 {
- bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1))
- if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 {
- bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1))
+
+ // Start check at a fixed offset to allow for a few mismatches.
+ // For this compression level 2 yields the best results.
+ // We cannot do this if we have already indexed this position.
+ const skipBeginning = 2
+ if best.s > s-skipBeginning {
+ // See if we can find a better match by checking where the current best ends.
+ // Use that offset to see if we can find a better full match.
+ if sAt := best.s + best.length; sAt < sLimit {
+ nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
+ candidateEnd := e.longTable[nextHashL]
+
+ if off := candidateEnd.offset - e.cur - best.length + skipBeginning; off >= 0 {
+ improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
+ if off := candidateEnd.prev - e.cur - best.length + skipBeginning; off >= 0 {
+ improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
+ }
}
- best = bestEnd
}
}
}
if debugAsserts {
+ if best.offset >= best.s {
+ panic(fmt.Sprintf("best.offset > s: %d >= %d", best.offset, best.s))
+ }
+ if best.s < nextEmit {
+ panic(fmt.Sprintf("s %d < nextEmit %d", best.s, nextEmit))
+ }
+ if best.offset < s-e.maxMatchOff {
+ panic(fmt.Sprintf("best.offset < s-e.maxMatchOff: %d < %d", best.offset, s-e.maxMatchOff))
+ }
if !bytes.Equal(src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]) {
panic(fmt.Sprintf("match mismatch: %v != %v", src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]))
}
}
// We have a match, we can store the forward value
+ s = best.s
if best.rep > 0 {
- s = best.s
var seq seq
seq.matchLen = uint32(best.length - zstdMinMatch)
+ addLiterals(&seq, best.s)
- // We might be able to match backwards.
- // Extend as long as we can.
- start := best.s
- // We end the search early, so we don't risk 0 literals
- // and have to do special offset treatment.
- startLimit := nextEmit + 1
-
- tMin := s - e.maxMatchOff
- if tMin < 0 {
- tMin = 0
- }
- repIndex := best.offset
- for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
- repIndex--
- start--
- seq.matchLen++
- }
- addLiterals(&seq, start)
-
- // rep 0
- seq.offset = uint32(best.rep)
+ // Repeat. If bit 4 is set, this is a non-lit repeat.
+ seq.offset = uint32(best.rep & 3)
if debugSequences {
- println("repeat sequence", seq, "next s:", s)
+ println("repeat sequence", seq, "next s:", best.s, "off:", best.s-best.offset)
}
blk.sequences = append(blk.sequences, seq)
- // Index match start+1 (long) -> s - 1
- index0 := s
+ // Index old s + 1 -> s - 1
s = best.s + best.length
-
nextEmit = s
- if s >= sLimit {
- if debugEncoder {
- println("repeat ended", s, best.length)
- }
- break encodeLoop
- }
// Index skipped...
+ end := s
+ if s > sLimit+4 {
+ end = sLimit + 4
+ }
off := index0 + e.cur
- for index0 < s-1 {
+ for index0 < end {
cv0 := load6432(src, index0)
h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
@@ -348,19 +396,26 @@
off++
index0++
}
+
switch best.rep {
- case 2:
+ case 2, 4 | 1:
offset1, offset2 = offset2, offset1
- case 3:
+ case 3, 4 | 2:
offset1, offset2, offset3 = offset3, offset1, offset2
+ case 4 | 3:
+ offset1, offset2, offset3 = offset1-1, offset1, offset2
}
- cv = load6432(src, s)
+ if s >= sLimit {
+ if debugEncoder {
+ println("repeat ended", s, best.length)
+ }
+ break encodeLoop
+ }
continue
}
// A 4-byte match has been found. Update recent offsets.
// We'll later see if more than 4 bytes.
- s = best.s
t := best.offset
offset1, offset2, offset3 = s-t, offset1, offset2
@@ -372,22 +427,9 @@
panic("invalid offset")
}
- // Extend the n-byte match as long as possible.
- l := best.length
-
- // Extend backwards
- tMin := s - e.maxMatchOff
- if tMin < 0 {
- tMin = 0
- }
- for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
- s--
- t--
- l++
- }
-
// Write our sequence
var seq seq
+ l := best.length
seq.litLen = uint32(s - nextEmit)
seq.matchLen = uint32(l - zstdMinMatch)
if seq.litLen > 0 {
@@ -400,65 +442,25 @@
}
blk.sequences = append(blk.sequences, seq)
nextEmit = s
- if s >= sLimit {
- break encodeLoop
+
+ // Index old s + 1 -> s - 1 or sLimit
+ end := s
+ if s > sLimit-4 {
+ end = sLimit - 4
}
- // Index match start+1 (long) -> s - 1
- index0 := s - l + 1
- // every entry
- for index0 < s-1 {
+ off := index0 + e.cur
+ for index0 < end {
cv0 := load6432(src, index0)
h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
- off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
index0++
+ off++
}
-
- cv = load6432(src, s)
- if !canRepeat {
- continue
- }
-
- // Check offset 2
- for {
- o2 := s - offset2
- if load3232(src, o2) != uint32(cv) {
- // Do regular search
- break
- }
-
- // Store this, since we have it.
- nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
- nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
-
- // We have at least 4 byte match.
- // No need to check backwards. We come straight from a match
- l := 4 + e.matchlen(s+4, o2+4, src)
-
- e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
- e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: e.table[nextHashS].offset}
- seq.matchLen = uint32(l) - zstdMinMatch
- seq.litLen = 0
-
- // Since litlen is always 0, this is offset 1.
- seq.offset = 1
- s += l
- nextEmit = s
- if debugSequences {
- println("sequence", seq, "next s:", s)
- }
- blk.sequences = append(blk.sequences, seq)
-
- // Swap offset 1 and 2.
- offset1, offset2 = offset2, offset1
- if s >= sLimit {
- // Finished
- break encodeLoop
- }
- cv = load6432(src, s)
+ if s >= sLimit {
+ break encodeLoop
}
}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go
index c769f69..84a79fd 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_better.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go
@@ -62,14 +62,10 @@
)
// Protect against e.cur wraparound.
- for e.cur >= bufferReset {
+ for e.cur >= e.bufferReset-int32(len(e.hist)) {
if len(e.hist) == 0 {
- for i := range e.table[:] {
- e.table[i] = tableEntry{}
- }
- for i := range e.longTable[:] {
- e.longTable[i] = prevEntry{}
- }
+ e.table = [betterShortTableSize]tableEntry{}
+ e.longTable = [betterLongTableSize]prevEntry{}
e.cur = e.maxMatchOff
break
}
@@ -106,9 +102,20 @@
e.cur = e.maxMatchOff
break
}
-
+ // Add block to history
s := e.addBlock(src)
blk.size = len(src)
+
+ // Check RLE first
+ if len(src) > zstdMinMatch {
+ ml := matchLen(src[1:], src)
+ if ml == len(src)-1 {
+ blk.literals = append(blk.literals, src[0])
+ blk.sequences = append(blk.sequences, seq{litLen: 1, matchLen: uint32(len(src)-1) - zstdMinMatch, offset: 1 + 3})
+ return
+ }
+ }
+
if len(src) < minNonLiteralBlockSize {
blk.extraLits = len(src)
blk.literals = blk.literals[:len(src)]
@@ -149,7 +156,7 @@
var t int32
// We allow the encoder to optionally turn off repeat offsets across blocks
canRepeat := len(blk.sequences) > 2
- var matched int32
+ var matched, index0 int32
for {
if debugAsserts && canRepeat && offset1 == 0 {
@@ -166,14 +173,15 @@
off := s + e.cur
e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
+ index0 = s + 1
if canRepeat {
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
// Consider history as well.
var seq seq
- lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
+ length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
- seq.matchLen = uint32(lenght - zstdMinMatch)
+ seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
@@ -202,12 +210,12 @@
// Index match start+1 (long) -> s - 1
index0 := s + repOff
- s += lenght + repOff
+ s += length + repOff
nextEmit = s
if s >= sLimit {
if debugEncoder {
- println("repeat ended", s, lenght)
+ println("repeat ended", s, length)
}
break encodeLoop
@@ -233,9 +241,9 @@
if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
// Consider history as well.
var seq seq
- lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
+ length := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
- seq.matchLen = uint32(lenght - zstdMinMatch)
+ seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
@@ -262,12 +270,11 @@
}
blk.sequences = append(blk.sequences, seq)
- index0 := s + repOff2
- s += lenght + repOff2
+ s += length + repOff2
nextEmit = s
if s >= sLimit {
if debugEncoder {
- println("repeat ended", s, lenght)
+ println("repeat ended", s, length)
}
break encodeLoop
@@ -416,15 +423,23 @@
// Try to find a better match by searching for a long match at the end of the current best match
if s+matched < sLimit {
+ // Allow some bytes at the beginning to mismatch.
+ // Sweet spot is around 3 bytes, but depends on input.
+ // The skipped bytes are tested in Extend backwards,
+ // and still picked up as part of the match if they do.
+ const skipBeginning = 3
+
nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
- cv := load3232(src, s)
+ s2 := s + skipBeginning
+ cv := load3232(src, s2)
candidateL := e.longTable[nextHashL]
- coffsetL := candidateL.offset - e.cur - matched
- if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
+ coffsetL := candidateL.offset - e.cur - matched + skipBeginning
+ if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
// Found a long match, at least 4 bytes.
- matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
+ matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4
if matchedNext > matched {
t = coffsetL
+ s = s2
matched = matchedNext
if debugMatches {
println("long match at end-of-match")
@@ -434,12 +449,13 @@
// Check prev long...
if true {
- coffsetL = candidateL.prev - e.cur - matched
- if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
+ coffsetL = candidateL.prev - e.cur - matched + skipBeginning
+ if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
// Found a long match, at least 4 bytes.
- matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
+ matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4
if matchedNext > matched {
t = coffsetL
+ s = s2
matched = matchedNext
if debugMatches {
println("prev long match at end-of-match")
@@ -493,15 +509,15 @@
}
// Index match start+1 (long) -> s - 1
- index0 := s - l + 1
+ off := index0 + e.cur
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
- off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
index0 += 2
+ off += 2
}
cv = load6432(src, s)
@@ -578,7 +594,7 @@
)
// Protect against e.cur wraparound.
- for e.cur >= bufferReset {
+ for e.cur >= e.bufferReset-int32(len(e.hist)) {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
@@ -667,7 +683,7 @@
var t int32
// We allow the encoder to optionally turn off repeat offsets across blocks
canRepeat := len(blk.sequences) > 2
- var matched int32
+ var matched, index0 int32
for {
if debugAsserts && canRepeat && offset1 == 0 {
@@ -686,14 +702,15 @@
e.markLongShardDirty(nextHashL)
e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
e.markShortShardDirty(nextHashS)
+ index0 = s + 1
if canRepeat {
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
// Consider history as well.
var seq seq
- lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
+ length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
- seq.matchLen = uint32(lenght - zstdMinMatch)
+ seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
@@ -721,13 +738,12 @@
blk.sequences = append(blk.sequences, seq)
// Index match start+1 (long) -> s - 1
- index0 := s + repOff
- s += lenght + repOff
+ s += length + repOff
nextEmit = s
if s >= sLimit {
if debugEncoder {
- println("repeat ended", s, lenght)
+ println("repeat ended", s, length)
}
break encodeLoop
@@ -756,9 +772,9 @@
if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
// Consider history as well.
var seq seq
- lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
+ length := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
- seq.matchLen = uint32(lenght - zstdMinMatch)
+ seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
@@ -785,12 +801,11 @@
}
blk.sequences = append(blk.sequences, seq)
- index0 := s + repOff2
- s += lenght + repOff2
+ s += length + repOff2
nextEmit = s
if s >= sLimit {
if debugEncoder {
- println("repeat ended", s, lenght)
+ println("repeat ended", s, length)
}
break encodeLoop
@@ -1019,18 +1034,18 @@
}
// Index match start+1 (long) -> s - 1
- index0 := s - l + 1
+ off := index0 + e.cur
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
- off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.markLongShardDirty(h0)
h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.markShortShardDirty(h1)
index0 += 2
+ off += 2
}
cv = load6432(src, s)
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
index 7ff0c64..d36be7b 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
@@ -44,14 +44,10 @@
)
// Protect against e.cur wraparound.
- for e.cur >= bufferReset {
+ for e.cur >= e.bufferReset-int32(len(e.hist)) {
if len(e.hist) == 0 {
- for i := range e.table[:] {
- e.table[i] = tableEntry{}
- }
- for i := range e.longTable[:] {
- e.longTable[i] = tableEntry{}
- }
+ e.table = [dFastShortTableSize]tableEntry{}
+ e.longTable = [dFastLongTableSize]tableEntry{}
e.cur = e.maxMatchOff
break
}
@@ -142,9 +138,9 @@
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
// Consider history as well.
var seq seq
- lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
+ length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
- seq.matchLen = uint32(lenght - zstdMinMatch)
+ seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
@@ -170,11 +166,11 @@
println("repeat sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
- s += lenght + repOff
+ s += length + repOff
nextEmit = s
if s >= sLimit {
if debugEncoder {
- println("repeat ended", s, lenght)
+ println("repeat ended", s, length)
}
break encodeLoop
@@ -388,7 +384,7 @@
)
// Protect against e.cur wraparound.
- if e.cur >= bufferReset {
+ if e.cur >= e.bufferReset {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
@@ -685,7 +681,7 @@
}
// We do not store history, so we must offset e.cur to avoid false matches for next user.
- if e.cur < bufferReset {
+ if e.cur < e.bufferReset {
e.cur += int32(len(src))
}
}
@@ -700,7 +696,7 @@
)
// Protect against e.cur wraparound.
- for e.cur >= bufferReset {
+ for e.cur >= e.bufferReset-int32(len(e.hist)) {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
@@ -802,9 +798,9 @@
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
// Consider history as well.
var seq seq
- lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
+ length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
- seq.matchLen = uint32(lenght - zstdMinMatch)
+ seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
@@ -830,11 +826,11 @@
println("repeat sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
- s += lenght + repOff
+ s += length + repOff
nextEmit = s
if s >= sLimit {
if debugEncoder {
- println("repeat ended", s, lenght)
+ println("repeat ended", s, length)
}
break encodeLoop
@@ -1088,7 +1084,7 @@
}
}
e.lastDictID = d.id
- e.allDirty = true
+ allDirty = true
}
// Reset table to initial state
e.cur = e.maxMatchOff
@@ -1103,7 +1099,8 @@
}
if allDirty || dirtyShardCnt > dLongTableShardCnt/2 {
- copy(e.longTable[:], e.dictLongTable)
+ //copy(e.longTable[:], e.dictLongTable)
+ e.longTable = *(*[dFastLongTableSize]tableEntry)(e.dictLongTable)
for i := range e.longTableShardDirty {
e.longTableShardDirty[i] = false
}
@@ -1114,7 +1111,9 @@
continue
}
- copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize])
+ // copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize])
+ *(*[dLongTableShardSize]tableEntry)(e.longTable[i*dLongTableShardSize:]) = *(*[dLongTableShardSize]tableEntry)(e.dictLongTable[i*dLongTableShardSize:])
+
e.longTableShardDirty[i] = false
}
}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
index f51ab52..f45a3da 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
@@ -43,7 +43,7 @@
)
// Protect against e.cur wraparound.
- for e.cur >= bufferReset {
+ for e.cur >= e.bufferReset-int32(len(e.hist)) {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
@@ -133,8 +133,7 @@
if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
// Consider history as well.
var seq seq
- var length int32
- length = 4 + e.matchlen(s+6, repIndex+4, src)
+ length := 4 + e.matchlen(s+6, repIndex+4, src)
seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards.
@@ -304,13 +303,13 @@
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debugEncoder {
- if len(src) > maxBlockSize {
+ if len(src) > maxCompressedBlockSize {
panic("src too big")
}
}
// Protect against e.cur wraparound.
- if e.cur >= bufferReset {
+ if e.cur >= e.bufferReset {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
@@ -538,7 +537,7 @@
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
// We do not store history, so we must offset e.cur to avoid false matches for next user.
- if e.cur < bufferReset {
+ if e.cur < e.bufferReset {
e.cur += int32(len(src))
}
}
@@ -555,11 +554,9 @@
return
}
// Protect against e.cur wraparound.
- for e.cur >= bufferReset {
+ for e.cur >= e.bufferReset-int32(len(e.hist)) {
if len(e.hist) == 0 {
- for i := range e.table[:] {
- e.table[i] = tableEntry{}
- }
+ e.table = [tableSize]tableEntry{}
e.cur = e.maxMatchOff
break
}
@@ -647,8 +644,7 @@
if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
// Consider history as well.
var seq seq
- var length int32
- length = 4 + e.matchlen(s+6, repIndex+4, src)
+ length := 4 + e.matchlen(s+6, repIndex+4, src)
seq.matchLen = uint32(length - zstdMinMatch)
@@ -833,13 +829,12 @@
}
if true {
end := e.maxMatchOff + int32(len(d.content)) - 8
- for i := e.maxMatchOff; i < end; i += 3 {
+ for i := e.maxMatchOff; i < end; i += 2 {
const hashLog = tableBits
cv := load6432(d.content, i-e.maxMatchOff)
- nextHash := hashLen(cv, hashLog, tableFastHashLen) // 0 -> 5
- nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen) // 1 -> 6
- nextHash2 := hashLen(cv>>16, hashLog, tableFastHashLen) // 2 -> 7
+ nextHash := hashLen(cv, hashLog, tableFastHashLen) // 0 -> 6
+ nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen) // 1 -> 7
e.dictTable[nextHash] = tableEntry{
val: uint32(cv),
offset: i,
@@ -848,10 +843,6 @@
val: uint32(cv >> 8),
offset: i + 1,
}
- e.dictTable[nextHash2] = tableEntry{
- val: uint32(cv >> 16),
- offset: i + 2,
- }
}
}
e.lastDictID = d.id
@@ -871,7 +862,8 @@
const shardCnt = tableShardCnt
const shardSize = tableShardSize
if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
- copy(e.table[:], e.dictTable)
+ //copy(e.table[:], e.dictTable)
+ e.table = *(*[tableSize]tableEntry)(e.dictTable)
for i := range e.tableShardDirty {
e.tableShardDirty[i] = false
}
@@ -883,7 +875,8 @@
continue
}
- copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
+ //copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
+ *(*[shardSize]tableEntry)(e.table[i*shardSize:]) = *(*[shardSize]tableEntry)(e.dictTable[i*shardSize:])
e.tableShardDirty[i] = false
}
e.allDirty = false
diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go
index 7aaaedb..8f8223c 100644
--- a/vendor/github.com/klauspost/compress/zstd/encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder.go
@@ -6,8 +6,10 @@
import (
"crypto/rand"
+ "errors"
"fmt"
"io"
+ "math"
rdebug "runtime/debug"
"sync"
@@ -148,6 +150,9 @@
// and write CRC if requested.
func (e *Encoder) Write(p []byte) (n int, err error) {
s := &e.state
+ if s.eofWritten {
+ return 0, ErrEncoderClosed
+ }
for len(p) > 0 {
if len(p)+len(s.filling) < e.o.blockSize {
if e.o.crc {
@@ -201,7 +206,7 @@
return nil
}
if final && len(s.filling) > 0 {
- s.current = e.EncodeAll(s.filling, s.current[:0])
+ s.current = e.encodeAll(s.encoder, s.filling, s.current[:0])
var n2 int
n2, s.err = s.w.Write(s.current)
if s.err != nil {
@@ -226,10 +231,7 @@
DictID: e.o.dict.ID(),
}
- dst, err := fh.appendTo(tmp[:0])
- if err != nil {
- return err
- }
+ dst := fh.appendTo(tmp[:0])
s.headerWritten = true
s.wWg.Wait()
var n2 int
@@ -276,23 +278,9 @@
s.eofWritten = true
}
- err := errIncompressible
- // If we got the exact same number of literals as input,
- // assume the literals cannot be compressed.
- if len(src) != len(blk.literals) || len(src) != e.o.blockSize {
- err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
- }
- switch err {
- case errIncompressible:
- if debugEncoder {
- println("Storing incompressible block as raw")
- }
- blk.encodeRaw(src)
- // In fast mode, we do not transfer offsets, so we don't have to deal with changing the.
- case nil:
- default:
- s.err = err
- return err
+ s.err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
+ if s.err != nil {
+ return s.err
}
_, s.err = s.w.Write(blk.output)
s.nWritten += int64(len(blk.output))
@@ -304,6 +292,9 @@
s.filling, s.current, s.previous = s.previous[:0], s.filling, s.current
s.nInput += int64(len(s.current))
s.wg.Add(1)
+ if final {
+ s.eofWritten = true
+ }
go func(src []byte) {
if debugEncoder {
println("Adding block,", len(src), "bytes, final:", final)
@@ -319,9 +310,6 @@
blk := enc.Block()
enc.Encode(blk, src)
blk.last = final
- if final {
- s.eofWritten = true
- }
// Wait for pending writes.
s.wWg.Wait()
if s.writeErr != nil {
@@ -342,22 +330,8 @@
}
s.wWg.Done()
}()
- err := errIncompressible
- // If we got the exact same number of literals as input,
- // assume the literals cannot be compressed.
- if len(src) != len(blk.literals) || len(src) != e.o.blockSize {
- err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
- }
- switch err {
- case errIncompressible:
- if debugEncoder {
- println("Storing incompressible block as raw")
- }
- blk.encodeRaw(src)
- // In fast mode, we do not transfer offsets, so we don't have to deal with changing the.
- case nil:
- default:
- s.writeErr = err
+ s.writeErr = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
+ if s.writeErr != nil {
return
}
_, s.writeErr = s.w.Write(blk.output)
@@ -431,12 +405,20 @@
if len(s.filling) > 0 {
err := e.nextBlock(false)
if err != nil {
+ // Ignore Flush after Close.
+ if errors.Is(s.err, ErrEncoderClosed) {
+ return nil
+ }
return err
}
}
s.wg.Wait()
s.wWg.Wait()
if s.err != nil {
+ // Ignore Flush after Close.
+ if errors.Is(s.err, ErrEncoderClosed) {
+ return nil
+ }
return s.err
}
return s.writeErr
@@ -452,6 +434,9 @@
}
err := e.nextBlock(true)
if err != nil {
+ if errors.Is(s.err, ErrEncoderClosed) {
+ return nil
+ }
return err
}
if s.frameContentSize > 0 {
@@ -489,6 +474,11 @@
}
_, s.err = s.w.Write(frame)
}
+ if s.err == nil {
+ s.err = ErrEncoderClosed
+ return nil
+ }
+
return s.err
}
@@ -499,6 +489,15 @@
// Data compressed with EncodeAll can be decoded with the Decoder,
// using either a stream or DecodeAll.
func (e *Encoder) EncodeAll(src, dst []byte) []byte {
+ e.init.Do(e.initialize)
+ enc := <-e.encoders
+ defer func() {
+ e.encoders <- enc
+ }()
+ return e.encodeAll(enc, src, dst)
+}
+
+func (e *Encoder) encodeAll(enc encoder, src, dst []byte) []byte {
if len(src) == 0 {
if e.o.fullZero {
// Add frame header.
@@ -510,7 +509,7 @@
Checksum: false,
DictID: 0,
}
- dst, _ = fh.appendTo(dst)
+ dst = fh.appendTo(dst)
// Write raw block as last one only.
var blk blockHeader
@@ -521,13 +520,7 @@
}
return dst
}
- e.init.Do(e.initialize)
- enc := <-e.encoders
- defer func() {
- // Release encoder reference to last block.
- // If a non-single block is needed the encoder will reset again.
- e.encoders <- enc
- }()
+
// Use single segments when above minimum window and below window size.
single := len(src) <= e.o.windowSize && len(src) > MinWindowSize
if e.o.single != nil {
@@ -545,10 +538,7 @@
if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 && !e.o.lowMem {
dst = make([]byte, 0, len(src))
}
- dst, err := fh.appendTo(dst)
- if err != nil {
- panic(err)
- }
+ dst = fh.appendTo(dst)
// If we can do everything in one block, prefer that.
if len(src) <= e.o.blockSize {
@@ -567,25 +557,15 @@
// If we got the exact same number of literals as input,
// assume the literals cannot be compressed.
- err := errIncompressible
oldout := blk.output
- if len(blk.literals) != len(src) || len(src) != e.o.blockSize {
- // Output directly to dst
- blk.output = dst
- err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
- }
+ // Output directly to dst
+ blk.output = dst
- switch err {
- case errIncompressible:
- if debugEncoder {
- println("Storing incompressible block as raw")
- }
- dst = blk.encodeRawTo(dst, src)
- case nil:
- dst = blk.output
- default:
+ err := blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
+ if err != nil {
panic(err)
}
+ dst = blk.output
blk.output = oldout
} else {
enc.Reset(e.o.dict, false)
@@ -604,25 +584,11 @@
if len(src) == 0 {
blk.last = true
}
- err := errIncompressible
- // If we got the exact same number of literals as input,
- // assume the literals cannot be compressed.
- if len(blk.literals) != len(todo) || len(todo) != e.o.blockSize {
- err = blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy)
- }
-
- switch err {
- case errIncompressible:
- if debugEncoder {
- println("Storing incompressible block as raw")
- }
- dst = blk.encodeRawTo(dst, todo)
- blk.popOffsets()
- case nil:
- dst = append(dst, blk.output...)
- default:
+ err := blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy)
+ if err != nil {
panic(err)
}
+ dst = append(dst, blk.output...)
blk.reset(nil)
}
}
@@ -632,6 +598,7 @@
// Add padding with content from crypto/rand.Reader
if e.o.pad > 0 {
add := calcSkippableFrame(int64(len(dst)), int64(e.o.pad))
+ var err error
dst, err = skippableFrame(dst, add, rand.Reader)
if err != nil {
panic(err)
@@ -639,3 +606,37 @@
}
return dst
}
+
+// MaxEncodedSize returns the expected maximum
+// size of an encoded block or stream.
+func (e *Encoder) MaxEncodedSize(size int) int {
+ frameHeader := 4 + 2 // magic + frame header & window descriptor
+ if e.o.dict != nil {
+ frameHeader += 4
+ }
+ // Frame content size:
+ if size < 256 {
+ frameHeader++
+ } else if size < 65536+256 {
+ frameHeader += 2
+ } else if size < math.MaxInt32 {
+ frameHeader += 4
+ } else {
+ frameHeader += 8
+ }
+ // Final crc
+ if e.o.crc {
+ frameHeader += 4
+ }
+
+ // Max overhead is 3 bytes/block.
+ // There cannot be 0 blocks.
+ blocks := (size + e.o.blockSize) / e.o.blockSize
+
+ // Combine, add padding.
+ maxSz := frameHeader + 3*blocks + size
+ if e.o.pad > 1 {
+ maxSz += calcSkippableFrame(int64(maxSz), int64(e.o.pad))
+ }
+ return maxSz
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go
index a7c5e1a..20671dc 100644
--- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go
@@ -3,6 +3,8 @@
import (
"errors"
"fmt"
+ "math"
+ "math/bits"
"runtime"
"strings"
)
@@ -37,7 +39,7 @@
blockSize: maxCompressedBlockSize,
windowSize: 8 << 20,
level: SpeedDefault,
- allLitEntropy: true,
+ allLitEntropy: false,
lowMem: false,
}
}
@@ -47,22 +49,22 @@
switch o.level {
case SpeedFastest:
if o.dict != nil {
- return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
+ return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
}
- return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
+ return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
case SpeedDefault:
if o.dict != nil {
- return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}}
+ return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}}
}
- return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
+ return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
case SpeedBetterCompression:
if o.dict != nil {
- return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
+ return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
}
- return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
+ return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
case SpeedBestCompression:
- return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
+ return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
}
panic("unknown compression level")
}
@@ -92,7 +94,7 @@
// The value must be a power of two between MinWindowSize and MaxWindowSize.
// A larger value will enable better compression but allocate more memory and,
// for above-default values, take considerably longer.
-// The default value is determined by the compression level.
+// The default value is determined by the compression level and max 8MB.
func WithWindowSize(n int) EOption {
return func(o *encoderOptions) error {
switch {
@@ -127,7 +129,7 @@
}
// No need to waste our time.
if n == 1 {
- o.pad = 0
+ n = 0
}
if n > 1<<30 {
return fmt.Errorf("padding must less than 1GB (1<<30 bytes) ")
@@ -230,13 +232,13 @@
case SpeedDefault:
o.windowSize = 8 << 20
case SpeedBetterCompression:
- o.windowSize = 16 << 20
+ o.windowSize = 8 << 20
case SpeedBestCompression:
- o.windowSize = 32 << 20
+ o.windowSize = 8 << 20
}
}
if !o.customALEntropy {
- o.allLitEntropy = l > SpeedFastest
+ o.allLitEntropy = l > SpeedDefault
}
return nil
@@ -304,7 +306,13 @@
}
// WithEncoderDict allows to register a dictionary that will be used for the encode.
+//
+// The slice dict must be in the [dictionary format] produced by
+// "zstd --train" from the Zstandard reference implementation.
+//
// The encoder *may* choose to use no dictionary instead for certain payloads.
+//
+// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
func WithEncoderDict(dict []byte) EOption {
return func(o *encoderOptions) error {
d, err := loadDict(dict)
@@ -315,3 +323,17 @@
return nil
}
}
+
+// WithEncoderDictRaw registers a dictionary that may be used by the encoder.
+//
+// The slice content may contain arbitrary data. It will be used as an initial
+// history.
+func WithEncoderDictRaw(id uint32, content []byte) EOption {
+ return func(o *encoderOptions) error {
+ if bits.UintSize > 32 && uint(len(content)) > dictMaxLength {
+ return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content))
+ }
+ o.dict = &dict{id: id, content: content, offsets: [3]int{1, 4, 8}}
+ return nil
+ }
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go
index 9568a4b..e47af66 100644
--- a/vendor/github.com/klauspost/compress/zstd/framedec.go
+++ b/vendor/github.com/klauspost/compress/zstd/framedec.go
@@ -5,7 +5,7 @@
package zstd
import (
- "bytes"
+ "encoding/binary"
"encoding/hex"
"errors"
"io"
@@ -29,7 +29,7 @@
FrameContentSize uint64
- DictionaryID *uint32
+ DictionaryID uint32
HasCheckSum bool
SingleSegment bool
}
@@ -43,9 +43,9 @@
MaxWindowSize = 1 << 29
)
-var (
- frameMagic = []byte{0x28, 0xb5, 0x2f, 0xfd}
- skippableFrameMagic = []byte{0x2a, 0x4d, 0x18}
+const (
+ frameMagic = "\x28\xb5\x2f\xfd"
+ skippableFrameMagic = "\x2a\x4d\x18"
)
func newFrameDec(o decoderOptions) *frameDec {
@@ -73,25 +73,25 @@
switch err {
case io.EOF, io.ErrUnexpectedEOF:
return io.EOF
- default:
- return err
case nil:
signature[0] = b[0]
+ default:
+ return err
}
// Read the rest, don't allow io.ErrUnexpectedEOF
b, err = br.readSmall(3)
switch err {
case io.EOF:
return io.EOF
- default:
- return err
case nil:
copy(signature[1:], b)
+ default:
+ return err
}
- if !bytes.Equal(signature[1:4], skippableFrameMagic) || signature[0]&0xf0 != 0x50 {
+ if string(signature[1:4]) != skippableFrameMagic || signature[0]&0xf0 != 0x50 {
if debugDecoder {
- println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString(skippableFrameMagic))
+ println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString([]byte(skippableFrameMagic)))
}
// Break if not skippable frame.
break
@@ -114,9 +114,9 @@
return err
}
}
- if !bytes.Equal(signature[:], frameMagic) {
+ if string(signature[:]) != frameMagic {
if debugDecoder {
- println("Got magic numbers: ", signature, "want:", frameMagic)
+ println("Got magic numbers: ", signature, "want:", []byte(frameMagic))
}
return ErrMagicMismatch
}
@@ -146,7 +146,9 @@
}
return err
}
- printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3)
+ if debugDecoder {
+ printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3)
+ }
windowLog := 10 + (wd >> 3)
windowBase := uint64(1) << windowLog
windowAdd := (windowBase / 8) * uint64(wd&0x7)
@@ -155,7 +157,7 @@
// Read Dictionary_ID
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id
- d.DictionaryID = nil
+ d.DictionaryID = 0
if size := fhd & 3; size != 0 {
if size == 3 {
size = 4
@@ -167,7 +169,7 @@
return err
}
var id uint32
- switch size {
+ switch len(b) {
case 1:
id = uint32(b[0])
case 2:
@@ -178,11 +180,7 @@
if debugDecoder {
println("Dict size", size, "ID:", id)
}
- if id > 0 {
- // ID 0 means "sorry, no dictionary anyway".
- // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
- d.DictionaryID = &id
- }
+ d.DictionaryID = id
}
// Read Frame_Content_Size
@@ -204,7 +202,7 @@
println("Reading Frame content", err)
return err
}
- switch fcsSize {
+ switch len(b) {
case 1:
d.FrameContentSize = uint64(b[0])
case 2:
@@ -261,11 +259,16 @@
}
d.history.windowSize = int(d.WindowSize)
if !d.o.lowMem || d.history.windowSize < maxBlockSize {
- // Alloc 2x window size if not low-mem, or very small window size.
+ // Alloc 2x window size if not low-mem, or window size below 2MB.
d.history.allocFrameBuffer = d.history.windowSize * 2
} else {
- // Alloc with one additional block
- d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize
+ if d.o.lowMem {
+ // Alloc with 1MB extra.
+ d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize/2
+ } else {
+ // Alloc with 2MB extra.
+ d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize
+ }
}
if debugDecoder {
@@ -292,58 +295,41 @@
return nil
}
-// checkCRC will check the checksum if the frame has one.
+// checkCRC will check the checksum, assuming the frame has one.
// Will return ErrCRCMismatch if crc check failed, otherwise nil.
func (d *frameDec) checkCRC() error {
- if !d.HasCheckSum {
- return nil
- }
-
// We can overwrite upper tmp now
- want, err := d.rawInput.readSmall(4)
+ buf, err := d.rawInput.readSmall(4)
if err != nil {
println("CRC missing?", err)
return err
}
- if d.o.ignoreChecksum {
- return nil
- }
+ want := binary.LittleEndian.Uint32(buf[:4])
+ got := uint32(d.crc.Sum64())
- var tmp [4]byte
- got := d.crc.Sum64()
- // Flip to match file order.
- tmp[0] = byte(got >> 0)
- tmp[1] = byte(got >> 8)
- tmp[2] = byte(got >> 16)
- tmp[3] = byte(got >> 24)
-
- if !bytes.Equal(tmp[:], want) {
+ if got != want {
if debugDecoder {
- println("CRC Check Failed:", tmp[:], "!=", want)
+ printf("CRC check failed: got %08x, want %08x\n", got, want)
}
return ErrCRCMismatch
}
if debugDecoder {
- println("CRC ok", tmp[:])
+ printf("CRC ok %08x\n", got)
}
return nil
}
-// consumeCRC reads the checksum data if the frame has one.
+// consumeCRC skips over the checksum, assuming the frame has one.
func (d *frameDec) consumeCRC() error {
- if d.HasCheckSum {
- _, err := d.rawInput.readSmall(4)
- if err != nil {
- println("CRC missing?", err)
- return err
- }
+ _, err := d.rawInput.readSmall(4)
+ if err != nil {
+ println("CRC missing?", err)
}
-
- return nil
+ return err
}
-// runDecoder will create a sync decoder that will decode a block of data.
+// runDecoder will run the decoder for the remainder of the frame.
func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
saved := d.history.b
@@ -353,12 +339,23 @@
// Store input length, so we only check new data.
crcStart := len(dst)
d.history.decoders.maxSyncLen = 0
+ if d.o.limitToCap {
+ d.history.decoders.maxSyncLen = uint64(cap(dst) - len(dst))
+ }
if d.FrameContentSize != fcsUnknown {
- d.history.decoders.maxSyncLen = d.FrameContentSize + uint64(len(dst))
+ if !d.o.limitToCap || d.FrameContentSize+uint64(len(dst)) < d.history.decoders.maxSyncLen {
+ d.history.decoders.maxSyncLen = d.FrameContentSize + uint64(len(dst))
+ }
if d.history.decoders.maxSyncLen > d.o.maxDecodedSize {
+ if debugDecoder {
+ println("maxSyncLen:", d.history.decoders.maxSyncLen, "> maxDecodedSize:", d.o.maxDecodedSize)
+ }
return dst, ErrDecoderSizeExceeded
}
- if uint64(cap(dst)) < d.history.decoders.maxSyncLen {
+ if debugDecoder {
+ println("maxSyncLen:", d.history.decoders.maxSyncLen)
+ }
+ if !d.o.limitToCap && uint64(cap(dst)) < d.history.decoders.maxSyncLen {
// Alloc for output
dst2 := make([]byte, len(dst), d.history.decoders.maxSyncLen+compressedBlockOverAlloc)
copy(dst2, dst)
@@ -378,7 +375,13 @@
if err != nil {
break
}
- if uint64(len(d.history.b)) > d.o.maxDecodedSize {
+ if uint64(len(d.history.b)-crcStart) > d.o.maxDecodedSize {
+ println("runDecoder: maxDecodedSize exceeded", uint64(len(d.history.b)-crcStart), ">", d.o.maxDecodedSize)
+ err = ErrDecoderSizeExceeded
+ break
+ }
+ if d.o.limitToCap && len(d.history.b) > cap(dst) {
+ println("runDecoder: cap exceeded", uint64(len(d.history.b)), ">", cap(dst))
err = ErrDecoderSizeExceeded
break
}
@@ -402,15 +405,8 @@
if d.o.ignoreChecksum {
err = d.consumeCRC()
} else {
- var n int
- n, err = d.crc.Write(dst[crcStart:])
- if err == nil {
- if n != len(dst)-crcStart {
- err = io.ErrShortWrite
- } else {
- err = d.checkCRC()
- }
- }
+ d.crc.Write(dst[crcStart:])
+ err = d.checkCRC()
}
}
}
diff --git a/vendor/github.com/klauspost/compress/zstd/frameenc.go b/vendor/github.com/klauspost/compress/zstd/frameenc.go
index 4ef7f5a..667ca06 100644
--- a/vendor/github.com/klauspost/compress/zstd/frameenc.go
+++ b/vendor/github.com/klauspost/compress/zstd/frameenc.go
@@ -22,7 +22,7 @@
const maxHeaderSize = 14
-func (f frameHeader) appendTo(dst []byte) ([]byte, error) {
+func (f frameHeader) appendTo(dst []byte) []byte {
dst = append(dst, frameMagic...)
var fhd uint8
if f.Checksum {
@@ -76,7 +76,7 @@
if f.SingleSegment {
dst = append(dst, uint8(f.ContentSize))
}
- // Unless SingleSegment is set, framessizes < 256 are nto stored.
+ // Unless SingleSegment is set, framessizes < 256 are not stored.
case 1:
f.ContentSize -= 256
dst = append(dst, uint8(f.ContentSize), uint8(f.ContentSize>>8))
@@ -88,7 +88,7 @@
default:
panic("invalid fcs")
}
- return dst, nil
+ return dst
}
const skippableFrameHeader = 4 + 4
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go
index c881d28..d04a829 100644
--- a/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go
+++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go
@@ -21,7 +21,8 @@
// buildDtable_asm is an x86 assembly implementation of fseDecoder.buildDtable.
// Function returns non-zero exit code on error.
-// go:noescape
+//
+//go:noescape
func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int
// please keep in sync with _generate/gen_fse.go
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s b/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s
index da32b44..bcde398 100644
--- a/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s
+++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s
@@ -1,7 +1,6 @@
// Code generated by command: go run gen_fse.go -out ../fse_decoder_amd64.s -pkg=zstd. DO NOT EDIT.
//go:build !appengine && !noasm && gc && !noasm
-// +build !appengine,!noasm,gc,!noasm
// func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int
TEXT ·buildDtable_asm(SB), $0-24
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go
index 332e51f..8adfebb 100644
--- a/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go
+++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go
@@ -20,10 +20,9 @@
if v == -1 {
s.dt[highThreshold].setAddBits(uint8(i))
highThreshold--
- symbolNext[i] = 1
- } else {
- symbolNext[i] = uint16(v)
+ v = 1
}
+ symbolNext[i] = uint16(v)
}
}
@@ -35,10 +34,12 @@
for ss, v := range s.norm[:s.symbolLen] {
for i := 0; i < int(v); i++ {
s.dt[position].setAddBits(uint8(ss))
- position = (position + step) & tableMask
- for position > highThreshold {
+ for {
// lowprob area
position = (position + step) & tableMask
+ if position <= highThreshold {
+ break
+ }
}
}
}
diff --git a/vendor/github.com/klauspost/compress/zstd/history.go b/vendor/github.com/klauspost/compress/zstd/history.go
index 28b4015..0916485 100644
--- a/vendor/github.com/klauspost/compress/zstd/history.go
+++ b/vendor/github.com/klauspost/compress/zstd/history.go
@@ -37,26 +37,23 @@
h.ignoreBuffer = 0
h.error = false
h.recentOffsets = [3]int{1, 4, 8}
- if f := h.decoders.litLengths.fse; f != nil && !f.preDefined {
- fseDecoderPool.Put(f)
- }
- if f := h.decoders.offsets.fse; f != nil && !f.preDefined {
- fseDecoderPool.Put(f)
- }
- if f := h.decoders.matchLengths.fse; f != nil && !f.preDefined {
- fseDecoderPool.Put(f)
- }
+ h.decoders.freeDecoders()
h.decoders = sequenceDecs{br: h.decoders.br}
- if h.huffTree != nil {
- if h.dict == nil || h.dict.litEnc != h.huffTree {
- huffDecoderPool.Put(h.huffTree)
- }
- }
+ h.freeHuffDecoder()
h.huffTree = nil
h.dict = nil
//printf("history created: %+v (l: %d, c: %d)", *h, len(h.b), cap(h.b))
}
+func (h *history) freeHuffDecoder() {
+ if h.huffTree != nil {
+ if h.dict == nil || h.dict.litEnc != h.huffTree {
+ huffDecoderPool.Put(h.huffTree)
+ h.huffTree = nil
+ }
+ }
+}
+
func (h *history) setDict(dict *dict) {
if dict == nil {
return
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md
index 69aa3bb..777290d 100644
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md
@@ -2,12 +2,7 @@
VENDORED: Go to [github.com/cespare/xxhash](https://github.com/cespare/xxhash) for original package.
-
-[](https://godoc.org/github.com/cespare/xxhash)
-[](https://travis-ci.org/cespare/xxhash)
-
-xxhash is a Go implementation of the 64-bit
-[xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a
+xxhash is a Go implementation of the 64-bit [xxHash] algorithm, XXH64. This is a
high-quality hashing algorithm that is much faster than anything in the Go
standard library.
@@ -28,31 +23,49 @@
func (*Digest) Sum64() uint64
```
-This implementation provides a fast pure-Go implementation and an even faster
-assembly implementation for amd64.
+The package is written with optimized pure Go and also contains even faster
+assembly implementations for amd64 and arm64. If desired, the `purego` build tag
+opts into using the Go code even on those architectures.
+
+[xxHash]: http://cyan4973.github.io/xxHash/
+
+## Compatibility
+
+This package is in a module and the latest code is in version 2 of the module.
+You need a version of Go with at least "minimal module compatibility" to use
+github.com/cespare/xxhash/v2:
+
+* 1.9.7+ for Go 1.9
+* 1.10.3+ for Go 1.10
+* Go 1.11 or later
+
+I recommend using the latest release of Go.
## Benchmarks
Here are some quick benchmarks comparing the pure-Go and assembly
implementations of Sum64.
-| input size | purego | asm |
-| --- | --- | --- |
-| 5 B | 979.66 MB/s | 1291.17 MB/s |
-| 100 B | 7475.26 MB/s | 7973.40 MB/s |
-| 4 KB | 17573.46 MB/s | 17602.65 MB/s |
-| 10 MB | 17131.46 MB/s | 17142.16 MB/s |
+| input size | purego | asm |
+| ---------- | --------- | --------- |
+| 4 B | 1.3 GB/s | 1.2 GB/s |
+| 16 B | 2.9 GB/s | 3.5 GB/s |
+| 100 B | 6.9 GB/s | 8.1 GB/s |
+| 4 KB | 11.7 GB/s | 16.7 GB/s |
+| 10 MB | 12.0 GB/s | 17.3 GB/s |
-These numbers were generated on Ubuntu 18.04 with an Intel i7-8700K CPU using
-the following commands under Go 1.11.2:
+These numbers were generated on Ubuntu 20.04 with an Intel Xeon Platinum 8252C
+CPU using the following commands under Go 1.19.2:
```
-$ go test -tags purego -benchtime 10s -bench '/xxhash,direct,bytes'
-$ go test -benchtime 10s -bench '/xxhash,direct,bytes'
+benchstat <(go test -tags purego -benchtime 500ms -count 15 -bench 'Sum64$')
+benchstat <(go test -benchtime 500ms -count 15 -bench 'Sum64$')
```
## Projects using this package
- [InfluxDB](https://github.com/influxdata/influxdb)
- [Prometheus](https://github.com/prometheus/prometheus)
+- [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics)
- [FreeCache](https://github.com/coocood/freecache)
+- [FastCache](https://github.com/VictoriaMetrics/fastcache)
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go
index 2c112a0..fc40c82 100644
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go
@@ -18,19 +18,11 @@
prime5 uint64 = 2870177450012600261
)
-// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where
-// possible in the Go code is worth a small (but measurable) performance boost
-// by avoiding some MOVQs. Vars are needed for the asm and also are useful for
-// convenience in the Go code in a few places where we need to intentionally
-// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the
-// result overflows a uint64).
-var (
- prime1v = prime1
- prime2v = prime2
- prime3v = prime3
- prime4v = prime4
- prime5v = prime5
-)
+// Store the primes in an array as well.
+//
+// The consts are used when possible in Go code to avoid MOVs but we need a
+// contiguous array of the assembly code.
+var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5}
// Digest implements hash.Hash64.
type Digest struct {
@@ -52,10 +44,10 @@
// Reset clears the Digest's state so that it can be reused.
func (d *Digest) Reset() {
- d.v1 = prime1v + prime2
+ d.v1 = primes[0] + prime2
d.v2 = prime2
d.v3 = 0
- d.v4 = -prime1v
+ d.v4 = -primes[0]
d.total = 0
d.n = 0
}
@@ -71,21 +63,23 @@
n = len(b)
d.total += uint64(n)
+ memleft := d.mem[d.n&(len(d.mem)-1):]
+
if d.n+n < 32 {
// This new data doesn't even fill the current block.
- copy(d.mem[d.n:], b)
+ copy(memleft, b)
d.n += n
return
}
if d.n > 0 {
// Finish off the partial block.
- copy(d.mem[d.n:], b)
+ c := copy(memleft, b)
d.v1 = round(d.v1, u64(d.mem[0:8]))
d.v2 = round(d.v2, u64(d.mem[8:16]))
d.v3 = round(d.v3, u64(d.mem[16:24]))
d.v4 = round(d.v4, u64(d.mem[24:32]))
- b = b[32-d.n:]
+ b = b[c:]
d.n = 0
}
@@ -135,21 +129,20 @@
h += d.total
- i, end := 0, d.n
- for ; i+8 <= end; i += 8 {
- k1 := round(0, u64(d.mem[i:i+8]))
+ b := d.mem[:d.n&(len(d.mem)-1)]
+ for ; len(b) >= 8; b = b[8:] {
+ k1 := round(0, u64(b[:8]))
h ^= k1
h = rol27(h)*prime1 + prime4
}
- if i+4 <= end {
- h ^= uint64(u32(d.mem[i:i+4])) * prime1
+ if len(b) >= 4 {
+ h ^= uint64(u32(b[:4])) * prime1
h = rol23(h)*prime2 + prime3
- i += 4
+ b = b[4:]
}
- for i < end {
- h ^= uint64(d.mem[i]) * prime5
+ for ; len(b) > 0; b = b[1:] {
+ h ^= uint64(b[0]) * prime5
h = rol11(h) * prime1
- i++
}
h ^= h >> 33
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
index cea1785..ddb63aa 100644
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
@@ -1,3 +1,4 @@
+//go:build !appengine && gc && !purego && !noasm
// +build !appengine
// +build gc
// +build !purego
@@ -5,212 +6,205 @@
#include "textflag.h"
-// Register allocation:
-// AX h
-// SI pointer to advance through b
-// DX n
-// BX loop end
-// R8 v1, k1
-// R9 v2
-// R10 v3
-// R11 v4
-// R12 tmp
-// R13 prime1v
-// R14 prime2v
-// DI prime4v
+// Registers:
+#define h AX
+#define d AX
+#define p SI // pointer to advance through b
+#define n DX
+#define end BX // loop end
+#define v1 R8
+#define v2 R9
+#define v3 R10
+#define v4 R11
+#define x R12
+#define prime1 R13
+#define prime2 R14
+#define prime4 DI
-// round reads from and advances the buffer pointer in SI.
-// It assumes that R13 has prime1v and R14 has prime2v.
-#define round(r) \
- MOVQ (SI), R12 \
- ADDQ $8, SI \
- IMULQ R14, R12 \
- ADDQ R12, r \
- ROLQ $31, r \
- IMULQ R13, r
+#define round(acc, x) \
+ IMULQ prime2, x \
+ ADDQ x, acc \
+ ROLQ $31, acc \
+ IMULQ prime1, acc
-// mergeRound applies a merge round on the two registers acc and val.
-// It assumes that R13 has prime1v, R14 has prime2v, and DI has prime4v.
-#define mergeRound(acc, val) \
- IMULQ R14, val \
- ROLQ $31, val \
- IMULQ R13, val \
- XORQ val, acc \
- IMULQ R13, acc \
- ADDQ DI, acc
+// round0 performs the operation x = round(0, x).
+#define round0(x) \
+ IMULQ prime2, x \
+ ROLQ $31, x \
+ IMULQ prime1, x
+
+// mergeRound applies a merge round on the two registers acc and x.
+// It assumes that prime1, prime2, and prime4 have been loaded.
+#define mergeRound(acc, x) \
+ round0(x) \
+ XORQ x, acc \
+ IMULQ prime1, acc \
+ ADDQ prime4, acc
+
+// blockLoop processes as many 32-byte blocks as possible,
+// updating v1, v2, v3, and v4. It assumes that there is at least one block
+// to process.
+#define blockLoop() \
+loop: \
+ MOVQ +0(p), x \
+ round(v1, x) \
+ MOVQ +8(p), x \
+ round(v2, x) \
+ MOVQ +16(p), x \
+ round(v3, x) \
+ MOVQ +24(p), x \
+ round(v4, x) \
+ ADDQ $32, p \
+ CMPQ p, end \
+ JLE loop
// func Sum64(b []byte) uint64
-TEXT ·Sum64(SB), NOSPLIT, $0-32
+TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32
// Load fixed primes.
- MOVQ ·prime1v(SB), R13
- MOVQ ·prime2v(SB), R14
- MOVQ ·prime4v(SB), DI
+ MOVQ ·primes+0(SB), prime1
+ MOVQ ·primes+8(SB), prime2
+ MOVQ ·primes+24(SB), prime4
// Load slice.
- MOVQ b_base+0(FP), SI
- MOVQ b_len+8(FP), DX
- LEAQ (SI)(DX*1), BX
+ MOVQ b_base+0(FP), p
+ MOVQ b_len+8(FP), n
+ LEAQ (p)(n*1), end
// The first loop limit will be len(b)-32.
- SUBQ $32, BX
+ SUBQ $32, end
// Check whether we have at least one block.
- CMPQ DX, $32
+ CMPQ n, $32
JLT noBlocks
// Set up initial state (v1, v2, v3, v4).
- MOVQ R13, R8
- ADDQ R14, R8
- MOVQ R14, R9
- XORQ R10, R10
- XORQ R11, R11
- SUBQ R13, R11
+ MOVQ prime1, v1
+ ADDQ prime2, v1
+ MOVQ prime2, v2
+ XORQ v3, v3
+ XORQ v4, v4
+ SUBQ prime1, v4
- // Loop until SI > BX.
-blockLoop:
- round(R8)
- round(R9)
- round(R10)
- round(R11)
+ blockLoop()
- CMPQ SI, BX
- JLE blockLoop
+ MOVQ v1, h
+ ROLQ $1, h
+ MOVQ v2, x
+ ROLQ $7, x
+ ADDQ x, h
+ MOVQ v3, x
+ ROLQ $12, x
+ ADDQ x, h
+ MOVQ v4, x
+ ROLQ $18, x
+ ADDQ x, h
- MOVQ R8, AX
- ROLQ $1, AX
- MOVQ R9, R12
- ROLQ $7, R12
- ADDQ R12, AX
- MOVQ R10, R12
- ROLQ $12, R12
- ADDQ R12, AX
- MOVQ R11, R12
- ROLQ $18, R12
- ADDQ R12, AX
-
- mergeRound(AX, R8)
- mergeRound(AX, R9)
- mergeRound(AX, R10)
- mergeRound(AX, R11)
+ mergeRound(h, v1)
+ mergeRound(h, v2)
+ mergeRound(h, v3)
+ mergeRound(h, v4)
JMP afterBlocks
noBlocks:
- MOVQ ·prime5v(SB), AX
+ MOVQ ·primes+32(SB), h
afterBlocks:
- ADDQ DX, AX
+ ADDQ n, h
- // Right now BX has len(b)-32, and we want to loop until SI > len(b)-8.
- ADDQ $24, BX
+ ADDQ $24, end
+ CMPQ p, end
+ JG try4
- CMPQ SI, BX
- JG fourByte
+loop8:
+ MOVQ (p), x
+ ADDQ $8, p
+ round0(x)
+ XORQ x, h
+ ROLQ $27, h
+ IMULQ prime1, h
+ ADDQ prime4, h
-wordLoop:
- // Calculate k1.
- MOVQ (SI), R8
- ADDQ $8, SI
- IMULQ R14, R8
- ROLQ $31, R8
- IMULQ R13, R8
+ CMPQ p, end
+ JLE loop8
- XORQ R8, AX
- ROLQ $27, AX
- IMULQ R13, AX
- ADDQ DI, AX
+try4:
+ ADDQ $4, end
+ CMPQ p, end
+ JG try1
- CMPQ SI, BX
- JLE wordLoop
+ MOVL (p), x
+ ADDQ $4, p
+ IMULQ prime1, x
+ XORQ x, h
-fourByte:
- ADDQ $4, BX
- CMPQ SI, BX
- JG singles
+ ROLQ $23, h
+ IMULQ prime2, h
+ ADDQ ·primes+16(SB), h
- MOVL (SI), R8
- ADDQ $4, SI
- IMULQ R13, R8
- XORQ R8, AX
-
- ROLQ $23, AX
- IMULQ R14, AX
- ADDQ ·prime3v(SB), AX
-
-singles:
- ADDQ $4, BX
- CMPQ SI, BX
+try1:
+ ADDQ $4, end
+ CMPQ p, end
JGE finalize
-singlesLoop:
- MOVBQZX (SI), R12
- ADDQ $1, SI
- IMULQ ·prime5v(SB), R12
- XORQ R12, AX
+loop1:
+ MOVBQZX (p), x
+ ADDQ $1, p
+ IMULQ ·primes+32(SB), x
+ XORQ x, h
+ ROLQ $11, h
+ IMULQ prime1, h
- ROLQ $11, AX
- IMULQ R13, AX
-
- CMPQ SI, BX
- JL singlesLoop
+ CMPQ p, end
+ JL loop1
finalize:
- MOVQ AX, R12
- SHRQ $33, R12
- XORQ R12, AX
- IMULQ R14, AX
- MOVQ AX, R12
- SHRQ $29, R12
- XORQ R12, AX
- IMULQ ·prime3v(SB), AX
- MOVQ AX, R12
- SHRQ $32, R12
- XORQ R12, AX
+ MOVQ h, x
+ SHRQ $33, x
+ XORQ x, h
+ IMULQ prime2, h
+ MOVQ h, x
+ SHRQ $29, x
+ XORQ x, h
+ IMULQ ·primes+16(SB), h
+ MOVQ h, x
+ SHRQ $32, x
+ XORQ x, h
- MOVQ AX, ret+24(FP)
+ MOVQ h, ret+24(FP)
RET
-// writeBlocks uses the same registers as above except that it uses AX to store
-// the d pointer.
-
// func writeBlocks(d *Digest, b []byte) int
-TEXT ·writeBlocks(SB), NOSPLIT, $0-40
+TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40
// Load fixed primes needed for round.
- MOVQ ·prime1v(SB), R13
- MOVQ ·prime2v(SB), R14
+ MOVQ ·primes+0(SB), prime1
+ MOVQ ·primes+8(SB), prime2
// Load slice.
- MOVQ b_base+8(FP), SI
- MOVQ b_len+16(FP), DX
- LEAQ (SI)(DX*1), BX
- SUBQ $32, BX
+ MOVQ b_base+8(FP), p
+ MOVQ b_len+16(FP), n
+ LEAQ (p)(n*1), end
+ SUBQ $32, end
// Load vN from d.
- MOVQ d+0(FP), AX
- MOVQ 0(AX), R8 // v1
- MOVQ 8(AX), R9 // v2
- MOVQ 16(AX), R10 // v3
- MOVQ 24(AX), R11 // v4
+ MOVQ s+0(FP), d
+ MOVQ 0(d), v1
+ MOVQ 8(d), v2
+ MOVQ 16(d), v3
+ MOVQ 24(d), v4
// We don't need to check the loop condition here; this function is
// always called with at least one block of data to process.
-blockLoop:
- round(R8)
- round(R9)
- round(R10)
- round(R11)
-
- CMPQ SI, BX
- JLE blockLoop
+ blockLoop()
// Copy vN back to d.
- MOVQ R8, 0(AX)
- MOVQ R9, 8(AX)
- MOVQ R10, 16(AX)
- MOVQ R11, 24(AX)
+ MOVQ v1, 0(d)
+ MOVQ v2, 8(d)
+ MOVQ v3, 16(d)
+ MOVQ v4, 24(d)
- // The number of bytes written is SI minus the old base pointer.
- SUBQ b_base+8(FP), SI
- MOVQ SI, ret+32(FP)
+ // The number of bytes written is p minus the old base pointer.
+ SUBQ b_base+8(FP), p
+ MOVQ p, ret+32(FP)
RET
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s
index 4d64a17..ae7d4d3 100644
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s
@@ -1,13 +1,17 @@
-// +build gc,!purego,!noasm
+//go:build !appengine && gc && !purego && !noasm
+// +build !appengine
+// +build gc
+// +build !purego
+// +build !noasm
#include "textflag.h"
-// Register allocation.
+// Registers:
#define digest R1
-#define h R2 // Return value.
-#define p R3 // Input pointer.
-#define len R4
-#define nblocks R5 // len / 32.
+#define h R2 // return value
+#define p R3 // input pointer
+#define n R4 // input length
+#define nblocks R5 // n / 32
#define prime1 R7
#define prime2 R8
#define prime3 R9
@@ -25,60 +29,52 @@
#define round(acc, x) \
MADD prime2, acc, x, acc \
ROR $64-31, acc \
- MUL prime1, acc \
+ MUL prime1, acc
-// x = round(0, x).
+// round0 performs the operation x = round(0, x).
#define round0(x) \
MUL prime2, x \
ROR $64-31, x \
- MUL prime1, x \
+ MUL prime1, x
-#define mergeRound(x) \
- round0(x) \
- EOR x, h \
- MADD h, prime4, prime1, h \
+#define mergeRound(acc, x) \
+ round0(x) \
+ EOR x, acc \
+ MADD acc, prime4, prime1, acc
-// Update v[1-4] with 32-byte blocks. Assumes len >= 32.
-#define blocksLoop() \
- LSR $5, len, nblocks \
- PCALIGN $16 \
- loop: \
- LDP.P 32(p), (x1, x2) \
- round(v1, x1) \
- LDP -16(p), (x3, x4) \
- round(v2, x2) \
- SUB $1, nblocks \
- round(v3, x3) \
- round(v4, x4) \
- CBNZ nblocks, loop \
-
-// The primes are repeated here to ensure that they're stored
-// in a contiguous array, so we can load them with LDP.
-DATA primes<> +0(SB)/8, $11400714785074694791
-DATA primes<> +8(SB)/8, $14029467366897019727
-DATA primes<>+16(SB)/8, $1609587929392839161
-DATA primes<>+24(SB)/8, $9650029242287828579
-DATA primes<>+32(SB)/8, $2870177450012600261
-GLOBL primes<>(SB), NOPTR+RODATA, $40
+// blockLoop processes as many 32-byte blocks as possible,
+// updating v1, v2, v3, and v4. It assumes that n >= 32.
+#define blockLoop() \
+ LSR $5, n, nblocks \
+ PCALIGN $16 \
+ loop: \
+ LDP.P 16(p), (x1, x2) \
+ LDP.P 16(p), (x3, x4) \
+ round(v1, x1) \
+ round(v2, x2) \
+ round(v3, x3) \
+ round(v4, x4) \
+ SUB $1, nblocks \
+ CBNZ nblocks, loop
// func Sum64(b []byte) uint64
-TEXT ·Sum64(SB), NOFRAME+NOSPLIT, $0-32
- LDP b_base+0(FP), (p, len)
+TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32
+ LDP b_base+0(FP), (p, n)
- LDP primes<> +0(SB), (prime1, prime2)
- LDP primes<>+16(SB), (prime3, prime4)
- MOVD primes<>+32(SB), prime5
+ LDP ·primes+0(SB), (prime1, prime2)
+ LDP ·primes+16(SB), (prime3, prime4)
+ MOVD ·primes+32(SB), prime5
- CMP $32, len
- CSEL LO, prime5, ZR, h // if len < 32 { h = prime5 } else { h = 0 }
- BLO afterLoop
+ CMP $32, n
+ CSEL LT, prime5, ZR, h // if n < 32 { h = prime5 } else { h = 0 }
+ BLT afterLoop
ADD prime1, prime2, v1
MOVD prime2, v2
MOVD $0, v3
NEG prime1, v4
- blocksLoop()
+ blockLoop()
ROR $64-1, v1, x1
ROR $64-7, v2, x2
@@ -88,71 +84,75 @@
ADD x3, x4
ADD x2, x4, h
- mergeRound(v1)
- mergeRound(v2)
- mergeRound(v3)
- mergeRound(v4)
+ mergeRound(h, v1)
+ mergeRound(h, v2)
+ mergeRound(h, v3)
+ mergeRound(h, v4)
afterLoop:
- ADD len, h
+ ADD n, h
- TBZ $4, len, try8
+ TBZ $4, n, try8
LDP.P 16(p), (x1, x2)
round0(x1)
+
+ // NOTE: here and below, sequencing the EOR after the ROR (using a
+ // rotated register) is worth a small but measurable speedup for small
+ // inputs.
ROR $64-27, h
EOR x1 @> 64-27, h, h
MADD h, prime4, prime1, h
round0(x2)
ROR $64-27, h
- EOR x2 @> 64-27, h
+ EOR x2 @> 64-27, h, h
MADD h, prime4, prime1, h
try8:
- TBZ $3, len, try4
+ TBZ $3, n, try4
MOVD.P 8(p), x1
round0(x1)
ROR $64-27, h
- EOR x1 @> 64-27, h
+ EOR x1 @> 64-27, h, h
MADD h, prime4, prime1, h
try4:
- TBZ $2, len, try2
+ TBZ $2, n, try2
MOVWU.P 4(p), x2
MUL prime1, x2
ROR $64-23, h
- EOR x2 @> 64-23, h
+ EOR x2 @> 64-23, h, h
MADD h, prime3, prime2, h
try2:
- TBZ $1, len, try1
+ TBZ $1, n, try1
MOVHU.P 2(p), x3
AND $255, x3, x1
LSR $8, x3, x2
MUL prime5, x1
ROR $64-11, h
- EOR x1 @> 64-11, h
+ EOR x1 @> 64-11, h, h
MUL prime1, h
MUL prime5, x2
ROR $64-11, h
- EOR x2 @> 64-11, h
+ EOR x2 @> 64-11, h, h
MUL prime1, h
try1:
- TBZ $0, len, end
+ TBZ $0, n, finalize
MOVBU (p), x4
MUL prime5, x4
ROR $64-11, h
- EOR x4 @> 64-11, h
+ EOR x4 @> 64-11, h, h
MUL prime1, h
-end:
+finalize:
EOR h >> 33, h
MUL prime2, h
EOR h >> 29, h
@@ -162,25 +162,23 @@
MOVD h, ret+24(FP)
RET
-// func writeBlocks(d *Digest, b []byte) int
-//
-// Assumes len(b) >= 32.
-TEXT ·writeBlocks(SB), NOFRAME+NOSPLIT, $0-40
- LDP primes<>(SB), (prime1, prime2)
+// func writeBlocks(s *Digest, b []byte) int
+TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40
+ LDP ·primes+0(SB), (prime1, prime2)
// Load state. Assume v[1-4] are stored contiguously.
- MOVD d+0(FP), digest
+ MOVD s+0(FP), digest
LDP 0(digest), (v1, v2)
LDP 16(digest), (v3, v4)
- LDP b_base+8(FP), (p, len)
+ LDP b_base+8(FP), (p, n)
- blocksLoop()
+ blockLoop()
// Store updated state.
STP (v1, v2), 0(digest)
STP (v3, v4), 16(digest)
- BIC $31, len
- MOVD len, ret+32(FP)
+ BIC $31, n
+ MOVD n, ret+32(FP)
RET
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go
index 1a1fac9..d4221ed 100644
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go
@@ -13,4 +13,4 @@
func Sum64(b []byte) uint64
//go:noescape
-func writeBlocks(d *Digest, b []byte) int
+func writeBlocks(s *Digest, b []byte) int
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go
index 209cb4a..0be16ce 100644
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go
@@ -15,10 +15,10 @@
var h uint64
if n >= 32 {
- v1 := prime1v + prime2
+ v1 := primes[0] + prime2
v2 := prime2
v3 := uint64(0)
- v4 := -prime1v
+ v4 := -primes[0]
for len(b) >= 32 {
v1 = round(v1, u64(b[0:8:len(b)]))
v2 = round(v2, u64(b[8:16:len(b)]))
@@ -37,19 +37,18 @@
h += uint64(n)
- i, end := 0, len(b)
- for ; i+8 <= end; i += 8 {
- k1 := round(0, u64(b[i:i+8:len(b)]))
+ for ; len(b) >= 8; b = b[8:] {
+ k1 := round(0, u64(b[:8]))
h ^= k1
h = rol27(h)*prime1 + prime4
}
- if i+4 <= end {
- h ^= uint64(u32(b[i:i+4:len(b)])) * prime1
+ if len(b) >= 4 {
+ h ^= uint64(u32(b[:4])) * prime1
h = rol23(h)*prime2 + prime3
- i += 4
+ b = b[4:]
}
- for ; i < end; i++ {
- h ^= uint64(b[i]) * prime5
+ for ; len(b) > 0; b = b[1:] {
+ h ^= uint64(b[0]) * prime5
h = rol11(h) * prime1
}
diff --git a/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.go b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.go
new file mode 100644
index 0000000..f41932b
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.go
@@ -0,0 +1,16 @@
+//go:build amd64 && !appengine && !noasm && gc
+// +build amd64,!appengine,!noasm,gc
+
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+
+package zstd
+
+// matchLen returns how many bytes match in a and b
+//
+// It assumes that:
+//
+// len(a) <= len(b) and len(a) > 0
+//
+//go:noescape
+func matchLen(a []byte, b []byte) int
diff --git a/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s
new file mode 100644
index 0000000..0782b86
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s
@@ -0,0 +1,66 @@
+// Copied from S2 implementation.
+
+//go:build !appengine && !noasm && gc && !noasm
+
+#include "textflag.h"
+
+// func matchLen(a []byte, b []byte) int
+TEXT ·matchLen(SB), NOSPLIT, $0-56
+ MOVQ a_base+0(FP), AX
+ MOVQ b_base+24(FP), CX
+ MOVQ a_len+8(FP), DX
+
+ // matchLen
+ XORL SI, SI
+ CMPL DX, $0x08
+ JB matchlen_match4_standalone
+
+matchlen_loopback_standalone:
+ MOVQ (AX)(SI*1), BX
+ XORQ (CX)(SI*1), BX
+ JZ matchlen_loop_standalone
+
+#ifdef GOAMD64_v3
+ TZCNTQ BX, BX
+#else
+ BSFQ BX, BX
+#endif
+ SHRL $0x03, BX
+ LEAL (SI)(BX*1), SI
+ JMP gen_match_len_end
+
+matchlen_loop_standalone:
+ LEAL -8(DX), DX
+ LEAL 8(SI), SI
+ CMPL DX, $0x08
+ JAE matchlen_loopback_standalone
+
+matchlen_match4_standalone:
+ CMPL DX, $0x04
+ JB matchlen_match2_standalone
+ MOVL (AX)(SI*1), BX
+ CMPL (CX)(SI*1), BX
+ JNE matchlen_match2_standalone
+ LEAL -4(DX), DX
+ LEAL 4(SI), SI
+
+matchlen_match2_standalone:
+ CMPL DX, $0x02
+ JB matchlen_match1_standalone
+ MOVW (AX)(SI*1), BX
+ CMPW (CX)(SI*1), BX
+ JNE matchlen_match1_standalone
+ LEAL -2(DX), DX
+ LEAL 2(SI), SI
+
+matchlen_match1_standalone:
+ CMPL DX, $0x01
+ JB gen_match_len_end
+ MOVB (AX)(SI*1), BL
+ CMPB (CX)(SI*1), BL
+ JNE gen_match_len_end
+ INCL SI
+
+gen_match_len_end:
+ MOVQ SI, ret+48(FP)
+ RET
diff --git a/vendor/github.com/klauspost/compress/zstd/matchlen_generic.go b/vendor/github.com/klauspost/compress/zstd/matchlen_generic.go
new file mode 100644
index 0000000..bea1779
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/matchlen_generic.go
@@ -0,0 +1,38 @@
+//go:build !amd64 || appengine || !gc || noasm
+// +build !amd64 appengine !gc noasm
+
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+
+package zstd
+
+import (
+ "math/bits"
+
+ "github.com/klauspost/compress/internal/le"
+)
+
+// matchLen returns the maximum common prefix length of a and b.
+// a must be the shortest of the two.
+func matchLen(a, b []byte) (n int) {
+ left := len(a)
+ for left >= 8 {
+ diff := le.Load64(a, n) ^ le.Load64(b, n)
+ if diff != 0 {
+ return n + bits.TrailingZeros64(diff)>>3
+ }
+ n += 8
+ left -= 8
+ }
+ a = a[n:]
+ b = b[n:]
+
+ for i := range a {
+ if a[i] != b[i] {
+ break
+ }
+ n++
+ }
+ return n
+
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go
index df04472..9a7de82 100644
--- a/vendor/github.com/klauspost/compress/zstd/seqdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go
@@ -99,6 +99,21 @@
return nil
}
+func (s *sequenceDecs) freeDecoders() {
+ if f := s.litLengths.fse; f != nil && !f.preDefined {
+ fseDecoderPool.Put(f)
+ s.litLengths.fse = nil
+ }
+ if f := s.offsets.fse; f != nil && !f.preDefined {
+ fseDecoderPool.Put(f)
+ s.offsets.fse = nil
+ }
+ if f := s.matchLengths.fse; f != nil && !f.preDefined {
+ fseDecoderPool.Put(f)
+ s.matchLengths.fse = nil
+ }
+}
+
// execute will execute the decoded sequence with the provided history.
// The sequence must be evaluated before being sent.
func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
@@ -221,13 +236,16 @@
maxBlockSize = s.windowSize
}
+ if debugDecoder {
+ println("decodeSync: decoding", seqs, "sequences", br.remain(), "bits remain on stream")
+ }
for i := seqs - 1; i >= 0; i-- {
if br.overread() {
- printf("reading sequence %d, exceeded available data\n", seqs-i)
+ printf("reading sequence %d, exceeded available data. Overread by %d\n", seqs-i, -br.remain())
return io.ErrUnexpectedEOF
}
var ll, mo, ml int
- if br.off > 4+((maxOffsetBits+16+16)>>3) {
+ if br.cursor > 4+((maxOffsetBits+16+16)>>3) {
// inlined function:
// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
@@ -299,7 +317,7 @@
}
size := ll + ml + len(out)
if size-startSize > maxBlockSize {
- return fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize)
+ return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
}
if size > cap(out) {
// Not enough size, which can happen under high volume block streaming conditions
@@ -409,9 +427,8 @@
}
}
- // Check if space for literals
- if size := len(s.literals) + len(s.out) - startSize; size > maxBlockSize {
- return fmt.Errorf("output (%d) bigger than max block size (%d)", size, maxBlockSize)
+ if size := len(s.literals) + len(out) - startSize; size > maxBlockSize {
+ return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
}
// Add final literals
@@ -435,18 +452,13 @@
// extra bits are stored in reverse order.
br.fill()
- if s.maxBits <= 32 {
- mo += br.getBits(moB)
- ml += br.getBits(mlB)
- ll += br.getBits(llB)
- } else {
- mo += br.getBits(moB)
+ mo += br.getBits(moB)
+ if s.maxBits > 32 {
br.fill()
- // matchlength+literal length, max 32 bits
- ml += br.getBits(mlB)
- ll += br.getBits(llB)
-
}
+ // matchlength+literal length, max 32 bits
+ ml += br.getBits(mlB)
+ ll += br.getBits(llB)
mo = s.adjustOffset(mo, ll, moB)
return
}
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
index 7598c10..c59f17e 100644
--- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
@@ -5,6 +5,7 @@
import (
"fmt"
+ "io"
"github.com/klauspost/compress/internal/cpuinfo"
)
@@ -32,18 +33,22 @@
// sequenceDecs_decodeSync_amd64 implements the main loop of sequenceDecs.decodeSync in x86 asm.
//
// Please refer to seqdec_generic.go for the reference implementation.
+//
//go:noescape
func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// sequenceDecs_decodeSync_bmi2 implements the main loop of sequenceDecs.decodeSync in x86 asm with BMI2 extensions.
+//
//go:noescape
func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// sequenceDecs_decodeSync_safe_amd64 does the same as above, but does not write more than output buffer.
+//
//go:noescape
func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// sequenceDecs_decodeSync_safe_bmi2 does the same as above, but does not write more than output buffer.
+//
//go:noescape
func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
@@ -130,20 +135,23 @@
return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available",
ctx.ll, ctx.litRemain+ctx.ll)
+ case errorOverread:
+ return true, io.ErrUnexpectedEOF
+
case errorNotEnoughSpace:
size := ctx.outPosition + ctx.ll + ctx.ml
if debugDecoder {
println("msl:", s.maxSyncLen, "cap", cap(s.out), "bef:", startSize, "sz:", size-startSize, "mbs:", maxBlockSize, "outsz:", cap(s.out)-startSize)
}
- return true, fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize)
+ return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
default:
- return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode)
+ return true, fmt.Errorf("sequenceDecs_decode returned erroneous code %d", errCode)
}
s.seqSize += ctx.litRemain
if s.seqSize > maxBlockSize {
- return true, fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
+ return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
}
err := br.close()
if err != nil {
@@ -198,23 +206,30 @@
// error reported when capacity of `out` is too small
const errorNotEnoughSpace = 5
+// error reported when bits are overread.
+const errorOverread = 6
+
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
//
// Please refer to seqdec_generic.go for the reference implementation.
+//
//go:noescape
func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
//
// Please refer to seqdec_generic.go for the reference implementation.
+//
//go:noescape
func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
+//
//go:noescape
func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
+//
//go:noescape
func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
@@ -239,6 +254,10 @@
litRemain: len(s.literals),
}
+ if debugDecoder {
+ println("decode: decoding", len(seqs), "sequences", br.remain(), "bits remain on stream")
+ }
+
s.seqSize = 0
lte56bits := s.maxBits+s.offsets.fse.actualTableLog+s.matchLengths.fse.actualTableLog+s.litLengths.fse.actualTableLog <= 56
var errCode int
@@ -269,9 +288,11 @@
case errorNotEnoughLiterals:
ll := ctx.seqs[i].ll
return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, ctx.litRemain+ll)
+ case errorOverread:
+ return io.ErrUnexpectedEOF
}
- return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode)
+ return fmt.Errorf("sequenceDecs_decode_amd64 returned erroneous code %d", errCode)
}
if ctx.litRemain < 0 {
@@ -281,7 +302,10 @@
s.seqSize += ctx.litRemain
if s.seqSize > maxBlockSize {
- return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
+ return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
+ }
+ if debugDecoder {
+ println("decode: ", br.remain(), "bits remain on stream. code:", errCode)
}
err := br.close()
if err != nil {
@@ -308,10 +332,12 @@
// Returns false if a match offset is too big.
//
// Please refer to seqdec_generic.go for the reference implementation.
+//
//go:noescape
func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
// Same as above, but with safe memcopies
+//
//go:noescape
func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
index 27e7677..a708ca6 100644
--- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
@@ -1,16 +1,15 @@
// Code generated by command: go run gen.go -out ../seqdec_amd64.s -pkg=zstd. DO NOT EDIT.
//go:build !appengine && !noasm && gc && !noasm
-// +build !appengine,!noasm,gc,!noasm
// func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: CMOV
TEXT ·sequenceDecs_decode_amd64(SB), $8-32
- MOVQ br+8(FP), AX
- MOVQ 32(AX), DX
- MOVBQZX 40(AX), BX
- MOVQ 24(AX), SI
- MOVQ (AX), AX
+ MOVQ br+8(FP), CX
+ MOVQ 24(CX), DX
+ MOVBQZX 40(CX), BX
+ MOVQ (CX), AX
+ MOVQ 32(CX), SI
ADDQ SI, AX
MOVQ AX, (SP)
MOVQ ctx+16(FP), AX
@@ -39,7 +38,7 @@
sequenceDecs_decode_amd64_fill_byte_by_byte:
CMPQ SI, $0x00
- JLE sequenceDecs_decode_amd64_fill_end
+ JLE sequenceDecs_decode_amd64_fill_check_overread
CMPQ BX, $0x07
JLE sequenceDecs_decode_amd64_fill_end
SHLQ $0x08, DX
@@ -50,6 +49,10 @@
ORQ AX, DX
JMP sequenceDecs_decode_amd64_fill_byte_by_byte
+sequenceDecs_decode_amd64_fill_check_overread:
+ CMPQ BX, $0x40
+ JA error_overread
+
sequenceDecs_decode_amd64_fill_end:
// Update offset
MOVQ R9, AX
@@ -106,7 +109,7 @@
sequenceDecs_decode_amd64_fill_2_byte_by_byte:
CMPQ SI, $0x00
- JLE sequenceDecs_decode_amd64_fill_2_end
+ JLE sequenceDecs_decode_amd64_fill_2_check_overread
CMPQ BX, $0x07
JLE sequenceDecs_decode_amd64_fill_2_end
SHLQ $0x08, DX
@@ -117,6 +120,10 @@
ORQ AX, DX
JMP sequenceDecs_decode_amd64_fill_2_byte_by_byte
+sequenceDecs_decode_amd64_fill_2_check_overread:
+ CMPQ BX, $0x40
+ JA error_overread
+
sequenceDecs_decode_amd64_fill_2_end:
// Update literal length
MOVQ DI, AX
@@ -150,8 +157,7 @@
// Update Literal Length State
MOVBQZX DI, R14
- SHRQ $0x10, DI
- MOVWQZX DI, DI
+ SHRL $0x10, DI
LEAQ (BX)(R14*1), CX
MOVQ DX, R15
MOVQ CX, BX
@@ -170,8 +176,7 @@
// Update Match Length State
MOVBQZX R8, R14
- SHRQ $0x10, R8
- MOVWQZX R8, R8
+ SHRL $0x10, R8
LEAQ (BX)(R14*1), CX
MOVQ DX, R15
MOVQ CX, BX
@@ -190,8 +195,7 @@
// Update Offset State
MOVBQZX R9, R14
- SHRQ $0x10, R9
- MOVWQZX R9, R9
+ SHRL $0x10, R9
LEAQ (BX)(R14*1), CX
MOVQ DX, R15
MOVQ CX, BX
@@ -294,9 +298,9 @@
MOVQ R12, 152(AX)
MOVQ R13, 160(AX)
MOVQ br+8(FP), AX
- MOVQ DX, 32(AX)
+ MOVQ DX, 24(AX)
MOVB BL, 40(AX)
- MOVQ SI, 24(AX)
+ MOVQ SI, 32(AX)
// Return success
MOVQ $0x00000000, ret+24(FP)
@@ -321,18 +325,19 @@
MOVQ $0x00000004, ret+24(FP)
RET
- // Return with not enough output space error
- MOVQ $0x00000005, ret+24(FP)
+ // Return with overread error
+error_overread:
+ MOVQ $0x00000006, ret+24(FP)
RET
// func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: CMOV
TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32
- MOVQ br+8(FP), AX
- MOVQ 32(AX), DX
- MOVBQZX 40(AX), BX
- MOVQ 24(AX), SI
- MOVQ (AX), AX
+ MOVQ br+8(FP), CX
+ MOVQ 24(CX), DX
+ MOVBQZX 40(CX), BX
+ MOVQ (CX), AX
+ MOVQ 32(CX), SI
ADDQ SI, AX
MOVQ AX, (SP)
MOVQ ctx+16(FP), AX
@@ -361,7 +366,7 @@
sequenceDecs_decode_56_amd64_fill_byte_by_byte:
CMPQ SI, $0x00
- JLE sequenceDecs_decode_56_amd64_fill_end
+ JLE sequenceDecs_decode_56_amd64_fill_check_overread
CMPQ BX, $0x07
JLE sequenceDecs_decode_56_amd64_fill_end
SHLQ $0x08, DX
@@ -372,6 +377,10 @@
ORQ AX, DX
JMP sequenceDecs_decode_56_amd64_fill_byte_by_byte
+sequenceDecs_decode_56_amd64_fill_check_overread:
+ CMPQ BX, $0x40
+ JA error_overread
+
sequenceDecs_decode_56_amd64_fill_end:
// Update offset
MOVQ R9, AX
@@ -447,8 +456,7 @@
// Update Literal Length State
MOVBQZX DI, R14
- SHRQ $0x10, DI
- MOVWQZX DI, DI
+ SHRL $0x10, DI
LEAQ (BX)(R14*1), CX
MOVQ DX, R15
MOVQ CX, BX
@@ -467,8 +475,7 @@
// Update Match Length State
MOVBQZX R8, R14
- SHRQ $0x10, R8
- MOVWQZX R8, R8
+ SHRL $0x10, R8
LEAQ (BX)(R14*1), CX
MOVQ DX, R15
MOVQ CX, BX
@@ -487,8 +494,7 @@
// Update Offset State
MOVBQZX R9, R14
- SHRQ $0x10, R9
- MOVWQZX R9, R9
+ SHRL $0x10, R9
LEAQ (BX)(R14*1), CX
MOVQ DX, R15
MOVQ CX, BX
@@ -591,9 +597,9 @@
MOVQ R12, 152(AX)
MOVQ R13, 160(AX)
MOVQ br+8(FP), AX
- MOVQ DX, 32(AX)
+ MOVQ DX, 24(AX)
MOVB BL, 40(AX)
- MOVQ SI, 24(AX)
+ MOVQ SI, 32(AX)
// Return success
MOVQ $0x00000000, ret+24(FP)
@@ -618,18 +624,19 @@
MOVQ $0x00000004, ret+24(FP)
RET
- // Return with not enough output space error
- MOVQ $0x00000005, ret+24(FP)
+ // Return with overread error
+error_overread:
+ MOVQ $0x00000006, ret+24(FP)
RET
// func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: BMI, BMI2, CMOV
TEXT ·sequenceDecs_decode_bmi2(SB), $8-32
- MOVQ br+8(FP), CX
- MOVQ 32(CX), AX
- MOVBQZX 40(CX), DX
- MOVQ 24(CX), BX
- MOVQ (CX), CX
+ MOVQ br+8(FP), BX
+ MOVQ 24(BX), AX
+ MOVBQZX 40(BX), DX
+ MOVQ (BX), CX
+ MOVQ 32(BX), BX
ADDQ BX, CX
MOVQ CX, (SP)
MOVQ ctx+16(FP), CX
@@ -658,7 +665,7 @@
sequenceDecs_decode_bmi2_fill_byte_by_byte:
CMPQ BX, $0x00
- JLE sequenceDecs_decode_bmi2_fill_end
+ JLE sequenceDecs_decode_bmi2_fill_check_overread
CMPQ DX, $0x07
JLE sequenceDecs_decode_bmi2_fill_end
SHLQ $0x08, AX
@@ -669,6 +676,10 @@
ORQ CX, AX
JMP sequenceDecs_decode_bmi2_fill_byte_by_byte
+sequenceDecs_decode_bmi2_fill_check_overread:
+ CMPQ DX, $0x40
+ JA error_overread
+
sequenceDecs_decode_bmi2_fill_end:
// Update offset
MOVQ $0x00000808, CX
@@ -709,7 +720,7 @@
sequenceDecs_decode_bmi2_fill_2_byte_by_byte:
CMPQ BX, $0x00
- JLE sequenceDecs_decode_bmi2_fill_2_end
+ JLE sequenceDecs_decode_bmi2_fill_2_check_overread
CMPQ DX, $0x07
JLE sequenceDecs_decode_bmi2_fill_2_end
SHLQ $0x08, AX
@@ -720,6 +731,10 @@
ORQ CX, AX
JMP sequenceDecs_decode_bmi2_fill_2_byte_by_byte
+sequenceDecs_decode_bmi2_fill_2_check_overread:
+ CMPQ DX, $0x40
+ JA error_overread
+
sequenceDecs_decode_bmi2_fill_2_end:
// Update literal length
MOVQ $0x00000808, CX
@@ -751,11 +766,10 @@
BZHIQ R14, R15, R15
// Update Offset State
- BZHIQ R8, R15, CX
- SHRXQ R8, R15, R15
- MOVQ $0x00001010, R14
- BEXTRQ R14, R8, R8
- ADDQ CX, R8
+ BZHIQ R8, R15, CX
+ SHRXQ R8, R15, R15
+ SHRL $0x10, R8
+ ADDQ CX, R8
// Load ctx.ofTable
MOVQ ctx+16(FP), CX
@@ -763,11 +777,10 @@
MOVQ (CX)(R8*8), R8
// Update Match Length State
- BZHIQ DI, R15, CX
- SHRXQ DI, R15, R15
- MOVQ $0x00001010, R14
- BEXTRQ R14, DI, DI
- ADDQ CX, DI
+ BZHIQ DI, R15, CX
+ SHRXQ DI, R15, R15
+ SHRL $0x10, DI
+ ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
@@ -775,10 +788,9 @@
MOVQ (CX)(DI*8), DI
// Update Literal Length State
- BZHIQ SI, R15, CX
- MOVQ $0x00001010, R14
- BEXTRQ R14, SI, SI
- ADDQ CX, SI
+ BZHIQ SI, R15, CX
+ SHRL $0x10, SI
+ ADDQ CX, SI
// Load ctx.llTable
MOVQ ctx+16(FP), CX
@@ -871,9 +883,9 @@
MOVQ R11, 152(CX)
MOVQ R12, 160(CX)
MOVQ br+8(FP), CX
- MOVQ AX, 32(CX)
+ MOVQ AX, 24(CX)
MOVB DL, 40(CX)
- MOVQ BX, 24(CX)
+ MOVQ BX, 32(CX)
// Return success
MOVQ $0x00000000, ret+24(FP)
@@ -898,18 +910,19 @@
MOVQ $0x00000004, ret+24(FP)
RET
- // Return with not enough output space error
- MOVQ $0x00000005, ret+24(FP)
+ // Return with overread error
+error_overread:
+ MOVQ $0x00000006, ret+24(FP)
RET
// func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: BMI, BMI2, CMOV
TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32
- MOVQ br+8(FP), CX
- MOVQ 32(CX), AX
- MOVBQZX 40(CX), DX
- MOVQ 24(CX), BX
- MOVQ (CX), CX
+ MOVQ br+8(FP), BX
+ MOVQ 24(BX), AX
+ MOVBQZX 40(BX), DX
+ MOVQ (BX), CX
+ MOVQ 32(BX), BX
ADDQ BX, CX
MOVQ CX, (SP)
MOVQ ctx+16(FP), CX
@@ -938,7 +951,7 @@
sequenceDecs_decode_56_bmi2_fill_byte_by_byte:
CMPQ BX, $0x00
- JLE sequenceDecs_decode_56_bmi2_fill_end
+ JLE sequenceDecs_decode_56_bmi2_fill_check_overread
CMPQ DX, $0x07
JLE sequenceDecs_decode_56_bmi2_fill_end
SHLQ $0x08, AX
@@ -949,6 +962,10 @@
ORQ CX, AX
JMP sequenceDecs_decode_56_bmi2_fill_byte_by_byte
+sequenceDecs_decode_56_bmi2_fill_check_overread:
+ CMPQ DX, $0x40
+ JA error_overread
+
sequenceDecs_decode_56_bmi2_fill_end:
// Update offset
MOVQ $0x00000808, CX
@@ -1006,11 +1023,10 @@
BZHIQ R14, R15, R15
// Update Offset State
- BZHIQ R8, R15, CX
- SHRXQ R8, R15, R15
- MOVQ $0x00001010, R14
- BEXTRQ R14, R8, R8
- ADDQ CX, R8
+ BZHIQ R8, R15, CX
+ SHRXQ R8, R15, R15
+ SHRL $0x10, R8
+ ADDQ CX, R8
// Load ctx.ofTable
MOVQ ctx+16(FP), CX
@@ -1018,11 +1034,10 @@
MOVQ (CX)(R8*8), R8
// Update Match Length State
- BZHIQ DI, R15, CX
- SHRXQ DI, R15, R15
- MOVQ $0x00001010, R14
- BEXTRQ R14, DI, DI
- ADDQ CX, DI
+ BZHIQ DI, R15, CX
+ SHRXQ DI, R15, R15
+ SHRL $0x10, DI
+ ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
@@ -1030,10 +1045,9 @@
MOVQ (CX)(DI*8), DI
// Update Literal Length State
- BZHIQ SI, R15, CX
- MOVQ $0x00001010, R14
- BEXTRQ R14, SI, SI
- ADDQ CX, SI
+ BZHIQ SI, R15, CX
+ SHRL $0x10, SI
+ ADDQ CX, SI
// Load ctx.llTable
MOVQ ctx+16(FP), CX
@@ -1126,9 +1140,9 @@
MOVQ R11, 152(CX)
MOVQ R12, 160(CX)
MOVQ br+8(FP), CX
- MOVQ AX, 32(CX)
+ MOVQ AX, 24(CX)
MOVB DL, 40(CX)
- MOVQ BX, 24(CX)
+ MOVQ BX, 32(CX)
// Return success
MOVQ $0x00000000, ret+24(FP)
@@ -1153,8 +1167,9 @@
MOVQ $0x00000004, ret+24(FP)
RET
- // Return with not enough output space error
- MOVQ $0x00000005, ret+24(FP)
+ // Return with overread error
+error_overread:
+ MOVQ $0x00000006, ret+24(FP)
RET
// func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
@@ -1390,8 +1405,7 @@
MOVQ ctx+0(FP), AX
MOVQ DX, 24(AX)
MOVQ DI, 104(AX)
- MOVQ 80(AX), CX
- SUBQ CX, SI
+ SUBQ 80(AX), SI
MOVQ SI, 112(AX)
RET
@@ -1403,8 +1417,7 @@
MOVQ ctx+0(FP), AX
MOVQ DX, 24(AX)
MOVQ DI, 104(AX)
- MOVQ 80(AX), CX
- SUBQ CX, SI
+ SUBQ 80(AX), SI
MOVQ SI, 112(AX)
RET
@@ -1748,8 +1761,7 @@
MOVQ ctx+0(FP), AX
MOVQ DX, 24(AX)
MOVQ DI, 104(AX)
- MOVQ 80(AX), CX
- SUBQ CX, SI
+ SUBQ 80(AX), SI
MOVQ SI, 112(AX)
RET
@@ -1761,8 +1773,7 @@
MOVQ ctx+0(FP), AX
MOVQ DX, 24(AX)
MOVQ DI, 104(AX)
- MOVQ 80(AX), CX
- SUBQ CX, SI
+ SUBQ 80(AX), SI
MOVQ SI, 112(AX)
RET
@@ -1774,11 +1785,11 @@
// func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// Requires: CMOV, SSE
TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32
- MOVQ br+8(FP), AX
- MOVQ 32(AX), DX
- MOVBQZX 40(AX), BX
- MOVQ 24(AX), SI
- MOVQ (AX), AX
+ MOVQ br+8(FP), CX
+ MOVQ 24(CX), DX
+ MOVBQZX 40(CX), BX
+ MOVQ (CX), AX
+ MOVQ 32(CX), SI
ADDQ SI, AX
MOVQ AX, (SP)
MOVQ ctx+16(FP), AX
@@ -1803,7 +1814,7 @@
MOVQ 40(SP), AX
ADDQ AX, 48(SP)
- // Calculate poiter to s.out[cap(s.out)] (a past-end pointer)
+ // Calculate pointer to s.out[cap(s.out)] (a past-end pointer)
ADDQ R10, 32(SP)
// outBase += outPosition
@@ -1825,7 +1836,7 @@
sequenceDecs_decodeSync_amd64_fill_byte_by_byte:
CMPQ SI, $0x00
- JLE sequenceDecs_decodeSync_amd64_fill_end
+ JLE sequenceDecs_decodeSync_amd64_fill_check_overread
CMPQ BX, $0x07
JLE sequenceDecs_decodeSync_amd64_fill_end
SHLQ $0x08, DX
@@ -1836,6 +1847,10 @@
ORQ AX, DX
JMP sequenceDecs_decodeSync_amd64_fill_byte_by_byte
+sequenceDecs_decodeSync_amd64_fill_check_overread:
+ CMPQ BX, $0x40
+ JA error_overread
+
sequenceDecs_decodeSync_amd64_fill_end:
// Update offset
MOVQ R9, AX
@@ -1892,7 +1907,7 @@
sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte:
CMPQ SI, $0x00
- JLE sequenceDecs_decodeSync_amd64_fill_2_end
+ JLE sequenceDecs_decodeSync_amd64_fill_2_check_overread
CMPQ BX, $0x07
JLE sequenceDecs_decodeSync_amd64_fill_2_end
SHLQ $0x08, DX
@@ -1903,6 +1918,10 @@
ORQ AX, DX
JMP sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte
+sequenceDecs_decodeSync_amd64_fill_2_check_overread:
+ CMPQ BX, $0x40
+ JA error_overread
+
sequenceDecs_decodeSync_amd64_fill_2_end:
// Update literal length
MOVQ DI, AX
@@ -1936,8 +1955,7 @@
// Update Literal Length State
MOVBQZX DI, R13
- SHRQ $0x10, DI
- MOVWQZX DI, DI
+ SHRL $0x10, DI
LEAQ (BX)(R13*1), CX
MOVQ DX, R14
MOVQ CX, BX
@@ -1956,8 +1974,7 @@
// Update Match Length State
MOVBQZX R8, R13
- SHRQ $0x10, R8
- MOVWQZX R8, R8
+ SHRL $0x10, R8
LEAQ (BX)(R13*1), CX
MOVQ DX, R14
MOVQ CX, BX
@@ -1976,8 +1993,7 @@
// Update Offset State
MOVBQZX R9, R13
- SHRQ $0x10, R9
- MOVWQZX R9, R9
+ SHRL $0x10, R9
LEAQ (BX)(R13*1), CX
MOVQ DX, R14
MOVQ CX, BX
@@ -2264,9 +2280,9 @@
loop_finished:
MOVQ br+8(FP), AX
- MOVQ DX, 32(AX)
+ MOVQ DX, 24(AX)
MOVB BL, 40(AX)
- MOVQ SI, 24(AX)
+ MOVQ SI, 32(AX)
// Update the context
MOVQ ctx+16(FP), AX
@@ -2312,6 +2328,11 @@
MOVQ $0x00000004, ret+24(FP)
RET
+ // Return with overread error
+error_overread:
+ MOVQ $0x00000006, ret+24(FP)
+ RET
+
// Return with not enough output space error
error_not_enough_space:
MOVQ ctx+16(FP), AX
@@ -2326,11 +2347,11 @@
// func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// Requires: BMI, BMI2, CMOV, SSE
TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32
- MOVQ br+8(FP), CX
- MOVQ 32(CX), AX
- MOVBQZX 40(CX), DX
- MOVQ 24(CX), BX
- MOVQ (CX), CX
+ MOVQ br+8(FP), BX
+ MOVQ 24(BX), AX
+ MOVBQZX 40(BX), DX
+ MOVQ (BX), CX
+ MOVQ 32(BX), BX
ADDQ BX, CX
MOVQ CX, (SP)
MOVQ ctx+16(FP), CX
@@ -2355,7 +2376,7 @@
MOVQ 40(SP), CX
ADDQ CX, 48(SP)
- // Calculate poiter to s.out[cap(s.out)] (a past-end pointer)
+ // Calculate pointer to s.out[cap(s.out)] (a past-end pointer)
ADDQ R9, 32(SP)
// outBase += outPosition
@@ -2377,7 +2398,7 @@
sequenceDecs_decodeSync_bmi2_fill_byte_by_byte:
CMPQ BX, $0x00
- JLE sequenceDecs_decodeSync_bmi2_fill_end
+ JLE sequenceDecs_decodeSync_bmi2_fill_check_overread
CMPQ DX, $0x07
JLE sequenceDecs_decodeSync_bmi2_fill_end
SHLQ $0x08, AX
@@ -2388,6 +2409,10 @@
ORQ CX, AX
JMP sequenceDecs_decodeSync_bmi2_fill_byte_by_byte
+sequenceDecs_decodeSync_bmi2_fill_check_overread:
+ CMPQ DX, $0x40
+ JA error_overread
+
sequenceDecs_decodeSync_bmi2_fill_end:
// Update offset
MOVQ $0x00000808, CX
@@ -2428,7 +2453,7 @@
sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte:
CMPQ BX, $0x00
- JLE sequenceDecs_decodeSync_bmi2_fill_2_end
+ JLE sequenceDecs_decodeSync_bmi2_fill_2_check_overread
CMPQ DX, $0x07
JLE sequenceDecs_decodeSync_bmi2_fill_2_end
SHLQ $0x08, AX
@@ -2439,6 +2464,10 @@
ORQ CX, AX
JMP sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte
+sequenceDecs_decodeSync_bmi2_fill_2_check_overread:
+ CMPQ DX, $0x40
+ JA error_overread
+
sequenceDecs_decodeSync_bmi2_fill_2_end:
// Update literal length
MOVQ $0x00000808, CX
@@ -2470,11 +2499,10 @@
BZHIQ R13, R14, R14
// Update Offset State
- BZHIQ R8, R14, CX
- SHRXQ R8, R14, R14
- MOVQ $0x00001010, R13
- BEXTRQ R13, R8, R8
- ADDQ CX, R8
+ BZHIQ R8, R14, CX
+ SHRXQ R8, R14, R14
+ SHRL $0x10, R8
+ ADDQ CX, R8
// Load ctx.ofTable
MOVQ ctx+16(FP), CX
@@ -2482,11 +2510,10 @@
MOVQ (CX)(R8*8), R8
// Update Match Length State
- BZHIQ DI, R14, CX
- SHRXQ DI, R14, R14
- MOVQ $0x00001010, R13
- BEXTRQ R13, DI, DI
- ADDQ CX, DI
+ BZHIQ DI, R14, CX
+ SHRXQ DI, R14, R14
+ SHRL $0x10, DI
+ ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
@@ -2494,10 +2521,9 @@
MOVQ (CX)(DI*8), DI
// Update Literal Length State
- BZHIQ SI, R14, CX
- MOVQ $0x00001010, R13
- BEXTRQ R13, SI, SI
- ADDQ CX, SI
+ BZHIQ SI, R14, CX
+ SHRL $0x10, SI
+ ADDQ CX, SI
// Load ctx.llTable
MOVQ ctx+16(FP), CX
@@ -2774,9 +2800,9 @@
loop_finished:
MOVQ br+8(FP), CX
- MOVQ AX, 32(CX)
+ MOVQ AX, 24(CX)
MOVB DL, 40(CX)
- MOVQ BX, 24(CX)
+ MOVQ BX, 32(CX)
// Update the context
MOVQ ctx+16(FP), AX
@@ -2822,6 +2848,11 @@
MOVQ $0x00000004, ret+24(FP)
RET
+ // Return with overread error
+error_overread:
+ MOVQ $0x00000006, ret+24(FP)
+ RET
+
// Return with not enough output space error
error_not_enough_space:
MOVQ ctx+16(FP), AX
@@ -2836,11 +2867,11 @@
// func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// Requires: CMOV, SSE
TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32
- MOVQ br+8(FP), AX
- MOVQ 32(AX), DX
- MOVBQZX 40(AX), BX
- MOVQ 24(AX), SI
- MOVQ (AX), AX
+ MOVQ br+8(FP), CX
+ MOVQ 24(CX), DX
+ MOVBQZX 40(CX), BX
+ MOVQ (CX), AX
+ MOVQ 32(CX), SI
ADDQ SI, AX
MOVQ AX, (SP)
MOVQ ctx+16(FP), AX
@@ -2865,7 +2896,7 @@
MOVQ 40(SP), AX
ADDQ AX, 48(SP)
- // Calculate poiter to s.out[cap(s.out)] (a past-end pointer)
+ // Calculate pointer to s.out[cap(s.out)] (a past-end pointer)
ADDQ R10, 32(SP)
// outBase += outPosition
@@ -2887,7 +2918,7 @@
sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte:
CMPQ SI, $0x00
- JLE sequenceDecs_decodeSync_safe_amd64_fill_end
+ JLE sequenceDecs_decodeSync_safe_amd64_fill_check_overread
CMPQ BX, $0x07
JLE sequenceDecs_decodeSync_safe_amd64_fill_end
SHLQ $0x08, DX
@@ -2898,6 +2929,10 @@
ORQ AX, DX
JMP sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte
+sequenceDecs_decodeSync_safe_amd64_fill_check_overread:
+ CMPQ BX, $0x40
+ JA error_overread
+
sequenceDecs_decodeSync_safe_amd64_fill_end:
// Update offset
MOVQ R9, AX
@@ -2954,7 +2989,7 @@
sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte:
CMPQ SI, $0x00
- JLE sequenceDecs_decodeSync_safe_amd64_fill_2_end
+ JLE sequenceDecs_decodeSync_safe_amd64_fill_2_check_overread
CMPQ BX, $0x07
JLE sequenceDecs_decodeSync_safe_amd64_fill_2_end
SHLQ $0x08, DX
@@ -2965,6 +3000,10 @@
ORQ AX, DX
JMP sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte
+sequenceDecs_decodeSync_safe_amd64_fill_2_check_overread:
+ CMPQ BX, $0x40
+ JA error_overread
+
sequenceDecs_decodeSync_safe_amd64_fill_2_end:
// Update literal length
MOVQ DI, AX
@@ -2998,8 +3037,7 @@
// Update Literal Length State
MOVBQZX DI, R13
- SHRQ $0x10, DI
- MOVWQZX DI, DI
+ SHRL $0x10, DI
LEAQ (BX)(R13*1), CX
MOVQ DX, R14
MOVQ CX, BX
@@ -3018,8 +3056,7 @@
// Update Match Length State
MOVBQZX R8, R13
- SHRQ $0x10, R8
- MOVWQZX R8, R8
+ SHRL $0x10, R8
LEAQ (BX)(R13*1), CX
MOVQ DX, R14
MOVQ CX, BX
@@ -3038,8 +3075,7 @@
// Update Offset State
MOVBQZX R9, R13
- SHRQ $0x10, R9
- MOVWQZX R9, R9
+ SHRL $0x10, R9
LEAQ (BX)(R13*1), CX
MOVQ DX, R14
MOVQ CX, BX
@@ -3428,9 +3464,9 @@
loop_finished:
MOVQ br+8(FP), AX
- MOVQ DX, 32(AX)
+ MOVQ DX, 24(AX)
MOVB BL, 40(AX)
- MOVQ SI, 24(AX)
+ MOVQ SI, 32(AX)
// Update the context
MOVQ ctx+16(FP), AX
@@ -3476,6 +3512,11 @@
MOVQ $0x00000004, ret+24(FP)
RET
+ // Return with overread error
+error_overread:
+ MOVQ $0x00000006, ret+24(FP)
+ RET
+
// Return with not enough output space error
error_not_enough_space:
MOVQ ctx+16(FP), AX
@@ -3490,11 +3531,11 @@
// func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// Requires: BMI, BMI2, CMOV, SSE
TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32
- MOVQ br+8(FP), CX
- MOVQ 32(CX), AX
- MOVBQZX 40(CX), DX
- MOVQ 24(CX), BX
- MOVQ (CX), CX
+ MOVQ br+8(FP), BX
+ MOVQ 24(BX), AX
+ MOVBQZX 40(BX), DX
+ MOVQ (BX), CX
+ MOVQ 32(BX), BX
ADDQ BX, CX
MOVQ CX, (SP)
MOVQ ctx+16(FP), CX
@@ -3519,7 +3560,7 @@
MOVQ 40(SP), CX
ADDQ CX, 48(SP)
- // Calculate poiter to s.out[cap(s.out)] (a past-end pointer)
+ // Calculate pointer to s.out[cap(s.out)] (a past-end pointer)
ADDQ R9, 32(SP)
// outBase += outPosition
@@ -3541,7 +3582,7 @@
sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte:
CMPQ BX, $0x00
- JLE sequenceDecs_decodeSync_safe_bmi2_fill_end
+ JLE sequenceDecs_decodeSync_safe_bmi2_fill_check_overread
CMPQ DX, $0x07
JLE sequenceDecs_decodeSync_safe_bmi2_fill_end
SHLQ $0x08, AX
@@ -3552,6 +3593,10 @@
ORQ CX, AX
JMP sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte
+sequenceDecs_decodeSync_safe_bmi2_fill_check_overread:
+ CMPQ DX, $0x40
+ JA error_overread
+
sequenceDecs_decodeSync_safe_bmi2_fill_end:
// Update offset
MOVQ $0x00000808, CX
@@ -3592,7 +3637,7 @@
sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte:
CMPQ BX, $0x00
- JLE sequenceDecs_decodeSync_safe_bmi2_fill_2_end
+ JLE sequenceDecs_decodeSync_safe_bmi2_fill_2_check_overread
CMPQ DX, $0x07
JLE sequenceDecs_decodeSync_safe_bmi2_fill_2_end
SHLQ $0x08, AX
@@ -3603,6 +3648,10 @@
ORQ CX, AX
JMP sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte
+sequenceDecs_decodeSync_safe_bmi2_fill_2_check_overread:
+ CMPQ DX, $0x40
+ JA error_overread
+
sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
// Update literal length
MOVQ $0x00000808, CX
@@ -3634,11 +3683,10 @@
BZHIQ R13, R14, R14
// Update Offset State
- BZHIQ R8, R14, CX
- SHRXQ R8, R14, R14
- MOVQ $0x00001010, R13
- BEXTRQ R13, R8, R8
- ADDQ CX, R8
+ BZHIQ R8, R14, CX
+ SHRXQ R8, R14, R14
+ SHRL $0x10, R8
+ ADDQ CX, R8
// Load ctx.ofTable
MOVQ ctx+16(FP), CX
@@ -3646,11 +3694,10 @@
MOVQ (CX)(R8*8), R8
// Update Match Length State
- BZHIQ DI, R14, CX
- SHRXQ DI, R14, R14
- MOVQ $0x00001010, R13
- BEXTRQ R13, DI, DI
- ADDQ CX, DI
+ BZHIQ DI, R14, CX
+ SHRXQ DI, R14, R14
+ SHRL $0x10, DI
+ ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
@@ -3658,10 +3705,9 @@
MOVQ (CX)(DI*8), DI
// Update Literal Length State
- BZHIQ SI, R14, CX
- MOVQ $0x00001010, R13
- BEXTRQ R13, SI, SI
- ADDQ CX, SI
+ BZHIQ SI, R14, CX
+ SHRL $0x10, SI
+ ADDQ CX, SI
// Load ctx.llTable
MOVQ ctx+16(FP), CX
@@ -4040,9 +4086,9 @@
loop_finished:
MOVQ br+8(FP), CX
- MOVQ AX, 32(CX)
+ MOVQ AX, 24(CX)
MOVB DL, 40(CX)
- MOVQ BX, 24(CX)
+ MOVQ BX, 32(CX)
// Update the context
MOVQ ctx+16(FP), AX
@@ -4088,6 +4134,11 @@
MOVQ $0x00000004, ret+24(FP)
RET
+ // Return with overread error
+error_overread:
+ MOVQ $0x00000006, ret+24(FP)
+ RET
+
// Return with not enough output space error
error_not_enough_space:
MOVQ ctx+16(FP), AX
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go
index c3452bc..7cec219 100644
--- a/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go
@@ -29,7 +29,7 @@
}
for i := range seqs {
var ll, mo, ml int
- if br.off > 4+((maxOffsetBits+16+16)>>3) {
+ if br.cursor > 4+((maxOffsetBits+16+16)>>3) {
// inlined function:
// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
@@ -111,7 +111,7 @@
}
s.seqSize += ll + ml
if s.seqSize > maxBlockSize {
- return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
+ return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
}
litRemain -= ll
if litRemain < 0 {
@@ -149,7 +149,7 @@
}
s.seqSize += litRemain
if s.seqSize > maxBlockSize {
- return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
+ return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
}
err := br.close()
if err != nil {
diff --git a/vendor/github.com/klauspost/compress/zstd/seqenc.go b/vendor/github.com/klauspost/compress/zstd/seqenc.go
index 8014174..65045ea 100644
--- a/vendor/github.com/klauspost/compress/zstd/seqenc.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqenc.go
@@ -69,7 +69,6 @@
func llCode(litLength uint32) uint8 {
const llDeltaCode = 19
if litLength <= 63 {
- // Compiler insists on bounds check (Go 1.12)
return llCodeTable[litLength&63]
}
return uint8(highBit(litLength)) + llDeltaCode
@@ -102,7 +101,6 @@
func mlCode(mlBase uint32) uint8 {
const mlDeltaCode = 36
if mlBase <= 127 {
- // Compiler insists on bounds check (Go 1.12)
return mlCodeTable[mlBase&127]
}
return uint8(highBit(mlBase)) + mlDeltaCode
diff --git a/vendor/github.com/klauspost/compress/zstd/snappy.go b/vendor/github.com/klauspost/compress/zstd/snappy.go
index 9e1baad..a17381b 100644
--- a/vendor/github.com/klauspost/compress/zstd/snappy.go
+++ b/vendor/github.com/klauspost/compress/zstd/snappy.go
@@ -95,10 +95,9 @@
var written int64
var readHeader bool
{
- var header []byte
- var n int
- header, r.err = frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0])
+ header := frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0])
+ var n int
n, r.err = w.Write(header)
if r.err != nil {
return written, r.err
@@ -198,7 +197,7 @@
n, r.err = w.Write(r.block.output)
if r.err != nil {
- return written, err
+ return written, r.err
}
written += int64(n)
continue
@@ -240,7 +239,7 @@
}
n, r.err = w.Write(r.block.output)
if r.err != nil {
- return written, err
+ return written, r.err
}
written += int64(n)
continue
diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go
index 3eb3f1c..6252b46 100644
--- a/vendor/github.com/klauspost/compress/zstd/zstd.go
+++ b/vendor/github.com/klauspost/compress/zstd/zstd.go
@@ -5,11 +5,11 @@
import (
"bytes"
- "encoding/binary"
"errors"
"log"
"math"
- "math/bits"
+
+ "github.com/klauspost/compress/internal/le"
)
// enable debug printing
@@ -36,9 +36,6 @@
// zstdMinMatch is the minimum zstd match length.
const zstdMinMatch = 3
-// Reset the buffer offset when reaching this.
-const bufferReset = math.MaxInt32 - MaxWindowSize
-
// fcsUnknown is used for unknown frame content size.
const fcsUnknown = math.MaxUint64
@@ -75,7 +72,6 @@
ErrDecoderSizeExceeded = errors.New("decompressed size exceeds configured limit")
// ErrUnknownDictionary is returned if the dictionary ID is unknown.
- // For the time being dictionaries are not supported.
ErrUnknownDictionary = errors.New("unknown dictionary")
// ErrFrameSizeExceeded is returned if the stated frame size is exceeded.
@@ -93,6 +89,10 @@
// Close has been called.
ErrDecoderClosed = errors.New("decoder used after Close")
+ // ErrEncoderClosed will be returned if the Encoder was used after
+ // Close has been called.
+ ErrEncoderClosed = errors.New("encoder used after Close")
+
// ErrDecoderNilInput is returned when a nil Reader was provided
// and an operation other than Reset/DecodeAll/Close was attempted.
ErrDecoderNilInput = errors.New("nil input provided as reader")
@@ -110,38 +110,12 @@
}
}
-// matchLen returns the maximum length.
-// a must be the shortest of the two.
-// The function also returns whether all bytes matched.
-func matchLen(a, b []byte) int {
- b = b[:len(a)]
- for i := 0; i < len(a)-7; i += 8 {
- if diff := load64(a, i) ^ load64(b, i); diff != 0 {
- return i + (bits.TrailingZeros64(diff) >> 3)
- }
- }
-
- checked := (len(a) >> 3) << 3
- a = a[checked:]
- b = b[checked:]
- for i := range a {
- if a[i] != b[i] {
- return i + checked
- }
- }
- return len(a) + checked
-}
-
func load3232(b []byte, i int32) uint32 {
- return binary.LittleEndian.Uint32(b[i:])
+ return le.Load32(b, i)
}
func load6432(b []byte, i int32) uint64 {
- return binary.LittleEndian.Uint64(b[i:])
-}
-
-func load64(b []byte, i int) uint64 {
- return binary.LittleEndian.Uint64(b[i:])
+ return le.Load64(b, i)
}
type byter interface {