[VOL-5486] Fix deprecated versions
Change-Id: I3e03ea246020547ae75fa92ce8cf5cbba7e8f3bb
Signed-off-by: Abhay Kumar <abhay.kumar@radisys.com>
diff --git a/vendor/github.com/klauspost/compress/huff0/bitreader.go b/vendor/github.com/klauspost/compress/huff0/bitreader.go
index 504a7be..bfc7a52 100644
--- a/vendor/github.com/klauspost/compress/huff0/bitreader.go
+++ b/vendor/github.com/klauspost/compress/huff0/bitreader.go
@@ -6,10 +6,11 @@
package huff0
import (
- "encoding/binary"
"errors"
"fmt"
"io"
+
+ "github.com/klauspost/compress/internal/le"
)
// bitReader reads a bitstream in reverse.
@@ -46,7 +47,7 @@
return nil
}
-// peekBitsFast requires that at least one bit is requested every time.
+// peekByteFast requires that at least one byte is requested every time.
// There are no checks if the buffer is filled.
func (b *bitReaderBytes) peekByteFast() uint8 {
got := uint8(b.value >> 56)
@@ -66,9 +67,7 @@
}
// 2 bounds checks.
- v := b.in[b.off-4 : b.off]
- v = v[:4]
- low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ low := le.Load32(b.in, b.off-4)
b.value |= uint64(low) << (b.bitsRead - 32)
b.bitsRead -= 32
b.off -= 4
@@ -77,7 +76,7 @@
// fillFastStart() assumes the bitReaderBytes is empty and there is at least 8 bytes to read.
func (b *bitReaderBytes) fillFastStart() {
// Do single re-slice to avoid bounds checks.
- b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+ b.value = le.Load64(b.in, b.off-8)
b.bitsRead = 0
b.off -= 8
}
@@ -87,10 +86,8 @@
if b.bitsRead < 32 {
return
}
- if b.off > 4 {
- v := b.in[b.off-4:]
- v = v[:4]
- low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ if b.off >= 4 {
+ low := le.Load32(b.in, b.off-4)
b.value |= uint64(low) << (b.bitsRead - 32)
b.bitsRead -= 32
b.off -= 4
@@ -177,10 +174,7 @@
return
}
- // 2 bounds checks.
- v := b.in[b.off-4 : b.off]
- v = v[:4]
- low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ low := le.Load32(b.in, b.off-4)
b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
b.bitsRead -= 32
b.off -= 4
@@ -188,8 +182,7 @@
// fillFastStart() assumes the bitReaderShifted is empty and there is at least 8 bytes to read.
func (b *bitReaderShifted) fillFastStart() {
- // Do single re-slice to avoid bounds checks.
- b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+ b.value = le.Load64(b.in, b.off-8)
b.bitsRead = 0
b.off -= 8
}
@@ -200,9 +193,7 @@
return
}
if b.off > 4 {
- v := b.in[b.off-4:]
- v = v[:4]
- low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ low := le.Load32(b.in, b.off-4)
b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
b.bitsRead -= 32
b.off -= 4
diff --git a/vendor/github.com/klauspost/compress/huff0/bitwriter.go b/vendor/github.com/klauspost/compress/huff0/bitwriter.go
index ec71f7a..0ebc9aa 100644
--- a/vendor/github.com/klauspost/compress/huff0/bitwriter.go
+++ b/vendor/github.com/klauspost/compress/huff0/bitwriter.go
@@ -13,14 +13,6 @@
out []byte
}
-// bitMask16 is bitmasks. Has extra to avoid bounds check.
-var bitMask16 = [32]uint16{
- 0, 1, 3, 7, 0xF, 0x1F,
- 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
- 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0xFFFF,
- 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
- 0xFFFF, 0xFFFF} /* up to 16 bits */
-
// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
@@ -60,6 +52,22 @@
b.nBits += encA.nBits + encB.nBits
}
+// encFourSymbols adds up to 32 bits from four symbols.
+// It will not check if there is space for them,
+// so the caller must ensure that b has been flushed recently.
+func (b *bitWriter) encFourSymbols(encA, encB, encC, encD cTableEntry) {
+ bitsA := encA.nBits
+ bitsB := bitsA + encB.nBits
+ bitsC := bitsB + encC.nBits
+ bitsD := bitsC + encD.nBits
+ combined := uint64(encA.val) |
+ (uint64(encB.val) << (bitsA & 63)) |
+ (uint64(encC.val) << (bitsB & 63)) |
+ (uint64(encD.val) << (bitsC & 63))
+ b.bitContainer |= combined << (b.nBits & 63)
+ b.nBits += bitsD
+}
+
// flush32 will flush out, so there are at least 32 bits available for writing.
func (b *bitWriter) flush32() {
if b.nBits < 32 {
@@ -86,10 +94,9 @@
// close will write the alignment bit and write the final byte(s)
// to the output.
-func (b *bitWriter) close() error {
+func (b *bitWriter) close() {
// End mark
b.addBits16Clean(1, 1)
// flush until next byte.
b.flushAlign()
- return nil
}
diff --git a/vendor/github.com/klauspost/compress/huff0/bytereader.go b/vendor/github.com/klauspost/compress/huff0/bytereader.go
deleted file mode 100644
index 4dcab8d..0000000
--- a/vendor/github.com/klauspost/compress/huff0/bytereader.go
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright 2018 Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
-
-package huff0
-
-// byteReader provides a byte reader that reads
-// little endian values from a byte stream.
-// The input stream is manually advanced.
-// The reader performs no bounds checks.
-type byteReader struct {
- b []byte
- off int
-}
-
-// init will initialize the reader and set the input.
-func (b *byteReader) init(in []byte) {
- b.b = in
- b.off = 0
-}
-
-// Int32 returns a little endian int32 starting at current offset.
-func (b byteReader) Int32() int32 {
- v3 := int32(b.b[b.off+3])
- v2 := int32(b.b[b.off+2])
- v1 := int32(b.b[b.off+1])
- v0 := int32(b.b[b.off])
- return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0
-}
-
-// Uint32 returns a little endian uint32 starting at current offset.
-func (b byteReader) Uint32() uint32 {
- v3 := uint32(b.b[b.off+3])
- v2 := uint32(b.b[b.off+2])
- v1 := uint32(b.b[b.off+1])
- v0 := uint32(b.b[b.off])
- return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0
-}
-
-// remain will return the number of bytes remaining.
-func (b byteReader) remain() int {
- return len(b.b) - b.off
-}
diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go
index 4d14542..84aa3d1 100644
--- a/vendor/github.com/klauspost/compress/huff0/compress.go
+++ b/vendor/github.com/klauspost/compress/huff0/compress.go
@@ -227,10 +227,10 @@
}
func (s *Scratch) compress1X(src []byte) ([]byte, error) {
- return s.compress1xDo(s.Out, src)
+ return s.compress1xDo(s.Out, src), nil
}
-func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) {
+func (s *Scratch) compress1xDo(dst, src []byte) []byte {
var bw = bitWriter{out: dst}
// N is length divisible by 4.
@@ -248,8 +248,7 @@
tmp := src[n : n+4]
// tmp should be len 4
bw.flush32()
- bw.encTwoSymbols(cTable, tmp[3], tmp[2])
- bw.encTwoSymbols(cTable, tmp[1], tmp[0])
+ bw.encFourSymbols(cTable[tmp[3]], cTable[tmp[2]], cTable[tmp[1]], cTable[tmp[0]])
}
} else {
for ; n >= 0; n -= 4 {
@@ -261,8 +260,8 @@
bw.encTwoSymbols(cTable, tmp[1], tmp[0])
}
}
- err := bw.close()
- return bw.out, err
+ bw.close()
+ return bw.out
}
var sixZeros [6]byte
@@ -284,12 +283,8 @@
}
src = src[len(toDo):]
- var err error
idx := len(s.Out)
- s.Out, err = s.compress1xDo(s.Out, toDo)
- if err != nil {
- return nil, err
- }
+ s.Out = s.compress1xDo(s.Out, toDo)
if len(s.Out)-idx > math.MaxUint16 {
// We cannot store the size in the jump table
return nil, ErrIncompressible
@@ -316,7 +311,6 @@
segmentSize := (len(src) + 3) / 4
var wg sync.WaitGroup
- var errs [4]error
wg.Add(4)
for i := 0; i < 4; i++ {
toDo := src
@@ -327,15 +321,12 @@
// Separate goroutine for each block.
go func(i int) {
- s.tmpOut[i], errs[i] = s.compress1xDo(s.tmpOut[i][:0], toDo)
+ s.tmpOut[i] = s.compress1xDo(s.tmpOut[i][:0], toDo)
wg.Done()
}(i)
}
wg.Wait()
for i := 0; i < 4; i++ {
- if errs[i] != nil {
- return nil, errs[i]
- }
o := s.tmpOut[i]
if len(o) > math.MaxUint16 {
// We cannot store the size in the jump table
@@ -359,35 +350,36 @@
// Does not update s.clearCount.
func (s *Scratch) countSimple(in []byte) (max int, reuse bool) {
reuse = true
+ _ = s.count // Assert that s != nil to speed up the following loop.
for _, v := range in {
s.count[v]++
}
m := uint32(0)
if len(s.prevTable) > 0 {
for i, v := range s.count[:] {
+ if v == 0 {
+ continue
+ }
if v > m {
m = v
}
- if v > 0 {
- s.symbolLen = uint16(i) + 1
- if i >= len(s.prevTable) {
- reuse = false
- } else {
- if s.prevTable[i].nBits == 0 {
- reuse = false
- }
- }
+ s.symbolLen = uint16(i) + 1
+ if i >= len(s.prevTable) {
+ reuse = false
+ } else if s.prevTable[i].nBits == 0 {
+ reuse = false
}
}
return int(m), reuse
}
for i, v := range s.count[:] {
+ if v == 0 {
+ continue
+ }
if v > m {
m = v
}
- if v > 0 {
- s.symbolLen = uint16(i) + 1
- }
+ s.symbolLen = uint16(i) + 1
}
return int(m), false
}
@@ -424,7 +416,7 @@
// minTableLog provides the minimum logSize to safely represent a distribution.
func (s *Scratch) minTableLog() uint8 {
- minBitsSrc := highBit32(uint32(s.br.remain())) + 1
+ minBitsSrc := highBit32(uint32(s.srcLen)) + 1
minBitsSymbols := highBit32(uint32(s.symbolLen-1)) + 2
if minBitsSrc < minBitsSymbols {
return uint8(minBitsSrc)
@@ -436,7 +428,7 @@
func (s *Scratch) optimalTableLog() {
tableLog := s.TableLog
minBits := s.minTableLog()
- maxBitsSrc := uint8(highBit32(uint32(s.br.remain()-1))) - 1
+ maxBitsSrc := uint8(highBit32(uint32(s.srcLen-1))) - 1
if maxBitsSrc < tableLog {
// Accuracy can be reduced
tableLog = maxBitsSrc
@@ -484,34 +476,35 @@
// Different from reference implementation.
huffNode0 := s.nodes[0 : huffNodesLen+1]
- for huffNode[nonNullRank].count == 0 {
+ for huffNode[nonNullRank].count() == 0 {
nonNullRank--
}
lowS := int16(nonNullRank)
nodeRoot := nodeNb + lowS - 1
lowN := nodeNb
- huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count
- huffNode[lowS].parent, huffNode[lowS-1].parent = uint16(nodeNb), uint16(nodeNb)
+ huffNode[nodeNb].setCount(huffNode[lowS].count() + huffNode[lowS-1].count())
+ huffNode[lowS].setParent(nodeNb)
+ huffNode[lowS-1].setParent(nodeNb)
nodeNb++
lowS -= 2
for n := nodeNb; n <= nodeRoot; n++ {
- huffNode[n].count = 1 << 30
+ huffNode[n].setCount(1 << 30)
}
// fake entry, strong barrier
- huffNode0[0].count = 1 << 31
+ huffNode0[0].setCount(1 << 31)
// create parents
for nodeNb <= nodeRoot {
var n1, n2 int16
- if huffNode0[lowS+1].count < huffNode0[lowN+1].count {
+ if huffNode0[lowS+1].count() < huffNode0[lowN+1].count() {
n1 = lowS
lowS--
} else {
n1 = lowN
lowN++
}
- if huffNode0[lowS+1].count < huffNode0[lowN+1].count {
+ if huffNode0[lowS+1].count() < huffNode0[lowN+1].count() {
n2 = lowS
lowS--
} else {
@@ -519,18 +512,19 @@
lowN++
}
- huffNode[nodeNb].count = huffNode0[n1+1].count + huffNode0[n2+1].count
- huffNode0[n1+1].parent, huffNode0[n2+1].parent = uint16(nodeNb), uint16(nodeNb)
+ huffNode[nodeNb].setCount(huffNode0[n1+1].count() + huffNode0[n2+1].count())
+ huffNode0[n1+1].setParent(nodeNb)
+ huffNode0[n2+1].setParent(nodeNb)
nodeNb++
}
// distribute weights (unlimited tree height)
- huffNode[nodeRoot].nbBits = 0
+ huffNode[nodeRoot].setNbBits(0)
for n := nodeRoot - 1; n >= startNode; n-- {
- huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1
+ huffNode[n].setNbBits(huffNode[huffNode[n].parent()].nbBits() + 1)
}
for n := uint16(0); n <= nonNullRank; n++ {
- huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1
+ huffNode[n].setNbBits(huffNode[huffNode[n].parent()].nbBits() + 1)
}
s.actualTableLog = s.setMaxHeight(int(nonNullRank))
maxNbBits := s.actualTableLog
@@ -542,7 +536,7 @@
var nbPerRank [tableLogMax + 1]uint16
var valPerRank [16]uint16
for _, v := range huffNode[:nonNullRank+1] {
- nbPerRank[v.nbBits]++
+ nbPerRank[v.nbBits()]++
}
// determine stating value per rank
{
@@ -557,7 +551,7 @@
// push nbBits per symbol, symbol order
for _, v := range huffNode[:nonNullRank+1] {
- s.cTable[v.symbol].nBits = v.nbBits
+ s.cTable[v.symbol()].nBits = v.nbBits()
}
// assign value within rank, symbol order
@@ -603,12 +597,12 @@
pos := rank[r].current
rank[r].current++
prev := nodes[(pos-1)&huffNodesMask]
- for pos > rank[r].base && c > prev.count {
+ for pos > rank[r].base && c > prev.count() {
nodes[pos&huffNodesMask] = prev
pos--
prev = nodes[(pos-1)&huffNodesMask]
}
- nodes[pos&huffNodesMask] = nodeElt{count: c, symbol: byte(n)}
+ nodes[pos&huffNodesMask] = makeNodeElt(c, byte(n))
}
}
@@ -617,7 +611,7 @@
huffNode := s.nodes[1 : huffNodesLen+1]
//huffNode = huffNode[: huffNodesLen]
- largestBits := huffNode[lastNonNull].nbBits
+ largestBits := huffNode[lastNonNull].nbBits()
// early exit : no elt > maxNbBits
if largestBits <= maxNbBits {
@@ -627,14 +621,14 @@
baseCost := int(1) << (largestBits - maxNbBits)
n := uint32(lastNonNull)
- for huffNode[n].nbBits > maxNbBits {
- totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits))
- huffNode[n].nbBits = maxNbBits
+ for huffNode[n].nbBits() > maxNbBits {
+ totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits()))
+ huffNode[n].setNbBits(maxNbBits)
n--
}
// n stops at huffNode[n].nbBits <= maxNbBits
- for huffNode[n].nbBits == maxNbBits {
+ for huffNode[n].nbBits() == maxNbBits {
n--
}
// n end at index of smallest symbol using < maxNbBits
@@ -655,10 +649,10 @@
{
currentNbBits := maxNbBits
for pos := int(n); pos >= 0; pos-- {
- if huffNode[pos].nbBits >= currentNbBits {
+ if huffNode[pos].nbBits() >= currentNbBits {
continue
}
- currentNbBits = huffNode[pos].nbBits // < maxNbBits
+ currentNbBits = huffNode[pos].nbBits() // < maxNbBits
rankLast[maxNbBits-currentNbBits] = uint32(pos)
}
}
@@ -675,8 +669,8 @@
if lowPos == noSymbol {
break
}
- highTotal := huffNode[highPos].count
- lowTotal := 2 * huffNode[lowPos].count
+ highTotal := huffNode[highPos].count()
+ lowTotal := 2 * huffNode[lowPos].count()
if highTotal <= lowTotal {
break
}
@@ -692,13 +686,14 @@
// this rank is no longer empty
rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]
}
- huffNode[rankLast[nBitsToDecrease]].nbBits++
+ huffNode[rankLast[nBitsToDecrease]].setNbBits(1 +
+ huffNode[rankLast[nBitsToDecrease]].nbBits())
if rankLast[nBitsToDecrease] == 0 {
/* special case, reached largest symbol */
rankLast[nBitsToDecrease] = noSymbol
} else {
rankLast[nBitsToDecrease]--
- if huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease {
+ if huffNode[rankLast[nBitsToDecrease]].nbBits() != maxNbBits-nBitsToDecrease {
rankLast[nBitsToDecrease] = noSymbol /* this rank is now empty */
}
}
@@ -706,15 +701,15 @@
for totalCost < 0 { /* Sometimes, cost correction overshoot */
if rankLast[1] == noSymbol { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
- for huffNode[n].nbBits == maxNbBits {
+ for huffNode[n].nbBits() == maxNbBits {
n--
}
- huffNode[n+1].nbBits--
+ huffNode[n+1].setNbBits(huffNode[n+1].nbBits() - 1)
rankLast[1] = n + 1
totalCost++
continue
}
- huffNode[rankLast[1]+1].nbBits--
+ huffNode[rankLast[1]+1].setNbBits(huffNode[rankLast[1]+1].nbBits() - 1)
rankLast[1]++
totalCost++
}
@@ -722,9 +717,26 @@
return maxNbBits
}
-type nodeElt struct {
- count uint32
- parent uint16
- symbol byte
- nbBits uint8
+// A nodeElt is the fields
+//
+// count uint32
+// parent uint16
+// symbol byte
+// nbBits uint8
+//
+// in some order, all squashed into an integer so that the compiler
+// always loads and stores entire nodeElts instead of separate fields.
+type nodeElt uint64
+
+func makeNodeElt(count uint32, symbol byte) nodeElt {
+ return nodeElt(count) | nodeElt(symbol)<<48
}
+
+func (e *nodeElt) count() uint32 { return uint32(*e) }
+func (e *nodeElt) parent() uint16 { return uint16(*e >> 32) }
+func (e *nodeElt) symbol() byte { return byte(*e >> 48) }
+func (e *nodeElt) nbBits() uint8 { return uint8(*e >> 56) }
+
+func (e *nodeElt) setCount(c uint32) { *e = (*e)&0xffffffff00000000 | nodeElt(c) }
+func (e *nodeElt) setParent(p int16) { *e = (*e)&0xffff0000ffffffff | nodeElt(uint16(p))<<32 }
+func (e *nodeElt) setNbBits(n uint8) { *e = (*e)&0x00ffffffffffffff | nodeElt(n)<<56 }
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go
index c0c48bd..0f56b02 100644
--- a/vendor/github.com/klauspost/compress/huff0/decompress.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress.go
@@ -61,7 +61,7 @@
b, err := fse.Decompress(in[:iSize], s.fse)
s.fse.Out = nil
if err != nil {
- return s, nil, err
+ return s, nil, fmt.Errorf("fse decompress returned: %w", err)
}
if len(b) > 255 {
return s, nil, errors.New("corrupt input: output table too large")
@@ -253,7 +253,7 @@
switch d.actualTableLog {
case 8:
- const shift = 8 - 8
+ const shift = 0
for br.off >= 4 {
br.fillFast()
v := dt[uint8(br.value>>(56+shift))]
@@ -763,17 +763,20 @@
d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 1")
}
- copy(out, buf[0][:])
- copy(out[dstEvery:], buf[1][:])
- copy(out[dstEvery*2:], buf[2][:])
- copy(out[dstEvery*3:], buf[3][:])
- out = out[bufoff:]
- decoded += bufoff * 4
// There must at least be 3 buffers left.
- if len(out) < dstEvery*3 {
+ if len(out)-bufoff < dstEvery*3 {
d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 2")
}
+ //copy(out, buf[0][:])
+ //copy(out[dstEvery:], buf[1][:])
+ //copy(out[dstEvery*2:], buf[2][:])
+ *(*[bufoff]byte)(out) = buf[0]
+ *(*[bufoff]byte)(out[dstEvery:]) = buf[1]
+ *(*[bufoff]byte)(out[dstEvery*2:]) = buf[2]
+ *(*[bufoff]byte)(out[dstEvery*3:]) = buf[3]
+ out = out[bufoff:]
+ decoded += bufoff * 4
}
}
if off > 0 {
@@ -997,17 +1000,22 @@
d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 1")
}
- copy(out, buf[0][:])
- copy(out[dstEvery:], buf[1][:])
- copy(out[dstEvery*2:], buf[2][:])
- copy(out[dstEvery*3:], buf[3][:])
- out = out[bufoff:]
- decoded += bufoff * 4
// There must at least be 3 buffers left.
- if len(out) < dstEvery*3 {
+ if len(out)-bufoff < dstEvery*3 {
d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 2")
}
+
+ //copy(out, buf[0][:])
+ //copy(out[dstEvery:], buf[1][:])
+ //copy(out[dstEvery*2:], buf[2][:])
+ // copy(out[dstEvery*3:], buf[3][:])
+ *(*[bufoff]byte)(out) = buf[0]
+ *(*[bufoff]byte)(out[dstEvery:]) = buf[1]
+ *(*[bufoff]byte)(out[dstEvery*2:]) = buf[2]
+ *(*[bufoff]byte)(out[dstEvery*3:]) = buf[3]
+ out = out[bufoff:]
+ decoded += bufoff * 4
}
}
if off > 0 {
@@ -1128,7 +1136,7 @@
errs++
}
if errs > 0 {
- fmt.Fprintf(w, "%d errros in base, stopping\n", errs)
+ fmt.Fprintf(w, "%d errors in base, stopping\n", errs)
continue
}
// Ensure that all combinations are covered.
@@ -1144,7 +1152,7 @@
errs++
}
if errs > 20 {
- fmt.Fprintf(w, "%d errros, stopping\n", errs)
+ fmt.Fprintf(w, "%d errors, stopping\n", errs)
break
}
}
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
index 9f3e9f7..ba7e8e6 100644
--- a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
@@ -14,12 +14,14 @@
// decompress4x_main_loop_x86 is an x86 assembler implementation
// of Decompress4X when tablelog > 8.
+//
//go:noescape
func decompress4x_main_loop_amd64(ctx *decompress4xContext)
// decompress4x_8b_loop_x86 is an x86 assembler implementation
// of Decompress4X when tablelog <= 8 which decodes 4 entries
// per loop.
+//
//go:noescape
func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext)
@@ -145,11 +147,13 @@
// decompress4x_main_loop_x86 is an x86 assembler implementation
// of Decompress1X when tablelog > 8.
+//
//go:noescape
func decompress1x_main_loop_amd64(ctx *decompress1xContext)
// decompress4x_main_loop_x86 is an x86 with BMI2 assembler implementation
// of Decompress1X when tablelog > 8.
+//
//go:noescape
func decompress1x_main_loop_bmi2(ctx *decompress1xContext)
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s
index dd1a5ae..c4c7ab2 100644
--- a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s
+++ b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s
@@ -1,364 +1,352 @@
// Code generated by command: go run gen.go -out ../decompress_amd64.s -pkg=huff0. DO NOT EDIT.
//go:build amd64 && !appengine && !noasm && gc
-// +build amd64,!appengine,!noasm,gc
// func decompress4x_main_loop_amd64(ctx *decompress4xContext)
TEXT ·decompress4x_main_loop_amd64(SB), $0-8
- XORQ DX, DX
-
// Preload values
MOVQ ctx+0(FP), AX
MOVBQZX 8(AX), DI
- MOVQ 16(AX), SI
- MOVQ 48(AX), BX
- MOVQ 24(AX), R9
- MOVQ 32(AX), R10
- MOVQ (AX), R11
+ MOVQ 16(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 24(AX), R8
+ MOVQ 32(AX), R9
+ MOVQ (AX), R10
// Main loop
main_loop:
- MOVQ SI, R8
- CMPQ R8, BX
+ XORL DX, DX
+ CMPQ BX, SI
SETGE DL
// br0.fillFast32()
- MOVQ 32(R11), R12
- MOVBQZX 40(R11), R13
- CMPQ R13, $0x20
+ MOVQ 32(R10), R11
+ MOVBQZX 40(R10), R12
+ CMPQ R12, $0x20
JBE skip_fill0
- MOVQ 24(R11), AX
- SUBQ $0x20, R13
+ MOVQ 24(R10), AX
+ SUBQ $0x20, R12
SUBQ $0x04, AX
- MOVQ (R11), R14
+ MOVQ (R10), R13
// b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (AX)(R14*1), R14
- MOVQ R13, CX
- SHLQ CL, R14
- MOVQ AX, 24(R11)
- ORQ R14, R12
+ MOVL (AX)(R13*1), R13
+ MOVQ R12, CX
+ SHLQ CL, R13
+ MOVQ AX, 24(R10)
+ ORQ R13, R11
- // exhausted = exhausted || (br0.off < 4)
- CMPQ AX, $0x04
- SETLT AL
- ORB AL, DL
+ // exhausted += (br0.off < 4)
+ CMPQ AX, $0x04
+ ADCB $+0, DL
skip_fill0:
// val0 := br0.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v0 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br0.advance(uint8(v0.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val1 := br0.peekTopBits(peekBits)
MOVQ DI, CX
- MOVQ R12, R14
- SHRQ CL, R14
+ MOVQ R11, R13
+ SHRQ CL, R13
// v1 := table[val1&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br0.advance(uint8(v1.entry))
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// these two writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
- MOVW AX, (R8)
+ MOVW AX, (BX)
// update the bitreader structure
- MOVQ R12, 32(R11)
- MOVB R13, 40(R11)
- ADDQ R9, R8
+ MOVQ R11, 32(R10)
+ MOVB R12, 40(R10)
// br1.fillFast32()
- MOVQ 80(R11), R12
- MOVBQZX 88(R11), R13
- CMPQ R13, $0x20
+ MOVQ 80(R10), R11
+ MOVBQZX 88(R10), R12
+ CMPQ R12, $0x20
JBE skip_fill1
- MOVQ 72(R11), AX
- SUBQ $0x20, R13
+ MOVQ 72(R10), AX
+ SUBQ $0x20, R12
SUBQ $0x04, AX
- MOVQ 48(R11), R14
+ MOVQ 48(R10), R13
// b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (AX)(R14*1), R14
- MOVQ R13, CX
- SHLQ CL, R14
- MOVQ AX, 72(R11)
- ORQ R14, R12
+ MOVL (AX)(R13*1), R13
+ MOVQ R12, CX
+ SHLQ CL, R13
+ MOVQ AX, 72(R10)
+ ORQ R13, R11
- // exhausted = exhausted || (br1.off < 4)
- CMPQ AX, $0x04
- SETLT AL
- ORB AL, DL
+ // exhausted += (br1.off < 4)
+ CMPQ AX, $0x04
+ ADCB $+0, DL
skip_fill1:
// val0 := br1.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v0 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br1.advance(uint8(v0.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val1 := br1.peekTopBits(peekBits)
MOVQ DI, CX
- MOVQ R12, R14
- SHRQ CL, R14
+ MOVQ R11, R13
+ SHRQ CL, R13
// v1 := table[val1&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br1.advance(uint8(v1.entry))
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// these two writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
- MOVW AX, (R8)
+ MOVW AX, (BX)(R8*1)
// update the bitreader structure
- MOVQ R12, 80(R11)
- MOVB R13, 88(R11)
- ADDQ R9, R8
+ MOVQ R11, 80(R10)
+ MOVB R12, 88(R10)
// br2.fillFast32()
- MOVQ 128(R11), R12
- MOVBQZX 136(R11), R13
- CMPQ R13, $0x20
+ MOVQ 128(R10), R11
+ MOVBQZX 136(R10), R12
+ CMPQ R12, $0x20
JBE skip_fill2
- MOVQ 120(R11), AX
- SUBQ $0x20, R13
+ MOVQ 120(R10), AX
+ SUBQ $0x20, R12
SUBQ $0x04, AX
- MOVQ 96(R11), R14
+ MOVQ 96(R10), R13
// b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (AX)(R14*1), R14
- MOVQ R13, CX
- SHLQ CL, R14
- MOVQ AX, 120(R11)
- ORQ R14, R12
+ MOVL (AX)(R13*1), R13
+ MOVQ R12, CX
+ SHLQ CL, R13
+ MOVQ AX, 120(R10)
+ ORQ R13, R11
- // exhausted = exhausted || (br2.off < 4)
- CMPQ AX, $0x04
- SETLT AL
- ORB AL, DL
+ // exhausted += (br2.off < 4)
+ CMPQ AX, $0x04
+ ADCB $+0, DL
skip_fill2:
// val0 := br2.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v0 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br2.advance(uint8(v0.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val1 := br2.peekTopBits(peekBits)
MOVQ DI, CX
- MOVQ R12, R14
- SHRQ CL, R14
+ MOVQ R11, R13
+ SHRQ CL, R13
// v1 := table[val1&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br2.advance(uint8(v1.entry))
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// these two writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
- MOVW AX, (R8)
+ MOVW AX, (BX)(R8*2)
// update the bitreader structure
- MOVQ R12, 128(R11)
- MOVB R13, 136(R11)
- ADDQ R9, R8
+ MOVQ R11, 128(R10)
+ MOVB R12, 136(R10)
// br3.fillFast32()
- MOVQ 176(R11), R12
- MOVBQZX 184(R11), R13
- CMPQ R13, $0x20
+ MOVQ 176(R10), R11
+ MOVBQZX 184(R10), R12
+ CMPQ R12, $0x20
JBE skip_fill3
- MOVQ 168(R11), AX
- SUBQ $0x20, R13
+ MOVQ 168(R10), AX
+ SUBQ $0x20, R12
SUBQ $0x04, AX
- MOVQ 144(R11), R14
+ MOVQ 144(R10), R13
// b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (AX)(R14*1), R14
- MOVQ R13, CX
- SHLQ CL, R14
- MOVQ AX, 168(R11)
- ORQ R14, R12
+ MOVL (AX)(R13*1), R13
+ MOVQ R12, CX
+ SHLQ CL, R13
+ MOVQ AX, 168(R10)
+ ORQ R13, R11
- // exhausted = exhausted || (br3.off < 4)
- CMPQ AX, $0x04
- SETLT AL
- ORB AL, DL
+ // exhausted += (br3.off < 4)
+ CMPQ AX, $0x04
+ ADCB $+0, DL
skip_fill3:
// val0 := br3.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v0 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br3.advance(uint8(v0.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val1 := br3.peekTopBits(peekBits)
MOVQ DI, CX
- MOVQ R12, R14
- SHRQ CL, R14
+ MOVQ R11, R13
+ SHRQ CL, R13
// v1 := table[val1&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br3.advance(uint8(v1.entry))
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// these two writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
- MOVW AX, (R8)
+ LEAQ (R8)(R8*2), CX
+ MOVW AX, (BX)(CX*1)
// update the bitreader structure
- MOVQ R12, 176(R11)
- MOVB R13, 184(R11)
- ADDQ $0x02, SI
+ MOVQ R11, 176(R10)
+ MOVB R12, 184(R10)
+ ADDQ $0x02, BX
TESTB DL, DL
JZ main_loop
MOVQ ctx+0(FP), AX
- SUBQ 16(AX), SI
- SHLQ $0x02, SI
- MOVQ SI, 40(AX)
+ SUBQ 16(AX), BX
+ SHLQ $0x02, BX
+ MOVQ BX, 40(AX)
RET
// func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext)
TEXT ·decompress4x_8b_main_loop_amd64(SB), $0-8
- XORQ DX, DX
-
// Preload values
MOVQ ctx+0(FP), CX
MOVBQZX 8(CX), DI
MOVQ 16(CX), BX
MOVQ 48(CX), SI
- MOVQ 24(CX), R9
- MOVQ 32(CX), R10
- MOVQ (CX), R11
+ MOVQ 24(CX), R8
+ MOVQ 32(CX), R9
+ MOVQ (CX), R10
// Main loop
main_loop:
- MOVQ BX, R8
- CMPQ R8, SI
+ XORL DX, DX
+ CMPQ BX, SI
SETGE DL
// br0.fillFast32()
- MOVQ 32(R11), R12
- MOVBQZX 40(R11), R13
- CMPQ R13, $0x20
+ MOVQ 32(R10), R11
+ MOVBQZX 40(R10), R12
+ CMPQ R12, $0x20
JBE skip_fill0
- MOVQ 24(R11), R14
- SUBQ $0x20, R13
- SUBQ $0x04, R14
- MOVQ (R11), R15
+ MOVQ 24(R10), R13
+ SUBQ $0x20, R12
+ SUBQ $0x04, R13
+ MOVQ (R10), R14
// b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (R14)(R15*1), R15
- MOVQ R13, CX
- SHLQ CL, R15
- MOVQ R14, 24(R11)
- ORQ R15, R12
+ MOVL (R13)(R14*1), R14
+ MOVQ R12, CX
+ SHLQ CL, R14
+ MOVQ R13, 24(R10)
+ ORQ R14, R11
- // exhausted = exhausted || (br0.off < 4)
- CMPQ R14, $0x04
- SETLT AL
- ORB AL, DL
+ // exhausted += (br0.off < 4)
+ CMPQ R13, $0x04
+ ADCB $+0, DL
skip_fill0:
// val0 := br0.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v0 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br0.advance(uint8(v0.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val1 := br0.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v1 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br0.advance(uint8(v1.entry)
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
BSWAPL AX
// val2 := br0.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v2 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br0.advance(uint8(v2.entry)
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val3 := br0.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v3 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br0.advance(uint8(v3.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
BSWAPL AX
// these four writes get coalesced
@@ -366,88 +354,86 @@
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
- MOVL AX, (R8)
+ MOVL AX, (BX)
// update the bitreader structure
- MOVQ R12, 32(R11)
- MOVB R13, 40(R11)
- ADDQ R9, R8
+ MOVQ R11, 32(R10)
+ MOVB R12, 40(R10)
// br1.fillFast32()
- MOVQ 80(R11), R12
- MOVBQZX 88(R11), R13
- CMPQ R13, $0x20
+ MOVQ 80(R10), R11
+ MOVBQZX 88(R10), R12
+ CMPQ R12, $0x20
JBE skip_fill1
- MOVQ 72(R11), R14
- SUBQ $0x20, R13
- SUBQ $0x04, R14
- MOVQ 48(R11), R15
+ MOVQ 72(R10), R13
+ SUBQ $0x20, R12
+ SUBQ $0x04, R13
+ MOVQ 48(R10), R14
// b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (R14)(R15*1), R15
- MOVQ R13, CX
- SHLQ CL, R15
- MOVQ R14, 72(R11)
- ORQ R15, R12
+ MOVL (R13)(R14*1), R14
+ MOVQ R12, CX
+ SHLQ CL, R14
+ MOVQ R13, 72(R10)
+ ORQ R14, R11
- // exhausted = exhausted || (br1.off < 4)
- CMPQ R14, $0x04
- SETLT AL
- ORB AL, DL
+ // exhausted += (br1.off < 4)
+ CMPQ R13, $0x04
+ ADCB $+0, DL
skip_fill1:
// val0 := br1.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v0 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br1.advance(uint8(v0.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val1 := br1.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v1 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br1.advance(uint8(v1.entry)
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
BSWAPL AX
// val2 := br1.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v2 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br1.advance(uint8(v2.entry)
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val3 := br1.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v3 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br1.advance(uint8(v3.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
BSWAPL AX
// these four writes get coalesced
@@ -455,88 +441,86 @@
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
- MOVL AX, (R8)
+ MOVL AX, (BX)(R8*1)
// update the bitreader structure
- MOVQ R12, 80(R11)
- MOVB R13, 88(R11)
- ADDQ R9, R8
+ MOVQ R11, 80(R10)
+ MOVB R12, 88(R10)
// br2.fillFast32()
- MOVQ 128(R11), R12
- MOVBQZX 136(R11), R13
- CMPQ R13, $0x20
+ MOVQ 128(R10), R11
+ MOVBQZX 136(R10), R12
+ CMPQ R12, $0x20
JBE skip_fill2
- MOVQ 120(R11), R14
- SUBQ $0x20, R13
- SUBQ $0x04, R14
- MOVQ 96(R11), R15
+ MOVQ 120(R10), R13
+ SUBQ $0x20, R12
+ SUBQ $0x04, R13
+ MOVQ 96(R10), R14
// b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (R14)(R15*1), R15
- MOVQ R13, CX
- SHLQ CL, R15
- MOVQ R14, 120(R11)
- ORQ R15, R12
+ MOVL (R13)(R14*1), R14
+ MOVQ R12, CX
+ SHLQ CL, R14
+ MOVQ R13, 120(R10)
+ ORQ R14, R11
- // exhausted = exhausted || (br2.off < 4)
- CMPQ R14, $0x04
- SETLT AL
- ORB AL, DL
+ // exhausted += (br2.off < 4)
+ CMPQ R13, $0x04
+ ADCB $+0, DL
skip_fill2:
// val0 := br2.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v0 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br2.advance(uint8(v0.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val1 := br2.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v1 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br2.advance(uint8(v1.entry)
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
BSWAPL AX
// val2 := br2.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v2 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br2.advance(uint8(v2.entry)
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val3 := br2.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v3 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br2.advance(uint8(v3.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
BSWAPL AX
// these four writes get coalesced
@@ -544,88 +528,86 @@
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
- MOVL AX, (R8)
+ MOVL AX, (BX)(R8*2)
// update the bitreader structure
- MOVQ R12, 128(R11)
- MOVB R13, 136(R11)
- ADDQ R9, R8
+ MOVQ R11, 128(R10)
+ MOVB R12, 136(R10)
// br3.fillFast32()
- MOVQ 176(R11), R12
- MOVBQZX 184(R11), R13
- CMPQ R13, $0x20
+ MOVQ 176(R10), R11
+ MOVBQZX 184(R10), R12
+ CMPQ R12, $0x20
JBE skip_fill3
- MOVQ 168(R11), R14
- SUBQ $0x20, R13
- SUBQ $0x04, R14
- MOVQ 144(R11), R15
+ MOVQ 168(R10), R13
+ SUBQ $0x20, R12
+ SUBQ $0x04, R13
+ MOVQ 144(R10), R14
// b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (R14)(R15*1), R15
- MOVQ R13, CX
- SHLQ CL, R15
- MOVQ R14, 168(R11)
- ORQ R15, R12
+ MOVL (R13)(R14*1), R14
+ MOVQ R12, CX
+ SHLQ CL, R14
+ MOVQ R13, 168(R10)
+ ORQ R14, R11
- // exhausted = exhausted || (br3.off < 4)
- CMPQ R14, $0x04
- SETLT AL
- ORB AL, DL
+ // exhausted += (br3.off < 4)
+ CMPQ R13, $0x04
+ ADCB $+0, DL
skip_fill3:
// val0 := br3.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v0 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br3.advance(uint8(v0.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val1 := br3.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v1 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br3.advance(uint8(v1.entry)
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
BSWAPL AX
// val2 := br3.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v2 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br3.advance(uint8(v2.entry)
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val3 := br3.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v3 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br3.advance(uint8(v3.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
BSWAPL AX
// these four writes get coalesced
@@ -633,11 +615,12 @@
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
- MOVL AX, (R8)
+ LEAQ (R8)(R8*2), CX
+ MOVL AX, (BX)(CX*1)
// update the bitreader structure
- MOVQ R12, 176(R11)
- MOVB R13, 184(R11)
+ MOVQ R11, 176(R10)
+ MOVB R12, 184(R10)
ADDQ $0x04, BX
TESTB DL, DL
JZ main_loop
@@ -653,7 +636,7 @@
MOVQ 16(CX), DX
MOVQ 24(CX), BX
CMPQ BX, $0x04
- JB error_max_decoded_size_exeeded
+ JB error_max_decoded_size_exceeded
LEAQ (DX)(BX*1), BX
MOVQ (CX), SI
MOVQ (SI), R8
@@ -668,7 +651,7 @@
// Check if we have room for 4 bytes in the output buffer
LEAQ 4(DX), CX
CMPQ CX, BX
- JGE error_max_decoded_size_exeeded
+ JGE error_max_decoded_size_exceeded
// Decode 4 values
CMPQ R11, $0x20
@@ -745,7 +728,7 @@
RET
// Report error
-error_max_decoded_size_exeeded:
+error_max_decoded_size_exceeded:
MOVQ ctx+0(FP), AX
MOVQ $-1, CX
MOVQ CX, 40(AX)
@@ -758,7 +741,7 @@
MOVQ 16(CX), DX
MOVQ 24(CX), BX
CMPQ BX, $0x04
- JB error_max_decoded_size_exeeded
+ JB error_max_decoded_size_exceeded
LEAQ (DX)(BX*1), BX
MOVQ (CX), SI
MOVQ (SI), R8
@@ -773,7 +756,7 @@
// Check if we have room for 4 bytes in the output buffer
LEAQ 4(DX), CX
CMPQ CX, BX
- JGE error_max_decoded_size_exeeded
+ JGE error_max_decoded_size_exceeded
// Decode 4 values
CMPQ R11, $0x20
@@ -840,7 +823,7 @@
RET
// Report error
-error_max_decoded_size_exeeded:
+error_max_decoded_size_exceeded:
MOVQ ctx+0(FP), AX
MOVQ $-1, CX
MOVQ CX, 40(AX)
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_generic.go b/vendor/github.com/klauspost/compress/huff0/decompress_generic.go
index 4f6f37c..908c17d 100644
--- a/vendor/github.com/klauspost/compress/huff0/decompress_generic.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress_generic.go
@@ -122,17 +122,21 @@
d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 1")
}
- copy(out, buf[0][:])
- copy(out[dstEvery:], buf[1][:])
- copy(out[dstEvery*2:], buf[2][:])
- copy(out[dstEvery*3:], buf[3][:])
- out = out[bufoff:]
- decoded += bufoff * 4
// There must at least be 3 buffers left.
- if len(out) < dstEvery*3 {
+ if len(out)-bufoff < dstEvery*3 {
d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 2")
}
+ //copy(out, buf[0][:])
+ //copy(out[dstEvery:], buf[1][:])
+ //copy(out[dstEvery*2:], buf[2][:])
+ //copy(out[dstEvery*3:], buf[3][:])
+ *(*[bufoff]byte)(out) = buf[0]
+ *(*[bufoff]byte)(out[dstEvery:]) = buf[1]
+ *(*[bufoff]byte)(out[dstEvery*2:]) = buf[2]
+ *(*[bufoff]byte)(out[dstEvery*3:]) = buf[3]
+ out = out[bufoff:]
+ decoded += bufoff * 4
}
}
if off > 0 {
diff --git a/vendor/github.com/klauspost/compress/huff0/huff0.go b/vendor/github.com/klauspost/compress/huff0/huff0.go
index e8ad17a..77ecd68 100644
--- a/vendor/github.com/klauspost/compress/huff0/huff0.go
+++ b/vendor/github.com/klauspost/compress/huff0/huff0.go
@@ -88,7 +88,7 @@
// Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded.
MaxDecodedSize int
- br byteReader
+ srcLen int
// MaxSymbolValue will override the maximum symbol value of the next block.
MaxSymbolValue uint8
@@ -170,7 +170,7 @@
if s.fse == nil {
s.fse = &fse.Scratch{}
}
- s.br.init(in)
+ s.srcLen = len(in)
return s, nil
}