| package flate |
| |
| import ( |
| "io" |
| "math" |
| "sync" |
| |
| "github.com/klauspost/compress/internal/le" |
| ) |
| |
| const ( |
| maxStatelessBlock = math.MaxInt16 |
| // dictionary will be taken from maxStatelessBlock, so limit it. |
| maxStatelessDict = 8 << 10 |
| |
| slTableBits = 13 |
| slTableSize = 1 << slTableBits |
| slTableShift = 32 - slTableBits |
| ) |
| |
| type statelessWriter struct { |
| dst io.Writer |
| closed bool |
| } |
| |
| func (s *statelessWriter) Close() error { |
| if s.closed { |
| return nil |
| } |
| s.closed = true |
| // Emit EOF block |
| return StatelessDeflate(s.dst, nil, true, nil) |
| } |
| |
| func (s *statelessWriter) Write(p []byte) (n int, err error) { |
| err = StatelessDeflate(s.dst, p, false, nil) |
| if err != nil { |
| return 0, err |
| } |
| return len(p), nil |
| } |
| |
| func (s *statelessWriter) Reset(w io.Writer) { |
| s.dst = w |
| s.closed = false |
| } |
| |
| // NewStatelessWriter will do compression but without maintaining any state |
| // between Write calls. |
| // There will be no memory kept between Write calls, |
| // but compression and speed will be suboptimal. |
| // Because of this, the size of actual Write calls will affect output size. |
| func NewStatelessWriter(dst io.Writer) io.WriteCloser { |
| return &statelessWriter{dst: dst} |
| } |
| |
| // bitWriterPool contains bit writers that can be reused. |
| var bitWriterPool = sync.Pool{ |
| New: func() interface{} { |
| return newHuffmanBitWriter(nil) |
| }, |
| } |
| |
| // StatelessDeflate allows compressing directly to a Writer without retaining state. |
| // When returning everything will be flushed. |
| // Up to 8KB of an optional dictionary can be given which is presumed to precede the block. |
| // Longer dictionaries will be truncated and will still produce valid output. |
| // Sending nil dictionary is perfectly fine. |
| func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error { |
| var dst tokens |
| bw := bitWriterPool.Get().(*huffmanBitWriter) |
| bw.reset(out) |
| defer func() { |
| // don't keep a reference to our output |
| bw.reset(nil) |
| bitWriterPool.Put(bw) |
| }() |
| if eof && len(in) == 0 { |
| // Just write an EOF block. |
| // Could be faster... |
| bw.writeStoredHeader(0, true) |
| bw.flush() |
| return bw.err |
| } |
| |
| // Truncate dict |
| if len(dict) > maxStatelessDict { |
| dict = dict[len(dict)-maxStatelessDict:] |
| } |
| |
| // For subsequent loops, keep shallow dict reference to avoid alloc+copy. |
| var inDict []byte |
| |
| for len(in) > 0 { |
| todo := in |
| if len(inDict) > 0 { |
| if len(todo) > maxStatelessBlock-maxStatelessDict { |
| todo = todo[:maxStatelessBlock-maxStatelessDict] |
| } |
| } else if len(todo) > maxStatelessBlock-len(dict) { |
| todo = todo[:maxStatelessBlock-len(dict)] |
| } |
| inOrg := in |
| in = in[len(todo):] |
| uncompressed := todo |
| if len(dict) > 0 { |
| // combine dict and source |
| bufLen := len(todo) + len(dict) |
| combined := make([]byte, bufLen) |
| copy(combined, dict) |
| copy(combined[len(dict):], todo) |
| todo = combined |
| } |
| // Compress |
| if len(inDict) == 0 { |
| statelessEnc(&dst, todo, int16(len(dict))) |
| } else { |
| statelessEnc(&dst, inDict[:maxStatelessDict+len(todo)], maxStatelessDict) |
| } |
| isEof := eof && len(in) == 0 |
| |
| if dst.n == 0 { |
| bw.writeStoredHeader(len(uncompressed), isEof) |
| if bw.err != nil { |
| return bw.err |
| } |
| bw.writeBytes(uncompressed) |
| } else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 { |
| // If we removed less than 1/16th, huffman compress the block. |
| bw.writeBlockHuff(isEof, uncompressed, len(in) == 0) |
| } else { |
| bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0) |
| } |
| if len(in) > 0 { |
| // Retain a dict if we have more |
| inDict = inOrg[len(uncompressed)-maxStatelessDict:] |
| dict = nil |
| dst.Reset() |
| } |
| if bw.err != nil { |
| return bw.err |
| } |
| } |
| if !eof { |
| // Align, only a stored block can do that. |
| bw.writeStoredHeader(0, false) |
| } |
| bw.flush() |
| return bw.err |
| } |
| |
| func hashSL(u uint32) uint32 { |
| return (u * 0x1e35a7bd) >> slTableShift |
| } |
| |
| func load3216(b []byte, i int16) uint32 { |
| return le.Load32(b, i) |
| } |
| |
| func load6416(b []byte, i int16) uint64 { |
| return le.Load64(b, i) |
| } |
| |
| func statelessEnc(dst *tokens, src []byte, startAt int16) { |
| const ( |
| inputMargin = 12 - 1 |
| minNonLiteralBlockSize = 1 + 1 + inputMargin |
| ) |
| |
| type tableEntry struct { |
| offset int16 |
| } |
| |
| var table [slTableSize]tableEntry |
| |
| // This check isn't in the Snappy implementation, but there, the caller |
| // instead of the callee handles this case. |
| if len(src)-int(startAt) < minNonLiteralBlockSize { |
| // We do not fill the token table. |
| // This will be picked up by caller. |
| dst.n = 0 |
| return |
| } |
| // Index until startAt |
| if startAt > 0 { |
| cv := load3232(src, 0) |
| for i := int16(0); i < startAt; i++ { |
| table[hashSL(cv)] = tableEntry{offset: i} |
| cv = (cv >> 8) | (uint32(src[i+4]) << 24) |
| } |
| } |
| |
| s := startAt + 1 |
| nextEmit := startAt |
| // sLimit is when to stop looking for offset/length copies. The inputMargin |
| // lets us use a fast path for emitLiteral in the main loop, while we are |
| // looking for copies. |
| sLimit := int16(len(src) - inputMargin) |
| |
| // nextEmit is where in src the next emitLiteral should start from. |
| cv := load3216(src, s) |
| |
| for { |
| const skipLog = 5 |
| const doEvery = 2 |
| |
| nextS := s |
| var candidate tableEntry |
| for { |
| nextHash := hashSL(cv) |
| candidate = table[nextHash] |
| nextS = s + doEvery + (s-nextEmit)>>skipLog |
| if nextS > sLimit || nextS <= 0 { |
| goto emitRemainder |
| } |
| |
| now := load6416(src, nextS) |
| table[nextHash] = tableEntry{offset: s} |
| nextHash = hashSL(uint32(now)) |
| |
| if cv == load3216(src, candidate.offset) { |
| table[nextHash] = tableEntry{offset: nextS} |
| break |
| } |
| |
| // Do one right away... |
| cv = uint32(now) |
| s = nextS |
| nextS++ |
| candidate = table[nextHash] |
| now >>= 8 |
| table[nextHash] = tableEntry{offset: s} |
| |
| if cv == load3216(src, candidate.offset) { |
| table[nextHash] = tableEntry{offset: nextS} |
| break |
| } |
| cv = uint32(now) |
| s = nextS |
| } |
| |
| // A 4-byte match has been found. We'll later see if more than 4 bytes |
| // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit |
| // them as literal bytes. |
| for { |
| // Invariant: we have a 4-byte match at s, and no need to emit any |
| // literal bytes prior to s. |
| |
| // Extend the 4-byte match as long as possible. |
| t := candidate.offset |
| l := int16(matchLen(src[s+4:], src[t+4:]) + 4) |
| |
| // Extend backwards |
| for t > 0 && s > nextEmit && src[t-1] == src[s-1] { |
| s-- |
| t-- |
| l++ |
| } |
| if nextEmit < s { |
| if false { |
| emitLiteral(dst, src[nextEmit:s]) |
| } else { |
| for _, v := range src[nextEmit:s] { |
| dst.tokens[dst.n] = token(v) |
| dst.litHist[v]++ |
| dst.n++ |
| } |
| } |
| } |
| |
| // Save the match found |
| dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset)) |
| s += l |
| nextEmit = s |
| if nextS >= s { |
| s = nextS + 1 |
| } |
| if s >= sLimit { |
| goto emitRemainder |
| } |
| |
| // We could immediately start working at s now, but to improve |
| // compression we first update the hash table at s-2 and at s. If |
| // another emitCopy is not our next move, also calculate nextHash |
| // at s+1. At least on GOARCH=amd64, these three hash calculations |
| // are faster as one load64 call (with some shifts) instead of |
| // three load32 calls. |
| x := load6416(src, s-2) |
| o := s - 2 |
| prevHash := hashSL(uint32(x)) |
| table[prevHash] = tableEntry{offset: o} |
| x >>= 16 |
| currHash := hashSL(uint32(x)) |
| candidate = table[currHash] |
| table[currHash] = tableEntry{offset: o + 2} |
| |
| if uint32(x) != load3216(src, candidate.offset) { |
| cv = uint32(x >> 8) |
| s++ |
| break |
| } |
| } |
| } |
| |
| emitRemainder: |
| if int(nextEmit) < len(src) { |
| // If nothing was added, don't encode literals. |
| if dst.n == 0 { |
| return |
| } |
| emitLiteral(dst, src[nextEmit:]) |
| } |
| } |