blob: 78bddf1ceed747148b3b9ef8113fcf3296fedb60 [file] [log] [blame]
khenaidoo26721882021-08-11 17:42:52 -04001// Package xxhash implements the 64-bit variant of xxHash (XXH64) as described
2// at http://cyan4973.github.io/xxHash/.
3package xxhash
4
5import (
6 "encoding/binary"
7 "errors"
8 "math/bits"
9)
10
11const (
12 prime1 uint64 = 11400714785074694791
13 prime2 uint64 = 14029467366897019727
14 prime3 uint64 = 1609587929392839161
15 prime4 uint64 = 9650029242287828579
16 prime5 uint64 = 2870177450012600261
17)
18
Abhay Kumar40252eb2025-10-13 13:25:53 +000019// Store the primes in an array as well.
20//
21// The consts are used when possible in Go code to avoid MOVs but we need a
22// contiguous array for the assembly code.
23var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5}
khenaidoo26721882021-08-11 17:42:52 -040024
25// Digest implements hash.Hash64.
Abhay Kumar40252eb2025-10-13 13:25:53 +000026//
27// Note that a zero-valued Digest is not ready to receive writes.
28// Call Reset or create a Digest using New before calling other methods.
khenaidoo26721882021-08-11 17:42:52 -040029type Digest struct {
30 v1 uint64
31 v2 uint64
32 v3 uint64
33 v4 uint64
34 total uint64
35 mem [32]byte
36 n int // how much of mem is used
37}
38
Abhay Kumar40252eb2025-10-13 13:25:53 +000039// New creates a new Digest with a zero seed.
khenaidoo26721882021-08-11 17:42:52 -040040func New() *Digest {
Abhay Kumar40252eb2025-10-13 13:25:53 +000041 return NewWithSeed(0)
42}
43
44// NewWithSeed creates a new Digest with the given seed.
45func NewWithSeed(seed uint64) *Digest {
khenaidoo26721882021-08-11 17:42:52 -040046 var d Digest
Abhay Kumar40252eb2025-10-13 13:25:53 +000047 d.ResetWithSeed(seed)
khenaidoo26721882021-08-11 17:42:52 -040048 return &d
49}
50
51// Reset clears the Digest's state so that it can be reused.
Abhay Kumar40252eb2025-10-13 13:25:53 +000052// It uses a seed value of zero.
khenaidoo26721882021-08-11 17:42:52 -040053func (d *Digest) Reset() {
Abhay Kumar40252eb2025-10-13 13:25:53 +000054 d.ResetWithSeed(0)
55}
56
57// ResetWithSeed clears the Digest's state so that it can be reused.
58// It uses the given seed to initialize the state.
59func (d *Digest) ResetWithSeed(seed uint64) {
60 d.v1 = seed + prime1 + prime2
61 d.v2 = seed + prime2
62 d.v3 = seed
63 d.v4 = seed - prime1
khenaidoo26721882021-08-11 17:42:52 -040064 d.total = 0
65 d.n = 0
66}
67
68// Size always returns 8 bytes.
69func (d *Digest) Size() int { return 8 }
70
71// BlockSize always returns 32 bytes.
72func (d *Digest) BlockSize() int { return 32 }
73
74// Write adds more data to d. It always returns len(b), nil.
75func (d *Digest) Write(b []byte) (n int, err error) {
76 n = len(b)
77 d.total += uint64(n)
78
Abhay Kumar40252eb2025-10-13 13:25:53 +000079 memleft := d.mem[d.n&(len(d.mem)-1):]
80
khenaidoo26721882021-08-11 17:42:52 -040081 if d.n+n < 32 {
82 // This new data doesn't even fill the current block.
Abhay Kumar40252eb2025-10-13 13:25:53 +000083 copy(memleft, b)
khenaidoo26721882021-08-11 17:42:52 -040084 d.n += n
85 return
86 }
87
88 if d.n > 0 {
89 // Finish off the partial block.
Abhay Kumar40252eb2025-10-13 13:25:53 +000090 c := copy(memleft, b)
khenaidoo26721882021-08-11 17:42:52 -040091 d.v1 = round(d.v1, u64(d.mem[0:8]))
92 d.v2 = round(d.v2, u64(d.mem[8:16]))
93 d.v3 = round(d.v3, u64(d.mem[16:24]))
94 d.v4 = round(d.v4, u64(d.mem[24:32]))
Abhay Kumar40252eb2025-10-13 13:25:53 +000095 b = b[c:]
khenaidoo26721882021-08-11 17:42:52 -040096 d.n = 0
97 }
98
99 if len(b) >= 32 {
100 // One or more full blocks left.
101 nw := writeBlocks(d, b)
102 b = b[nw:]
103 }
104
105 // Store any remaining partial block.
106 copy(d.mem[:], b)
107 d.n = len(b)
108
109 return
110}
111
112// Sum appends the current hash to b and returns the resulting slice.
113func (d *Digest) Sum(b []byte) []byte {
114 s := d.Sum64()
115 return append(
116 b,
117 byte(s>>56),
118 byte(s>>48),
119 byte(s>>40),
120 byte(s>>32),
121 byte(s>>24),
122 byte(s>>16),
123 byte(s>>8),
124 byte(s),
125 )
126}
127
128// Sum64 returns the current hash.
129func (d *Digest) Sum64() uint64 {
130 var h uint64
131
132 if d.total >= 32 {
133 v1, v2, v3, v4 := d.v1, d.v2, d.v3, d.v4
134 h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
135 h = mergeRound(h, v1)
136 h = mergeRound(h, v2)
137 h = mergeRound(h, v3)
138 h = mergeRound(h, v4)
139 } else {
140 h = d.v3 + prime5
141 }
142
143 h += d.total
144
Abhay Kumar40252eb2025-10-13 13:25:53 +0000145 b := d.mem[:d.n&(len(d.mem)-1)]
146 for ; len(b) >= 8; b = b[8:] {
147 k1 := round(0, u64(b[:8]))
khenaidoo26721882021-08-11 17:42:52 -0400148 h ^= k1
149 h = rol27(h)*prime1 + prime4
150 }
Abhay Kumar40252eb2025-10-13 13:25:53 +0000151 if len(b) >= 4 {
152 h ^= uint64(u32(b[:4])) * prime1
khenaidoo26721882021-08-11 17:42:52 -0400153 h = rol23(h)*prime2 + prime3
Abhay Kumar40252eb2025-10-13 13:25:53 +0000154 b = b[4:]
khenaidoo26721882021-08-11 17:42:52 -0400155 }
Abhay Kumar40252eb2025-10-13 13:25:53 +0000156 for ; len(b) > 0; b = b[1:] {
157 h ^= uint64(b[0]) * prime5
khenaidoo26721882021-08-11 17:42:52 -0400158 h = rol11(h) * prime1
khenaidoo26721882021-08-11 17:42:52 -0400159 }
160
161 h ^= h >> 33
162 h *= prime2
163 h ^= h >> 29
164 h *= prime3
165 h ^= h >> 32
166
167 return h
168}
169
170const (
171 magic = "xxh\x06"
172 marshaledSize = len(magic) + 8*5 + 32
173)
174
175// MarshalBinary implements the encoding.BinaryMarshaler interface.
176func (d *Digest) MarshalBinary() ([]byte, error) {
177 b := make([]byte, 0, marshaledSize)
178 b = append(b, magic...)
179 b = appendUint64(b, d.v1)
180 b = appendUint64(b, d.v2)
181 b = appendUint64(b, d.v3)
182 b = appendUint64(b, d.v4)
183 b = appendUint64(b, d.total)
184 b = append(b, d.mem[:d.n]...)
185 b = b[:len(b)+len(d.mem)-d.n]
186 return b, nil
187}
188
189// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface.
190func (d *Digest) UnmarshalBinary(b []byte) error {
191 if len(b) < len(magic) || string(b[:len(magic)]) != magic {
192 return errors.New("xxhash: invalid hash state identifier")
193 }
194 if len(b) != marshaledSize {
195 return errors.New("xxhash: invalid hash state size")
196 }
197 b = b[len(magic):]
198 b, d.v1 = consumeUint64(b)
199 b, d.v2 = consumeUint64(b)
200 b, d.v3 = consumeUint64(b)
201 b, d.v4 = consumeUint64(b)
202 b, d.total = consumeUint64(b)
203 copy(d.mem[:], b)
khenaidoo26721882021-08-11 17:42:52 -0400204 d.n = int(d.total % uint64(len(d.mem)))
205 return nil
206}
207
208func appendUint64(b []byte, x uint64) []byte {
209 var a [8]byte
210 binary.LittleEndian.PutUint64(a[:], x)
211 return append(b, a[:]...)
212}
213
214func consumeUint64(b []byte) ([]byte, uint64) {
215 x := u64(b)
216 return b[8:], x
217}
218
219func u64(b []byte) uint64 { return binary.LittleEndian.Uint64(b) }
220func u32(b []byte) uint32 { return binary.LittleEndian.Uint32(b) }
221
222func round(acc, input uint64) uint64 {
223 acc += input * prime2
224 acc = rol31(acc)
225 acc *= prime1
226 return acc
227}
228
229func mergeRound(acc, val uint64) uint64 {
230 val = round(0, val)
231 acc ^= val
232 acc = acc*prime1 + prime4
233 return acc
234}
235
236func rol1(x uint64) uint64 { return bits.RotateLeft64(x, 1) }
237func rol7(x uint64) uint64 { return bits.RotateLeft64(x, 7) }
238func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) }
239func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) }
240func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) }
241func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) }
242func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) }
243func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) }