blob: 0c1e28c106ff2c7e63cbb8d703727b4cffb92dae [file] [log] [blame]
Abhay Kumar40252eb2025-10-13 13:25:53 +00001package bbolt
2
3import (
4 "bytes"
5 "fmt"
6 "sort"
7
8 "go.etcd.io/bbolt/errors"
9 "go.etcd.io/bbolt/internal/common"
10)
11
12// Cursor represents an iterator that can traverse over all key/value pairs in a bucket
13// in lexicographical order.
14// Cursors see nested buckets with value == nil.
15// Cursors can be obtained from a transaction and are valid as long as the transaction is open.
16//
17// Keys and values returned from the cursor are only valid for the life of the transaction.
18//
19// Changing data while traversing with a cursor may cause it to be invalidated
20// and return unexpected keys and/or values. You must reposition your cursor
21// after mutating data.
22type Cursor struct {
23 bucket *Bucket
24 stack []elemRef
25}
26
27// Bucket returns the bucket that this cursor was created from.
28func (c *Cursor) Bucket() *Bucket {
29 return c.bucket
30}
31
32// First moves the cursor to the first item in the bucket and returns its key and value.
33// If the bucket is empty then a nil key and value are returned.
34// The returned key and value are only valid for the life of the transaction.
35func (c *Cursor) First() (key []byte, value []byte) {
36 common.Assert(c.bucket.tx.db != nil, "tx closed")
37 k, v, flags := c.first()
38 if (flags & uint32(common.BucketLeafFlag)) != 0 {
39 return k, nil
40 }
41 return k, v
42}
43
44func (c *Cursor) first() (key []byte, value []byte, flags uint32) {
45 c.stack = c.stack[:0]
46 p, n := c.bucket.pageNode(c.bucket.RootPage())
47 c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
48 c.goToFirstElementOnTheStack()
49
50 // If we land on an empty page then move to the next value.
51 // https://github.com/boltdb/bolt/issues/450
52 if c.stack[len(c.stack)-1].count() == 0 {
53 c.next()
54 }
55
56 k, v, flags := c.keyValue()
57 if (flags & uint32(common.BucketLeafFlag)) != 0 {
58 return k, nil, flags
59 }
60 return k, v, flags
61}
62
63// Last moves the cursor to the last item in the bucket and returns its key and value.
64// If the bucket is empty then a nil key and value are returned.
65// The returned key and value are only valid for the life of the transaction.
66func (c *Cursor) Last() (key []byte, value []byte) {
67 common.Assert(c.bucket.tx.db != nil, "tx closed")
68 c.stack = c.stack[:0]
69 p, n := c.bucket.pageNode(c.bucket.RootPage())
70 ref := elemRef{page: p, node: n}
71 ref.index = ref.count() - 1
72 c.stack = append(c.stack, ref)
73 c.last()
74
75 // If this is an empty page (calling Delete may result in empty pages)
76 // we call prev to find the last page that is not empty
77 for len(c.stack) > 1 && c.stack[len(c.stack)-1].count() == 0 {
78 c.prev()
79 }
80
81 if len(c.stack) == 0 {
82 return nil, nil
83 }
84
85 k, v, flags := c.keyValue()
86 if (flags & uint32(common.BucketLeafFlag)) != 0 {
87 return k, nil
88 }
89 return k, v
90}
91
92// Next moves the cursor to the next item in the bucket and returns its key and value.
93// If the cursor is at the end of the bucket then a nil key and value are returned.
94// The returned key and value are only valid for the life of the transaction.
95func (c *Cursor) Next() (key []byte, value []byte) {
96 common.Assert(c.bucket.tx.db != nil, "tx closed")
97 k, v, flags := c.next()
98 if (flags & uint32(common.BucketLeafFlag)) != 0 {
99 return k, nil
100 }
101 return k, v
102}
103
104// Prev moves the cursor to the previous item in the bucket and returns its key and value.
105// If the cursor is at the beginning of the bucket then a nil key and value are returned.
106// The returned key and value are only valid for the life of the transaction.
107func (c *Cursor) Prev() (key []byte, value []byte) {
108 common.Assert(c.bucket.tx.db != nil, "tx closed")
109 k, v, flags := c.prev()
110 if (flags & uint32(common.BucketLeafFlag)) != 0 {
111 return k, nil
112 }
113 return k, v
114}
115
116// Seek moves the cursor to a given key using a b-tree search and returns it.
117// If the key does not exist then the next key is used. If no keys
118// follow, a nil key is returned.
119// The returned key and value are only valid for the life of the transaction.
120func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) {
121 common.Assert(c.bucket.tx.db != nil, "tx closed")
122
123 k, v, flags := c.seek(seek)
124
125 // If we ended up after the last element of a page then move to the next one.
126 if ref := &c.stack[len(c.stack)-1]; ref.index >= ref.count() {
127 k, v, flags = c.next()
128 }
129
130 if k == nil {
131 return nil, nil
132 } else if (flags & uint32(common.BucketLeafFlag)) != 0 {
133 return k, nil
134 }
135 return k, v
136}
137
138// Delete removes the current key/value under the cursor from the bucket.
139// Delete fails if current key/value is a bucket or if the transaction is not writable.
140func (c *Cursor) Delete() error {
141 if c.bucket.tx.db == nil {
142 return errors.ErrTxClosed
143 } else if !c.bucket.Writable() {
144 return errors.ErrTxNotWritable
145 }
146
147 key, _, flags := c.keyValue()
148 // Return an error if current value is a bucket.
149 if (flags & common.BucketLeafFlag) != 0 {
150 return errors.ErrIncompatibleValue
151 }
152 c.node().del(key)
153
154 return nil
155}
156
157// seek moves the cursor to a given key and returns it.
158// If the key does not exist then the next key is used.
159func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) {
160 // Start from root page/node and traverse to correct page.
161 c.stack = c.stack[:0]
162 c.search(seek, c.bucket.RootPage())
163
164 // If this is a bucket then return a nil value.
165 return c.keyValue()
166}
167
168// first moves the cursor to the first leaf element under the last page in the stack.
169func (c *Cursor) goToFirstElementOnTheStack() {
170 for {
171 // Exit when we hit a leaf page.
172 var ref = &c.stack[len(c.stack)-1]
173 if ref.isLeaf() {
174 break
175 }
176
177 // Keep adding pages pointing to the first element to the stack.
178 var pgId common.Pgid
179 if ref.node != nil {
180 pgId = ref.node.inodes[ref.index].Pgid()
181 } else {
182 pgId = ref.page.BranchPageElement(uint16(ref.index)).Pgid()
183 }
184 p, n := c.bucket.pageNode(pgId)
185 c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
186 }
187}
188
189// last moves the cursor to the last leaf element under the last page in the stack.
190func (c *Cursor) last() {
191 for {
192 // Exit when we hit a leaf page.
193 ref := &c.stack[len(c.stack)-1]
194 if ref.isLeaf() {
195 break
196 }
197
198 // Keep adding pages pointing to the last element in the stack.
199 var pgId common.Pgid
200 if ref.node != nil {
201 pgId = ref.node.inodes[ref.index].Pgid()
202 } else {
203 pgId = ref.page.BranchPageElement(uint16(ref.index)).Pgid()
204 }
205 p, n := c.bucket.pageNode(pgId)
206
207 var nextRef = elemRef{page: p, node: n}
208 nextRef.index = nextRef.count() - 1
209 c.stack = append(c.stack, nextRef)
210 }
211}
212
213// next moves to the next leaf element and returns the key and value.
214// If the cursor is at the last leaf element then it stays there and returns nil.
215func (c *Cursor) next() (key []byte, value []byte, flags uint32) {
216 for {
217 // Attempt to move over one element until we're successful.
218 // Move up the stack as we hit the end of each page in our stack.
219 var i int
220 for i = len(c.stack) - 1; i >= 0; i-- {
221 elem := &c.stack[i]
222 if elem.index < elem.count()-1 {
223 elem.index++
224 break
225 }
226 }
227
228 // If we've hit the root page then stop and return. This will leave the
229 // cursor on the last element of the last page.
230 if i == -1 {
231 return nil, nil, 0
232 }
233
234 // Otherwise start from where we left off in the stack and find the
235 // first element of the first leaf page.
236 c.stack = c.stack[:i+1]
237 c.goToFirstElementOnTheStack()
238
239 // If this is an empty page then restart and move back up the stack.
240 // https://github.com/boltdb/bolt/issues/450
241 if c.stack[len(c.stack)-1].count() == 0 {
242 continue
243 }
244
245 return c.keyValue()
246 }
247}
248
249// prev moves the cursor to the previous item in the bucket and returns its key and value.
250// If the cursor is at the beginning of the bucket then a nil key and value are returned.
251func (c *Cursor) prev() (key []byte, value []byte, flags uint32) {
252 // Attempt to move back one element until we're successful.
253 // Move up the stack as we hit the beginning of each page in our stack.
254 for i := len(c.stack) - 1; i >= 0; i-- {
255 elem := &c.stack[i]
256 if elem.index > 0 {
257 elem.index--
258 break
259 }
260 // If we've hit the beginning, we should stop moving the cursor,
261 // and stay at the first element, so that users can continue to
262 // iterate over the elements in reverse direction by calling `Next`.
263 // We should return nil in such case.
264 // Refer to https://github.com/etcd-io/bbolt/issues/733
265 if len(c.stack) == 1 {
266 c.first()
267 return nil, nil, 0
268 }
269 c.stack = c.stack[:i]
270 }
271
272 // If we've hit the end then return nil.
273 if len(c.stack) == 0 {
274 return nil, nil, 0
275 }
276
277 // Move down the stack to find the last element of the last leaf under this branch.
278 c.last()
279 return c.keyValue()
280}
281
282// search recursively performs a binary search against a given page/node until it finds a given key.
283func (c *Cursor) search(key []byte, pgId common.Pgid) {
284 p, n := c.bucket.pageNode(pgId)
285 if p != nil && !p.IsBranchPage() && !p.IsLeafPage() {
286 panic(fmt.Sprintf("invalid page type: %d: %x", p.Id(), p.Flags()))
287 }
288 e := elemRef{page: p, node: n}
289 c.stack = append(c.stack, e)
290
291 // If we're on a leaf page/node then find the specific node.
292 if e.isLeaf() {
293 c.nsearch(key)
294 return
295 }
296
297 if n != nil {
298 c.searchNode(key, n)
299 return
300 }
301 c.searchPage(key, p)
302}
303
304func (c *Cursor) searchNode(key []byte, n *node) {
305 var exact bool
306 index := sort.Search(len(n.inodes), func(i int) bool {
307 // TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
308 // sort.Search() finds the lowest index where f() != -1 but we need the highest index.
309 ret := bytes.Compare(n.inodes[i].Key(), key)
310 if ret == 0 {
311 exact = true
312 }
313 return ret != -1
314 })
315 if !exact && index > 0 {
316 index--
317 }
318 c.stack[len(c.stack)-1].index = index
319
320 // Recursively search to the next page.
321 c.search(key, n.inodes[index].Pgid())
322}
323
324func (c *Cursor) searchPage(key []byte, p *common.Page) {
325 // Binary search for the correct range.
326 inodes := p.BranchPageElements()
327
328 var exact bool
329 index := sort.Search(int(p.Count()), func(i int) bool {
330 // TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
331 // sort.Search() finds the lowest index where f() != -1 but we need the highest index.
332 ret := bytes.Compare(inodes[i].Key(), key)
333 if ret == 0 {
334 exact = true
335 }
336 return ret != -1
337 })
338 if !exact && index > 0 {
339 index--
340 }
341 c.stack[len(c.stack)-1].index = index
342
343 // Recursively search to the next page.
344 c.search(key, inodes[index].Pgid())
345}
346
347// nsearch searches the leaf node on the top of the stack for a key.
348func (c *Cursor) nsearch(key []byte) {
349 e := &c.stack[len(c.stack)-1]
350 p, n := e.page, e.node
351
352 // If we have a node then search its inodes.
353 if n != nil {
354 index := sort.Search(len(n.inodes), func(i int) bool {
355 return bytes.Compare(n.inodes[i].Key(), key) != -1
356 })
357 e.index = index
358 return
359 }
360
361 // If we have a page then search its leaf elements.
362 inodes := p.LeafPageElements()
363 index := sort.Search(int(p.Count()), func(i int) bool {
364 return bytes.Compare(inodes[i].Key(), key) != -1
365 })
366 e.index = index
367}
368
369// keyValue returns the key and value of the current leaf element.
370func (c *Cursor) keyValue() ([]byte, []byte, uint32) {
371 ref := &c.stack[len(c.stack)-1]
372
373 // If the cursor is pointing to the end of page/node then return nil.
374 if ref.count() == 0 || ref.index >= ref.count() {
375 return nil, nil, 0
376 }
377
378 // Retrieve value from node.
379 if ref.node != nil {
380 inode := &ref.node.inodes[ref.index]
381 return inode.Key(), inode.Value(), inode.Flags()
382 }
383
384 // Or retrieve value from page.
385 elem := ref.page.LeafPageElement(uint16(ref.index))
386 return elem.Key(), elem.Value(), elem.Flags()
387}
388
389// node returns the node that the cursor is currently positioned on.
390func (c *Cursor) node() *node {
391 common.Assert(len(c.stack) > 0, "accessing a node with a zero-length cursor stack")
392
393 // If the top of the stack is a leaf node then just return it.
394 if ref := &c.stack[len(c.stack)-1]; ref.node != nil && ref.isLeaf() {
395 return ref.node
396 }
397
398 // Start from root and traverse down the hierarchy.
399 var n = c.stack[0].node
400 if n == nil {
401 n = c.bucket.node(c.stack[0].page.Id(), nil)
402 }
403 for _, ref := range c.stack[:len(c.stack)-1] {
404 common.Assert(!n.isLeaf, "expected branch node")
405 n = n.childAt(ref.index)
406 }
407 common.Assert(n.isLeaf, "expected leaf node")
408 return n
409}
410
411// elemRef represents a reference to an element on a given page/node.
412type elemRef struct {
413 page *common.Page
414 node *node
415 index int
416}
417
418// isLeaf returns whether the ref is pointing at a leaf page/node.
419func (r *elemRef) isLeaf() bool {
420 if r.node != nil {
421 return r.node.isLeaf
422 }
423 return r.page.IsLeafPage()
424}
425
426// count returns the number of inodes or page elements.
427func (r *elemRef) count() int {
428 if r.node != nil {
429 return len(r.node.inodes)
430 }
431 return int(r.page.Count())
432}