| Abhay Kumar | 40252eb | 2025-10-13 13:25:53 +0000 | [diff] [blame^] | 1 | package bbolt |
| 2 | |
| 3 | import ( |
| 4 | "bytes" |
| 5 | "fmt" |
| 6 | "sort" |
| 7 | |
| 8 | "go.etcd.io/bbolt/errors" |
| 9 | "go.etcd.io/bbolt/internal/common" |
| 10 | ) |
| 11 | |
| 12 | // Cursor represents an iterator that can traverse over all key/value pairs in a bucket |
| 13 | // in lexicographical order. |
| 14 | // Cursors see nested buckets with value == nil. |
| 15 | // Cursors can be obtained from a transaction and are valid as long as the transaction is open. |
| 16 | // |
| 17 | // Keys and values returned from the cursor are only valid for the life of the transaction. |
| 18 | // |
| 19 | // Changing data while traversing with a cursor may cause it to be invalidated |
| 20 | // and return unexpected keys and/or values. You must reposition your cursor |
| 21 | // after mutating data. |
| 22 | type Cursor struct { |
| 23 | bucket *Bucket |
| 24 | stack []elemRef |
| 25 | } |
| 26 | |
| 27 | // Bucket returns the bucket that this cursor was created from. |
| 28 | func (c *Cursor) Bucket() *Bucket { |
| 29 | return c.bucket |
| 30 | } |
| 31 | |
| 32 | // First moves the cursor to the first item in the bucket and returns its key and value. |
| 33 | // If the bucket is empty then a nil key and value are returned. |
| 34 | // The returned key and value are only valid for the life of the transaction. |
| 35 | func (c *Cursor) First() (key []byte, value []byte) { |
| 36 | common.Assert(c.bucket.tx.db != nil, "tx closed") |
| 37 | k, v, flags := c.first() |
| 38 | if (flags & uint32(common.BucketLeafFlag)) != 0 { |
| 39 | return k, nil |
| 40 | } |
| 41 | return k, v |
| 42 | } |
| 43 | |
| 44 | func (c *Cursor) first() (key []byte, value []byte, flags uint32) { |
| 45 | c.stack = c.stack[:0] |
| 46 | p, n := c.bucket.pageNode(c.bucket.RootPage()) |
| 47 | c.stack = append(c.stack, elemRef{page: p, node: n, index: 0}) |
| 48 | c.goToFirstElementOnTheStack() |
| 49 | |
| 50 | // If we land on an empty page then move to the next value. |
| 51 | // https://github.com/boltdb/bolt/issues/450 |
| 52 | if c.stack[len(c.stack)-1].count() == 0 { |
| 53 | c.next() |
| 54 | } |
| 55 | |
| 56 | k, v, flags := c.keyValue() |
| 57 | if (flags & uint32(common.BucketLeafFlag)) != 0 { |
| 58 | return k, nil, flags |
| 59 | } |
| 60 | return k, v, flags |
| 61 | } |
| 62 | |
| 63 | // Last moves the cursor to the last item in the bucket and returns its key and value. |
| 64 | // If the bucket is empty then a nil key and value are returned. |
| 65 | // The returned key and value are only valid for the life of the transaction. |
| 66 | func (c *Cursor) Last() (key []byte, value []byte) { |
| 67 | common.Assert(c.bucket.tx.db != nil, "tx closed") |
| 68 | c.stack = c.stack[:0] |
| 69 | p, n := c.bucket.pageNode(c.bucket.RootPage()) |
| 70 | ref := elemRef{page: p, node: n} |
| 71 | ref.index = ref.count() - 1 |
| 72 | c.stack = append(c.stack, ref) |
| 73 | c.last() |
| 74 | |
| 75 | // If this is an empty page (calling Delete may result in empty pages) |
| 76 | // we call prev to find the last page that is not empty |
| 77 | for len(c.stack) > 1 && c.stack[len(c.stack)-1].count() == 0 { |
| 78 | c.prev() |
| 79 | } |
| 80 | |
| 81 | if len(c.stack) == 0 { |
| 82 | return nil, nil |
| 83 | } |
| 84 | |
| 85 | k, v, flags := c.keyValue() |
| 86 | if (flags & uint32(common.BucketLeafFlag)) != 0 { |
| 87 | return k, nil |
| 88 | } |
| 89 | return k, v |
| 90 | } |
| 91 | |
| 92 | // Next moves the cursor to the next item in the bucket and returns its key and value. |
| 93 | // If the cursor is at the end of the bucket then a nil key and value are returned. |
| 94 | // The returned key and value are only valid for the life of the transaction. |
| 95 | func (c *Cursor) Next() (key []byte, value []byte) { |
| 96 | common.Assert(c.bucket.tx.db != nil, "tx closed") |
| 97 | k, v, flags := c.next() |
| 98 | if (flags & uint32(common.BucketLeafFlag)) != 0 { |
| 99 | return k, nil |
| 100 | } |
| 101 | return k, v |
| 102 | } |
| 103 | |
| 104 | // Prev moves the cursor to the previous item in the bucket and returns its key and value. |
| 105 | // If the cursor is at the beginning of the bucket then a nil key and value are returned. |
| 106 | // The returned key and value are only valid for the life of the transaction. |
| 107 | func (c *Cursor) Prev() (key []byte, value []byte) { |
| 108 | common.Assert(c.bucket.tx.db != nil, "tx closed") |
| 109 | k, v, flags := c.prev() |
| 110 | if (flags & uint32(common.BucketLeafFlag)) != 0 { |
| 111 | return k, nil |
| 112 | } |
| 113 | return k, v |
| 114 | } |
| 115 | |
| 116 | // Seek moves the cursor to a given key using a b-tree search and returns it. |
| 117 | // If the key does not exist then the next key is used. If no keys |
| 118 | // follow, a nil key is returned. |
| 119 | // The returned key and value are only valid for the life of the transaction. |
| 120 | func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) { |
| 121 | common.Assert(c.bucket.tx.db != nil, "tx closed") |
| 122 | |
| 123 | k, v, flags := c.seek(seek) |
| 124 | |
| 125 | // If we ended up after the last element of a page then move to the next one. |
| 126 | if ref := &c.stack[len(c.stack)-1]; ref.index >= ref.count() { |
| 127 | k, v, flags = c.next() |
| 128 | } |
| 129 | |
| 130 | if k == nil { |
| 131 | return nil, nil |
| 132 | } else if (flags & uint32(common.BucketLeafFlag)) != 0 { |
| 133 | return k, nil |
| 134 | } |
| 135 | return k, v |
| 136 | } |
| 137 | |
| 138 | // Delete removes the current key/value under the cursor from the bucket. |
| 139 | // Delete fails if current key/value is a bucket or if the transaction is not writable. |
| 140 | func (c *Cursor) Delete() error { |
| 141 | if c.bucket.tx.db == nil { |
| 142 | return errors.ErrTxClosed |
| 143 | } else if !c.bucket.Writable() { |
| 144 | return errors.ErrTxNotWritable |
| 145 | } |
| 146 | |
| 147 | key, _, flags := c.keyValue() |
| 148 | // Return an error if current value is a bucket. |
| 149 | if (flags & common.BucketLeafFlag) != 0 { |
| 150 | return errors.ErrIncompatibleValue |
| 151 | } |
| 152 | c.node().del(key) |
| 153 | |
| 154 | return nil |
| 155 | } |
| 156 | |
| 157 | // seek moves the cursor to a given key and returns it. |
| 158 | // If the key does not exist then the next key is used. |
| 159 | func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) { |
| 160 | // Start from root page/node and traverse to correct page. |
| 161 | c.stack = c.stack[:0] |
| 162 | c.search(seek, c.bucket.RootPage()) |
| 163 | |
| 164 | // If this is a bucket then return a nil value. |
| 165 | return c.keyValue() |
| 166 | } |
| 167 | |
| 168 | // first moves the cursor to the first leaf element under the last page in the stack. |
| 169 | func (c *Cursor) goToFirstElementOnTheStack() { |
| 170 | for { |
| 171 | // Exit when we hit a leaf page. |
| 172 | var ref = &c.stack[len(c.stack)-1] |
| 173 | if ref.isLeaf() { |
| 174 | break |
| 175 | } |
| 176 | |
| 177 | // Keep adding pages pointing to the first element to the stack. |
| 178 | var pgId common.Pgid |
| 179 | if ref.node != nil { |
| 180 | pgId = ref.node.inodes[ref.index].Pgid() |
| 181 | } else { |
| 182 | pgId = ref.page.BranchPageElement(uint16(ref.index)).Pgid() |
| 183 | } |
| 184 | p, n := c.bucket.pageNode(pgId) |
| 185 | c.stack = append(c.stack, elemRef{page: p, node: n, index: 0}) |
| 186 | } |
| 187 | } |
| 188 | |
| 189 | // last moves the cursor to the last leaf element under the last page in the stack. |
| 190 | func (c *Cursor) last() { |
| 191 | for { |
| 192 | // Exit when we hit a leaf page. |
| 193 | ref := &c.stack[len(c.stack)-1] |
| 194 | if ref.isLeaf() { |
| 195 | break |
| 196 | } |
| 197 | |
| 198 | // Keep adding pages pointing to the last element in the stack. |
| 199 | var pgId common.Pgid |
| 200 | if ref.node != nil { |
| 201 | pgId = ref.node.inodes[ref.index].Pgid() |
| 202 | } else { |
| 203 | pgId = ref.page.BranchPageElement(uint16(ref.index)).Pgid() |
| 204 | } |
| 205 | p, n := c.bucket.pageNode(pgId) |
| 206 | |
| 207 | var nextRef = elemRef{page: p, node: n} |
| 208 | nextRef.index = nextRef.count() - 1 |
| 209 | c.stack = append(c.stack, nextRef) |
| 210 | } |
| 211 | } |
| 212 | |
| 213 | // next moves to the next leaf element and returns the key and value. |
| 214 | // If the cursor is at the last leaf element then it stays there and returns nil. |
| 215 | func (c *Cursor) next() (key []byte, value []byte, flags uint32) { |
| 216 | for { |
| 217 | // Attempt to move over one element until we're successful. |
| 218 | // Move up the stack as we hit the end of each page in our stack. |
| 219 | var i int |
| 220 | for i = len(c.stack) - 1; i >= 0; i-- { |
| 221 | elem := &c.stack[i] |
| 222 | if elem.index < elem.count()-1 { |
| 223 | elem.index++ |
| 224 | break |
| 225 | } |
| 226 | } |
| 227 | |
| 228 | // If we've hit the root page then stop and return. This will leave the |
| 229 | // cursor on the last element of the last page. |
| 230 | if i == -1 { |
| 231 | return nil, nil, 0 |
| 232 | } |
| 233 | |
| 234 | // Otherwise start from where we left off in the stack and find the |
| 235 | // first element of the first leaf page. |
| 236 | c.stack = c.stack[:i+1] |
| 237 | c.goToFirstElementOnTheStack() |
| 238 | |
| 239 | // If this is an empty page then restart and move back up the stack. |
| 240 | // https://github.com/boltdb/bolt/issues/450 |
| 241 | if c.stack[len(c.stack)-1].count() == 0 { |
| 242 | continue |
| 243 | } |
| 244 | |
| 245 | return c.keyValue() |
| 246 | } |
| 247 | } |
| 248 | |
| 249 | // prev moves the cursor to the previous item in the bucket and returns its key and value. |
| 250 | // If the cursor is at the beginning of the bucket then a nil key and value are returned. |
| 251 | func (c *Cursor) prev() (key []byte, value []byte, flags uint32) { |
| 252 | // Attempt to move back one element until we're successful. |
| 253 | // Move up the stack as we hit the beginning of each page in our stack. |
| 254 | for i := len(c.stack) - 1; i >= 0; i-- { |
| 255 | elem := &c.stack[i] |
| 256 | if elem.index > 0 { |
| 257 | elem.index-- |
| 258 | break |
| 259 | } |
| 260 | // If we've hit the beginning, we should stop moving the cursor, |
| 261 | // and stay at the first element, so that users can continue to |
| 262 | // iterate over the elements in reverse direction by calling `Next`. |
| 263 | // We should return nil in such case. |
| 264 | // Refer to https://github.com/etcd-io/bbolt/issues/733 |
| 265 | if len(c.stack) == 1 { |
| 266 | c.first() |
| 267 | return nil, nil, 0 |
| 268 | } |
| 269 | c.stack = c.stack[:i] |
| 270 | } |
| 271 | |
| 272 | // If we've hit the end then return nil. |
| 273 | if len(c.stack) == 0 { |
| 274 | return nil, nil, 0 |
| 275 | } |
| 276 | |
| 277 | // Move down the stack to find the last element of the last leaf under this branch. |
| 278 | c.last() |
| 279 | return c.keyValue() |
| 280 | } |
| 281 | |
| 282 | // search recursively performs a binary search against a given page/node until it finds a given key. |
| 283 | func (c *Cursor) search(key []byte, pgId common.Pgid) { |
| 284 | p, n := c.bucket.pageNode(pgId) |
| 285 | if p != nil && !p.IsBranchPage() && !p.IsLeafPage() { |
| 286 | panic(fmt.Sprintf("invalid page type: %d: %x", p.Id(), p.Flags())) |
| 287 | } |
| 288 | e := elemRef{page: p, node: n} |
| 289 | c.stack = append(c.stack, e) |
| 290 | |
| 291 | // If we're on a leaf page/node then find the specific node. |
| 292 | if e.isLeaf() { |
| 293 | c.nsearch(key) |
| 294 | return |
| 295 | } |
| 296 | |
| 297 | if n != nil { |
| 298 | c.searchNode(key, n) |
| 299 | return |
| 300 | } |
| 301 | c.searchPage(key, p) |
| 302 | } |
| 303 | |
| 304 | func (c *Cursor) searchNode(key []byte, n *node) { |
| 305 | var exact bool |
| 306 | index := sort.Search(len(n.inodes), func(i int) bool { |
| 307 | // TODO(benbjohnson): Optimize this range search. It's a bit hacky right now. |
| 308 | // sort.Search() finds the lowest index where f() != -1 but we need the highest index. |
| 309 | ret := bytes.Compare(n.inodes[i].Key(), key) |
| 310 | if ret == 0 { |
| 311 | exact = true |
| 312 | } |
| 313 | return ret != -1 |
| 314 | }) |
| 315 | if !exact && index > 0 { |
| 316 | index-- |
| 317 | } |
| 318 | c.stack[len(c.stack)-1].index = index |
| 319 | |
| 320 | // Recursively search to the next page. |
| 321 | c.search(key, n.inodes[index].Pgid()) |
| 322 | } |
| 323 | |
| 324 | func (c *Cursor) searchPage(key []byte, p *common.Page) { |
| 325 | // Binary search for the correct range. |
| 326 | inodes := p.BranchPageElements() |
| 327 | |
| 328 | var exact bool |
| 329 | index := sort.Search(int(p.Count()), func(i int) bool { |
| 330 | // TODO(benbjohnson): Optimize this range search. It's a bit hacky right now. |
| 331 | // sort.Search() finds the lowest index where f() != -1 but we need the highest index. |
| 332 | ret := bytes.Compare(inodes[i].Key(), key) |
| 333 | if ret == 0 { |
| 334 | exact = true |
| 335 | } |
| 336 | return ret != -1 |
| 337 | }) |
| 338 | if !exact && index > 0 { |
| 339 | index-- |
| 340 | } |
| 341 | c.stack[len(c.stack)-1].index = index |
| 342 | |
| 343 | // Recursively search to the next page. |
| 344 | c.search(key, inodes[index].Pgid()) |
| 345 | } |
| 346 | |
| 347 | // nsearch searches the leaf node on the top of the stack for a key. |
| 348 | func (c *Cursor) nsearch(key []byte) { |
| 349 | e := &c.stack[len(c.stack)-1] |
| 350 | p, n := e.page, e.node |
| 351 | |
| 352 | // If we have a node then search its inodes. |
| 353 | if n != nil { |
| 354 | index := sort.Search(len(n.inodes), func(i int) bool { |
| 355 | return bytes.Compare(n.inodes[i].Key(), key) != -1 |
| 356 | }) |
| 357 | e.index = index |
| 358 | return |
| 359 | } |
| 360 | |
| 361 | // If we have a page then search its leaf elements. |
| 362 | inodes := p.LeafPageElements() |
| 363 | index := sort.Search(int(p.Count()), func(i int) bool { |
| 364 | return bytes.Compare(inodes[i].Key(), key) != -1 |
| 365 | }) |
| 366 | e.index = index |
| 367 | } |
| 368 | |
| 369 | // keyValue returns the key and value of the current leaf element. |
| 370 | func (c *Cursor) keyValue() ([]byte, []byte, uint32) { |
| 371 | ref := &c.stack[len(c.stack)-1] |
| 372 | |
| 373 | // If the cursor is pointing to the end of page/node then return nil. |
| 374 | if ref.count() == 0 || ref.index >= ref.count() { |
| 375 | return nil, nil, 0 |
| 376 | } |
| 377 | |
| 378 | // Retrieve value from node. |
| 379 | if ref.node != nil { |
| 380 | inode := &ref.node.inodes[ref.index] |
| 381 | return inode.Key(), inode.Value(), inode.Flags() |
| 382 | } |
| 383 | |
| 384 | // Or retrieve value from page. |
| 385 | elem := ref.page.LeafPageElement(uint16(ref.index)) |
| 386 | return elem.Key(), elem.Value(), elem.Flags() |
| 387 | } |
| 388 | |
| 389 | // node returns the node that the cursor is currently positioned on. |
| 390 | func (c *Cursor) node() *node { |
| 391 | common.Assert(len(c.stack) > 0, "accessing a node with a zero-length cursor stack") |
| 392 | |
| 393 | // If the top of the stack is a leaf node then just return it. |
| 394 | if ref := &c.stack[len(c.stack)-1]; ref.node != nil && ref.isLeaf() { |
| 395 | return ref.node |
| 396 | } |
| 397 | |
| 398 | // Start from root and traverse down the hierarchy. |
| 399 | var n = c.stack[0].node |
| 400 | if n == nil { |
| 401 | n = c.bucket.node(c.stack[0].page.Id(), nil) |
| 402 | } |
| 403 | for _, ref := range c.stack[:len(c.stack)-1] { |
| 404 | common.Assert(!n.isLeaf, "expected branch node") |
| 405 | n = n.childAt(ref.index) |
| 406 | } |
| 407 | common.Assert(n.isLeaf, "expected leaf node") |
| 408 | return n |
| 409 | } |
| 410 | |
| 411 | // elemRef represents a reference to an element on a given page/node. |
| 412 | type elemRef struct { |
| 413 | page *common.Page |
| 414 | node *node |
| 415 | index int |
| 416 | } |
| 417 | |
| 418 | // isLeaf returns whether the ref is pointing at a leaf page/node. |
| 419 | func (r *elemRef) isLeaf() bool { |
| 420 | if r.node != nil { |
| 421 | return r.node.isLeaf |
| 422 | } |
| 423 | return r.page.IsLeafPage() |
| 424 | } |
| 425 | |
| 426 | // count returns the number of inodes or page elements. |
| 427 | func (r *elemRef) count() int { |
| 428 | if r.node != nil { |
| 429 | return len(r.node.inodes) |
| 430 | } |
| 431 | return int(r.page.Count()) |
| 432 | } |