blob: ee808967c5731851dacf6e1d2ff4ec352f616600 [file] [log] [blame]
Abhay Kumara61c5222025-11-10 07:32:50 +00001package common
2
3import (
4 "fmt"
5 "os"
6 "sort"
7 "unsafe"
8)
9
10const PageHeaderSize = unsafe.Sizeof(Page{})
11
12const MinKeysPerPage = 2
13
14const BranchPageElementSize = unsafe.Sizeof(branchPageElement{})
15const LeafPageElementSize = unsafe.Sizeof(leafPageElement{})
16const pgidSize = unsafe.Sizeof(Pgid(0))
17
18const (
19 BranchPageFlag = 0x01
20 LeafPageFlag = 0x02
21 MetaPageFlag = 0x04
22 FreelistPageFlag = 0x10
23)
24
25const (
26 BucketLeafFlag = 0x01
27)
28
29type Pgid uint64
30
31type Page struct {
32 id Pgid
33 flags uint16
34 count uint16
35 overflow uint32
36}
37
38func NewPage(id Pgid, flags, count uint16, overflow uint32) *Page {
39 return &Page{
40 id: id,
41 flags: flags,
42 count: count,
43 overflow: overflow,
44 }
45}
46
47// Typ returns a human-readable page type string used for debugging.
48func (p *Page) Typ() string {
49 if p.IsBranchPage() {
50 return "branch"
51 } else if p.IsLeafPage() {
52 return "leaf"
53 } else if p.IsMetaPage() {
54 return "meta"
55 } else if p.IsFreelistPage() {
56 return "freelist"
57 }
58 return fmt.Sprintf("unknown<%02x>", p.flags)
59}
60
61func (p *Page) IsBranchPage() bool {
62 return p.flags == BranchPageFlag
63}
64
65func (p *Page) IsLeafPage() bool {
66 return p.flags == LeafPageFlag
67}
68
69func (p *Page) IsMetaPage() bool {
70 return p.flags == MetaPageFlag
71}
72
73func (p *Page) IsFreelistPage() bool {
74 return p.flags == FreelistPageFlag
75}
76
77// Meta returns a pointer to the metadata section of the page.
78func (p *Page) Meta() *Meta {
79 return (*Meta)(UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)))
80}
81
82func (p *Page) FastCheck(id Pgid) {
83 Assert(p.id == id, "Page expected to be: %v, but self identifies as %v", id, p.id)
84 // Only one flag of page-type can be set.
85 Assert(p.IsBranchPage() ||
86 p.IsLeafPage() ||
87 p.IsMetaPage() ||
88 p.IsFreelistPage(),
89 "page %v: has unexpected type/flags: %x", p.id, p.flags)
90}
91
92// LeafPageElement retrieves the leaf node by index
93func (p *Page) LeafPageElement(index uint16) *leafPageElement {
94 return (*leafPageElement)(UnsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p),
95 LeafPageElementSize, int(index)))
96}
97
98// LeafPageElements retrieves a list of leaf nodes.
99func (p *Page) LeafPageElements() []leafPageElement {
100 if p.count == 0 {
101 return nil
102 }
103 data := UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
104 elems := unsafe.Slice((*leafPageElement)(data), int(p.count))
105 return elems
106}
107
108// BranchPageElement retrieves the branch node by index
109func (p *Page) BranchPageElement(index uint16) *branchPageElement {
110 return (*branchPageElement)(UnsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p),
111 unsafe.Sizeof(branchPageElement{}), int(index)))
112}
113
114// BranchPageElements retrieves a list of branch nodes.
115func (p *Page) BranchPageElements() []branchPageElement {
116 if p.count == 0 {
117 return nil
118 }
119 data := UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
120 elems := unsafe.Slice((*branchPageElement)(data), int(p.count))
121 return elems
122}
123
124func (p *Page) FreelistPageCount() (int, int) {
125 Assert(p.IsFreelistPage(), fmt.Sprintf("can't get freelist page count from a non-freelist page: %2x", p.flags))
126
127 // If the page.count is at the max uint16 value (64k) then it's considered
128 // an overflow and the size of the freelist is stored as the first element.
129 var idx, count = 0, int(p.count)
130 if count == 0xFFFF {
131 idx = 1
132 c := *(*Pgid)(UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)))
133 count = int(c)
134 if count < 0 {
135 panic(fmt.Sprintf("leading element count %d overflows int", c))
136 }
137 }
138
139 return idx, count
140}
141
142func (p *Page) FreelistPageIds() []Pgid {
143 Assert(p.IsFreelistPage(), fmt.Sprintf("can't get freelist page IDs from a non-freelist page: %2x", p.flags))
144
145 idx, count := p.FreelistPageCount()
146
147 if count == 0 {
148 return nil
149 }
150
151 data := UnsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p), pgidSize, idx)
152 ids := unsafe.Slice((*Pgid)(data), count)
153
154 return ids
155}
156
157// dump writes n bytes of the page to STDERR as hex output.
158func (p *Page) hexdump(n int) {
159 buf := UnsafeByteSlice(unsafe.Pointer(p), 0, 0, n)
160 fmt.Fprintf(os.Stderr, "%x\n", buf)
161}
162
163func (p *Page) PageElementSize() uintptr {
164 if p.IsLeafPage() {
165 return LeafPageElementSize
166 }
167 return BranchPageElementSize
168}
169
170func (p *Page) Id() Pgid {
171 return p.id
172}
173
174func (p *Page) SetId(target Pgid) {
175 p.id = target
176}
177
178func (p *Page) Flags() uint16 {
179 return p.flags
180}
181
182func (p *Page) SetFlags(v uint16) {
183 p.flags = v
184}
185
186func (p *Page) Count() uint16 {
187 return p.count
188}
189
190func (p *Page) SetCount(target uint16) {
191 p.count = target
192}
193
194func (p *Page) Overflow() uint32 {
195 return p.overflow
196}
197
198func (p *Page) SetOverflow(target uint32) {
199 p.overflow = target
200}
201
202func (p *Page) String() string {
203 return fmt.Sprintf("ID: %d, Type: %s, count: %d, overflow: %d", p.id, p.Typ(), p.count, p.overflow)
204}
205
206type Pages []*Page
207
208func (s Pages) Len() int { return len(s) }
209func (s Pages) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
210func (s Pages) Less(i, j int) bool { return s[i].id < s[j].id }
211
212// branchPageElement represents a node on a branch page.
213type branchPageElement struct {
214 pos uint32
215 ksize uint32
216 pgid Pgid
217}
218
219func (n *branchPageElement) Pos() uint32 {
220 return n.pos
221}
222
223func (n *branchPageElement) SetPos(v uint32) {
224 n.pos = v
225}
226
227func (n *branchPageElement) Ksize() uint32 {
228 return n.ksize
229}
230
231func (n *branchPageElement) SetKsize(v uint32) {
232 n.ksize = v
233}
234
235func (n *branchPageElement) Pgid() Pgid {
236 return n.pgid
237}
238
239func (n *branchPageElement) SetPgid(v Pgid) {
240 n.pgid = v
241}
242
243// Key returns a byte slice of the node key.
244func (n *branchPageElement) Key() []byte {
245 return UnsafeByteSlice(unsafe.Pointer(n), 0, int(n.pos), int(n.pos)+int(n.ksize))
246}
247
248// leafPageElement represents a node on a leaf page.
249type leafPageElement struct {
250 flags uint32
251 pos uint32
252 ksize uint32
253 vsize uint32
254}
255
256func NewLeafPageElement(flags, pos, ksize, vsize uint32) *leafPageElement {
257 return &leafPageElement{
258 flags: flags,
259 pos: pos,
260 ksize: ksize,
261 vsize: vsize,
262 }
263}
264
265func (n *leafPageElement) Flags() uint32 {
266 return n.flags
267}
268
269func (n *leafPageElement) SetFlags(v uint32) {
270 n.flags = v
271}
272
273func (n *leafPageElement) Pos() uint32 {
274 return n.pos
275}
276
277func (n *leafPageElement) SetPos(v uint32) {
278 n.pos = v
279}
280
281func (n *leafPageElement) Ksize() uint32 {
282 return n.ksize
283}
284
285func (n *leafPageElement) SetKsize(v uint32) {
286 n.ksize = v
287}
288
289func (n *leafPageElement) Vsize() uint32 {
290 return n.vsize
291}
292
293func (n *leafPageElement) SetVsize(v uint32) {
294 n.vsize = v
295}
296
297// Key returns a byte slice of the node key.
298func (n *leafPageElement) Key() []byte {
299 i := int(n.pos)
300 j := i + int(n.ksize)
301 return UnsafeByteSlice(unsafe.Pointer(n), 0, i, j)
302}
303
304// Value returns a byte slice of the node value.
305func (n *leafPageElement) Value() []byte {
306 i := int(n.pos) + int(n.ksize)
307 j := i + int(n.vsize)
308 return UnsafeByteSlice(unsafe.Pointer(n), 0, i, j)
309}
310
311func (n *leafPageElement) IsBucketEntry() bool {
312 return n.flags&uint32(BucketLeafFlag) != 0
313}
314
315func (n *leafPageElement) Bucket() *InBucket {
316 if n.IsBucketEntry() {
317 return LoadBucket(n.Value())
318 } else {
319 return nil
320 }
321}
322
323// PageInfo represents human readable information about a page.
324type PageInfo struct {
325 ID int
326 Type string
327 Count int
328 OverflowCount int
329}
330
331type Pgids []Pgid
332
333func (s Pgids) Len() int { return len(s) }
334func (s Pgids) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
335func (s Pgids) Less(i, j int) bool { return s[i] < s[j] }
336
337// Merge returns the sorted union of a and b.
338func (a Pgids) Merge(b Pgids) Pgids {
339 // Return the opposite slice if one is nil.
340 if len(a) == 0 {
341 return b
342 }
343 if len(b) == 0 {
344 return a
345 }
346 merged := make(Pgids, len(a)+len(b))
347 Mergepgids(merged, a, b)
348 return merged
349}
350
351// Mergepgids copies the sorted union of a and b into dst.
352// If dst is too small, it panics.
353func Mergepgids(dst, a, b Pgids) {
354 if len(dst) < len(a)+len(b) {
355 panic(fmt.Errorf("mergepgids bad len %d < %d + %d", len(dst), len(a), len(b)))
356 }
357 // Copy in the opposite slice if one is nil.
358 if len(a) == 0 {
359 copy(dst, b)
360 return
361 }
362 if len(b) == 0 {
363 copy(dst, a)
364 return
365 }
366
367 // Merged will hold all elements from both lists.
368 merged := dst[:0]
369
370 // Assign lead to the slice with a lower starting value, follow to the higher value.
371 lead, follow := a, b
372 if b[0] < a[0] {
373 lead, follow = b, a
374 }
375
376 // Continue while there are elements in the lead.
377 for len(lead) > 0 {
378 // Merge largest prefix of lead that is ahead of follow[0].
379 n := sort.Search(len(lead), func(i int) bool { return lead[i] > follow[0] })
380 merged = append(merged, lead[:n]...)
381 if n >= len(lead) {
382 break
383 }
384
385 // Swap lead and follow.
386 lead, follow = follow, lead[n:]
387 }
388
389 // Append what's left in follow.
390 _ = append(merged, follow...)
391}