-
Notifications
You must be signed in to change notification settings - Fork 275
/
Copy pathbdb.py
210 lines (183 loc) · 8.13 KB
/
bdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
#!/usr/bin/env python3
# Copyright (c) 2020-2021 The Bitcoin Core developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
"""
Utilities for working directly with the wallet's BDB database file
This is specific to the configuration of BDB used in this project:
- Outer database contains single subdatabase named 'main'
- btree
- btree internal, leaf and overflow pages
Each key-value pair is two entries in a btree leaf, which optionally refers to overflow pages
if the data doesn't fit into a single page. The first entry is the key, the one that follows
is the value. And so on. Note that the entry data is itself not in the correct order. Instead
entry offsets are stored in the correct order and those offsets are needed to then retrieve
the data itself. Note that this implementation currently only supports reading databases that
are in the same endianness as the host.
Page format can be found in BDB source code dbinc/db_page.h
`db_dump -da wallet.dat` is useful to see the data in a wallet.dat BDB file
"""
import struct
# Important constants
PAGE_HEADER_SIZE = 26
OUTER_META_PAGE = 0
# Page type values
BTREE_INTERNAL = 3
BTREE_LEAF = 5
OVERFLOW_DATA = 7
BTREE_META = 9
# Record type values
RECORD_KEYDATA = 1
RECORD_OVERFLOW_DATA = 3
# Some magic numbers for sanity checking
BTREE_MAGIC = 0x053162
DB_VERSION = 9
SUBDATABASE_NAME = b'main'
# Deserializes an internal, leaf or overflow page into a dict.
# In addition to the common page header fields, the result contains an 'entries'
# array of dicts with the following fields, depending on the page type:
# internal page [BTREE_INTERNAL]:
# - 'page_num': referenced page number (used to find further pages to process)
# leaf page [BTREE_LEAF]:
# - 'record_type': record type, must be RECORD_KEYDATA or RECORD_OVERFLOW_DATA
# - 'data': binary data (key or value payload), if record type is RECORD_KEYDATA
# - 'page_num': referenced overflow page number, if record type is RECORD_OVERFLOW_DATA
# overflow page [OVERFLOW_DATA]:
# - 'data': binary data (part of key or value payload)
def dump_page(data):
page_info = {}
page_header = data[0:26]
_, pgno, prev_pgno, next_pgno, entries, hf_offset, level, pg_type = struct.unpack('QIIIHHBB', page_header)
page_info['pgno'] = pgno
page_info['prev_pgno'] = prev_pgno
page_info['next_pgno'] = next_pgno
page_info['hf_offset'] = hf_offset
page_info['level'] = level
page_info['pg_type'] = pg_type
page_info['entry_offsets'] = struct.unpack('{}H'.format(entries), data[26:26 + entries * 2])
page_info['entries'] = []
assert pg_type in (BTREE_INTERNAL, BTREE_LEAF, OVERFLOW_DATA)
if pg_type == OVERFLOW_DATA:
assert entries == 1
page_info['entries'].append({'data': data[26:26 + hf_offset]})
return page_info
for i in range(0, entries):
entry = {}
offset = page_info['entry_offsets'][i]
record_header = data[offset:offset + 3]
offset += 3
e_len, record_type = struct.unpack('HB', record_header)
if pg_type == BTREE_INTERNAL:
assert record_type == RECORD_KEYDATA
internal_record_data = data[offset:offset + 9]
_, page_num, _ = struct.unpack('=BII', internal_record_data)
entry['page_num'] = page_num
elif pg_type == BTREE_LEAF:
assert record_type in (RECORD_KEYDATA, RECORD_OVERFLOW_DATA)
entry['record_type'] = record_type
if record_type == RECORD_KEYDATA:
entry['data'] = data[offset:offset + e_len]
elif record_type == RECORD_OVERFLOW_DATA:
overflow_record_data = data[offset:offset + 9]
_, page_num, _ = struct.unpack('=BII', overflow_record_data)
entry['page_num'] = page_num
page_info['entries'].append(entry)
return page_info
# Deserializes a btree metadata page into a dict.
# Does a simple sanity check on the magic value, type, and version
def dump_meta_page(page):
# metadata page
# general metadata
metadata = {}
meta_page = page[0:72]
_, pgno, magic, version, pagesize, encrypt_alg, pg_type, metaflags, _, free, last_pgno, nparts, key_count, record_count, flags, uid = struct.unpack('QIIIIBBBBIIIIII20s', meta_page)
metadata['pgno'] = pgno
metadata['magic'] = magic
metadata['version'] = version
metadata['pagesize'] = pagesize
metadata['encrypt_alg'] = encrypt_alg
metadata['pg_type'] = pg_type
metadata['metaflags'] = metaflags
metadata['free'] = free
metadata['last_pgno'] = last_pgno
metadata['nparts'] = nparts
metadata['key_count'] = key_count
metadata['record_count'] = record_count
metadata['flags'] = flags
metadata['uid'] = uid.hex().encode()
assert magic == BTREE_MAGIC, 'bdb magic does not match bdb btree magic'
assert pg_type == BTREE_META, 'Metadata page is not a btree metadata page'
assert version == DB_VERSION, 'Database too new'
# btree metadata
btree_meta_page = page[72:512]
_, minkey, re_len, re_pad, root, _, crypto_magic, _, iv, chksum = struct.unpack('IIIII368sI12s16s20s', btree_meta_page)
metadata['minkey'] = minkey
metadata['re_len'] = re_len
metadata['re_pad'] = re_pad
metadata['root'] = root
metadata['crypto_magic'] = crypto_magic
metadata['iv'] = iv.hex().encode()
metadata['chksum'] = chksum.hex().encode()
return metadata
# Given the dict from dump_leaf_page, get the key-value pairs and put them into a dict
def extract_kv_pairs(page_data, pages):
out = {}
last_key = None
for i, entry in enumerate(page_data['entries']):
data = b''
if entry['record_type'] == RECORD_KEYDATA:
data = entry['data']
elif entry['record_type'] == RECORD_OVERFLOW_DATA:
next_page = entry['page_num']
while next_page != 0:
opage = pages[next_page]
opage_info = dump_page(opage)
data += opage_info['entries'][0]['data']
next_page = opage_info['next_pgno']
# By virtue of these all being pairs, even number entries are keys, and odd are values
if i % 2 == 0:
out[entry['data']] = b''
last_key = data
else:
out[last_key] = data
return out
# Extract the key-value pairs of the BDB file given in filename
def dump_bdb_kv(filename):
# Read in the BDB file and start deserializing it
pages = []
with open(filename, 'rb') as f:
# Determine pagesize first
data = f.read(PAGE_HEADER_SIZE)
pagesize = struct.unpack('I', data[20:24])[0]
assert pagesize in (512, 1024, 2048, 4096, 8192, 16384, 32768, 65536)
# Read rest of first page
data += f.read(pagesize - PAGE_HEADER_SIZE)
assert len(data) == pagesize
# Read all remaining pages
while len(data) > 0:
pages.append(data)
data = f.read(pagesize)
# Sanity check the meta pages, read root page
outer_meta_info = dump_meta_page(pages[OUTER_META_PAGE])
root_page_info = dump_page(pages[outer_meta_info['root']])
assert root_page_info['pg_type'] == BTREE_LEAF
assert len(root_page_info['entries']) == 2
assert root_page_info['entries'][0]['data'] == SUBDATABASE_NAME
assert len(root_page_info['entries'][1]['data']) == 4
inner_meta_page = int.from_bytes(root_page_info['entries'][1]['data'], 'big')
inner_meta_info = dump_meta_page(pages[inner_meta_page])
# Fetch the kv pairs from the pages
kv = {}
pages_to_process = [inner_meta_info['root']]
while len(pages_to_process) > 0:
curr_page_no = pages_to_process.pop()
assert curr_page_no <= outer_meta_info['last_pgno']
info = dump_page(pages[curr_page_no])
assert info['pg_type'] in (BTREE_INTERNAL, BTREE_LEAF)
if info['pg_type'] == BTREE_INTERNAL:
for entry in info['entries']:
pages_to_process.append(entry['page_num'])
elif info['pg_type'] == BTREE_LEAF:
info_kv = extract_kv_pairs(info, pages)
kv = {**kv, **info_kv}
return kv