From cb2f7b7a638dc4f337b40e7bb322c5a006f327b0 Mon Sep 17 00:00:00 2001 From: Benjamin Wang Date: Wed, 4 Sep 2024 12:49:12 +0100 Subject: [PATCH] Support scan-keys command to scan the key space starting a specific revision Signed-off-by: Benjamin Wang --- tools/etcd-dump-db/README.md | 10 +++ tools/etcd-dump-db/main.go | 28 +++++++ tools/etcd-dump-db/meta.go | 40 ++++++++++ tools/etcd-dump-db/page.go | 68 ++++++++++++++++ tools/etcd-dump-db/scan.go | 150 +++++++++++++++++++++++++++++++++++ tools/etcd-dump-db/utils.go | 27 ++++++- 6 files changed, 322 insertions(+), 1 deletion(-) create mode 100644 tools/etcd-dump-db/meta.go create mode 100644 tools/etcd-dump-db/page.go create mode 100644 tools/etcd-dump-db/scan.go diff --git a/tools/etcd-dump-db/README.md b/tools/etcd-dump-db/README.md index 2def87ba4ee..b176096f0a6 100644 --- a/tools/etcd-dump-db/README.md +++ b/tools/etcd-dump-db/README.md @@ -102,3 +102,13 @@ key="\x00\x00\x00\x00\x005@x_\x00\x00\x00\x00\x00\x00\x00\tt", value="\n\x153640 key="\x00\x00\x00\x00\x005@x_\x00\x00\x00\x00\x00\x00\x00\bt", value="\n\x153640412599896088633_8" key="\x00\x00\x00\x00\x005@x_\x00\x00\x00\x00\x00\x00\x00\at", value="\n\x153640412599896088633_7" ``` + +#### scan-keys [data dir or db file path] + +Scans all the key-value pairs starting from a specific revision in the key space. It works even the db is corrupted. + +``` +$ ./etcd-dump-db scan-keys ~/tmp/etcd/778/db.db 16589739 2>/dev/null | grep "/registry/configmaps/istio-system/istio-namespace-controller-election" +pageID=1306, index=5/5, rev={Revision:{Main:16589739 Sub:0} tombstone:false}, value=[key "/registry/configmaps/istio-system/istio-namespace-controller-election" | val "k8s\x00\n\x0f\n\x02v1\x12\tConfigMap\x12\xeb\x03\n\xe8\x03\n#istio-namespace-controller-election\x12\x00\x1a\fistio-system\"\x00*$bb696087-260d-4167-bf06-17d3361f9b5f2\x008\x00B\b\b\x9e\xbe\xed\xb5\x06\x10\x00b\xe6\x01\n(control-plane.alpha.kubernetes.io/leader\x12\xb9\x01{\"holderIdentity\":\"istiod-d56968787-txq2d\",\"holderKey\":\"default\",\"leaseDurationSeconds\":30,\"acquireTime\":\"2024-08-13T13:26:54Z\",\"renewTime\":\"2024-08-27T06:16:13Z\",\"leaderTransitions\":0}\x8a\x01\x90\x01\n\x0fpilot-discovery\x12\x06Update\x1a\x02v1\"\b\b\xad\u07b5\xb6\x06\x10\x002\bFieldsV1:[\nY{\"f:metadata\":{\"f:annotations\":{\".\":{},\"f:control-plane.alpha.kubernetes.io/leader\":{}}}}B\x00\x1a\x00\"\x00" | created 9612546 | mod 16589739 | ver 157604] +pageID=4737, index=4/4, rev={Revision:{Main:16589786 Sub:0} tombstone:false}, value=[key "/registry/configmaps/istio-system/istio-namespace-controller-election" | val "k8s\x00\n\x0f\n\x02v1\x12\tConfigMap\x12\xeb\x03\n\xe8\x03\n#istio-namespace-controller-election\x12\x00\x1a\fistio-system\"\x00*$bb696087-260d-4167-bf06-17d3361f9b5f2\x008\x00B\b\b\x9e\xbe\xed\xb5\x06\x10\x00b\xe6\x01\n(control-plane.alpha.kubernetes.io/leader\x12\xb9\x01{\"holderIdentity\":\"istiod-d56968787-txq2d\",\"holderKey\":\"default\",\"leaseDurationSeconds\":30,\"acquireTime\":\"2024-08-13T13:26:54Z\",\"renewTime\":\"2024-08-27T06:16:21Z\",\"leaderTransitions\":0}\x8a\x01\x90\x01\n\x0fpilot-discovery\x12\x06Update\x1a\x02v1\"\b\b\xb5\u07b5\xb6\x06\x10\x002\bFieldsV1:[\nY{\"f:metadata\":{\"f:annotations\":{\".\":{},\"f:control-plane.alpha.kubernetes.io/leader\":{}}}}B\x00\x1a\x00\"\x00" | created 9612546 | mod 16589786 | ver 157605] +``` \ No newline at end of file diff --git a/tools/etcd-dump-db/main.go b/tools/etcd-dump-db/main.go index 95e6c439265..89c2f2137e6 100644 --- a/tools/etcd-dump-db/main.go +++ b/tools/etcd-dump-db/main.go @@ -19,6 +19,7 @@ import ( "log" "os" "path/filepath" + "strconv" "strings" "time" @@ -40,6 +41,11 @@ var ( Short: "iterate-bucket lists key-value pairs in reverse order.", Run: iterateBucketCommandFunc, } + scanKeySpaceCommand = &cobra.Command{ + Use: "scan-keys [data dir or db file path] [start revision]", + Short: "scan-keys scans all the key-value pairs starting from a specific revision in the key space.", + Run: scanKeysCommandFunc, + } getHashCommand = &cobra.Command{ Use: "hash [data dir or db file path]", Short: "hash computes the hash of db file.", @@ -58,6 +64,7 @@ func init() { rootCommand.AddCommand(listBucketCommand) rootCommand.AddCommand(iterateBucketCommand) + rootCommand.AddCommand(scanKeySpaceCommand) rootCommand.AddCommand(getHashCommand) } @@ -107,6 +114,27 @@ func iterateBucketCommandFunc(_ *cobra.Command, args []string) { } } +func scanKeysCommandFunc(_ *cobra.Command, args []string) { + if len(args) != 2 { + log.Fatalf("Must provide 2 arguments (got %v)", args) + } + dp := args[0] + if !strings.HasSuffix(dp, "db") { + dp = filepath.Join(snapDir(dp), "db") + } + if !existFileOrDir(dp) { + log.Fatalf("%q does not exist", dp) + } + startRev, err := strconv.ParseInt(args[1], 10, 64) + if err != nil { + log.Fatal(err) + } + err = scanKeys(dp, startRev) + if err != nil { + log.Fatal(err) + } +} + func getHashCommandFunc(_ *cobra.Command, args []string) { if len(args) < 1 { log.Fatalf("Must provide at least 1 argument (got %v)", args) diff --git a/tools/etcd-dump-db/meta.go b/tools/etcd-dump-db/meta.go new file mode 100644 index 00000000000..30860c8af8b --- /dev/null +++ b/tools/etcd-dump-db/meta.go @@ -0,0 +1,40 @@ +// Copyright 2024 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import "unsafe" + +const magic uint32 = 0xED0CDAED + +type inBucket struct { + root uint64 // page id of the bucket's root-level page + sequence uint64 // monotonically incrementing, used by NextSequence() +} + +type meta struct { + magic uint32 + version uint32 + pageSize uint32 + flags uint32 + root inBucket + freelist uint64 + pgid uint64 + txid uint64 + checksum uint64 +} + +func loadPageMeta(buf []byte) *meta { + return (*meta)(unsafe.Pointer(&buf[pageHeaderSize])) +} diff --git a/tools/etcd-dump-db/page.go b/tools/etcd-dump-db/page.go new file mode 100644 index 00000000000..0b6c9c67b19 --- /dev/null +++ b/tools/etcd-dump-db/page.go @@ -0,0 +1,68 @@ +// Copyright 2024 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import "unsafe" + +const pageHeaderSize = unsafe.Sizeof(page{}) +const leafPageElementSize = unsafe.Sizeof(leafPageElement{}) +const pageMaxAllocSize = 0xFFFFFFF + +const ( + leafPageFlag = 0x02 +) + +type page struct { + id uint64 + flags uint16 + count uint16 + overflow uint32 +} + +func (p *page) isLeafPage() bool { + return p.flags == leafPageFlag +} + +func loadPage(buf []byte) *page { + return (*page)(unsafe.Pointer(&buf[0])) +} + +// leafPageElement retrieves the leaf node by index +func (p *page) leafPageElement(index uint16) *leafPageElement { + return (*leafPageElement)(unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p), + leafPageElementSize, int(index))) +} + +// leafPageElement represents a node on a leaf page. +type leafPageElement struct { + flags uint32 + pos uint32 + ksize uint32 + vsize uint32 +} + +// Key returns a byte slice of the node key. +func (n *leafPageElement) key() []byte { + i := int(n.pos) + j := i + int(n.ksize) + return unsafeByteSlice(unsafe.Pointer(n), 0, i, j) +} + +// Value returns a byte slice of the node value. +func (n *leafPageElement) value() []byte { + i := int(n.pos) + int(n.ksize) + j := i + int(n.vsize) + return unsafeByteSlice(unsafe.Pointer(n), 0, i, j) +} diff --git a/tools/etcd-dump-db/scan.go b/tools/etcd-dump-db/scan.go new file mode 100644 index 00000000000..3898459d260 --- /dev/null +++ b/tools/etcd-dump-db/scan.go @@ -0,0 +1,150 @@ +// Copyright 2024 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + "io" + "os" + + "go.etcd.io/etcd/server/v3/storage/mvcc" +) + +func scanKeys(dbPath string, startRev int64) error { + pgSize, hwm, err := readPageAndHWMSize(dbPath) + if err != nil { + return fmt.Errorf("failed to read page and HWM size: %w", err) + } + + for pageID := uint64(2); pageID < hwm; { + p, _, err := readPage(dbPath, pgSize, pageID) + if err != nil { + fmt.Fprintf(os.Stderr, "Reading page %d failed: %v. Continuting...\n", pageID, err) + pageID++ + continue + } + + if !p.isLeafPage() { + pageID++ + continue + } + + for i := uint16(0); i < p.count; i++ { + e := p.leafPageElement(i) + + rev, err := bytesToBucketKey(e.key()) + if err != nil { + if exceptionCheck(e.key()) { + break + } + fmt.Fprintf(os.Stderr, "Decoding revision failed, pageID: %d, index: %d, key: %x, error: %v\n", pageID, i, string(e.key()), err) + continue + } + + if startRev != 0 && rev.Main < startRev { + continue + } + + fmt.Printf("pageID=%d, index=%d/%d, ", pageID, i, p.count-1) + keyDecoder(e.key(), e.value()) + } + + pageID += uint64(p.overflow) + 1 + } + return nil +} + +func bytesToBucketKey(key []byte) (rev mvcc.BucketKey, err error) { + defer func() { + if r := recover(); r != nil { + err = fmt.Errorf("BytesToBucketKey failed: %v", r) + } + }() + rev = mvcc.BytesToBucketKey(key) + return rev, err +} + +func readPageAndHWMSize(dbPath string) (uint64, uint64, error) { + f, err := os.Open(dbPath) + if err != nil { + return 0, 0, err + } + defer f.Close() + + // read 4KB chunk + buf := make([]byte, 4096) + if _, err := io.ReadFull(f, buf); err != nil { + return 0, 0, err + } + + m := loadPageMeta(buf) + if m.magic != magic { + return 0, 0, fmt.Errorf("the Meta Page has wrong (unexpected) magic") + } + + return uint64(m.pageSize), m.pgid, nil +} + +func readPage(dbPath string, pageSize uint64, pageID uint64) (*page, []byte, error) { + f, err := os.Open(dbPath) + if err != nil { + return nil, nil, err + } + defer f.Close() + + buf := make([]byte, pageSize) + if _, err := f.ReadAt(buf, int64(pageID*pageSize)); err != nil { + return nil, nil, err + } + + p := loadPage(buf) + if p.id != pageID { + return nil, nil, fmt.Errorf("unexpected page id: %d, wanted: %d", p.id, pageID) + } + + if p.overflow == 0 { + return p, buf, nil + } + + buf = make([]byte, (uint64(p.overflow)+1)*pageSize) + if _, err := f.ReadAt(buf, int64(pageID*pageSize)); err != nil { + return nil, nil, err + } + + p = loadPage(buf) + if p.id != pageID { + return nil, nil, fmt.Errorf("unexpected page id: %d, wanted: %d", p.id, pageID) + } + + return p, buf, nil +} + +func exceptionCheck(key []byte) bool { + whiteKeyList := map[string]struct{}{ + "alarm": {}, + "auth": {}, + "authRoles": {}, + "authUsers": {}, + "cluster": {}, + "key": {}, + "lease": {}, + "members": {}, + "members_removed": {}, + "meta": {}, + } + + _, ok := whiteKeyList[string(key)] + return ok +} diff --git a/tools/etcd-dump-db/utils.go b/tools/etcd-dump-db/utils.go index 3af585a84d6..184cb5181c4 100644 --- a/tools/etcd-dump-db/utils.go +++ b/tools/etcd-dump-db/utils.go @@ -14,9 +14,34 @@ package main -import "os" +import ( + "os" + "unsafe" +) func existFileOrDir(name string) bool { _, err := os.Stat(name) return err == nil } + +func unsafeAdd(base unsafe.Pointer, offset uintptr) unsafe.Pointer { + return unsafe.Pointer(uintptr(base) + offset) +} + +func unsafeIndex(base unsafe.Pointer, offset uintptr, elemsz uintptr, n int) unsafe.Pointer { + return unsafe.Pointer(uintptr(base) + offset + uintptr(n)*elemsz) +} + +func unsafeByteSlice(base unsafe.Pointer, offset uintptr, i, j int) []byte { + // See: https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices + // + // This memory is not allocated from C, but it is unmanaged by Go's + // garbage collector and should behave similarly, and the compiler + // should produce similar code. Note that this conversion allows a + // subslice to begin after the base address, with an optional offset, + // while the URL above does not cover this case and only slices from + // index 0. However, the wiki never says that the address must be to + // the beginning of a C allocation (or even that malloc was used at + // all), so this is believed to be correct. + return (*[pageMaxAllocSize]byte)(unsafeAdd(base, offset))[i:j:j] +}