Skip to content

Commit

Permalink
Python ledger optimisations: Add a --quiet flag, and reuse calculat…
Browse files Browse the repository at this point in the history
…ed values in MerkleTree (#6702)

Co-authored-by: Amaury Chamayou <[email protected]>
  • Loading branch information
eddyashton and achamayou authored Dec 12, 2024
1 parent 41b1f97 commit 44a5155
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 49 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).

## [6.0.0-dev9]

[6.0.0-dev9]: https://github.com/microsoft/CCF/releases/tag/6.0.0-dev9

### Changed

- The `read_ledger.py` tool now has a `--quiet` option which avoids printing anything per-transaction, as well as other performance improvements, which should make it more useful in verifying the integrity of large ledgers.

## [6.0.0-dev8]

[6.0.0-dev8]: https://github.com/microsoft/CCF/releases/tag/6.0.0-dev8
Expand Down
2 changes: 1 addition & 1 deletion python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "ccf"
version = "6.0.0-dev8"
version = "6.0.0-dev9"
authors = [
{ name="CCF Team", email="[email protected]" },
]
Expand Down
77 changes: 46 additions & 31 deletions python/src/ccf/merkletree.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Licensed under the Apache 2.0 License.

from hashlib import sha256
import math


class MerkleTree(object):
Expand All @@ -10,18 +11,22 @@ class MerkleTree(object):
"""

def __init__(self):
self.levels = None
self.reset_tree()

def reset_tree(self):
self.leaves = list()
self.levels = None
self._levels = [[]]
self._root = None

def add_leaf(self, values: bytes, do_hash=True):
digest = values
if do_hash:
digest = sha256(values).digest()
self.leaves.append(digest)
self._levels[0].append(digest)
self._root = None # Need to recalculate

@property
def leaves(self):
return self._levels[0]

def get_leaf(self, index: int) -> bytes:
return self.leaves[index]
Expand All @@ -30,46 +35,56 @@ def get_leaf_count(self) -> int:
return len(self.leaves)

def get_merkle_root(self) -> bytes:
# Always make tree before getting root
self._make_tree()
assert (
self.levels is not None
), "Unexpected error while getting root. MerkleTree has no levels."
if self._root is None:
# Make tree before getting root if root not already calculated
self._make_tree()
assert (
self._levels is not None
), "Unexpected error while getting root. MerkleTree has no levels."
self._root = self._levels[-1][0]

return self.levels[0][0]
return self._root

def _calculate_next_level(self):
solo_leaf = None
# number of leaves on the level
number_of_leaves_on_current_level = len(self.levels[0])
def _recalculate_level(self, level):
assert len(self._levels) > level - 1
prev_level = self._levels[level - 1]
number_of_leaves_on_prev_level = len(prev_level)

assert (
number_of_leaves_on_current_level > 1
number_of_leaves_on_prev_level > 1
), "Merkle Tree should have more than one leaf at every level"

solo_leaf = None

if (
number_of_leaves_on_current_level % 2 == 1
number_of_leaves_on_prev_level % 2 == 1
): # if odd number of leaves on the level
# Get the solo leaf (last leaf in-case the leaves are odd numbered)
solo_leaf = self.levels[0][-1]
number_of_leaves_on_current_level -= 1
solo_leaf = prev_level[-1]
number_of_leaves_on_prev_level -= 1

if not len(self._levels) > level:
self._levels.append([])

# Reuse existing level as much as possible
current_level = self._levels[level]

# Since we may have copied a solo-leaf to the rightmost node last time, pop and re-calculate it
if len(current_level):
current_level.pop(-1)

done = len(current_level)

new_level = []
for left_node, right_node in zip(
self.levels[0][0:number_of_leaves_on_current_level:2],
self.levels[0][1:number_of_leaves_on_current_level:2],
prev_level[done * 2 : number_of_leaves_on_prev_level : 2],
prev_level[done * 2 + 1 : number_of_leaves_on_prev_level : 2],
):
new_level.append(sha256(left_node + right_node).digest())
current_level.append(sha256(left_node + right_node).digest())
if solo_leaf is not None:
new_level.append(solo_leaf)
self.levels = [
new_level,
] + self.levels # prepend new level
current_level.append(solo_leaf)

def _make_tree(self):
if self.get_leaf_count() > 0:
self.levels = [
self.leaves,
]
while len(self.levels[0]) > 1:
self._calculate_next_level()
num_levels = 1 + math.ceil(math.log(self.get_leaf_count(), 2))
for level in range(1, num_levels):
self._recalculate_level(level)
57 changes: 41 additions & 16 deletions python/src/ccf/read_ledger.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,17 @@
import json
import re
import argparse
from enum import Enum, auto

from loguru import logger as LOG


class PrintMode(Enum):
Quiet = auto()
Digests = auto()
Contents = auto()


def indent(n):
return " " * n

Expand Down Expand Up @@ -129,15 +136,16 @@ def dump_entry(entry, table_filter, tables_format_rules):

def run(
paths,
print_mode: PrintMode,
is_snapshot=False,
tables=None,
uncommitted=False,
tables_regex=None,
insecure_skip_verification=False,
uncommitted=False,
tables_format_rules=None,
digests_only=None,
):
table_filter = re.compile(tables_regex) if tables_regex is not None else None

# Extend and compile rules
table_filter = re.compile(tables) if tables is not None else None
tables_format_rules = tables_format_rules or []
tables_format_rules.extend(default_tables_format_rules)
tables_format_rules = [
Expand Down Expand Up @@ -170,11 +178,13 @@ def run(
f"chunk {chunk.filename()} ({'' if chunk.is_committed() else 'un'}committed)"
)
for transaction in chunk:
if digests_only:
if print_mode == PrintMode.Quiet:
pass
elif print_mode == PrintMode.Digests:
print(
f"{transaction.gcm_header.view}.{transaction.gcm_header.seqno} {transaction.get_write_set_digest().hex()}"
)
else:
elif print_mode == PrintMode.Contents:
dump_entry(transaction, table_filter, tables_format_rules)
except Exception as e:
LOG.exception(f"Error parsing ledger: {e}")
Expand Down Expand Up @@ -216,37 +226,52 @@ def main():
action="store_true",
)
parser.add_argument(
"--uncommitted", help="Also parse uncommitted ledger files", action="store_true"
)

display_options = parser.add_mutually_exclusive_group()
display_options.add_argument(
"-q",
"--quiet",
help="Don't print transaction digests or contents",
action="store_true",
)
display_options.add_argument(
"-d",
"--digests-only",
help="Only print transaction digests",
action="store_true",
)
parser.add_argument(
display_options.add_argument(
"-t",
"--tables",
help="Regex filter for tables to display",
type=str,
default=None,
)
parser.add_argument(
"--uncommitted", help="Also parse uncommitted ledger files", action="store_true"
)

parser.add_argument(
"--insecure-skip-verification",
help="INSECURE: skip ledger Merkle tree integrity verification",
action="store_true",
default=False,
)

args = parser.parse_args()

print_mode = PrintMode.Contents
if args.quiet:
print_mode = PrintMode.Quiet
elif args.digests_only:
print_mode = PrintMode.Digests

if not run(
args.paths,
args.snapshot,
args.tables,
args.uncommitted,
args.insecure_skip_verification,
None,
args.digests_only,
print_mode,
is_snapshot=args.snapshot,
tables_regex=args.tables,
insecure_skip_verification=args.insecure_skip_verification,
uncommitted=args.uncommitted,
):
sys.exit(1)

Expand Down
7 changes: 6 additions & 1 deletion tests/governance_history.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,11 +222,16 @@ def fmt_str(data: bytes) -> str:
primary, backups = network.find_nodes()
for node in (primary, *backups):
ledger_dirs = node.remote.ledger_paths()
assert ccf.read_ledger.run(paths=ledger_dirs, tables_format_rules=format_rule)
assert ccf.read_ledger.run(
paths=ledger_dirs,
print_mode=ccf.read_ledger.PrintMode.Contents,
tables_format_rules=format_rule,
)

snapshot_dir = network.get_committed_snapshots(primary)
assert ccf.read_ledger.run(
paths=[os.path.join(snapshot_dir, os.listdir(snapshot_dir)[-1])],
print_mode=ccf.read_ledger.PrintMode.Contents,
is_snapshot=True,
tables_format_rules=format_rule,
)
Expand Down

0 comments on commit 44a5155

Please sign in to comment.