Cherry-pick upstream snapshot commits to 1.3.x (#1533)

* core/state/snapshot: fix binary iterator (#20970) * core/state/snapshot: implement storage iterator (#20971) * core/state/snapshot: implement storage iterator * core/state/snapshot, tests: implement helper function * core/state/snapshot: fix storage issue If an account is deleted in the tx_1 but recreated in the tx_2, the it can happen that in this diff layer, both destructedSet and storageData records this account. In this case, the storage iterator should be able to iterate the slots belong to new account but disable further iteration in deeper layers(belong to old account) * core/state/snapshot: address peter and martin's comment * core/state: address comments * core/state/snapshot: fix test * core/state/snapshot: fix journal nil deserialziation * core/state/snapshot: fix trie generator reporter (#21004) # Conflicts: # go.sum * tests: cleanup snapshot generator goroutine leak * core/state/snapshot: release iterator after verification * core/state/snapshot: don't create storage list for non-existing accounts * core/state/snapshot: fix typo (#21037) * cmd/utils: grant snapshot cache to trie if disabled (#21416) * cmd/utils: grant snapshot cache to trie if disabled * eth: fix up default non-mainnet cache distribution Conflicts: eth/config.go * Fix default DatabaseCache value. In the PR #18087 (https://github.com/ethereum/go-ethereum/pull/18087/files), the trie cached was splited between Clean and Dirty. As the cache was suppose to be 1gb, and the Dirty cache was assigned using part of the DatabaseCache, it means that the DatabaseCache should be updated too, to validate that all the sum of the caches are still 1gb. This was missed in an early merge, and fixed here. * core/state/snapshot: reduce disk layer depth during generation * core/state/snapshot: stop generator if it hits missing trie nodes (#21649) * core/state/snapshot: exit Geth if generator hits missing trie nodes * core/state/snapshot: error instead of hard die on generator fault * core/state/snapshot: don't enable logging on the tests * core/state: disable snapshot iteration if it's not fully constructed (#21682) * core/state/snapshot: add diskRoot function * core/state/snapshot: disable iteration if the snapshot is generating * core/state/snapshot: simplify the function * core/state: panic for undefined layer * Fix imports of snapshot/generate_test * Fix tests from core/state/snapshot/generate_test The trie.database.Commit accepts 3 parameters now, but the commit that adds the change, was not cherrypicked * core: improve snapshot journal recovery (#21594) * core/state/snapshot: introduce snapshot journal version * core: update the disk layer in an atomic way * core: persist the disk layer generator periodically * core/state/snapshot: improve logging * core/state/snapshot: forcibly ensure the legacy snapshot is matched * core/state/snapshot: add debug logs * core, tests: fix tests and special recovery case * core: polish * core: add more blockchain tests for snapshot recovery * core/state: fix comment * core: add recovery flag for snapshot * core: add restart after start-after-crash tests * core/rawdb: fix imports * core: fix tests * core: remove log * core/state/snapshot: fix snapshot * core: avoid callbacks in SetHead * core: fix setHead cornercase where the threshold root has state * core: small docs for the test cases Co-authored-by: Péter Szilágyi <[email protected]> Conflicts: core/blockchain.go core/blockchain_repair_test.go core/blockchain_sethead_test.go core/rawdb/accessors_snapshot.go core/state/snapshot/disklayer_test.go core/state/snapshot/snapshot.go * core/state/snapshot: fix journal recovery from generating old journal (#21775) * core/state/snapshot: print warning if failed to resolve journal * core/state/snapshot: fix snapshot recovery When we meet the snapshot journal consisted with: - disk layer generator with new-format - diff layer journal with old-format The base layer should be returned without error. The broken diff layer can be reconstructed later but we definitely don't want to reconstruct the huge diff layer. * core: add tests * core/state/snapshot: update generator marker in sync with flushes * core/state/snapshot: gethring -> gathering typo (#22104) * snapshot, trie: fixed typos, mostly in snapshot pkg (#22133) Conflicts: trie/database.go * core/state/snapshot: add generation logs to storage too * core/state/snapshot: write snapshot generator in batch (#22163) * core/state/snapshot: write snapshot generator in batch * core: refactor the tests * core: update tests * core: update tests Conflicts: core/blockchain_snapshot_test.go * core/state: maintain one more diff layer (#21730) * core/state: maintain one more diff layer * core/state: address comment * snapshot: merge loops for better performance (#22160) * cmd/utils: enable snapshots by default Conflicts: cmd/utils/flags.go * core/state/snapshot: ensure Cap retains a min number of layers Conflicts: core/state/snapshot/snapshot_test.go * core/state: copy the snap when copying the state (#22340) * core/state: copy the snap when copying the state * core/state: deep-copy snap stuff during state Copy Conflicts: core/state/statedb.go * core/state/snapshot: fix panic on missing parent * core/state/snapshot, ethdb: track deletions more accurately (#22582) * core/state/snapshot, ethdb: track deletions more accurately * core/state/snapshot: don't reset the iterator, leveldb's screwy * ethdb: don't mess with the insert batches for now * core/state/snapshot: fix data race in diff layer (#22540) * Fix lint and fix cherrypicked test * Fix NewBlockChain in tests (commit that adds a param, not ch-p) * Fix test * Remove 'brew update' from ios build in CI (#1531) We previously added brew update in #1503, but it's no longer necessary and gives a different error now, so this removes it. * Fix indeterministic test TestReorgSideEvent. This has already been done on master in commit cae0c99. Here we apply part of that commit to fix this test being indeterministic. Co-authored-by: gary rong <[email protected]> Co-authored-by: Péter Szilágyi <[email protected]> Co-authored-by: Melvin Junhee Woo <[email protected]> Co-authored-by: Martin Holst Swende <[email protected]> Co-authored-by: Edgar Aroutiounian <[email protected]> Co-authored-by: Or Neeman <[email protected]>
celo-org · May 6, 2021 · 1136a3a · 1136a3a
1 parent e7acdda
commit 1136a3a
Show file tree

Hide file tree

Showing 36 changed files with 3,601 additions and 497 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -116,7 +116,6 @@ jobs:
       - run:
           name: Setup Go language
           command: |
-            brew update
             brew install [email protected]
             brew link [email protected]
             # Check that homebrew installed the expected go version

diff --git a/cmd/evm/staterunner.go b/cmd/evm/staterunner.go
@@ -95,7 +95,7 @@ func stateTestCmd(ctx *cli.Context) error {
 		for _, st := range test.Subtests() {
 			// Run the test and aggregate the result
 			result := &StatetestResult{Name: key, Fork: st.Fork, Pass: true}
-			state, err := test.Run(st, cfg, false)
+			_, state, err := test.Run(st, cfg, false)
 			// print state root for evmlab tracing
 			if ctx.GlobalBool(MachineFlag.Name) && state != nil {
 				fmt.Fprintf(os.Stderr, "{\"stateRoot\": \"%x\"}\n", state.IntermediateRoot(false))

diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go
@@ -214,9 +214,9 @@ var (
 		Usage: `Blockchain garbage collection mode ("full", "archive")`,
 		Value: "full",
 	}
-	SnapshotFlag = cli.BoolFlag{
+	SnapshotFlag = cli.BoolTFlag{
 		Name:  "snapshot",
-		Usage: `Enables snapshot-database mode -- experimental work in progress feature`,
+		Usage: `Enables snapshot-database mode (default = enable)`,
 	}
 	LightKDFFlag = cli.BoolFlag{
 		Name:  "lightkdf",
@@ -1693,7 +1693,8 @@ func SetEthConfig(ctx *cli.Context, stack *node.Node, cfg *eth.Config) {
 	if ctx.GlobalIsSet(CacheFlag.Name) || ctx.GlobalIsSet(CacheSnapshotFlag.Name) {
 		cfg.SnapshotCache = ctx.GlobalInt(CacheFlag.Name) * ctx.GlobalInt(CacheSnapshotFlag.Name) / 100
 	}
-	if !ctx.GlobalIsSet(SnapshotFlag.Name) {
+	if !ctx.GlobalBool(SnapshotFlag.Name) {
+		cfg.TrieCleanCache += cfg.SnapshotCache
 		cfg.SnapshotCache = 0 // Disabled
 	}
 	if ctx.GlobalIsSet(DocRootFlag.Name) {
@@ -1944,7 +1945,7 @@ func MakeChain(ctx *cli.Context, stack *node.Node) (chain *core.BlockChain, chai
 		TrieTimeLimit:       eth.DefaultConfig.TrieTimeout,
 		SnapshotLimit:       eth.DefaultConfig.SnapshotCache,
 	}
-	if !ctx.GlobalIsSet(SnapshotFlag.Name) {
+	if !ctx.GlobalBool(SnapshotFlag.Name) {
 		cache.SnapshotLimit = 0 // Disabled
 	}
 	if ctx.GlobalIsSet(CacheFlag.Name) || ctx.GlobalIsSet(CacheTrieFlag.Name) {

diff --git a/core/blockchain.go b/core/blockchain.go
@@ -196,9 +196,10 @@ type BlockChain struct {
 	processor  Processor  // Block transaction processor interface
 	vmConfig   vm.Config
 
-	badBlocks       *lru.Cache                     // Bad block cache
-	shouldPreserve  func(*types.Block) bool        // Function used to determine whether should preserve the given block.
-	terminateInsert func(common.Hash, uint64) bool // Testing hook used to terminate ancient receipt chain insertion.
+	badBlocks          *lru.Cache                     // Bad block cache
+	shouldPreserve     func(*types.Block) bool        // Function used to determine whether should preserve the given block.
+	terminateInsert    func(common.Hash, uint64) bool // Testing hook used to terminate ancient receipt chain insertion.
+	writeLegacyJournal bool                           // Testing flag used to flush the snapshot journal in legacy format.
 }
 
 // NewBlockChain returns a fully initialised block chain using information
@@ -262,9 +263,29 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par
 	// Make sure the state associated with the block is available
 	head := bc.CurrentBlock()
 	if _, err := state.New(head.Root(), bc.stateCache, bc.snaps); err != nil {
-		log.Warn("Head state missing, repairing", "number", head.Number(), "hash", head.Hash())
-		if err := bc.SetHead(head.NumberU64()); err != nil {
-			return nil, err
+		// Head state is missing, before the state recovery, find out the
+		// disk layer point of snapshot(if it's enabled). Make sure the
+		// rewound point is lower than disk layer.
+		var diskRoot common.Hash
+		if bc.cacheConfig.SnapshotLimit > 0 {
+			diskRoot = rawdb.ReadSnapshotRoot(bc.db)
+		}
+		if diskRoot != (common.Hash{}) {
+			log.Warn("Head state missing, repairing", "number", head.Number(), "hash", head.Hash(), "snaproot", diskRoot)
+
+			snapDisk, err := bc.SetHeadBeyondRoot(head.NumberU64(), diskRoot)
+			if err != nil {
+				return nil, err
+			}
+			// Chain rewound, persist old snapshot number to indicate recovery procedure
+			if snapDisk != 0 {
+				rawdb.WriteSnapshotRecoveryNumber(bc.db, snapDisk)
+			}
+		} else {
+			log.Warn("Head state missing, repairing", "number", head.Number(), "hash", head.Hash())
+			if err := bc.SetHead(head.NumberU64()); err != nil {
+				return nil, err
+			}
 		}
 	}
 	// Ensure that a previous crash in SetHead doesn't leave extra ancients
@@ -320,7 +341,18 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par
 	}
 	// Load any existing snapshot, regenerating it if loading failed
 	if bc.cacheConfig.SnapshotLimit > 0 {
-		bc.snaps = snapshot.New(bc.db, bc.stateCache.TrieDB(), bc.cacheConfig.SnapshotLimit, bc.CurrentBlock().Root(), !bc.cacheConfig.SnapshotWait)
+		// If the chain was rewound past the snapshot persistent layer (causing
+		// a recovery block number to be persisted to disk), check if we're still
+		// in recovery mode and in that case, don't invalidate the snapshot on a
+		// head mismatch.
+		var recover bool
+
+		head := bc.CurrentBlock()
+		if layer := rawdb.ReadSnapshotRecoveryNumber(bc.db); layer != nil && *layer > head.NumberU64() {
+			log.Warn("Enabling snapshot recovery", "chainhead", head.NumberU64(), "diskbase", *layer)
+			recover = true
+		}
+		bc.snaps = snapshot.New(bc.db, bc.stateCache.TrieDB(), bc.cacheConfig.SnapshotLimit, head.Root(), !bc.cacheConfig.SnapshotWait, recover)
 	}
 	// Take ownership of this particular state
 	go bc.update()
@@ -416,9 +448,25 @@ func (bc *BlockChain) loadLastState() error {
 // was fast synced or full synced and in which state, the method will try to
 // delete minimal data from disk whilst retaining chain consistency.
 func (bc *BlockChain) SetHead(head uint64) error {
+	_, err := bc.SetHeadBeyondRoot(head, common.Hash{})
+	return err
+}
+
+// SetHeadBeyondRoot rewinds the local chain to a new head with the extra condition
+// that the rewind must pass the specified state root. This method is meant to be
+// used when rewiding with snapshots enabled to ensure that we go back further than
+// persistent disk layer. Depending on whether the node was fast synced or full, and
+// in which state, the method will try to delete minimal data from disk whilst
+// retaining chain consistency.
+//
+// The method returns the block number where the requested root cap was found.
+func (bc *BlockChain) SetHeadBeyondRoot(head uint64, root common.Hash) (uint64, error) {
 	bc.chainmu.Lock()
 	defer bc.chainmu.Unlock()
 
+	// Track the block number of the requested root hash
+	var rootNumber uint64 // (no root == always 0)
+
 	// Retrieve the last pivot block to short circuit rollbacks beyond it and the
 	// current freezer limit to start nuking id underflown
 	pivot := rawdb.ReadLastPivotNumber(bc.db)
@@ -434,8 +482,16 @@ func (bc *BlockChain) SetHead(head uint64) error {
 				log.Error("Gap in the chain, rewinding to genesis", "number", header.Number, "hash", header.Hash())
 				newHeadBlock = bc.genesisBlock
 			} else {
-				// Block exists, keep rewinding until we find one with state
+				// Block exists, keep rewinding until we find one with state,
+				// keeping rewinding until we exceed the optional threshold
+				// root hash
+				beyondRoot := (root == common.Hash{}) // Flag whether we're beyond the requested root (no root, always true)
+
 				for {
+					// If a root threshold was requested but not yet crossed, check
+					if root != (common.Hash{}) && !beyondRoot && newHeadBlock.Root() == root {
+						beyondRoot, rootNumber = true, newHeadBlock.NumberU64()
+					}
 					if _, err := state.New(newHeadBlock.Root(), bc.stateCache, bc.snaps); err != nil {
 						log.Info("Block state missing, rewinding further", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash())
 						if pivot == nil || newHeadBlock.NumberU64() > *pivot {
@@ -446,8 +502,12 @@ func (bc *BlockChain) SetHead(head uint64) error {
 							newHeadBlock = bc.genesisBlock
 						}
 					}
-					log.Info("Rewound to block with state", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash())
-					break
+					if beyondRoot || newHeadBlock.NumberU64() == 0 {
+						log.Info("Rewound to block with state", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash())
+						break
+					}
+					log.Info("Skipping block with threshold state", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash(), "root", newHeadBlock.Root())
+					newHeadBlock = bc.GetBlock(newHeadBlock.ParentHash(), newHeadBlock.NumberU64()-1) // Keep rewinding
 				}
 			}
 			rawdb.WriteHeadBlockHash(db, newHeadBlock.Hash())
@@ -528,7 +588,7 @@ func (bc *BlockChain) SetHead(head uint64) error {
 	bc.txLookupCache.Purge()
 	bc.futureBlocks.Purge()
 
-	return bc.loadLastState()
+	return rootNumber, bc.loadLastState()
 }
 
 // FastSyncCommitHead sets the current head block to the one defined by the hash
@@ -571,6 +631,11 @@ func (bc *BlockChain) Snapshot() *snapshot.Tree {
 	return bc.snaps
 }
 
+// Add this to solve conflicts due to cherry-picking
+func (bc *BlockChain) Snapshots() *snapshot.Tree {
+	return bc.Snapshot()
+}
+
 // CurrentFastBlock retrieves the current fast-sync head block of the canonical
 // chain. The block is retrieved from the blockchain's internal cache.
 func (bc *BlockChain) CurrentFastBlock() *types.Block {
@@ -880,8 +945,14 @@ func (bc *BlockChain) Stop() {
 	var snapBase common.Hash
 	if bc.snaps != nil {
 		var err error
-		if snapBase, err = bc.snaps.Journal(bc.CurrentBlock().Root()); err != nil {
-			log.Error("Failed to journal state snapshot", "err", err)
+		if bc.writeLegacyJournal {
+			if snapBase, err = bc.snaps.LegacyJournal(bc.CurrentBlock().Root()); err != nil {
+				log.Error("Failed to journal state snapshot", "err", err)
+			}
+		} else {
+			if snapBase, err = bc.snaps.Journal(bc.CurrentBlock().Root()); err != nil {
+				log.Error("Failed to journal state snapshot", "err", err)
+			}
 		}
 	}
 	// Ensure the state of a recent block is also stored to disk before exiting.