From 72076bd9a6a538e5d439327dae836a6c034a532f Mon Sep 17 00:00:00 2001 From: andrew <44451818+afostr@users.noreply.github.com> Date: Thu, 28 Nov 2024 07:30:04 -0600 Subject: [PATCH] Fixes for rotation and activation logic fixes for global sync basic fix to make sure we sync globals again after we get all the account data extract a more simple core from calculateToAccept to support better test and sim options first pass on syncing logic changes needs some follow up consideration and testing update the sync floor and max add logic some updated logs and wip debug global sync log updates and some new clamping Support for better activation catch up. logging and comment improvements. a few more updates and cleanup ITN3 rotation safe settings --- src/config/server.ts | 5 + src/logger/index.ts | 36 +++++ src/p2p/Active.ts | 1 + src/p2p/CycleAutoScale.ts | 34 ++-- src/p2p/CycleCreator.ts | 1 + src/p2p/Join/v2/syncFinished.ts | 24 ++- src/p2p/ModeSystemFuncs.ts | 263 +++++++++++++++++++++++++------ src/p2p/Rotation.ts | 33 ++-- src/shardus/shardus-types.ts | 13 +- src/state-manager/AccountSync.ts | 43 ++++- 10 files changed, 368 insertions(+), 85 deletions(-) diff --git a/src/config/server.ts b/src/config/server.ts index 78a12dcd5..8cea42377 100644 --- a/src/config/server.ts +++ b/src/config/server.ts @@ -134,7 +134,12 @@ const SERVER_CONFIG: StrictServerConfiguration = { rotationPercentActive: 0.001, //rotate 0.1% of active nodes per cycle when in a steady processing state rotationMaxAddPercent: 0.1, rotationMaxRemovePercent: 0.05, + syncFloorEnabled: false, //DEBUG=true, ITN initially false for rotation safety + syncingMaxAddPercent: 0.2, + syncingDesiredMinCount: 50, //Debug=5, ITN = 50 allowActivePerCycle: 7, + allowActivePerCycleRecover: 4, + activeRecoveryEnabled: false, //Debug=true, ITN initially false for rotation safety useProxyForDownCheck: false, numCheckerNodes: 1, minChecksForDown: 1, diff --git a/src/logger/index.ts b/src/logger/index.ts index d9813ae8d..bfabe9280 100644 --- a/src/logger/index.ts +++ b/src/logger/index.ts @@ -28,6 +28,8 @@ interface Logger { _playbackLogger: any + _mainLogger: any + _seenAddresses: any _shortStrings: any _playbackOwner_host: any @@ -114,6 +116,10 @@ export type LogFlags = { txCancel: boolean // extra logging for TXs that get canceled getLocalOrRemote: boolean // special logging for getLocalOrRemote + + verboseNestedCounters: boolean // extra logging for nested counters + + node_rotation_debug: boolean // extra logging for node rotation math } export let logFlags: LogFlags = { @@ -153,6 +159,10 @@ export let logFlags: LogFlags = { txCancel: false, getLocalOrRemote: false, + + verboseNestedCounters: false, + + node_rotation_debug: false, } const filePath1 = path.join(process.cwd(), 'data-logs', 'cycleRecords1.txt') @@ -217,6 +227,7 @@ class Logger { this.getLogger('main').info('Logger initialized.') this._playbackLogger = this.getLogger('playback') + this._mainLogger = this.getLogger('main') this.setupLogControlValues() @@ -674,6 +685,31 @@ class Logger { console.log(`base logFlags: ` + Utils.safeStringify(logFlags)) } + + mainLog(level, key: string, message:string ): void { + //initially this will just go to a main log but we could but this in + //a json blob with the key and send it to a different logging service + this._mainLogger[level](key + ' ' + message) + } + + mainLog_debug(key: string, message:string ): void { + //note will change the key to be an array later and remove the DBG prefix + this.mainLog('debug', 'DBG_' + key, message) + } + + combine(...args: any[]): string { + return args + .map((arg) => { + if (typeof arg === 'object') { + return Utils.safeStringify(arg) + } else { + return String(arg) + } + }) + .join(' ') + } + + } export default Logger diff --git a/src/p2p/Active.ts b/src/p2p/Active.ts index 54d519708..3f7d6be3a 100644 --- a/src/p2p/Active.ts +++ b/src/p2p/Active.ts @@ -142,6 +142,7 @@ export function updateRecord( const activatedPublicKeys = [] if (NodeList.readyByTimeAndIdOrder.length > 0) { + // ITN3 example if processing this will pick allowActivePerCycle = 1 nodes const selectedNodes = selectNodesFromReadyList(_prev.mode) for (const node of selectedNodes) { /* prettier-ignore */ nestedCountersInstance.countEvent('p2p', `active:updateRecord node added to activated`) diff --git a/src/p2p/CycleAutoScale.ts b/src/p2p/CycleAutoScale.ts index 70b882852..0a5ddf766 100644 --- a/src/p2p/CycleAutoScale.ts +++ b/src/p2p/CycleAutoScale.ts @@ -79,7 +79,7 @@ export function init() { } export function reset() { - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log( 'Resetting auto-scale module', `Cycle ${CycleCreator.currentCycle}, Quarter: ${CycleCreator.currentQuarter}`) + /* prettier-ignore */ if (logFlags?.verbose) logger.mainLog_debug('RESET_1', logger.combine('Resetting auto-scale module', `Cycle ${CycleCreator.currentCycle}, Quarter: ${CycleCreator.currentQuarter}`)) scalingRequested = false scalingRequestsCollector = new Map() requestedScalingType = null @@ -147,7 +147,7 @@ export function requestNetworkUpsize() { return } - console.log('DBG', 'UPSIZE!') + /* prettier-ignore */ if (logFlags?.verbose) logger.mainLog_debug('REQUESTNETWORKUPSIZE_1', 'CycleAutoScale: UPSIZE!') _requestNetworkScaling(P2P.CycleAutoScaleTypes.ScaleType.UP) } @@ -161,7 +161,7 @@ export function requestNetworkDownsize() { return } - console.log('DBG', 'DOWNSIZE!') + /* prettier-ignore */ if (logFlags?.verbose) logger.mainLog_debug('REQUESTNETWORKDOWNSIZE_1', 'CycleAutoScale: DOWNSIZE!') _requestNetworkScaling(P2P.CycleAutoScaleTypes.ScaleType.DOWN) } @@ -296,13 +296,7 @@ function _checkScaling() { // If we haven't approved an scale type, check if we should scale down if (!changed) { - // if (approvedScalingType === P2P.CycleAutoScaleTypes.ScaleType.DOWN) { - // warn( - // 'Already set to scale down for this cycle. No need to scale down anymore.' - // ) - // return - // } - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("CycleAutoScale: scale up not approved") + /* prettier-ignore */ if (logFlags?.verbose) logger.mainLog_debug('CHECKSCALING_1', 'CycleAutoScale: scale up not approved') if (scaleDownRequests.length >= requiredVotes) { approvedScalingType = P2P.CycleAutoScaleTypes.ScaleType.DOWN changed = true @@ -341,7 +335,7 @@ function _checkScaling() { error(new Error(`Invalid scaling flag after changing flag. Flag: ${approvedScalingType}`)) return } - console.log('newDesired', newDesired) + /* prettier-ignore */ if (logFlags?.verbose) logger.mainLog_debug('CHECKSCALING_2', logger.combine('newDesired', newDesired)) } function setDesiredCount(count: number) { @@ -390,18 +384,18 @@ function setAndGetTargetCount(prevRecord: P2P.CycleCreatorTypes.CycleRecord): nu } } } else if (prevRecord.mode === 'processing') { - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("CycleAutoScale: in processing") + /* prettier-ignore */ if (logFlags?.verbose) logger.mainLog_debug('SETANDGETTARGETCOUNT_PROCESSING_1', "CycleAutoScale: in processing") if (enterSafety(active) === false && enterRecovery(active) === false) { - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("CycleAutoScale: not in safety") + /* prettier-ignore */ if (logFlags?.verbose) logger.mainLog_debug('SETANDGETTARGETCOUNT_PROCESSING_2', "CycleAutoScale: not in safety") let addRem = (desired - prevRecord.target) * 0.1 - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log(`addRem: ${addRem}, desired: ${desired}, prevTarget: ${prevRecord.target}`) + /* prettier-ignore */ if (logFlags?.verbose) logger.mainLog_debug('SETANDGETTARGETCOUNT_PROCESSING_3', `addRem: ${addRem}, desired: ${desired}, prevTarget: ${prevRecord.target}`) if (addRem > active * 0.01) { addRem = active * 0.01 } if (addRem < 0 - active * 0.005) { addRem = 0 - active * 0.005 } - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log(`CycleAutoScale: prev target is ${prevRecord.target} and addRem is ${addRem}`) + /* prettier-ignore */ if (logFlags?.verbose) logger.mainLog_debug('SETANDGETTARGETCOUNT_PROCESSING_4', `CycleAutoScale: prev target is ${prevRecord.target} and addRem is ${addRem}`) targetCount = prevRecord.target + addRem // may want to swap config values to values from cycle record if (targetCount < config.p2p.minNodes) { @@ -422,7 +416,13 @@ function setAndGetTargetCount(prevRecord: P2P.CycleCreatorTypes.CycleRecord): nu targetCount = config.p2p.minNodes + config.p2p.extraNodesToAddInRestart } else if (prevRecord.mode === 'restart') { // In restart mode, all the nodes remain in 'selected?' mode until the desired number of nodes are reached - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("CycleAutoScale: in restart") + + //Instruction: I have a bunch of logs like this but I want to upgrade them to call logger.mainLog_debug. + /* prettier-ignore */ //if (logFlags && logFlags.verbose) console.log("CycleAutoScale: in restart") + //Instruction: here is what I want the log to look like. Note the first argument is a unique key. The funciton name in all capps followed by some context and then an int that incrments so we dont have dupes will work + /* prettier-ignore */ if (logFlags?.verbose) logger.mainLog_debug('SETANDGETTARGETCOUNT_RESTART_1',"CycleAutoScale: in restart") + + if (syncing < desired + config.p2p.extraNodesToAddInRestart) { /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("CycleAutoScale: entered syncing < desired") let add = ~~(0.5 * syncing) // Add 50% more nodes on each cycle @@ -439,7 +439,7 @@ function setAndGetTargetCount(prevRecord: P2P.CycleCreatorTypes.CycleRecord): nu /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("CycleAutoScale: in Self.isFirst condition") targetCount = config.p2p.formingNodesPerCycle } - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("CycleAutoScale: target count is ", targetCount) + /* prettier-ignore */ if (logFlags?.verbose) logger.mainLog_debug('SETANDGETTARGETCOUNT_1', logger.combine('CycleAutoScale: target count is', targetCount)) return targetCount } diff --git a/src/p2p/CycleCreator.ts b/src/p2p/CycleCreator.ts index 815e4faf0..64911742c 100644 --- a/src/p2p/CycleCreator.ts +++ b/src/p2p/CycleCreator.ts @@ -294,6 +294,7 @@ function updateScaleFactor() { scaleFactorSyncBoost = 1 } + //ITN3 example numbers (128 / 5) * (640 / 100) = 25.6 * 6.4 = 163.84 scaleFactor = Math.max((consensusRange / consenusParSize) * (activeNodeCount / networkParSize), 1) } diff --git a/src/p2p/Join/v2/syncFinished.ts b/src/p2p/Join/v2/syncFinished.ts index 2d591b989..34585c24d 100644 --- a/src/p2p/Join/v2/syncFinished.ts +++ b/src/p2p/Join/v2/syncFinished.ts @@ -124,14 +124,30 @@ export function isNodeSelectedReadyList(nodeId: string): boolean { export function selectNodesFromReadyList(mode: string): P2P.NodeListTypes.Node[] { if (mode === 'processing') { + let nodesToAllowActive = config.p2p.allowActivePerCycle + + if(config.p2p.activeRecoveryEnabled){ + // check if we are below desired allow more nodes to join + if(CycleChain.newest != null){ + const active = CycleChain.newest.active + const desired = CycleChain.newest.desired + const deficit = desired - active + if(deficit > 0){ + // This code is rotation safe because if allowActivePerCycleRecover is set to 1 + // and allowActivePerCycle is set to 1 we will have the same boost + const boost = Math.min(config.p2p.allowActivePerCycleRecover, deficit) + // apply the boost + nodesToAllowActive = Math.max(nodesToAllowActive, boost) + } + } + } + if (config.debug.readyNodeDelay > 0) { nestedCountersInstance.countEvent('p2p', `selectNodesFromReadyList: only returning nodes from the ready list that were added at least ${config.debug.readyNodeDelay} seconds ago`) - return NodeList.readyByTimeAndIdOrder - .slice(0, config.p2p.allowActivePerCycle) - .filter((node) => CycleChain.newest.start >= node.readyTimestamp + config.debug.readyNodeDelay) + return NodeList.readyByTimeAndIdOrder.slice(0, config.p2p.allowActivePerCycle).filter((node) => CycleChain.newest.start >= node.readyTimestamp + config.debug.readyNodeDelay) } - return NodeList.readyByTimeAndIdOrder.slice(0, config.p2p.allowActivePerCycle) + return NodeList.readyByTimeAndIdOrder.slice(0, nodesToAllowActive) } else { if (mode === 'forming' && isFirst && NodeList.activeByIdOrder.length === 0) return NodeList.readyByTimeAndIdOrder diff --git a/src/p2p/ModeSystemFuncs.ts b/src/p2p/ModeSystemFuncs.ts index b54d87299..3384445e0 100644 --- a/src/p2p/ModeSystemFuncs.ts +++ b/src/p2p/ModeSystemFuncs.ts @@ -1,7 +1,7 @@ import * as NodeList from './NodeList' import * as Self from './Self' import { enterRecovery, enterSafety, enterProcessing, enterShutdown } from './Modes' -import { config } from './Context' +import { config, logger } from './Context' import { targetCount } from './CycleAutoScale' import { nestedCountersInstance } from '../utils/nestedCounters' import { P2P } from '@shardus/types' @@ -20,27 +20,54 @@ export function calculateToAcceptV2(prevRecord: P2P.CycleCreatorTypes.CycleRecor const active = NodeList.activeByIdOrder.length const syncing = NodeList.byJoinOrder.length - NodeList.activeByIdOrder.length // For now, we are using the desired value from the previous cycle. In the future, we should look at using the next desired value - const desired = prevRecord.desired + const desired = prevRecord?.desired const target = targetCount + const mode = prevRecord?.mode + const hasPrevRecord = prevRecord != null + const counter = prevRecord?.counter + const lost_count = prevRecord?.lost?.length + + /* prettier-ignore */ if (config.debug.verboseNestedCounters || (logFlags?.verboseNestedCounters)) nestedCountersInstance.countEvent( 'p2p', `desired: ${desired}, target: ${target}, active: ${active}, syncing: ${syncing}` ) + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATETOACCEPTV2_1', logger.combine(`calculateToAcceptV2 prevCounter: ${counter}, desired: ${desired}, target: ${target}, active: ${active}, syncing: ${syncing}`, 'calculateToAcceptV2_prevCounter')) + + if(hasPrevRecord === false){ + return { add: 0, remove: 0 } + } - /* prettier-ignore */ if (config.debug.verboseNestedCounters) nestedCountersInstance.countEvent( - 'p2p', - `desired: ${desired}, target: ${target}, active: ${active}, syncing: ${syncing}` - ) - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log(`prevCounter: ${prevRecord.counter}, desired: ${desired}, target: ${target}, active: ${active}, syncing: ${syncing}`) - - let add = 0 - let remove = 0 + return calculateAddRemove(mode, active, syncing, desired, target, counter, lost_count) +} - if (prevRecord) { - if (prevRecord.mode === 'forming') { +function calculateAddRemove( + mode: string, + active: number, + syncing: number, + desired: number, + target: number, + counter: number, + lost_count: number + ): ToAcceptResult { + let add = 0 + let remove = 0 + + // we can make desiredSyncingNodeCount dynamic later. it could be based on the average sync time and the desired rotaiton rate + const desiredSyncingNodeCount = config.p2p.syncingDesiredMinCount + const useNewSyncingDesiredCount = config.p2p.syncFloorEnabled + const syncingMaxAddPercent = config.p2p.syncingMaxAddPercent + const syncingCeilingBase = desiredSyncingNodeCount //config.p2p.syncingCeiling + + // going to re-evaluate this and make them adjustable + const syncingCeilingProcessing = syncingCeilingBase * 2 + const syncingCeilingSafety = syncingCeilingBase * 4 + const syncingCeilingRecovery = syncingCeilingBase * 4 + + if (mode === 'forming') { if (Self.isFirst && active < 1) { add = target remove = 0 return { add, remove } } else if (active != desired) { let addRem = target - (active + syncing) - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log(`under forming active != desired; addRem: ${addRem}`) + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_FORMING_1', logger.combine(`under forming active != desired; addRem: ${addRem}`, 'forming_active_not_desired')) if (addRem > 0) { add = Math.ceil(addRem) remove = 0 @@ -61,24 +88,26 @@ export function calculateToAcceptV2(prevRecord: P2P.CycleCreatorTypes.CycleRecor } } } - } else if (prevRecord.mode === 'restart') { + } else if (mode === 'restart') { if (syncing < desired + config.p2p.extraNodesToAddInRestart) { const addRem = target + config.p2p.extraNodesToAddInRestart - syncing - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log(`under restart active != desired; addRem: ${addRem}`) + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_RESTART_1', logger.combine(`under restart active != desired; addRem: ${addRem}`, 'restart_active_not_desired')) if (addRem > 0) { add = Math.ceil(addRem) remove = 0 return { add, remove } } } - } else if (prevRecord.mode === 'processing') { + } else if (mode === 'processing') { if (enterSafety(active) === false && enterRecovery(active) === false) { - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("max rotated per cycle: ", config.p2p.maxRotatedPerCycle) + + + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_1', logger.combine("max rotated per cycle: ", config.p2p.maxRotatedPerCycle, 'max_rotated_per_cycle')) if (active !== ~~target) { // calculate nodes to add or remove - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("active not equal target") + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_2', logger.combine("active not equal target", 'active_not_equal_target')) let addRem = target - (active + syncing) - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("addRem ", addRem) + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_3', logger.combine("addRem ", addRem, 'addRem')) if (addRem > 0) { if (addRem > active * config.p2p.rotationMaxAddPercent) { // limit nodes added to 10% of active; we are here because many were lost @@ -91,17 +120,29 @@ export function calculateToAcceptV2(prevRecord: P2P.CycleCreatorTypes.CycleRecor add = Math.ceil(addRem) remove = 0 - /* prettier-ignore */ if (config.debug.verboseNestedCounters) nestedCountersInstance.countEvent('p2p', `calculateToAcceptV2 c:${prevRecord.counter} active !== ~~target, addRem > 0 add: ${add}, remove: ${remove}`) + //new logic here , but it is not a total replacment, it will only boost the add value + if (useNewSyncingDesiredCount){ + //if (add + syncing < desiredSyncingNodeCount){ //??? + // NEED to decide if "add" count should be considered with the syncing count! + + add = maintainSyncingFloor(desiredSyncingNodeCount, syncing, add) + add = clampMaxNodesToAdd(add, active, syncingMaxAddPercent) + add = maintainSyncingCeiling(syncingCeilingProcessing, syncing, add) + + } + const logMsg = 'active !== ~~target addRem > 0' + /* prettier-ignore */ if(logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_4', logger.combine(`calculateAddRemove: cycle:${counter} `, logMsg, `add: ${add} remove:${remove} active:${active} syncing:${syncing}`, 'calculateAddRemove_active_not_equal_target_addRem_greater_than_0')) + /* prettier-ignore */ if (config.debug.verboseNestedCounters) nestedCountersInstance.countEvent('p2p', `calculateAddRemove c:${counter} active !== ~~target, addRem > 0 add: ${add}, remove: ${remove}`) return { add, remove } } if (addRem < 0) { //Note that we got here earlier because syncing nodes were "counting against us" //now we will look at addRem where syncing nodes are not considered let toRemove = active - target // only remove the active nodes more than target - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log(`addRem in processing: ${toRemove}`) + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_5', logger.combine(`addRem in processing: ${toRemove}`, 'addRem_in_processing')) if (toRemove > active * config.p2p.rotationMaxRemovePercent) { // limit nodes removed to 5% of active; this should not happen - console.log('unexpected addRem > 5% of active', toRemove, active, target, desired) + /* prettier-ignore */ if(logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_6', logger.combine('unexpected addRem > 5% of active', toRemove, active, target, desired, 'unexpected_addRem_greater_than_5_percent_of_active')) //~~ truncate the value of rnum i.e. fast Math.floor() toRemove = ~~(active * config.p2p.rotationMaxRemovePercent) if (toRemove === 0) { @@ -119,10 +160,50 @@ export function calculateToAcceptV2(prevRecord: P2P.CycleCreatorTypes.CycleRecor } add = 0 remove = Math.ceil(toRemove) - /* prettier-ignore */ if (config.debug.verboseNestedCounters) nestedCountersInstance.countEvent('p2p', `calculateToAcceptV2 c:${prevRecord.counter} active !== ~~target, addRem < 0 (remove) add: ${add}, remove: ${remove}`) + + //new logic here , but it is not a total replacment, it will only boost the add value + if (useNewSyncingDesiredCount){ + // even though we may have too many nodes we should still keep nodes in the syncing pipleline + add = maintainSyncingFloor(desiredSyncingNodeCount, syncing, add) + add = clampMaxNodesToAdd(add, active, syncingMaxAddPercent) + add = maintainSyncingCeiling(syncingCeilingProcessing, syncing, add) + } + + const logMsg = 'active !== ~~target addRem < 0 tooremove > 0' + /* prettier-ignore */ if(logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_7', logger.combine(`calculateAddRemove: cycle:${counter} `, logMsg, `add: ${add} remove:${remove} active:${active} syncing:${syncing}`, 'calculateAddRemove_active_not_equal_target_addRem_less_than_0_tooremove_greater_than_0')) + + + /* prettier-ignore */ if (config.debug.verboseNestedCounters) nestedCountersInstance.countEvent('p2p', `calculateAddRemove c:${counter} active !== ~~target, addRem < 0 (remove) add: ${add}, remove: ${remove}`) return { add, remove } } else { - //this is a case where syncing nodes are counting against us and we need to take a careful look to allow + //this is a case where syncing nodes are counting against us + // for example we still have less active nodes than we target, but + // if you count the syncing nodes we have more. + // + + //New logic here. To be rotation safe and correct it is better to run fresh logic + if (useNewSyncingDesiredCount){ + add = 0 + add = maintainSyncingFloor(desiredSyncingNodeCount, syncing, add) + add = clampMaxNodesToAdd(add, active, syncingMaxAddPercent) + add = maintainSyncingCeiling(syncingCeilingProcessing, syncing, add) + //counter to other cases where we just add an go on, the best option here + // is do avoid all the proceeding logic if we are using the new + // syncingDesiredCount value + + const logMsg = 'active !== ~~target addRem too remove <= 0' + /* prettier-ignore */ if(logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_8', logger.combine(`calculateAddRemove: cycle:${counter} `, logMsg, `add: ${add} remove:${remove} active:${active} syncing:${syncing}`, 'calculateAddRemove_active_not_equal_target_addRem_too_remove_less_than_or_equal_to_0')) + + return { add, remove } + } + + // Logic below here was the old stopgap solution that is no longer + // scaling well with larger, longer lived networks! + // We have to keep it to be rotation safe! + // syncingDesiredCount will not be set to a value greater than 0 + // until the activation migration of 1.15.3 + + // we need to take a careful look to allow //some nodes to sync and go active (can look at median time ) // for now we will use an approximation that we want to rotate one per cycle @@ -145,14 +226,30 @@ export function calculateToAcceptV2(prevRecord: P2P.CycleCreatorTypes.CycleRecor add = maxSyncing - syncing - /* prettier-ignore */ if (config.debug.verboseNestedCounters) nestedCountersInstance.countEvent('p2p', `calculateToAcceptV2 c:${prevRecord.counter} active !== ~~target, addRem < 0 (not-remove) add: ${add}, remove: ${remove}`) + /* prettier-ignore */ if (config.debug.verboseNestedCounters) nestedCountersInstance.countEvent('p2p', `calculateAddRemove c:${counter} active !== ~~target, addRem < 0 (not-remove) add: ${add}, remove: ${remove}`) return { add, remove } } } } } else if (config.p2p.maxRotatedPerCycle !== 0) { + //This is the case where active === target and we allow nodes to be rotated + + + //New logic here. To be rotation safe and correct it is better to run fresh logic + if (useNewSyncingDesiredCount){ + add = 0 + add = maintainSyncingFloor(desiredSyncingNodeCount, syncing, add) + add = clampMaxNodesToAdd(add, active, syncingMaxAddPercent) + add = maintainSyncingCeiling(syncingCeilingProcessing, syncing, add) + + const logMsg = 'active == ~~target 0' + /* prettier-ignore */ if(logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_9', logger.combine(`calculateAddRemove: cycle:${counter} `, logMsg, `add: ${add} remove:${remove} active:${active} syncing:${syncing}`, 'calculateAddRemove_active_equal_target_0')) + + return { add, remove } + } + //This essentially active === target and we have a non zero maxRotatedPerCycle - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("entered rotation") + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_10', logger.combine("entered rotation", 'entered_rotation')) let rnum = config.p2p.maxRotatedPerCycle // num to rotate per cycle; can be less than 1; like 0.5 for every other cycle; -1 for auto if (rnum < 0) { // rotate all nodes in 1000 cycles @@ -161,7 +258,7 @@ export function calculateToAcceptV2(prevRecord: P2P.CycleCreatorTypes.CycleRecor if (rnum < 1) { //This is supposed to be true rnum % of the time, that does not work //the math is wrong. fortunately we can avoid this if maxRotatedPerCycle >= 1 - if (prevRecord.counter % (1 / rnum) === 0) { + if (counter % (1 / rnum) === 0) { // rotate every few cycles if less than 1000 nodes rnum = 1 } else { @@ -181,19 +278,40 @@ export function calculateToAcceptV2(prevRecord: P2P.CycleCreatorTypes.CycleRecor rnum = config.p2p.rotationCountMultiply * rnum rnum = config.p2p.rotationCountAdd + rnum - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("rnum: ", rnum) - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("setting add to rnum") + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_11', logger.combine("rnum: ", rnum, 'rnum')) + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_12', logger.combine("setting add to rnum", 'setting_add_to_rnum')) add = Math.ceil(rnum) remove = 0 } - /* prettier-ignore */ if (config.debug.verboseNestedCounters) nestedCountersInstance.countEvent('p2p', `calculateToAcceptV2 c:${prevRecord.counter} config.p2p.maxRotatedPerCycle !== 0 add: ${add}, remove: ${remove}`) - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log(`add: ${add}, remove: ${remove}`) + /* prettier-ignore */ if (config.debug.verboseNestedCounters) nestedCountersInstance.countEvent('p2p', `calculateAddRemove c:${counter} config.p2p.maxRotatedPerCycle !== 0 add: ${add}, remove: ${remove}`) + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_13', logger.combine(`add: ${add}, remove: ${remove}`, 'add_remove')) return { add, remove } } } - } else if (prevRecord.mode === 'safety') { + } else if (mode === 'safety') { if (enterProcessing(active) === false && enterRecovery(active) === false) { + + //New logic here. To be rotation safe and correct it is better to run fresh logic + if (useNewSyncingDesiredCount){ + + + add = config.p2p.minNodes - (active + syncing) + add = Math.max(add, 0) // don't add negative nodes + + // the closer safety mode was getting to the goal the less syncing nodes were being maintained. + // it is much better to maintain the correct steady state of syncing nodes + // rather than letting it approach 0 + add = maintainSyncingFloor(desiredSyncingNodeCount, syncing, add) + add = clampMaxNodesToAdd(add, active, syncingMaxAddPercent) + add = maintainSyncingCeiling(syncingCeilingSafety, syncing, add) + const logMsg = 'safety' + /* prettier-ignore */ if(logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_SAFETY_1', logger.combine(`calculateAddRemove: cycle:${counter} `, logMsg, `add: ${add} remove:${remove} active:${active} syncing:${syncing}`, 'calculateAddRemove_safety')) + + return { add, remove } + } + + // since in safety mode, will use minNodes as the threshold to enter back into processing mode let addRem = 1.02 * config.p2p.minNodes - (active + syncing) // we try to overshoot min value by 2%; for slow syncing nodes if (addRem > active * 0.05) { @@ -204,17 +322,40 @@ export function calculateToAcceptV2(prevRecord: P2P.CycleCreatorTypes.CycleRecor } // Is this needed for lost nodes? lost nodes didn't get removed in next cycle if they refuted // Or is the intention to use the removed nodes in the previous cycle? If so, we can also consider apoptosized nodes as well. - addRem += prevRecord.lost.length // compensate for nodes that were lost; though this could add more burden on existing nodes + addRem += lost_count // compensate for nodes that were lost; though this could add more burden on existing nodes + if (addRem > 0) { add = Math.ceil(addRem) remove = 0 return { add, remove } } } - } else if (prevRecord.mode === 'recovery') { + } else if (mode === 'recovery') { if (enterShutdown(active) === false) { + + //New logic here. To be rotation safe and correct it is better to run fresh logic + if (useNewSyncingDesiredCount){ + + + add = config.p2p.minNodes - (active + syncing) + add = Math.max(add, 0) // don't add negative nodes + + // the closer recover mode was getting to the goal the less syncing nodes were being maintained. + // it is much better to maintain the correct steady state of syncing nodes + // rather than letting it approach 0 + add = maintainSyncingFloor(desiredSyncingNodeCount, syncing, add) + add = clampMaxNodesToAdd(add, active, syncingMaxAddPercent) + add = maintainSyncingCeiling(syncingCeilingRecovery, syncing, add) + const logMsg = 'recovery' + /* prettier-ignore */ if(logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_RECOVERY_1', logger.combine(`calculateAddRemove: cycle:${counter} `, logMsg, `add: ${add} remove:${remove} active:${active} syncing:${syncing}`)) + + return { add, remove } + } + + const totalNodeCount = active + syncing let addRem = target - totalNodeCount + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_RECOVERY_1', `Recovery mode calculations addRem: ${addRem} active: ${active} syncing: ${syncing} target: ${target}`) if (addRem > totalNodeCount * 0.2) { addRem = ~~(totalNodeCount * 0.2) // Add 20% more nodes on each cycle if (addRem === 0) { @@ -227,18 +368,56 @@ export function calculateToAcceptV2(prevRecord: P2P.CycleCreatorTypes.CycleRecor return { add, remove } } } - } else if (prevRecord.mode === 'restore') { + } else if (mode === 'restore') { const addRem = target - (active + syncing) + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_RESTORE_1', `Restore mode calculations addRem: ${addRem} active: ${active} syncing: ${syncing} target: ${target}`) if (addRem > 0) { add = Math.ceil(addRem) return { add, remove } } } - } - /* prettier-ignore */ if (logFlags.verbose) console.log('add remove returned from default') + + /* prettier-ignore */ if (logFlags.verbose) logger.mainLog_debug('CALCULATEADDREMOVE_17', `add remove returned from default. add_remove_returned_from_default mode:${mode} add: ${add} remove:${remove} active:${active} syncing:${syncing}`) return { add, remove } } +function maintainSyncingFloor(desiredSyncingNodeCount: number, syncing: number, add: number) : number { + if (syncing < desiredSyncingNodeCount){ + const addtionalNodesToAdd = desiredSyncingNodeCount - (syncing - add) + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('MAINTAINSYNCINGFLOOR_ADD', `maintainSyncingFloor syncing: ${syncing} desiredSyncingNodeCount: ${desiredSyncingNodeCount} add: ${add} (before) addtionalNodesToAdd: ${addtionalNodesToAdd}`) + add += addtionalNodesToAdd + } + return add +} + +function maintainSyncingCeiling(syncCeiling: number, syncing: number, add: number) : number { + if (syncing > syncCeiling){ + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('MAINTAINSYNCINGCEILING_CLAMP', `maintainSyncingCeiling syncing: ${syncing} syncCeiling: ${syncCeiling} add: ${add} (will be set to 0)`) + add = 0 + } + return add +} + +/** + * clam the amount of node we will suggest adding in a cycle to a percentage of the active nodes + * + * @param add + * @param active + * @param syncingMaxAddPercent + * @returns + */ +function clampMaxNodesToAdd(add: number, active: number, syncingMaxAddPercent: number): number { + const maxAdd = active * syncingMaxAddPercent + if (add > maxAdd) { + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CLAMP_MAX_NODES_TO_ADD', `clampMaxNodesToAdd add: ${add} active: ${active} maxAdd: ${maxAdd}`) + add = ~~(maxAdd) + if (add === 0) { + add = 1 + } + } + return add +} + // need to think about and maybe ask Omar about using prev record for determining mode, could use next record /** Returns the number of expired nodes and the list of removed nodes using calculateToAcceptV2 */ @@ -374,12 +553,4 @@ export function getExpiredRemovedV2( return { expired, removed } } -/** Returns a linearly interpolated value between `amountToShrink` and the same - * multiplied by a `scaleFactor`. The result depends on the - * `scaleInfluenceForShrink` */ -function getScaledAmountToShrink(): number { - const nonScaledAmount = config.p2p.amountToShrink - const scaledAmount = config.p2p.amountToShrink * CycleCreator.scaleFactor - const scaleInfluence = config.p2p.scaleInfluenceForShrink - return Math.floor(lerp(nonScaledAmount, scaledAmount, scaleInfluence)) -} + diff --git a/src/p2p/Rotation.ts b/src/p2p/Rotation.ts index b0b230511..c117d7af9 100644 --- a/src/p2p/Rotation.ts +++ b/src/p2p/Rotation.ts @@ -141,7 +141,7 @@ export function getExpiredRemoved( let scaleDownRemove = Math.max(active - desired, 0) //only let the scale factor impart a partial influence based on scaleInfluenceForShrink - const scaledAmountToShrink = getScaledAmountToShrink() + const scaledAmountToShrink = getScaledAmountToShrink() //ITN3 example = 36 //limit the scale down by scaledAmountToShrink if (scaleDownRemove > scaledAmountToShrink) { @@ -151,25 +151,13 @@ export function getExpiredRemoved( //maxActiveNodesToRemove is a percent of the active nodes that is set as a 0-1 value in maxShrinkMultiplier //this is to prevent the network from shrinking too fast //make sure the value is at least 1 + //ITN3 example: maxShrinkMultiplier: 0.02, active: 640 = floor(12.8) = 12 const maxActiveNodesToRemove = Math.max(Math.floor(config.p2p.maxShrinkMultiplier * active), 1) const cycle = CycleChain.newest.counter if (cycle > lastLoggedCycle && scaleDownRemove > 0) { lastLoggedCycle = cycle - info( - 'scale down dump:' + - Utils.safeStringify({ - cycle, - scaleFactor: CycleCreator.scaleFactor, - scaleDownRemove, - maxActiveNodesToRemove, - desired, - active, - scaledAmountToShrink, - maxRemove, - expired, - }) - ) + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('GETEXPIREDREMOVED_DUMPNODES', 'scale down dump:' + Utils.safeStringify({ cycle, scaleFactor: CycleCreator.scaleFactor, scaleDownRemove, maxActiveNodesToRemove, desired, active, scaledAmountToShrink, maxRemove, expired, }) ) } // Allows the network to scale down even if node rotation is turned off @@ -186,6 +174,7 @@ export function getExpiredRemoved( // final clamp of max remove, but only if it is more than amountToShrink // to avoid messing up the calculation above this next part can only make maxRemove smaller. // maxActiveNodesToRemove is a percent of the active nodes that is set as a 0-1 value in maxShrinkMultiplier + // ITN3 example amountToShrink = 5. maxActiveNodesToRemove = 12 if (maxRemove > config.p2p.amountToShrink && maxRemove > maxActiveNodesToRemove) { // yes, this max could be baked in earlier, but I like it here for clarity maxRemove = Math.max(config.p2p.amountToShrink, maxActiveNodesToRemove) @@ -248,7 +237,19 @@ function error(...msg: string[]): void { /** Returns a linearly interpolated value between `amountToShrink` and the same * multiplied by a `scaleFactor`. The result depends on the -* `scaleInfluenceForShrink` */ +* `scaleInfluenceForShrink` +* +* ITN3 example numbers (128 / 5) * (640 / 100) = 25.6 * 6.4 = 163.84 +* config.p2p.amountToShrink 5 +* config.p2p.scaleInfluenceForShrink 0.2, +* +* Math.floor(lerp(163.84, 5, 0.2)) = 36! +* +* this is use as a max though onlty to clamp but not raise our amount to shrink +* this is for the scaled down remove case +* +* +*/ function getScaledAmountToShrink(): number { const nonScaledAmount = config.p2p.amountToShrink const scaledAmount = config.p2p.amountToShrink * CycleCreator.scaleFactor diff --git a/src/shardus/shardus-types.ts b/src/shardus/shardus-types.ts index 743cfb7b0..693e6b671 100644 --- a/src/shardus/shardus-types.ts +++ b/src/shardus/shardus-types.ts @@ -899,8 +899,19 @@ export interface ServerConfiguration { rotationMaxAddPercent: number /** not an actual percent but 0-1 value or multiplication */ rotationMaxRemovePercent: number - /** The max number of nodes added to `activated` list in cycleRecord each cycle */ + /** enable sync floor */ + syncFloorEnabled: boolean + /** additional support for more syncing nodes. not an actual percent but 0-1 value or multiplication */ + syncingMaxAddPercent: number + /** how many node should be syncing at any given time */ + syncingDesiredMinCount: number + /** The max number of nodes added to `activated` list in cycleRecord each cycle while processing */ allowActivePerCycle: number + /** The max number of nodes added to `activated` list in cycleRecord each cycle */ + allowActivePerCycleRecover: number + /** enable active node rotation recovery */ + activeRecoveryEnabled: boolean + /** should a checking node use a random proxy to run the down test */ useProxyForDownCheck: boolean /** The number of checker nodes to ask to investigate whether a node that is potentially lost */ numCheckerNodes: number diff --git a/src/state-manager/AccountSync.ts b/src/state-manager/AccountSync.ts index 0fe6ae945..a1a1e4ef5 100644 --- a/src/state-manager/AccountSync.ts +++ b/src/state-manager/AccountSync.ts @@ -319,6 +319,28 @@ class AccountSync { // this.profiler.scopedProfileSectionEnd('get_account_data3', responseSize) // } // ) + + Context.network.registerExternalGet('sync-globals', isDebugModeMiddleware, async (req, res) => { + try { + + const cycle = this.stateManager.currentCycleShardData.cycleNumber + const syncFromArchiver = false + + // need to review this , consider sync from archiver. + // consider "express version" that syncs to a specific hash + // todo actual endpoint with options + const syncTracker = this.createSyncTrackerByForGlobals(cycle, false, syncFromArchiver) + //this.globalAccountsSynced = false + + await syncTracker.syncStateDataGlobals() + this.syncTrackers.pop() + } catch(e) { + this.mainLogger.error(`sync-globals: Exception executing request: ${errorToStringFull(e)}`) + res.write('error') + } + res.write('ok') + res.end() + }) const getAccDataBinaryHandler: Route> = { name: InternalRouteEnum.binary_get_account_data, @@ -742,7 +764,7 @@ class AccountSync { if (keptGlobal === false && this.globalAccountsSynced === false && useGlobalAccounts === true) { this.createSyncTrackerByForGlobals(cycle, true) addedGlobal = true - } + } //init new non global trackers rangesToSync = this.initRangesToSync(nodeShardData, homePartition, 4, 4) @@ -751,6 +773,11 @@ class AccountSync { this.createSyncTrackerByRange(range, cycle, true) newTrackers++ } + + // sync globals again after all the non global data. + // this is needed in case the global account changed in that time + this.createSyncTrackerByForGlobals(cycle, true) + /* prettier-ignore */ nestedCountersInstance.countRareEvent('sync', `RETRYSYNC: lastCycle: ${lastCycle} cycle: ${cycle} ${Utils.safeStringify({keptGlobal, addedGlobal, cleared, kept, newTrackers })}`) /* prettier-ignore */ this.mainLogger.debug(`DATASYNC: RETRYSYNC lastCycle: lastCycle: ${lastCycle} cycle: ${cycle} ${Utils.safeStringify({keptGlobal, addedGlobal, cleared, kept, newTrackers })}`) continue //resume loop at top! @@ -1515,6 +1542,20 @@ class AccountSync { setGlobalSyncFinished(): void { this.globalAccountsSynced = true } + + + reSyncGlobals(): void { + + const cycle = this.stateManager.currentCycleShardData.cycleNumber + const syncFromArchiver = false + + // need to review this , consider sync from archiver. + // consider "express version" that syncs to a specific hash + // todo actual endpoint with options + this.createSyncTrackerByForGlobals(cycle, false, syncFromArchiver) + //this.globalAccountsSynced = false + } + } export default AccountSync