diff --git a/src/config/server.ts b/src/config/server.ts index 78a12dcd5..8cea42377 100644 --- a/src/config/server.ts +++ b/src/config/server.ts @@ -134,7 +134,12 @@ const SERVER_CONFIG: StrictServerConfiguration = { rotationPercentActive: 0.001, //rotate 0.1% of active nodes per cycle when in a steady processing state rotationMaxAddPercent: 0.1, rotationMaxRemovePercent: 0.05, + syncFloorEnabled: false, //DEBUG=true, ITN initially false for rotation safety + syncingMaxAddPercent: 0.2, + syncingDesiredMinCount: 50, //Debug=5, ITN = 50 allowActivePerCycle: 7, + allowActivePerCycleRecover: 4, + activeRecoveryEnabled: false, //Debug=true, ITN initially false for rotation safety useProxyForDownCheck: false, numCheckerNodes: 1, minChecksForDown: 1, diff --git a/src/logger/index.ts b/src/logger/index.ts index d9813ae8d..bfabe9280 100644 --- a/src/logger/index.ts +++ b/src/logger/index.ts @@ -28,6 +28,8 @@ interface Logger { _playbackLogger: any + _mainLogger: any + _seenAddresses: any _shortStrings: any _playbackOwner_host: any @@ -114,6 +116,10 @@ export type LogFlags = { txCancel: boolean // extra logging for TXs that get canceled getLocalOrRemote: boolean // special logging for getLocalOrRemote + + verboseNestedCounters: boolean // extra logging for nested counters + + node_rotation_debug: boolean // extra logging for node rotation math } export let logFlags: LogFlags = { @@ -153,6 +159,10 @@ export let logFlags: LogFlags = { txCancel: false, getLocalOrRemote: false, + + verboseNestedCounters: false, + + node_rotation_debug: false, } const filePath1 = path.join(process.cwd(), 'data-logs', 'cycleRecords1.txt') @@ -217,6 +227,7 @@ class Logger { this.getLogger('main').info('Logger initialized.') this._playbackLogger = this.getLogger('playback') + this._mainLogger = this.getLogger('main') this.setupLogControlValues() @@ -674,6 +685,31 @@ class Logger { console.log(`base logFlags: ` + Utils.safeStringify(logFlags)) } + + mainLog(level, key: string, message:string ): void { + //initially this will just go to a main log but we could but this in + //a json blob with the key and send it to a different logging service + this._mainLogger[level](key + ' ' + message) + } + + mainLog_debug(key: string, message:string ): void { + //note will change the key to be an array later and remove the DBG prefix + this.mainLog('debug', 'DBG_' + key, message) + } + + combine(...args: any[]): string { + return args + .map((arg) => { + if (typeof arg === 'object') { + return Utils.safeStringify(arg) + } else { + return String(arg) + } + }) + .join(' ') + } + + } export default Logger diff --git a/src/p2p/Active.ts b/src/p2p/Active.ts index 54d519708..3f7d6be3a 100644 --- a/src/p2p/Active.ts +++ b/src/p2p/Active.ts @@ -142,6 +142,7 @@ export function updateRecord( const activatedPublicKeys = [] if (NodeList.readyByTimeAndIdOrder.length > 0) { + // ITN3 example if processing this will pick allowActivePerCycle = 1 nodes const selectedNodes = selectNodesFromReadyList(_prev.mode) for (const node of selectedNodes) { /* prettier-ignore */ nestedCountersInstance.countEvent('p2p', `active:updateRecord node added to activated`) diff --git a/src/p2p/CycleAutoScale.ts b/src/p2p/CycleAutoScale.ts index 70b882852..0a5ddf766 100644 --- a/src/p2p/CycleAutoScale.ts +++ b/src/p2p/CycleAutoScale.ts @@ -79,7 +79,7 @@ export function init() { } export function reset() { - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log( 'Resetting auto-scale module', `Cycle ${CycleCreator.currentCycle}, Quarter: ${CycleCreator.currentQuarter}`) + /* prettier-ignore */ if (logFlags?.verbose) logger.mainLog_debug('RESET_1', logger.combine('Resetting auto-scale module', `Cycle ${CycleCreator.currentCycle}, Quarter: ${CycleCreator.currentQuarter}`)) scalingRequested = false scalingRequestsCollector = new Map() requestedScalingType = null @@ -147,7 +147,7 @@ export function requestNetworkUpsize() { return } - console.log('DBG', 'UPSIZE!') + /* prettier-ignore */ if (logFlags?.verbose) logger.mainLog_debug('REQUESTNETWORKUPSIZE_1', 'CycleAutoScale: UPSIZE!') _requestNetworkScaling(P2P.CycleAutoScaleTypes.ScaleType.UP) } @@ -161,7 +161,7 @@ export function requestNetworkDownsize() { return } - console.log('DBG', 'DOWNSIZE!') + /* prettier-ignore */ if (logFlags?.verbose) logger.mainLog_debug('REQUESTNETWORKDOWNSIZE_1', 'CycleAutoScale: DOWNSIZE!') _requestNetworkScaling(P2P.CycleAutoScaleTypes.ScaleType.DOWN) } @@ -296,13 +296,7 @@ function _checkScaling() { // If we haven't approved an scale type, check if we should scale down if (!changed) { - // if (approvedScalingType === P2P.CycleAutoScaleTypes.ScaleType.DOWN) { - // warn( - // 'Already set to scale down for this cycle. No need to scale down anymore.' - // ) - // return - // } - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("CycleAutoScale: scale up not approved") + /* prettier-ignore */ if (logFlags?.verbose) logger.mainLog_debug('CHECKSCALING_1', 'CycleAutoScale: scale up not approved') if (scaleDownRequests.length >= requiredVotes) { approvedScalingType = P2P.CycleAutoScaleTypes.ScaleType.DOWN changed = true @@ -341,7 +335,7 @@ function _checkScaling() { error(new Error(`Invalid scaling flag after changing flag. Flag: ${approvedScalingType}`)) return } - console.log('newDesired', newDesired) + /* prettier-ignore */ if (logFlags?.verbose) logger.mainLog_debug('CHECKSCALING_2', logger.combine('newDesired', newDesired)) } function setDesiredCount(count: number) { @@ -390,18 +384,18 @@ function setAndGetTargetCount(prevRecord: P2P.CycleCreatorTypes.CycleRecord): nu } } } else if (prevRecord.mode === 'processing') { - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("CycleAutoScale: in processing") + /* prettier-ignore */ if (logFlags?.verbose) logger.mainLog_debug('SETANDGETTARGETCOUNT_PROCESSING_1', "CycleAutoScale: in processing") if (enterSafety(active) === false && enterRecovery(active) === false) { - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("CycleAutoScale: not in safety") + /* prettier-ignore */ if (logFlags?.verbose) logger.mainLog_debug('SETANDGETTARGETCOUNT_PROCESSING_2', "CycleAutoScale: not in safety") let addRem = (desired - prevRecord.target) * 0.1 - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log(`addRem: ${addRem}, desired: ${desired}, prevTarget: ${prevRecord.target}`) + /* prettier-ignore */ if (logFlags?.verbose) logger.mainLog_debug('SETANDGETTARGETCOUNT_PROCESSING_3', `addRem: ${addRem}, desired: ${desired}, prevTarget: ${prevRecord.target}`) if (addRem > active * 0.01) { addRem = active * 0.01 } if (addRem < 0 - active * 0.005) { addRem = 0 - active * 0.005 } - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log(`CycleAutoScale: prev target is ${prevRecord.target} and addRem is ${addRem}`) + /* prettier-ignore */ if (logFlags?.verbose) logger.mainLog_debug('SETANDGETTARGETCOUNT_PROCESSING_4', `CycleAutoScale: prev target is ${prevRecord.target} and addRem is ${addRem}`) targetCount = prevRecord.target + addRem // may want to swap config values to values from cycle record if (targetCount < config.p2p.minNodes) { @@ -422,7 +416,13 @@ function setAndGetTargetCount(prevRecord: P2P.CycleCreatorTypes.CycleRecord): nu targetCount = config.p2p.minNodes + config.p2p.extraNodesToAddInRestart } else if (prevRecord.mode === 'restart') { // In restart mode, all the nodes remain in 'selected?' mode until the desired number of nodes are reached - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("CycleAutoScale: in restart") + + //Instruction: I have a bunch of logs like this but I want to upgrade them to call logger.mainLog_debug. + /* prettier-ignore */ //if (logFlags && logFlags.verbose) console.log("CycleAutoScale: in restart") + //Instruction: here is what I want the log to look like. Note the first argument is a unique key. The funciton name in all capps followed by some context and then an int that incrments so we dont have dupes will work + /* prettier-ignore */ if (logFlags?.verbose) logger.mainLog_debug('SETANDGETTARGETCOUNT_RESTART_1',"CycleAutoScale: in restart") + + if (syncing < desired + config.p2p.extraNodesToAddInRestart) { /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("CycleAutoScale: entered syncing < desired") let add = ~~(0.5 * syncing) // Add 50% more nodes on each cycle @@ -439,7 +439,7 @@ function setAndGetTargetCount(prevRecord: P2P.CycleCreatorTypes.CycleRecord): nu /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("CycleAutoScale: in Self.isFirst condition") targetCount = config.p2p.formingNodesPerCycle } - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("CycleAutoScale: target count is ", targetCount) + /* prettier-ignore */ if (logFlags?.verbose) logger.mainLog_debug('SETANDGETTARGETCOUNT_1', logger.combine('CycleAutoScale: target count is', targetCount)) return targetCount } diff --git a/src/p2p/CycleCreator.ts b/src/p2p/CycleCreator.ts index 815e4faf0..64911742c 100644 --- a/src/p2p/CycleCreator.ts +++ b/src/p2p/CycleCreator.ts @@ -294,6 +294,7 @@ function updateScaleFactor() { scaleFactorSyncBoost = 1 } + //ITN3 example numbers (128 / 5) * (640 / 100) = 25.6 * 6.4 = 163.84 scaleFactor = Math.max((consensusRange / consenusParSize) * (activeNodeCount / networkParSize), 1) } diff --git a/src/p2p/Join/v2/syncFinished.ts b/src/p2p/Join/v2/syncFinished.ts index 2d591b989..34585c24d 100644 --- a/src/p2p/Join/v2/syncFinished.ts +++ b/src/p2p/Join/v2/syncFinished.ts @@ -124,14 +124,30 @@ export function isNodeSelectedReadyList(nodeId: string): boolean { export function selectNodesFromReadyList(mode: string): P2P.NodeListTypes.Node[] { if (mode === 'processing') { + let nodesToAllowActive = config.p2p.allowActivePerCycle + + if(config.p2p.activeRecoveryEnabled){ + // check if we are below desired allow more nodes to join + if(CycleChain.newest != null){ + const active = CycleChain.newest.active + const desired = CycleChain.newest.desired + const deficit = desired - active + if(deficit > 0){ + // This code is rotation safe because if allowActivePerCycleRecover is set to 1 + // and allowActivePerCycle is set to 1 we will have the same boost + const boost = Math.min(config.p2p.allowActivePerCycleRecover, deficit) + // apply the boost + nodesToAllowActive = Math.max(nodesToAllowActive, boost) + } + } + } + if (config.debug.readyNodeDelay > 0) { nestedCountersInstance.countEvent('p2p', `selectNodesFromReadyList: only returning nodes from the ready list that were added at least ${config.debug.readyNodeDelay} seconds ago`) - return NodeList.readyByTimeAndIdOrder - .slice(0, config.p2p.allowActivePerCycle) - .filter((node) => CycleChain.newest.start >= node.readyTimestamp + config.debug.readyNodeDelay) + return NodeList.readyByTimeAndIdOrder.slice(0, config.p2p.allowActivePerCycle).filter((node) => CycleChain.newest.start >= node.readyTimestamp + config.debug.readyNodeDelay) } - return NodeList.readyByTimeAndIdOrder.slice(0, config.p2p.allowActivePerCycle) + return NodeList.readyByTimeAndIdOrder.slice(0, nodesToAllowActive) } else { if (mode === 'forming' && isFirst && NodeList.activeByIdOrder.length === 0) return NodeList.readyByTimeAndIdOrder diff --git a/src/p2p/ModeSystemFuncs.ts b/src/p2p/ModeSystemFuncs.ts index b54d87299..3384445e0 100644 --- a/src/p2p/ModeSystemFuncs.ts +++ b/src/p2p/ModeSystemFuncs.ts @@ -1,7 +1,7 @@ import * as NodeList from './NodeList' import * as Self from './Self' import { enterRecovery, enterSafety, enterProcessing, enterShutdown } from './Modes' -import { config } from './Context' +import { config, logger } from './Context' import { targetCount } from './CycleAutoScale' import { nestedCountersInstance } from '../utils/nestedCounters' import { P2P } from '@shardus/types' @@ -20,27 +20,54 @@ export function calculateToAcceptV2(prevRecord: P2P.CycleCreatorTypes.CycleRecor const active = NodeList.activeByIdOrder.length const syncing = NodeList.byJoinOrder.length - NodeList.activeByIdOrder.length // For now, we are using the desired value from the previous cycle. In the future, we should look at using the next desired value - const desired = prevRecord.desired + const desired = prevRecord?.desired const target = targetCount + const mode = prevRecord?.mode + const hasPrevRecord = prevRecord != null + const counter = prevRecord?.counter + const lost_count = prevRecord?.lost?.length + + /* prettier-ignore */ if (config.debug.verboseNestedCounters || (logFlags?.verboseNestedCounters)) nestedCountersInstance.countEvent( 'p2p', `desired: ${desired}, target: ${target}, active: ${active}, syncing: ${syncing}` ) + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATETOACCEPTV2_1', logger.combine(`calculateToAcceptV2 prevCounter: ${counter}, desired: ${desired}, target: ${target}, active: ${active}, syncing: ${syncing}`, 'calculateToAcceptV2_prevCounter')) + + if(hasPrevRecord === false){ + return { add: 0, remove: 0 } + } - /* prettier-ignore */ if (config.debug.verboseNestedCounters) nestedCountersInstance.countEvent( - 'p2p', - `desired: ${desired}, target: ${target}, active: ${active}, syncing: ${syncing}` - ) - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log(`prevCounter: ${prevRecord.counter}, desired: ${desired}, target: ${target}, active: ${active}, syncing: ${syncing}`) - - let add = 0 - let remove = 0 + return calculateAddRemove(mode, active, syncing, desired, target, counter, lost_count) +} - if (prevRecord) { - if (prevRecord.mode === 'forming') { +function calculateAddRemove( + mode: string, + active: number, + syncing: number, + desired: number, + target: number, + counter: number, + lost_count: number + ): ToAcceptResult { + let add = 0 + let remove = 0 + + // we can make desiredSyncingNodeCount dynamic later. it could be based on the average sync time and the desired rotaiton rate + const desiredSyncingNodeCount = config.p2p.syncingDesiredMinCount + const useNewSyncingDesiredCount = config.p2p.syncFloorEnabled + const syncingMaxAddPercent = config.p2p.syncingMaxAddPercent + const syncingCeilingBase = desiredSyncingNodeCount //config.p2p.syncingCeiling + + // going to re-evaluate this and make them adjustable + const syncingCeilingProcessing = syncingCeilingBase * 2 + const syncingCeilingSafety = syncingCeilingBase * 4 + const syncingCeilingRecovery = syncingCeilingBase * 4 + + if (mode === 'forming') { if (Self.isFirst && active < 1) { add = target remove = 0 return { add, remove } } else if (active != desired) { let addRem = target - (active + syncing) - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log(`under forming active != desired; addRem: ${addRem}`) + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_FORMING_1', logger.combine(`under forming active != desired; addRem: ${addRem}`, 'forming_active_not_desired')) if (addRem > 0) { add = Math.ceil(addRem) remove = 0 @@ -61,24 +88,26 @@ export function calculateToAcceptV2(prevRecord: P2P.CycleCreatorTypes.CycleRecor } } } - } else if (prevRecord.mode === 'restart') { + } else if (mode === 'restart') { if (syncing < desired + config.p2p.extraNodesToAddInRestart) { const addRem = target + config.p2p.extraNodesToAddInRestart - syncing - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log(`under restart active != desired; addRem: ${addRem}`) + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_RESTART_1', logger.combine(`under restart active != desired; addRem: ${addRem}`, 'restart_active_not_desired')) if (addRem > 0) { add = Math.ceil(addRem) remove = 0 return { add, remove } } } - } else if (prevRecord.mode === 'processing') { + } else if (mode === 'processing') { if (enterSafety(active) === false && enterRecovery(active) === false) { - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("max rotated per cycle: ", config.p2p.maxRotatedPerCycle) + + + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_1', logger.combine("max rotated per cycle: ", config.p2p.maxRotatedPerCycle, 'max_rotated_per_cycle')) if (active !== ~~target) { // calculate nodes to add or remove - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("active not equal target") + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_2', logger.combine("active not equal target", 'active_not_equal_target')) let addRem = target - (active + syncing) - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("addRem ", addRem) + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_3', logger.combine("addRem ", addRem, 'addRem')) if (addRem > 0) { if (addRem > active * config.p2p.rotationMaxAddPercent) { // limit nodes added to 10% of active; we are here because many were lost @@ -91,17 +120,29 @@ export function calculateToAcceptV2(prevRecord: P2P.CycleCreatorTypes.CycleRecor add = Math.ceil(addRem) remove = 0 - /* prettier-ignore */ if (config.debug.verboseNestedCounters) nestedCountersInstance.countEvent('p2p', `calculateToAcceptV2 c:${prevRecord.counter} active !== ~~target, addRem > 0 add: ${add}, remove: ${remove}`) + //new logic here , but it is not a total replacment, it will only boost the add value + if (useNewSyncingDesiredCount){ + //if (add + syncing < desiredSyncingNodeCount){ //??? + // NEED to decide if "add" count should be considered with the syncing count! + + add = maintainSyncingFloor(desiredSyncingNodeCount, syncing, add) + add = clampMaxNodesToAdd(add, active, syncingMaxAddPercent) + add = maintainSyncingCeiling(syncingCeilingProcessing, syncing, add) + + } + const logMsg = 'active !== ~~target addRem > 0' + /* prettier-ignore */ if(logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_4', logger.combine(`calculateAddRemove: cycle:${counter} `, logMsg, `add: ${add} remove:${remove} active:${active} syncing:${syncing}`, 'calculateAddRemove_active_not_equal_target_addRem_greater_than_0')) + /* prettier-ignore */ if (config.debug.verboseNestedCounters) nestedCountersInstance.countEvent('p2p', `calculateAddRemove c:${counter} active !== ~~target, addRem > 0 add: ${add}, remove: ${remove}`) return { add, remove } } if (addRem < 0) { //Note that we got here earlier because syncing nodes were "counting against us" //now we will look at addRem where syncing nodes are not considered let toRemove = active - target // only remove the active nodes more than target - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log(`addRem in processing: ${toRemove}`) + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_5', logger.combine(`addRem in processing: ${toRemove}`, 'addRem_in_processing')) if (toRemove > active * config.p2p.rotationMaxRemovePercent) { // limit nodes removed to 5% of active; this should not happen - console.log('unexpected addRem > 5% of active', toRemove, active, target, desired) + /* prettier-ignore */ if(logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_6', logger.combine('unexpected addRem > 5% of active', toRemove, active, target, desired, 'unexpected_addRem_greater_than_5_percent_of_active')) //~~ truncate the value of rnum i.e. fast Math.floor() toRemove = ~~(active * config.p2p.rotationMaxRemovePercent) if (toRemove === 0) { @@ -119,10 +160,50 @@ export function calculateToAcceptV2(prevRecord: P2P.CycleCreatorTypes.CycleRecor } add = 0 remove = Math.ceil(toRemove) - /* prettier-ignore */ if (config.debug.verboseNestedCounters) nestedCountersInstance.countEvent('p2p', `calculateToAcceptV2 c:${prevRecord.counter} active !== ~~target, addRem < 0 (remove) add: ${add}, remove: ${remove}`) + + //new logic here , but it is not a total replacment, it will only boost the add value + if (useNewSyncingDesiredCount){ + // even though we may have too many nodes we should still keep nodes in the syncing pipleline + add = maintainSyncingFloor(desiredSyncingNodeCount, syncing, add) + add = clampMaxNodesToAdd(add, active, syncingMaxAddPercent) + add = maintainSyncingCeiling(syncingCeilingProcessing, syncing, add) + } + + const logMsg = 'active !== ~~target addRem < 0 tooremove > 0' + /* prettier-ignore */ if(logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_7', logger.combine(`calculateAddRemove: cycle:${counter} `, logMsg, `add: ${add} remove:${remove} active:${active} syncing:${syncing}`, 'calculateAddRemove_active_not_equal_target_addRem_less_than_0_tooremove_greater_than_0')) + + + /* prettier-ignore */ if (config.debug.verboseNestedCounters) nestedCountersInstance.countEvent('p2p', `calculateAddRemove c:${counter} active !== ~~target, addRem < 0 (remove) add: ${add}, remove: ${remove}`) return { add, remove } } else { - //this is a case where syncing nodes are counting against us and we need to take a careful look to allow + //this is a case where syncing nodes are counting against us + // for example we still have less active nodes than we target, but + // if you count the syncing nodes we have more. + // + + //New logic here. To be rotation safe and correct it is better to run fresh logic + if (useNewSyncingDesiredCount){ + add = 0 + add = maintainSyncingFloor(desiredSyncingNodeCount, syncing, add) + add = clampMaxNodesToAdd(add, active, syncingMaxAddPercent) + add = maintainSyncingCeiling(syncingCeilingProcessing, syncing, add) + //counter to other cases where we just add an go on, the best option here + // is do avoid all the proceeding logic if we are using the new + // syncingDesiredCount value + + const logMsg = 'active !== ~~target addRem too remove <= 0' + /* prettier-ignore */ if(logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_8', logger.combine(`calculateAddRemove: cycle:${counter} `, logMsg, `add: ${add} remove:${remove} active:${active} syncing:${syncing}`, 'calculateAddRemove_active_not_equal_target_addRem_too_remove_less_than_or_equal_to_0')) + + return { add, remove } + } + + // Logic below here was the old stopgap solution that is no longer + // scaling well with larger, longer lived networks! + // We have to keep it to be rotation safe! + // syncingDesiredCount will not be set to a value greater than 0 + // until the activation migration of 1.15.3 + + // we need to take a careful look to allow //some nodes to sync and go active (can look at median time ) // for now we will use an approximation that we want to rotate one per cycle @@ -145,14 +226,30 @@ export function calculateToAcceptV2(prevRecord: P2P.CycleCreatorTypes.CycleRecor add = maxSyncing - syncing - /* prettier-ignore */ if (config.debug.verboseNestedCounters) nestedCountersInstance.countEvent('p2p', `calculateToAcceptV2 c:${prevRecord.counter} active !== ~~target, addRem < 0 (not-remove) add: ${add}, remove: ${remove}`) + /* prettier-ignore */ if (config.debug.verboseNestedCounters) nestedCountersInstance.countEvent('p2p', `calculateAddRemove c:${counter} active !== ~~target, addRem < 0 (not-remove) add: ${add}, remove: ${remove}`) return { add, remove } } } } } else if (config.p2p.maxRotatedPerCycle !== 0) { + //This is the case where active === target and we allow nodes to be rotated + + + //New logic here. To be rotation safe and correct it is better to run fresh logic + if (useNewSyncingDesiredCount){ + add = 0 + add = maintainSyncingFloor(desiredSyncingNodeCount, syncing, add) + add = clampMaxNodesToAdd(add, active, syncingMaxAddPercent) + add = maintainSyncingCeiling(syncingCeilingProcessing, syncing, add) + + const logMsg = 'active == ~~target 0' + /* prettier-ignore */ if(logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_9', logger.combine(`calculateAddRemove: cycle:${counter} `, logMsg, `add: ${add} remove:${remove} active:${active} syncing:${syncing}`, 'calculateAddRemove_active_equal_target_0')) + + return { add, remove } + } + //This essentially active === target and we have a non zero maxRotatedPerCycle - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("entered rotation") + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_10', logger.combine("entered rotation", 'entered_rotation')) let rnum = config.p2p.maxRotatedPerCycle // num to rotate per cycle; can be less than 1; like 0.5 for every other cycle; -1 for auto if (rnum < 0) { // rotate all nodes in 1000 cycles @@ -161,7 +258,7 @@ export function calculateToAcceptV2(prevRecord: P2P.CycleCreatorTypes.CycleRecor if (rnum < 1) { //This is supposed to be true rnum % of the time, that does not work //the math is wrong. fortunately we can avoid this if maxRotatedPerCycle >= 1 - if (prevRecord.counter % (1 / rnum) === 0) { + if (counter % (1 / rnum) === 0) { // rotate every few cycles if less than 1000 nodes rnum = 1 } else { @@ -181,19 +278,40 @@ export function calculateToAcceptV2(prevRecord: P2P.CycleCreatorTypes.CycleRecor rnum = config.p2p.rotationCountMultiply * rnum rnum = config.p2p.rotationCountAdd + rnum - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("rnum: ", rnum) - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log("setting add to rnum") + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_11', logger.combine("rnum: ", rnum, 'rnum')) + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_12', logger.combine("setting add to rnum", 'setting_add_to_rnum')) add = Math.ceil(rnum) remove = 0 } - /* prettier-ignore */ if (config.debug.verboseNestedCounters) nestedCountersInstance.countEvent('p2p', `calculateToAcceptV2 c:${prevRecord.counter} config.p2p.maxRotatedPerCycle !== 0 add: ${add}, remove: ${remove}`) - /* prettier-ignore */ if (logFlags && logFlags.verbose) console.log(`add: ${add}, remove: ${remove}`) + /* prettier-ignore */ if (config.debug.verboseNestedCounters) nestedCountersInstance.countEvent('p2p', `calculateAddRemove c:${counter} config.p2p.maxRotatedPerCycle !== 0 add: ${add}, remove: ${remove}`) + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_PROCESSING_13', logger.combine(`add: ${add}, remove: ${remove}`, 'add_remove')) return { add, remove } } } - } else if (prevRecord.mode === 'safety') { + } else if (mode === 'safety') { if (enterProcessing(active) === false && enterRecovery(active) === false) { + + //New logic here. To be rotation safe and correct it is better to run fresh logic + if (useNewSyncingDesiredCount){ + + + add = config.p2p.minNodes - (active + syncing) + add = Math.max(add, 0) // don't add negative nodes + + // the closer safety mode was getting to the goal the less syncing nodes were being maintained. + // it is much better to maintain the correct steady state of syncing nodes + // rather than letting it approach 0 + add = maintainSyncingFloor(desiredSyncingNodeCount, syncing, add) + add = clampMaxNodesToAdd(add, active, syncingMaxAddPercent) + add = maintainSyncingCeiling(syncingCeilingSafety, syncing, add) + const logMsg = 'safety' + /* prettier-ignore */ if(logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_SAFETY_1', logger.combine(`calculateAddRemove: cycle:${counter} `, logMsg, `add: ${add} remove:${remove} active:${active} syncing:${syncing}`, 'calculateAddRemove_safety')) + + return { add, remove } + } + + // since in safety mode, will use minNodes as the threshold to enter back into processing mode let addRem = 1.02 * config.p2p.minNodes - (active + syncing) // we try to overshoot min value by 2%; for slow syncing nodes if (addRem > active * 0.05) { @@ -204,17 +322,40 @@ export function calculateToAcceptV2(prevRecord: P2P.CycleCreatorTypes.CycleRecor } // Is this needed for lost nodes? lost nodes didn't get removed in next cycle if they refuted // Or is the intention to use the removed nodes in the previous cycle? If so, we can also consider apoptosized nodes as well. - addRem += prevRecord.lost.length // compensate for nodes that were lost; though this could add more burden on existing nodes + addRem += lost_count // compensate for nodes that were lost; though this could add more burden on existing nodes + if (addRem > 0) { add = Math.ceil(addRem) remove = 0 return { add, remove } } } - } else if (prevRecord.mode === 'recovery') { + } else if (mode === 'recovery') { if (enterShutdown(active) === false) { + + //New logic here. To be rotation safe and correct it is better to run fresh logic + if (useNewSyncingDesiredCount){ + + + add = config.p2p.minNodes - (active + syncing) + add = Math.max(add, 0) // don't add negative nodes + + // the closer recover mode was getting to the goal the less syncing nodes were being maintained. + // it is much better to maintain the correct steady state of syncing nodes + // rather than letting it approach 0 + add = maintainSyncingFloor(desiredSyncingNodeCount, syncing, add) + add = clampMaxNodesToAdd(add, active, syncingMaxAddPercent) + add = maintainSyncingCeiling(syncingCeilingRecovery, syncing, add) + const logMsg = 'recovery' + /* prettier-ignore */ if(logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_RECOVERY_1', logger.combine(`calculateAddRemove: cycle:${counter} `, logMsg, `add: ${add} remove:${remove} active:${active} syncing:${syncing}`)) + + return { add, remove } + } + + const totalNodeCount = active + syncing let addRem = target - totalNodeCount + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_RECOVERY_1', `Recovery mode calculations addRem: ${addRem} active: ${active} syncing: ${syncing} target: ${target}`) if (addRem > totalNodeCount * 0.2) { addRem = ~~(totalNodeCount * 0.2) // Add 20% more nodes on each cycle if (addRem === 0) { @@ -227,18 +368,56 @@ export function calculateToAcceptV2(prevRecord: P2P.CycleCreatorTypes.CycleRecor return { add, remove } } } - } else if (prevRecord.mode === 'restore') { + } else if (mode === 'restore') { const addRem = target - (active + syncing) + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CALCULATEADDREMOVE_RESTORE_1', `Restore mode calculations addRem: ${addRem} active: ${active} syncing: ${syncing} target: ${target}`) if (addRem > 0) { add = Math.ceil(addRem) return { add, remove } } } - } - /* prettier-ignore */ if (logFlags.verbose) console.log('add remove returned from default') + + /* prettier-ignore */ if (logFlags.verbose) logger.mainLog_debug('CALCULATEADDREMOVE_17', `add remove returned from default. add_remove_returned_from_default mode:${mode} add: ${add} remove:${remove} active:${active} syncing:${syncing}`) return { add, remove } } +function maintainSyncingFloor(desiredSyncingNodeCount: number, syncing: number, add: number) : number { + if (syncing < desiredSyncingNodeCount){ + const addtionalNodesToAdd = desiredSyncingNodeCount - (syncing - add) + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('MAINTAINSYNCINGFLOOR_ADD', `maintainSyncingFloor syncing: ${syncing} desiredSyncingNodeCount: ${desiredSyncingNodeCount} add: ${add} (before) addtionalNodesToAdd: ${addtionalNodesToAdd}`) + add += addtionalNodesToAdd + } + return add +} + +function maintainSyncingCeiling(syncCeiling: number, syncing: number, add: number) : number { + if (syncing > syncCeiling){ + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('MAINTAINSYNCINGCEILING_CLAMP', `maintainSyncingCeiling syncing: ${syncing} syncCeiling: ${syncCeiling} add: ${add} (will be set to 0)`) + add = 0 + } + return add +} + +/** + * clam the amount of node we will suggest adding in a cycle to a percentage of the active nodes + * + * @param add + * @param active + * @param syncingMaxAddPercent + * @returns + */ +function clampMaxNodesToAdd(add: number, active: number, syncingMaxAddPercent: number): number { + const maxAdd = active * syncingMaxAddPercent + if (add > maxAdd) { + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('CLAMP_MAX_NODES_TO_ADD', `clampMaxNodesToAdd add: ${add} active: ${active} maxAdd: ${maxAdd}`) + add = ~~(maxAdd) + if (add === 0) { + add = 1 + } + } + return add +} + // need to think about and maybe ask Omar about using prev record for determining mode, could use next record /** Returns the number of expired nodes and the list of removed nodes using calculateToAcceptV2 */ @@ -374,12 +553,4 @@ export function getExpiredRemovedV2( return { expired, removed } } -/** Returns a linearly interpolated value between `amountToShrink` and the same - * multiplied by a `scaleFactor`. The result depends on the - * `scaleInfluenceForShrink` */ -function getScaledAmountToShrink(): number { - const nonScaledAmount = config.p2p.amountToShrink - const scaledAmount = config.p2p.amountToShrink * CycleCreator.scaleFactor - const scaleInfluence = config.p2p.scaleInfluenceForShrink - return Math.floor(lerp(nonScaledAmount, scaledAmount, scaleInfluence)) -} + diff --git a/src/p2p/Rotation.ts b/src/p2p/Rotation.ts index b0b230511..c117d7af9 100644 --- a/src/p2p/Rotation.ts +++ b/src/p2p/Rotation.ts @@ -141,7 +141,7 @@ export function getExpiredRemoved( let scaleDownRemove = Math.max(active - desired, 0) //only let the scale factor impart a partial influence based on scaleInfluenceForShrink - const scaledAmountToShrink = getScaledAmountToShrink() + const scaledAmountToShrink = getScaledAmountToShrink() //ITN3 example = 36 //limit the scale down by scaledAmountToShrink if (scaleDownRemove > scaledAmountToShrink) { @@ -151,25 +151,13 @@ export function getExpiredRemoved( //maxActiveNodesToRemove is a percent of the active nodes that is set as a 0-1 value in maxShrinkMultiplier //this is to prevent the network from shrinking too fast //make sure the value is at least 1 + //ITN3 example: maxShrinkMultiplier: 0.02, active: 640 = floor(12.8) = 12 const maxActiveNodesToRemove = Math.max(Math.floor(config.p2p.maxShrinkMultiplier * active), 1) const cycle = CycleChain.newest.counter if (cycle > lastLoggedCycle && scaleDownRemove > 0) { lastLoggedCycle = cycle - info( - 'scale down dump:' + - Utils.safeStringify({ - cycle, - scaleFactor: CycleCreator.scaleFactor, - scaleDownRemove, - maxActiveNodesToRemove, - desired, - active, - scaledAmountToShrink, - maxRemove, - expired, - }) - ) + /* prettier-ignore */ if (logFlags?.node_rotation_debug) logger.mainLog_debug('GETEXPIREDREMOVED_DUMPNODES', 'scale down dump:' + Utils.safeStringify({ cycle, scaleFactor: CycleCreator.scaleFactor, scaleDownRemove, maxActiveNodesToRemove, desired, active, scaledAmountToShrink, maxRemove, expired, }) ) } // Allows the network to scale down even if node rotation is turned off @@ -186,6 +174,7 @@ export function getExpiredRemoved( // final clamp of max remove, but only if it is more than amountToShrink // to avoid messing up the calculation above this next part can only make maxRemove smaller. // maxActiveNodesToRemove is a percent of the active nodes that is set as a 0-1 value in maxShrinkMultiplier + // ITN3 example amountToShrink = 5. maxActiveNodesToRemove = 12 if (maxRemove > config.p2p.amountToShrink && maxRemove > maxActiveNodesToRemove) { // yes, this max could be baked in earlier, but I like it here for clarity maxRemove = Math.max(config.p2p.amountToShrink, maxActiveNodesToRemove) @@ -248,7 +237,19 @@ function error(...msg: string[]): void { /** Returns a linearly interpolated value between `amountToShrink` and the same * multiplied by a `scaleFactor`. The result depends on the -* `scaleInfluenceForShrink` */ +* `scaleInfluenceForShrink` +* +* ITN3 example numbers (128 / 5) * (640 / 100) = 25.6 * 6.4 = 163.84 +* config.p2p.amountToShrink 5 +* config.p2p.scaleInfluenceForShrink 0.2, +* +* Math.floor(lerp(163.84, 5, 0.2)) = 36! +* +* this is use as a max though onlty to clamp but not raise our amount to shrink +* this is for the scaled down remove case +* +* +*/ function getScaledAmountToShrink(): number { const nonScaledAmount = config.p2p.amountToShrink const scaledAmount = config.p2p.amountToShrink * CycleCreator.scaleFactor diff --git a/src/shardus/shardus-types.ts b/src/shardus/shardus-types.ts index 743cfb7b0..693e6b671 100644 --- a/src/shardus/shardus-types.ts +++ b/src/shardus/shardus-types.ts @@ -899,8 +899,19 @@ export interface ServerConfiguration { rotationMaxAddPercent: number /** not an actual percent but 0-1 value or multiplication */ rotationMaxRemovePercent: number - /** The max number of nodes added to `activated` list in cycleRecord each cycle */ + /** enable sync floor */ + syncFloorEnabled: boolean + /** additional support for more syncing nodes. not an actual percent but 0-1 value or multiplication */ + syncingMaxAddPercent: number + /** how many node should be syncing at any given time */ + syncingDesiredMinCount: number + /** The max number of nodes added to `activated` list in cycleRecord each cycle while processing */ allowActivePerCycle: number + /** The max number of nodes added to `activated` list in cycleRecord each cycle */ + allowActivePerCycleRecover: number + /** enable active node rotation recovery */ + activeRecoveryEnabled: boolean + /** should a checking node use a random proxy to run the down test */ useProxyForDownCheck: boolean /** The number of checker nodes to ask to investigate whether a node that is potentially lost */ numCheckerNodes: number diff --git a/src/state-manager/AccountSync.ts b/src/state-manager/AccountSync.ts index 0fe6ae945..a1a1e4ef5 100644 --- a/src/state-manager/AccountSync.ts +++ b/src/state-manager/AccountSync.ts @@ -319,6 +319,28 @@ class AccountSync { // this.profiler.scopedProfileSectionEnd('get_account_data3', responseSize) // } // ) + + Context.network.registerExternalGet('sync-globals', isDebugModeMiddleware, async (req, res) => { + try { + + const cycle = this.stateManager.currentCycleShardData.cycleNumber + const syncFromArchiver = false + + // need to review this , consider sync from archiver. + // consider "express version" that syncs to a specific hash + // todo actual endpoint with options + const syncTracker = this.createSyncTrackerByForGlobals(cycle, false, syncFromArchiver) + //this.globalAccountsSynced = false + + await syncTracker.syncStateDataGlobals() + this.syncTrackers.pop() + } catch(e) { + this.mainLogger.error(`sync-globals: Exception executing request: ${errorToStringFull(e)}`) + res.write('error') + } + res.write('ok') + res.end() + }) const getAccDataBinaryHandler: Route> = { name: InternalRouteEnum.binary_get_account_data, @@ -742,7 +764,7 @@ class AccountSync { if (keptGlobal === false && this.globalAccountsSynced === false && useGlobalAccounts === true) { this.createSyncTrackerByForGlobals(cycle, true) addedGlobal = true - } + } //init new non global trackers rangesToSync = this.initRangesToSync(nodeShardData, homePartition, 4, 4) @@ -751,6 +773,11 @@ class AccountSync { this.createSyncTrackerByRange(range, cycle, true) newTrackers++ } + + // sync globals again after all the non global data. + // this is needed in case the global account changed in that time + this.createSyncTrackerByForGlobals(cycle, true) + /* prettier-ignore */ nestedCountersInstance.countRareEvent('sync', `RETRYSYNC: lastCycle: ${lastCycle} cycle: ${cycle} ${Utils.safeStringify({keptGlobal, addedGlobal, cleared, kept, newTrackers })}`) /* prettier-ignore */ this.mainLogger.debug(`DATASYNC: RETRYSYNC lastCycle: lastCycle: ${lastCycle} cycle: ${cycle} ${Utils.safeStringify({keptGlobal, addedGlobal, cleared, kept, newTrackers })}`) continue //resume loop at top! @@ -1515,6 +1542,20 @@ class AccountSync { setGlobalSyncFinished(): void { this.globalAccountsSynced = true } + + + reSyncGlobals(): void { + + const cycle = this.stateManager.currentCycleShardData.cycleNumber + const syncFromArchiver = false + + // need to review this , consider sync from archiver. + // consider "express version" that syncs to a specific hash + // todo actual endpoint with options + this.createSyncTrackerByForGlobals(cycle, false, syncFromArchiver) + //this.globalAccountsSynced = false + } + } export default AccountSync