diff --git a/bin/ethlambda/src/main.rs b/bin/ethlambda/src/main.rs index e7bc504f..b26481f2 100644 --- a/bin/ethlambda/src/main.rs +++ b/bin/ethlambda/src/main.rs @@ -131,6 +131,15 @@ struct CliOptions { /// Directory for RocksDB storage #[arg(long, default_value = "./data")] data_dir: PathBuf, + /// Disable the sync-gate's suppression of validator duties. + /// + /// By default a node that judges itself to be syncing (local head lagging + /// wall clock while the network still progresses) skips block proposal, + /// attestation production, and aggregate re-derivation. With this flag the + /// sync state is still tracked and exported via `lean_node_sync_status`, + /// but it no longer suppresses any duty: the gate becomes observe-only. + #[arg(long, default_value = "false")] + disable_duty_sync_gate: bool, } // Shadow single-steps execution in a discrete-event simulation, so the default @@ -284,6 +293,7 @@ async fn main() -> eyre::Result<()> { validator_keys, aggregator.clone(), attestation_committee_count, + !options.disable_duty_sync_gate, ); // Note: SwarmConfig.is_aggregator is intentionally a plain bool, not the diff --git a/crates/blockchain/src/lib.rs b/crates/blockchain/src/lib.rs index e2a32b1d..4360377d 100644 --- a/crates/blockchain/src/lib.rs +++ b/crates/blockchain/src/lib.rs @@ -81,6 +81,7 @@ impl BlockChain { validator_keys: HashMap, aggregator: AggregatorController, attestation_committee_count: u64, + gate_duties: bool, ) -> BlockChain { metrics::set_is_aggregator(aggregator.is_enabled()); metrics::set_node_sync_status(metrics::SyncStatus::Idle); @@ -106,7 +107,7 @@ impl BlockChain { last_tick_instant: None, attestation_committee_count, pre_merge_coverage: None, - sync_status: SyncStatusTracker::default(), + sync_status: SyncStatusTracker::new(gate_duties), } .start(); let time_until_genesis = (SystemTime::UNIX_EPOCH + Duration::from_secs(genesis_time)) @@ -172,7 +173,9 @@ pub struct BlockChainServer { /// Observability-only. pre_merge_coverage: Option, - /// Stateful sync heuristic used by `lean_node_sync_status`. + /// Stateful sync heuristic used by `lean_node_sync_status`. Also gates + /// validator duties while syncing, unless that gating was disabled at + /// startup via `--disable-duty-sync-gate` (then it is metric-only). sync_status: SyncStatusTracker, } diff --git a/crates/blockchain/src/sync_status.rs b/crates/blockchain/src/sync_status.rs index 48b20a97..02c71c9f 100644 --- a/crates/blockchain/src/sync_status.rs +++ b/crates/blockchain/src/sync_status.rs @@ -1,3 +1,5 @@ +use tracing::debug; + use crate::metrics::SyncStatus; /// Local head lag beyond which the node is considered to be syncing. @@ -12,12 +14,35 @@ const NETWORK_STALL_THRESHOLD: u64 = 8; /// Recovery band that prevents the sync status from flapping near the threshold. const SYNC_HYSTERESIS_BAND: u64 = 2; -#[derive(Default)] pub(crate) struct SyncStatusTracker { syncing: bool, + /// Whether the syncing state suppresses validator duties. + /// + /// When `false`, [`Self::update`] still tracks `syncing` and drives the + /// `lean_node_sync_status` metric, but [`Self::duties_allowed`] always + /// returns `true`: the gate is observe-only. Seeded from the CLI + /// `--disable-duty-sync-gate` flag (gating stays on by default). + gate_duties: bool, +} + +impl Default for SyncStatusTracker { + fn default() -> Self { + Self { + syncing: false, + gate_duties: true, + } + } } impl SyncStatusTracker { + /// Build a tracker, choosing whether the syncing state gates duties. + pub(crate) fn new(gate_duties: bool) -> Self { + Self { + gate_duties, + ..Self::default() + } + } + pub(crate) fn update( &mut self, current_slot: u64, @@ -26,6 +51,7 @@ impl SyncStatusTracker { ) -> SyncStatus { let head_lag = current_slot.saturating_sub(head_slot); let network_lag = current_slot.saturating_sub(max_seen_slot); + let was_syncing = self.syncing; if network_lag > NETWORK_STALL_THRESHOLD { self.syncing = false; @@ -35,6 +61,18 @@ impl SyncStatusTracker { self.syncing = head_lag > SYNC_LAG_THRESHOLD; } + if self.syncing != was_syncing { + debug!( + current_slot, + head_slot, + max_seen_slot, + head_lag, + network_lag, + syncing = self.syncing, + "Sync status changed" + ); + } + if self.syncing { SyncStatus::Syncing } else { @@ -43,7 +81,8 @@ impl SyncStatusTracker { } pub(crate) fn duties_allowed(&self) -> bool { - !self.syncing + // Gate disabled: the syncing state is observe-only, never suppresses duties. + !self.gate_duties || !self.syncing } }