From a9d8d268115ced061e2b7706142d3a4e63399474 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Gr=C3=BCner?= <47506558+MegaRedHand@users.noreply.github.com> Date: Mon, 22 Jun 2026 19:15:11 -0300 Subject: [PATCH 1/3] feat(blockchain): add --disable-duty-sync-gate to ungate duties The sync-gate suppresses block proposal, attestation production, and aggregate re-derivation whenever a node judges itself to be syncing (local head lagging wall clock while the network still progresses). In practice this feedback loop has driven finality stalls on the devnets: dead/slow proposers create empty slots, head lag crosses the threshold, nodes flap into Syncing and stop attesting, which widens the head-finalized gap further. Add a `--disable-duty-sync-gate` flag (default off, so gating stays on) that makes the gate observe-only: `SyncStatusTracker::update` still tracks the syncing state and drives `lean_node_sync_status`, but `duties_allowed()` always returns true. This lets us A/B the hypothesis on a devnet without a rebuild while keeping the metric for observability. --- bin/ethlambda/src/main.rs | 10 ++++++ crates/blockchain/src/lib.rs | 7 +++-- crates/blockchain/src/sync_status.rs | 46 ++++++++++++++++++++++++++-- 3 files changed, 59 insertions(+), 4 deletions(-) diff --git a/bin/ethlambda/src/main.rs b/bin/ethlambda/src/main.rs index e7bc504f..b26481f2 100644 --- a/bin/ethlambda/src/main.rs +++ b/bin/ethlambda/src/main.rs @@ -131,6 +131,15 @@ struct CliOptions { /// Directory for RocksDB storage #[arg(long, default_value = "./data")] data_dir: PathBuf, + /// Disable the sync-gate's suppression of validator duties. + /// + /// By default a node that judges itself to be syncing (local head lagging + /// wall clock while the network still progresses) skips block proposal, + /// attestation production, and aggregate re-derivation. With this flag the + /// sync state is still tracked and exported via `lean_node_sync_status`, + /// but it no longer suppresses any duty: the gate becomes observe-only. + #[arg(long, default_value = "false")] + disable_duty_sync_gate: bool, } // Shadow single-steps execution in a discrete-event simulation, so the default @@ -284,6 +293,7 @@ async fn main() -> eyre::Result<()> { validator_keys, aggregator.clone(), attestation_committee_count, + !options.disable_duty_sync_gate, ); // Note: SwarmConfig.is_aggregator is intentionally a plain bool, not the diff --git a/crates/blockchain/src/lib.rs b/crates/blockchain/src/lib.rs index e2a32b1d..4360377d 100644 --- a/crates/blockchain/src/lib.rs +++ b/crates/blockchain/src/lib.rs @@ -81,6 +81,7 @@ impl BlockChain { validator_keys: HashMap, aggregator: AggregatorController, attestation_committee_count: u64, + gate_duties: bool, ) -> BlockChain { metrics::set_is_aggregator(aggregator.is_enabled()); metrics::set_node_sync_status(metrics::SyncStatus::Idle); @@ -106,7 +107,7 @@ impl BlockChain { last_tick_instant: None, attestation_committee_count, pre_merge_coverage: None, - sync_status: SyncStatusTracker::default(), + sync_status: SyncStatusTracker::new(gate_duties), } .start(); let time_until_genesis = (SystemTime::UNIX_EPOCH + Duration::from_secs(genesis_time)) @@ -172,7 +173,9 @@ pub struct BlockChainServer { /// Observability-only. pre_merge_coverage: Option, - /// Stateful sync heuristic used by `lean_node_sync_status`. + /// Stateful sync heuristic used by `lean_node_sync_status`. Also gates + /// validator duties while syncing, unless that gating was disabled at + /// startup via `--disable-duty-sync-gate` (then it is metric-only). sync_status: SyncStatusTracker, } diff --git a/crates/blockchain/src/sync_status.rs b/crates/blockchain/src/sync_status.rs index 48b20a97..f23dee9c 100644 --- a/crates/blockchain/src/sync_status.rs +++ b/crates/blockchain/src/sync_status.rs @@ -12,12 +12,35 @@ const NETWORK_STALL_THRESHOLD: u64 = 8; /// Recovery band that prevents the sync status from flapping near the threshold. const SYNC_HYSTERESIS_BAND: u64 = 2; -#[derive(Default)] pub(crate) struct SyncStatusTracker { syncing: bool, + /// Whether the syncing state suppresses validator duties. + /// + /// When `false`, [`Self::update`] still tracks `syncing` and drives the + /// `lean_node_sync_status` metric, but [`Self::duties_allowed`] always + /// returns `true`: the gate is observe-only. Seeded from the CLI + /// `--disable-duty-sync-gate` flag (gating stays on by default). + gate_duties: bool, +} + +impl Default for SyncStatusTracker { + fn default() -> Self { + Self { + syncing: false, + gate_duties: true, + } + } } impl SyncStatusTracker { + /// Build a tracker, choosing whether the syncing state gates duties. + pub(crate) fn new(gate_duties: bool) -> Self { + Self { + gate_duties, + ..Self::default() + } + } + pub(crate) fn update( &mut self, current_slot: u64, @@ -43,7 +66,8 @@ impl SyncStatusTracker { } pub(crate) fn duties_allowed(&self) -> bool { - !self.syncing + // Gate disabled: the syncing state is observe-only, never suppresses duties. + !self.gate_duties || !self.syncing } } @@ -108,4 +132,22 @@ mod tests { assert_eq!(tracker.update(15, 20, 20), SyncStatus::Synced); } + + #[test] + fn gating_on_by_default_suppresses_duties_while_syncing() { + let mut tracker = SyncStatusTracker::default(); + + assert_eq!(tracker.update(20, 0, 20), SyncStatus::Syncing); + assert!(!tracker.duties_allowed()); + } + + #[test] + fn disabled_gate_still_tracks_status_but_allows_duties() { + let mut tracker = SyncStatusTracker::new(false); + + // Status still tracked for the metric... + assert_eq!(tracker.update(20, 0, 20), SyncStatus::Syncing); + // ...but duties are never suppressed. + assert!(tracker.duties_allowed()); + } } From 729f368457898d69b26d038819faf909d04a92fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Gr=C3=BCner?= <47506558+MegaRedHand@users.noreply.github.com> Date: Mon, 22 Jun 2026 19:25:54 -0300 Subject: [PATCH 2/3] feat(blockchain): log duties run while syncing with gate disabled When --disable-duty-sync-gate is set, the node may believe it is syncing yet run duties anyway. Surface that counterfactual: at each of the three gate sites (propose, attest, reaggregate) emit a warn when the gate would have suppressed the duty had it been enabled. Makes the override visible and greppable during devnet experiments. Also drops the two unit tests added in the previous commit. --- crates/blockchain/src/lib.rs | 15 +++++++++++++++ crates/blockchain/src/sync_status.rs | 25 +++++++------------------ 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/crates/blockchain/src/lib.rs b/crates/blockchain/src/lib.rs index 4360377d..a8913639 100644 --- a/crates/blockchain/src/lib.rs +++ b/crates/blockchain/src/lib.rs @@ -265,6 +265,13 @@ impl BlockChainServer { // Now build and publish the block (after attestations have been accepted) if let Some(validator_id) = scheduled_proposer { if self.sync_status.duties_allowed() { + if self.sync_status.duty_gate_overridden() { + warn!( + %slot, + %validator_id, + "Proposing block while syncing: duty sync-gate disabled" + ); + } self.propose_block(slot, validator_id); } else { info!(%slot, %validator_id, "Skipping block proposal while syncing"); @@ -290,6 +297,11 @@ impl BlockChainServer { ); } if self.sync_status.duties_allowed() { + if self.sync_status.duty_gate_overridden() + && !self.key_manager.validator_ids().is_empty() + { + warn!(%slot, "Producing attestations while syncing: duty sync-gate disabled"); + } self.produce_attestations(slot, is_aggregator); } else if !self.key_manager.validator_ids().is_empty() { info!(%slot, "Skipping attestations while syncing"); @@ -739,6 +751,9 @@ impl BlockChainServer { // run when the chain is in sync — backfilling nodes must // not spam gossip with rederived aggregates. if self.sync_status.duties_allowed() { + if self.sync_status.duty_gate_overridden() { + warn!(%slot, "Re-aggregating while syncing: duty sync-gate disabled"); + } self.run_reaggregate_from_block(&block_for_reaggregate); } diff --git a/crates/blockchain/src/sync_status.rs b/crates/blockchain/src/sync_status.rs index f23dee9c..4a0a597a 100644 --- a/crates/blockchain/src/sync_status.rs +++ b/crates/blockchain/src/sync_status.rs @@ -69,6 +69,13 @@ impl SyncStatusTracker { // Gate disabled: the syncing state is observe-only, never suppresses duties. !self.gate_duties || !self.syncing } + + /// Whether a duty is running only because the gate is disabled: the node + /// believes it is syncing, so with gating on the duty would have been + /// suppressed. Lets call sites log the counterfactual suppression. + pub(crate) fn duty_gate_overridden(&self) -> bool { + !self.gate_duties && self.syncing + } } #[cfg(test)] @@ -132,22 +139,4 @@ mod tests { assert_eq!(tracker.update(15, 20, 20), SyncStatus::Synced); } - - #[test] - fn gating_on_by_default_suppresses_duties_while_syncing() { - let mut tracker = SyncStatusTracker::default(); - - assert_eq!(tracker.update(20, 0, 20), SyncStatus::Syncing); - assert!(!tracker.duties_allowed()); - } - - #[test] - fn disabled_gate_still_tracks_status_but_allows_duties() { - let mut tracker = SyncStatusTracker::new(false); - - // Status still tracked for the metric... - assert_eq!(tracker.update(20, 0, 20), SyncStatus::Syncing); - // ...but duties are never suppressed. - assert!(tracker.duties_allowed()); - } } From 238c44f971967d9ada5bff6a00ba02334b223724 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Gr=C3=BCner?= <47506558+MegaRedHand@users.noreply.github.com> Date: Mon, 22 Jun 2026 19:32:28 -0300 Subject: [PATCH 3/3] feat(blockchain): log sync status transitions at debug level Replace the per-duty counterfactual logs with a single transition log in SyncStatusTracker::update: emit a debug line only when the syncing state actually flips, carrying the slot/lag context that drove the decision. Quieter than per-duty logging and applies whether or not the duty gate is enabled. --- crates/blockchain/src/lib.rs | 15 --------------- crates/blockchain/src/sync_status.rs | 22 +++++++++++++++------- 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/crates/blockchain/src/lib.rs b/crates/blockchain/src/lib.rs index a8913639..4360377d 100644 --- a/crates/blockchain/src/lib.rs +++ b/crates/blockchain/src/lib.rs @@ -265,13 +265,6 @@ impl BlockChainServer { // Now build and publish the block (after attestations have been accepted) if let Some(validator_id) = scheduled_proposer { if self.sync_status.duties_allowed() { - if self.sync_status.duty_gate_overridden() { - warn!( - %slot, - %validator_id, - "Proposing block while syncing: duty sync-gate disabled" - ); - } self.propose_block(slot, validator_id); } else { info!(%slot, %validator_id, "Skipping block proposal while syncing"); @@ -297,11 +290,6 @@ impl BlockChainServer { ); } if self.sync_status.duties_allowed() { - if self.sync_status.duty_gate_overridden() - && !self.key_manager.validator_ids().is_empty() - { - warn!(%slot, "Producing attestations while syncing: duty sync-gate disabled"); - } self.produce_attestations(slot, is_aggregator); } else if !self.key_manager.validator_ids().is_empty() { info!(%slot, "Skipping attestations while syncing"); @@ -751,9 +739,6 @@ impl BlockChainServer { // run when the chain is in sync — backfilling nodes must // not spam gossip with rederived aggregates. if self.sync_status.duties_allowed() { - if self.sync_status.duty_gate_overridden() { - warn!(%slot, "Re-aggregating while syncing: duty sync-gate disabled"); - } self.run_reaggregate_from_block(&block_for_reaggregate); } diff --git a/crates/blockchain/src/sync_status.rs b/crates/blockchain/src/sync_status.rs index 4a0a597a..02c71c9f 100644 --- a/crates/blockchain/src/sync_status.rs +++ b/crates/blockchain/src/sync_status.rs @@ -1,3 +1,5 @@ +use tracing::debug; + use crate::metrics::SyncStatus; /// Local head lag beyond which the node is considered to be syncing. @@ -49,6 +51,7 @@ impl SyncStatusTracker { ) -> SyncStatus { let head_lag = current_slot.saturating_sub(head_slot); let network_lag = current_slot.saturating_sub(max_seen_slot); + let was_syncing = self.syncing; if network_lag > NETWORK_STALL_THRESHOLD { self.syncing = false; @@ -58,6 +61,18 @@ impl SyncStatusTracker { self.syncing = head_lag > SYNC_LAG_THRESHOLD; } + if self.syncing != was_syncing { + debug!( + current_slot, + head_slot, + max_seen_slot, + head_lag, + network_lag, + syncing = self.syncing, + "Sync status changed" + ); + } + if self.syncing { SyncStatus::Syncing } else { @@ -69,13 +84,6 @@ impl SyncStatusTracker { // Gate disabled: the syncing state is observe-only, never suppresses duties. !self.gate_duties || !self.syncing } - - /// Whether a duty is running only because the gate is disabled: the node - /// believes it is syncing, so with gating on the duty would have been - /// suppressed. Lets call sites log the counterfactual suppression. - pub(crate) fn duty_gate_overridden(&self) -> bool { - !self.gate_duties && self.syncing - } } #[cfg(test)]