From 974eaad7f14ca7221e71244eb680d1272d3cfb2e Mon Sep 17 00:00:00 2001 From: diegokingston Date: Mon, 22 Jun 2026 12:38:06 -0300 Subject: [PATCH] perf(stark): skip fixed 0/1 muls in LogUp fingerprint accumulation In the fingerprint hot loop (prover aux-build + constraint-eval + verifier): - Bus-id term: alpha_powers[0] = alpha^0 = 1, so embed the bus id into the extension field directly instead of multiplying by 1 (drops one F*E mul per interaction per row, hoisted out of the row loop on the aux path). - Fixed-zero bus elements (the ~235 constant(0) used for bus-width padding) contribute nothing: skip the F*E multiply + accumulate entirely. Variable elements that happen to be zero on a row also benefit. Value-identical (field addition is exactly associative): stark lib 128/128 (default + parallel), prover bus/logup tests pass, clippy clean. Net effect on prove time is what we want to measure on the 32-core bench. --- crypto/stark/src/lookup.rs | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/crypto/stark/src/lookup.rs b/crypto/stark/src/lookup.rs index cdc68e7e0..d0eb01079 100644 --- a/crypto/stark/src/lookup.rs +++ b/crypto/stark/src/lookup.rs @@ -668,7 +668,11 @@ impl BusValue { } } } - *acc += &result * &alpha_powers[alpha_offset]; + // Fixed-zero bus elements (bus-width padding) contribute nothing — + // skip the F×E multiply. (α⁰ = 1 covers the bus-id term separately.) + if result != FieldElement::::zero() { + *acc += &result * &alpha_powers[alpha_offset]; + } 1 } } @@ -778,7 +782,11 @@ impl BusValue { } } } - *acc += result * &alpha_powers[alpha_offset]; + // Fixed-zero bus elements (bus-width padding) contribute nothing — + // skip the F×E multiply. + if result != FieldElement::::zero() { + *acc += result * &alpha_powers[alpha_offset]; + } 1 } } @@ -1465,10 +1473,6 @@ where .max() .unwrap_or(0); let alpha_powers = compute_alpha_powers(alpha, max_bus_elements); - let bus_ids: Vec> = interactions - .iter() - .map(|i| FieldElement::::from(i.bus_id)) - .collect(); let shifts = PackingShifts::::new(); let n = interactions.len(); @@ -1480,9 +1484,11 @@ where // Phase 1 — fingerprints, laid out as [int_0 rows…, int_1 rows…]. // fp[k*chunk_len + i] = interaction k at row chunk_start+i. let mut fingerprints: Vec> = Vec::with_capacity(n * chunk_len); - for (k, interaction) in interactions.iter().enumerate() { + for interaction in interactions.iter() { + // α⁰ = 1: the bus-id term needs no multiply — embed it into E once. + let bus_id_e = FieldElement::::from(interaction.bus_id); for row in chunk_start..chunk_start + chunk_len { - let mut lc = &bus_ids[k] * &alpha_powers[0]; + let mut lc = bus_id_e.clone(); let mut alpha_offset = 1; for bv in &interaction.values { alpha_offset += bv.accumulate_fingerprint( @@ -1502,7 +1508,8 @@ where if n == 1 { let interaction = interactions[0]; for (i, row) in (chunk_start..chunk_start + chunk_len).enumerate() { - let mut base_elements: Vec> = vec![bus_ids[0].clone()]; + let mut base_elements: Vec> = + vec![FieldElement::::from(interaction.bus_id)]; base_elements.extend( interaction .values @@ -1677,8 +1684,8 @@ fn compute_fingerprint_from_step, B: IsField>( alpha_powers: &[FieldElement], shifts: &PackingShifts, ) -> FieldElement { - let bus_id_f: FieldElement = FieldElement::from(interaction.bus_id); - let mut linear_combination = bus_id_f * &alpha_powers[0]; + // α⁰ = 1: the bus-id term needs no multiply — embed it into B directly. + let mut linear_combination = FieldElement::::from(interaction.bus_id); let mut alpha_idx = 1; for bv in &interaction.values { alpha_idx += bv.accumulate_fingerprint_from_step(