Skip to main content

rustriff_lib/services/analyzers/
latency_analyzer.rs

1//! Low-level benchmark utility for measuring DSP processor CPU cost.
2//!
3//! [`LatencyAnalyzer`] provides the timing primitives used by
4//! [`AudioLatencyMeasurementService`] to derive per-processor execution costs.
5//! It is intentionally a pure measurement tool with no knowledge of audio routing
6//! or the service layer.
7//!
8//! # Methodology
9//!
10//! Both functions run the processor (or a zero-work passthrough) over
11//! `iterations × block_size` samples, alternating between `+0.5` and `-0.5` inputs to
12//! exercise any branch-dependent code paths.  [`std::hint::black_box`] is used on each
13//! output to prevent the compiler from optimising the loop body away.
14//!
15//! The *net* cost reported by [`measure_effect_added_execution_us`] is:
16//!
17//! ```text
18//! net_us_per_sample = max(effect_us_per_sample − passthrough_us_per_sample, 0)
19//! ```
20//!
21//! Clamping to `≥ 0` prevents occasional negative readings caused by CPU scheduling
22//! noise on the passthrough run.
23//!
24//! [`AudioLatencyMeasurementService`]: crate::services::audio_latency_measurement_service::AudioLatencyMeasurementService
25//! [`measure_effect_added_execution_us`]: LatencyAnalyzer::measure_effect_added_execution_us
26
27use crate::domain::audio_processor::AudioProcessor;
28use std::hint::black_box;
29use std::time::Instant;
30
31/// Stateless benchmark utility for measuring DSP processor CPU execution cost.
32pub struct LatencyAnalyzer;
33
34impl LatencyAnalyzer {
35    /// Measures the average wall-clock execution time of a processor in µs per sample.
36    ///
37    /// Runs `effect` over `iterations × block_size` synthetic samples and returns the
38    /// mean time spent per sample.  The input alternates between `+0.5` and `-0.5` to
39    /// exercise both halves of any branch-dependent code, and [`black_box`] prevents
40    /// dead-code elimination of the loop body.
41    ///
42    /// Returns `0.0` immediately if `iterations × block_size` overflows or is zero.
43    ///
44    /// # Arguments
45    ///
46    /// * `effect` — The processor to benchmark.  Mutable because processors may carry
47    ///   internal filter state that updates on every sample.
48    /// * `iterations` — Number of full `block_size` passes to run.
49    /// * `block_size` — Samples per iteration.  Larger values reduce timer-call overhead
50    ///   relative to actual processing; 256–2 048 is a practical range.
51    ///
52    /// # Returns
53    ///
54    /// Total wall-clock time divided by total samples, in **microseconds per sample**.
55    pub fn measure_processor_execution_us<E: AudioProcessor>(
56        effect: &mut E,
57        iterations: usize,
58        block_size: usize,
59    ) -> f64 {
60        let total_samples = iterations.saturating_mul(block_size);
61        if total_samples == 0 {
62            return 0.0;
63        }
64
65        let started = Instant::now();
66        for sample_index in 0..total_samples {
67            let input_sample = if sample_index % 2 == 0 { 0.5 } else { -0.5 };
68            let processed_sample = effect.process(input_sample);
69            black_box(processed_sample);
70        }
71
72        let total_us = started.elapsed().as_secs_f64() * 1_000_000.0;
73        total_us / total_samples as f64
74    }
75
76    /// Measures the *net* CPU cost added by a processor, relative to a zero-work passthrough.
77    ///
78    /// Runs [`measure_processor_execution_us`] twice — once for a [`PassthroughProcessor`]
79    /// that simply returns its input unchanged, and once for `effect` — then subtracts the
80    /// baseline.  The passthrough baseline accounts for loop overhead, `Instant::now()` cost,
81    /// and `black_box` calls, so the returned value reflects only the processor's own work.
82    ///
83    /// The result is clamped to `≥ 0.0` to avoid negative readings from measurement noise
84    /// when the processor is extremely cheap (sub-nanosecond per sample).
85    ///
86    /// # Arguments
87    ///
88    /// * `effect` — The processor under test.
89    /// * `iterations` — Number of benchmark iterations (passed to
90    ///   [`measure_processor_execution_us`]).
91    /// * `block_size` — Samples per iteration.
92    ///
93    /// # Returns
94    ///
95    /// Net added execution cost in **microseconds per sample** (µs/sample), `≥ 0`.
96    ///
97    /// [`measure_processor_execution_us`]: LatencyAnalyzer::measure_processor_execution_us
98    /// [`PassthroughProcessor`]: PassthroughProcessor
99    pub fn measure_effect_added_execution_us<E: AudioProcessor>(
100        effect: &mut E,
101        iterations: usize,
102        block_size: usize,
103    ) -> f64 {
104        let mut passthrough = PassthroughProcessor;
105
106        let baseline_us =
107            Self::measure_processor_execution_us(&mut passthrough, iterations, block_size);
108        let effect_us = Self::measure_processor_execution_us(effect, iterations, block_size);
109
110        (effect_us - baseline_us).max(0.0)
111    }
112}
113
114/// Zero-work [`AudioProcessor`] used as the baseline in
115/// [`LatencyAnalyzer::measure_effect_added_execution_us`].
116///
117/// Returns every sample unchanged.  Its execution cost represents pure loop and
118/// timer overhead rather than any meaningful DSP work.
119struct PassthroughProcessor;
120
121impl AudioProcessor for PassthroughProcessor {
122    fn process(&mut self, sample: f32) -> f32 {
123        sample
124    }
125}
126
127#[cfg(test)]
128mod tests {
129    use super::*;
130    use crate::services::processors::fixed_delay::fixed_delay_processor::FixedDelayProcessor;
131
132    struct BusyProcessor;
133
134    impl AudioProcessor for BusyProcessor {
135        fn process(&mut self, sample: f32) -> f32 {
136            let mut value = sample;
137            for _ in 0..64 {
138                value = (value * 0.99).sin();
139            }
140            value
141        }
142    }
143
144    #[test]
145    fn zero_workload_returns_zero_microseconds() {
146        let mut passthrough = FixedDelayProcessor::new(0);
147        let measured = LatencyAnalyzer::measure_processor_execution_us(&mut passthrough, 0, 1024);
148
149        assert_eq!(measured, 0.0);
150    }
151
152    #[test]
153    fn busy_processor_adds_non_zero_execution_time() {
154        let mut busy = BusyProcessor;
155        let added_us = LatencyAnalyzer::measure_effect_added_execution_us(&mut busy, 128, 1024);
156
157        assert!(added_us > 0.0);
158    }
159}