rustriff_lib/services/analyzers/latency_analyzer.rs
1//! Low-level benchmark utility for measuring DSP processor CPU cost.
2//!
3//! [`LatencyAnalyzer`] provides the timing primitives used by
4//! [`AudioLatencyMeasurementService`] to derive per-processor execution costs.
5//! It is intentionally a pure measurement tool with no knowledge of audio routing
6//! or the service layer.
7//!
8//! # Methodology
9//!
10//! Both functions run the processor (or a zero-work passthrough) over
11//! `iterations × block_size` samples, alternating between `+0.5` and `-0.5` inputs to
12//! exercise any branch-dependent code paths. [`std::hint::black_box`] is used on each
13//! output to prevent the compiler from optimising the loop body away.
14//!
15//! The *net* cost reported by [`measure_effect_added_execution_us`] is:
16//!
17//! ```text
18//! net_us_per_sample = max(effect_us_per_sample − passthrough_us_per_sample, 0)
19//! ```
20//!
21//! Clamping to `≥ 0` prevents occasional negative readings caused by CPU scheduling
22//! noise on the passthrough run.
23//!
24//! [`AudioLatencyMeasurementService`]: crate::services::audio_latency_measurement_service::AudioLatencyMeasurementService
25//! [`measure_effect_added_execution_us`]: LatencyAnalyzer::measure_effect_added_execution_us
26
27use crate::domain::audio_processor::AudioProcessor;
28use std::hint::black_box;
29use std::time::Instant;
30
31/// Stateless benchmark utility for measuring DSP processor CPU execution cost.
32pub struct LatencyAnalyzer;
33
34impl LatencyAnalyzer {
35 /// Measures the average wall-clock execution time of a processor in µs per sample.
36 ///
37 /// Runs `effect` over `iterations × block_size` synthetic samples and returns the
38 /// mean time spent per sample. The input alternates between `+0.5` and `-0.5` to
39 /// exercise both halves of any branch-dependent code, and [`black_box`] prevents
40 /// dead-code elimination of the loop body.
41 ///
42 /// Returns `0.0` immediately if `iterations × block_size` overflows or is zero.
43 ///
44 /// # Arguments
45 ///
46 /// * `effect` — The processor to benchmark. Mutable because processors may carry
47 /// internal filter state that updates on every sample.
48 /// * `iterations` — Number of full `block_size` passes to run.
49 /// * `block_size` — Samples per iteration. Larger values reduce timer-call overhead
50 /// relative to actual processing; 256–2 048 is a practical range.
51 ///
52 /// # Returns
53 ///
54 /// Total wall-clock time divided by total samples, in **microseconds per sample**.
55 pub fn measure_processor_execution_us<E: AudioProcessor>(
56 effect: &mut E,
57 iterations: usize,
58 block_size: usize,
59 ) -> f64 {
60 let total_samples = iterations.saturating_mul(block_size);
61 if total_samples == 0 {
62 return 0.0;
63 }
64
65 let started = Instant::now();
66 for sample_index in 0..total_samples {
67 let input_sample = if sample_index % 2 == 0 { 0.5 } else { -0.5 };
68 let processed_sample = effect.process(input_sample);
69 black_box(processed_sample);
70 }
71
72 let total_us = started.elapsed().as_secs_f64() * 1_000_000.0;
73 total_us / total_samples as f64
74 }
75
76 /// Measures the *net* CPU cost added by a processor, relative to a zero-work passthrough.
77 ///
78 /// Runs [`measure_processor_execution_us`] twice — once for a [`PassthroughProcessor`]
79 /// that simply returns its input unchanged, and once for `effect` — then subtracts the
80 /// baseline. The passthrough baseline accounts for loop overhead, `Instant::now()` cost,
81 /// and `black_box` calls, so the returned value reflects only the processor's own work.
82 ///
83 /// The result is clamped to `≥ 0.0` to avoid negative readings from measurement noise
84 /// when the processor is extremely cheap (sub-nanosecond per sample).
85 ///
86 /// # Arguments
87 ///
88 /// * `effect` — The processor under test.
89 /// * `iterations` — Number of benchmark iterations (passed to
90 /// [`measure_processor_execution_us`]).
91 /// * `block_size` — Samples per iteration.
92 ///
93 /// # Returns
94 ///
95 /// Net added execution cost in **microseconds per sample** (µs/sample), `≥ 0`.
96 ///
97 /// [`measure_processor_execution_us`]: LatencyAnalyzer::measure_processor_execution_us
98 /// [`PassthroughProcessor`]: PassthroughProcessor
99 pub fn measure_effect_added_execution_us<E: AudioProcessor>(
100 effect: &mut E,
101 iterations: usize,
102 block_size: usize,
103 ) -> f64 {
104 let mut passthrough = PassthroughProcessor;
105
106 let baseline_us =
107 Self::measure_processor_execution_us(&mut passthrough, iterations, block_size);
108 let effect_us = Self::measure_processor_execution_us(effect, iterations, block_size);
109
110 (effect_us - baseline_us).max(0.0)
111 }
112}
113
114/// Zero-work [`AudioProcessor`] used as the baseline in
115/// [`LatencyAnalyzer::measure_effect_added_execution_us`].
116///
117/// Returns every sample unchanged. Its execution cost represents pure loop and
118/// timer overhead rather than any meaningful DSP work.
119struct PassthroughProcessor;
120
121impl AudioProcessor for PassthroughProcessor {
122 fn process(&mut self, sample: f32) -> f32 {
123 sample
124 }
125}
126
127#[cfg(test)]
128mod tests {
129 use super::*;
130 use crate::services::processors::fixed_delay::fixed_delay_processor::FixedDelayProcessor;
131
132 struct BusyProcessor;
133
134 impl AudioProcessor for BusyProcessor {
135 fn process(&mut self, sample: f32) -> f32 {
136 let mut value = sample;
137 for _ in 0..64 {
138 value = (value * 0.99).sin();
139 }
140 value
141 }
142 }
143
144 #[test]
145 fn zero_workload_returns_zero_microseconds() {
146 let mut passthrough = FixedDelayProcessor::new(0);
147 let measured = LatencyAnalyzer::measure_processor_execution_us(&mut passthrough, 0, 1024);
148
149 assert_eq!(measured, 0.0);
150 }
151
152 #[test]
153 fn busy_processor_adds_non_zero_execution_time() {
154 let mut busy = BusyProcessor;
155 let added_us = LatencyAnalyzer::measure_effect_added_execution_us(&mut busy, 128, 1024);
156
157 assert!(added_us > 0.0);
158 }
159}