use std::time::Instant;
fn median_of(times: &mut [u64]) -> u64 {
times.sort();
times[times.len() / 2]
}
fn main() {
for &sz in &[32usize, 48, 64, 96, 128] {
let a: Vec<f32> = (0..sz * sz).map(|i| (i % 7) as f32 * 0.1).collect();
let b: Vec<f32> = (0..sz * sz).map(|i| (i % 11) as f32 * 0.1).collect();
let mut c = vec![0.0f32; sz * sz];
let iters = 100;
acpu::matmul_f32(&a, &b, &mut c, sz, sz, sz);
let mut t = vec![0u64; iters];
for i in 0..iters {
c.fill(0.0);
let s = Instant::now();
acpu::matmul_f32(&a, &b, &mut c, sz, sz, sz);
t[i] = s.elapsed().as_nanos() as u64;
}
let ns = median_of(&mut t);
let ops = 2.0 * (sz as f64).powi(3);
let gf = ops / ns as f64;
eprintln!("{:>5}: {:>8} ns {:>8.2} GFLOPS", sz, ns, gf);
}
}