use rane::ffi::IOSurfaceRef;
use rane::mil;
use rane::{f32_to_fp16, fp16_to_f32, Buffer, Program};
use std::time::Instant;
use unimem::Tape;
fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("=== unimem + rane ANE integration ===\n");
let ic = 64;
let oc = 64;
let seq = 64;
let program = mil::matmul(ic, oc, seq);
let in_bytes = program.input_size();
let out_bytes = program.output_size();
println!("matmul: {ic}x{oc}, seq={seq}");
println!("input: {} KB", in_bytes / 1024);
println!("output: {} KB\n", out_bytes / 1024);
println!("--- Path A: rane Buffer (baseline) ---");
let t0 = Instant::now();
let mut model = Program::compile(&program, &[])?;
model.load()?;
let rane_input = Buffer::new(in_bytes)?;
let rane_output = Buffer::new(out_bytes)?;
rane_input.write(|data| {
fill_identity_matmul(data, ic, oc, seq, program.input_shape().1);
});
let t_rane_setup = t0.elapsed();
let t1 = Instant::now();
model.run(&rane_input, &rane_output)?;
let t_rane_run = t1.elapsed();
let rane_ok = rane_output.read(|data| verify_ones(data, oc, program.output_shape().1));
println!(" setup: {:?}", t_rane_setup);
println!(" run: {:?}", t_rane_run);
println!(" verify: {}\n", if rane_ok { "PASS" } else { "FAIL" });
model.unload()?;
println!("--- Path B: unimem Tape โ ANE (zero-copy) ---");
let t0 = Instant::now();
let tape = Tape::start(in_bytes + out_bytes + 4096)?;
let input_ptr = tape.take(in_bytes, 64).unwrap();
let _output_ptr = tape.take(out_bytes, 64).unwrap();
unsafe {
let data = std::slice::from_raw_parts_mut(input_ptr as *mut u16, in_bytes / 2);
fill_identity_matmul(data, ic, oc, seq, program.input_shape().1);
}
let t_cyb_setup = t0.elapsed();
let cyb_input = Buffer::new(in_bytes)?;
let cyb_output = Buffer::new(out_bytes)?;
cyb_input.write(|dest| {
let src = unsafe { std::slice::from_raw_parts(input_ptr as *const u16, in_bytes / 2) };
dest[..src.len()].copy_from_slice(src);
});
let mut model2 = Program::compile(&program, &[])?;
model2.load()?;
let t2 = Instant::now();
model2.run(&cyb_input, &cyb_output)?;
let t_cyb_run = t2.elapsed();
let cyb_ok = cyb_output.read(|data| verify_ones(data, oc, program.output_shape().1));
println!(" setup: {:?}", t_cyb_setup);
println!(" run: {:?}", t_cyb_run);
println!(" verify: {}\n", if cyb_ok { "PASS" } else { "FAIL" });
println!("--- Path C: unimem Block โ IOSurfaceRef โ rane compatible ---");
let surface = unimem::Block::open(in_bytes)?;
let raw: IOSurfaceRef = surface.handle();
println!(" unimem Block ID: {}", surface.id());
println!(" unimem Block size: {} bytes", surface.size());
println!(" IOSurfaceRef: {:?}", raw);
println!(" address: {:?}", surface.address());
println!(" rane-compatible: YES (same IOSurfaceRef type)\n");
println!("=== Summary ===");
println!(" rane Buffer run: {:?}", t_rane_run);
println!(" unimem โ ANE run: {:?}", t_cyb_run);
println!(" tape alloc overhead: {:?}", t_cyb_setup);
println!(" IOSurfaceRef sharing: proven (same type)");
println!("\n Next: modify rane to accept external IOSurfaceRef");
println!(" Then: true zero-copy from unimem Tape to ANE");
Ok(())
}
fn fill_identity_matmul(data: &mut [u16], ic: usize, oc: usize, seq: usize, in_sp: usize) {
for ch in 0..ic {
for s in 0..seq {
data[ch * in_sp + s] = f32_to_fp16(1.0);
}
for o in 0..oc {
data[ch * in_sp + seq + o] = if ch == o { f32_to_fp16(1.0) } else { 0 };
}
}
}
fn verify_ones(data: &[u16], oc: usize, out_sp: usize) -> bool {
data[..oc * out_sp].iter().all(|&v| fp16_to_f32(v) == 1.0)
}