#[cfg(target_arch = "aarch64")]
pub fn aes_encrypt_block(block: &mut [u8; 16], round_keys: &span>; 16) {
assert!(
matches!(round_keys.len(), 11 | 13 | 15),
"round_keys length must be 11 (AES-128), 13 (AES-192), or 15 (AES-256)"
);
let nr = round_keys.len() - 1; unsafe {
use core::arch::aarch64::*;
let mut state = vld1q_u8(block.as_ptr());
for rk_bytes in &round_keys[..nr - 1] {
let rk = vld1q_u8(rk_bytes.as_ptr());
state = vaeseq_u8(state, rk);
state = vaesmcq_u8(state);
}
let rk_penult = vld1q_u8(round_keys[nr - 1].as_ptr());
state = vaeseq_u8(state, rk_penult);
let rk_last = vld1q_u8(round_keys[nr].as_ptr());
state = veorq_u8(state, rk_last);
vst1q_u8(block.as_mut_ptr(), state);
}
}
#[cfg(target_arch = "aarch64")]
pub fn aes_encrypt_blocks(blocks: &mut span>; 16, round_keys: &span>; 16) {
assert!(
matches!(round_keys.len(), 11 | 13 | 15),
"round_keys length must be 11 (AES-128), 13 (AES-192), or 15 (AES-256)"
);
let nr = round_keys.len() - 1;
let len = blocks.len();
let mut i = 0;
unsafe {
use core::arch::aarch64::*;
while i + 4 <= len {
let mut s0 = vld1q_u8(blocks[i].as_ptr());
let mut s1 = vld1q_u8(blocks[i + 1].as_ptr());
let mut s2 = vld1q_u8(blocks[i + 2].as_ptr());
let mut s3 = vld1q_u8(blocks[i + 3].as_ptr());
for rk_bytes in &round_keys[..nr - 1] {
let rk = vld1q_u8(rk_bytes.as_ptr());
s0 = vaeseq_u8(s0, rk);
s0 = vaesmcq_u8(s0);
s1 = vaeseq_u8(s1, rk);
s1 = vaesmcq_u8(s1);
s2 = vaeseq_u8(s2, rk);
s2 = vaesmcq_u8(s2);
s3 = vaeseq_u8(s3, rk);
s3 = vaesmcq_u8(s3);
}
let rk_pen = vld1q_u8(round_keys[nr - 1].as_ptr());
let rk_last = vld1q_u8(round_keys[nr].as_ptr());
s0 = veorq_u8(vaeseq_u8(s0, rk_pen), rk_last);
s1 = veorq_u8(vaeseq_u8(s1, rk_pen), rk_last);
s2 = veorq_u8(vaeseq_u8(s2, rk_pen), rk_last);
s3 = veorq_u8(vaeseq_u8(s3, rk_pen), rk_last);
vst1q_u8(blocks[i].as_mut_ptr(), s0);
vst1q_u8(blocks[i + 1].as_mut_ptr(), s1);
vst1q_u8(blocks[i + 2].as_mut_ptr(), s2);
vst1q_u8(blocks[i + 3].as_mut_ptr(), s3);
i += 4;
}
}
while i < len {
aes_encrypt_block(&mut blocks[i], round_keys);
i += 1;
}
}
#[rustfmt::skip]
const K256: [u32; 64] = [
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
];
#[cfg(target_arch = "aarch64")]
pub fn sha256_compress(state: &mut [u32; 8], block: &[u8; 64]) {
unsafe {
use core::arch::aarch64::*;
let abcd_save = vld1q_u32(state.as_ptr());
let efgh_save = vld1q_u32(state.as_ptr().add(4));
let mut abcd = abcd_save;
let mut efgh = efgh_save;
let mut w0 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block.as_ptr())));
let mut w1 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block.as_ptr().add(16))));
let mut w2 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block.as_ptr().add(32))));
let mut w3 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block.as_ptr().add(48))));
macro_rules! sha256_4rounds {
($w:expr, $k_offset:expr) => {
let k = vld1q_u32(K256.as_ptr().add($k_offset));
let wk = vaddq_u32($w, k);
let tmp = abcd;
abcd = vsha256hq_u32(abcd, efgh, wk);
efgh = vsha256h2q_u32(efgh, tmp, wk);
};
}
sha256_4rounds!(w0, 0);
sha256_4rounds!(w1, 4);
sha256_4rounds!(w2, 8);
sha256_4rounds!(w3, 12);
macro_rules! sha256_4rounds_sched {
($w0:expr, $w1:expr, $w2:expr, $w3:expr, $k_offset:expr) => {
$w0 = vsha256su1q_u32(vsha256su0q_u32($w0, $w1), $w2, $w3);
sha256_4rounds!($w0, $k_offset);
};
}
sha256_4rounds_sched!(w0, w1, w2, w3, 16);
sha256_4rounds_sched!(w1, w2, w3, w0, 20);
sha256_4rounds_sched!(w2, w3, w0, w1, 24);
sha256_4rounds_sched!(w3, w0, w1, w2, 28);
sha256_4rounds_sched!(w0, w1, w2, w3, 32);
sha256_4rounds_sched!(w1, w2, w3, w0, 36);
sha256_4rounds_sched!(w2, w3, w0, w1, 40);
sha256_4rounds_sched!(w3, w0, w1, w2, 44);
sha256_4rounds_sched!(w0, w1, w2, w3, 48);
sha256_4rounds_sched!(w1, w2, w3, w0, 52);
sha256_4rounds_sched!(w2, w3, w0, w1, 56);
sha256_4rounds_sched!(w3, w0, w1, w2, 60);
abcd = vaddq_u32(abcd, abcd_save);
efgh = vaddq_u32(efgh, efgh_save);
vst1q_u32(state.as_mut_ptr(), abcd);
vst1q_u32(state.as_mut_ptr().add(4), efgh);
}
}
#[cfg(target_arch = "aarch64")]
pub fn pmull_64(a: u64, b: u64) -> u128 {
unsafe {
use core::arch::aarch64::*;
let va = vreinterpretq_p64_u64(vdupq_n_u64(a));
let vb = vreinterpretq_p64_u64(vdupq_n_u64(b));
vmull_p64(vgetq_lane_p64(va, 0) as u64, vgetq_lane_p64(vb, 0) as u64)
}
}
#[cfg(not(target_arch = "aarch64"))]
pub fn aes_encrypt_block(block: &mut [u8; 16], round_keys: &span>; 16) {
let _ = (block, round_keys);
unimplemented!("AES encrypt requires aarch64 with FEAT_AES");
}
#[cfg(not(target_arch = "aarch64"))]
pub fn aes_encrypt_blocks(blocks: &mut span>; 16, round_keys: &span>; 16) {
let _ = (blocks, round_keys);
unimplemented!("AES encrypt requires aarch64 with FEAT_AES");
}
#[cfg(not(target_arch = "aarch64"))]
pub fn sha256_compress(state: &mut [u32; 8], block: &[u8; 64]) {
let _ = (state, block);
unimplemented!("SHA-256 compress requires aarch64 with FEAT_SHA256");
}
#[cfg(not(target_arch = "aarch64"))]
pub fn pmull_64(a: u64, b: u64) -> u128 {
let _ = (a, b);
unimplemented!("PMULL requires aarch64 with FEAT_PMULL");
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_aes128_encrypt() {
let key: [u8; 16] = [
0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf,
0x4f, 0x3c,
];
let round_keys = aes128_key_expand(&key);
let mut block: [u8; 16] = [
0x32, 0x43, 0xf6, 0xa8, 0x88, 0x5a, 0x30, 0x8d, 0x31, 0x31, 0x98, 0xa2, 0xe0, 0x37,
0x07, 0x34,
];
let expected: [u8; 16] = [
0x39, 0x25, 0x84, 0x1d, 0x02, 0xdc, 0x09, 0xfb, 0xdc, 0x11, 0x85, 0x97, 0x19, 0x6a,
0x0b, 0x32,
];
aes_encrypt_block(&mut block, &round_keys);
assert_eq!(block, expected);
}
#[test]
fn test_aes128_encrypt_blocks() {
let key: [u8; 16] = [
0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf,
0x4f, 0x3c,
];
let round_keys = aes128_key_expand(&key);
let plain: [u8; 16] = [
0x32, 0x43, 0xf6, 0xa8, 0x88, 0x5a, 0x30, 0x8d, 0x31, 0x31, 0x98, 0xa2, 0xe0, 0x37,
0x07, 0x34,
];
let expected: [u8; 16] = [
0x39, 0x25, 0x84, 0x1d, 0x02, 0xdc, 0x09, 0xfb, 0xdc, 0x11, 0x85, 0x97, 0x19, 0x6a,
0x0b, 0x32,
];
let mut blocks = vec![plain; 5];
aes_encrypt_blocks(&mut blocks, &round_keys);
for b in &blocks {
assert_eq!(b, &expected);
}
}
#[test]
fn test_sha256_compress() {
let mut state: [u32; 8] = [
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab,
0x5be0cd19,
];
let mut block = [0u8; 64];
block[0] = 0x80;
sha256_compress(&mut state, &block);
let expected: [u32; 8] = [
0xe3b0c442, 0x98fc1c14, 0x9afbf4c8, 0x996fb924, 0x27ae41e4, 0x649b934c, 0xa495991b,
0x7852b855,
];
assert_eq!(state, expected);
}
#[test]
fn test_pmull_64() {
let r = pmull_64(1, 0xDEADBEEF);
assert_eq!(r as u64, 0xDEADBEEF);
let r = pmull_64(0, 0x12345678);
assert_eq!(r, 0);
let a = 0x123456789ABCDEF0u64;
let b = 0xFEDCBA9876543210u64;
assert_eq!(pmull_64(a, b), pmull_64(b, a));
let r = pmull_64(3, 3);
assert_eq!(r as u64, 5);
}
fn aes128_key_expand(key: &[u8; 16]) -> Vec<[u8; 16]> {
let rcon: [u8; 10] = [0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36];
let mut w = vec![0u32; 44];
for i in 0..4 {
w[i] = u32::from_be_bytes([key[4 * i], key[4 * i + 1], key[4 * i + 2], key[4 * i + 3]]);
}
for i in 4..44 {
let mut tmp = w[i - 1];
if i % 4 == 0 {
tmp = tmp.rotate_left(8);
tmp = sub_word(tmp) ^ ((rcon[i / 4 - 1] as u32) << 24);
}
w[i] = w[i - 4] ^ tmp;
}
let mut round_keys = Vec::with_capacity(11);
for r in 0..11 {
let mut rk = [0u8; 16];
for j in 0..4 {
let bytes = w[4 * r + j].to_be_bytes();
rk[4 * j..4 * j + 4].copy_from_slice(&bytes);
}
round_keys.push(rk);
}
round_keys
}
fn sub_word(w: u32) -> u32 {
let b = w.to_be_bytes();
u32::from_be_bytes([
SBOX[b[0] as usize],
SBOX[b[1] as usize],
SBOX[b[2] as usize],
SBOX[b[3] as usize],
])
}
#[rustfmt::skip]
const SBOX: [u8; 256] = [
0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5,0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76,
0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0,0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0,
0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc,0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15,
0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a,0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75,
0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0,0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84,
0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b,0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf,
0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85,0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8,
0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5,0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2,
0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17,0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73,
0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88,0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb,
0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c,0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79,
0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9,0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08,
0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6,0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a,
0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e,0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e,
0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94,0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf,
0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68,0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16,
];
}