// Hand-optimized TASM baseline: std.crypto.ecdsa
//
// First principles rewrite. Key decisions:
// - read_u256/divine_u256: batch I/O + split chain = 19 insns each
// - read/divine_signature: call subroutines, 3 insns each
// - write_u256: reverse + batch write = 13 insns
// - write_signature: store s to mem, write r, reload s, write s
// - is_zero_u256: OR chain (fewer insns than eq+and chain)
// - lt256_sub: store b at addr 0, borrow chain, 8 limbs
// - valid_range: store all 3 U256s, run 4 checks
// - Memory: contiguous blocks, write_mem 5 + write_mem 3 for batch store
//
// U256 on stack: [l0 l1 l2 l3 l4 l5 l6 l7] l7 on top, l0 deepest.
// Signature: [r.l0..r.l7 s.l0..s.l7] s.l7 on top.
// ===================================================================
// SUBROUTINES
// ===================================================================
// is_zero_u256: [l0..l7] -> [flag]
// OR all limbs. If result is 0, all were zero.
__is_zero:
or
or
or
or
or
or
or
push 0
eq
return
// not_flag: [b] -> [!b]
__not:
push 0
eq
return
// store_u256: [addr, l0..l7] -> [] writes 8 limbs starting at addr
// addr on top of the 8 limbs. write_mem 5 writes st1..st5 to [addr..addr+4], leaves addr+5.
// Need: addr below data. Swap addr to bottom.
__store_u256:
swap 8
write_mem 5
write_mem 3
pop 1
return
// load_u256: [addr] -> [l0..l7] reads 8 limbs from addr
// read_mem 5 reads [addr-4..addr] into st1..st5, leaves addr-5.
// We want to read addr..addr+7, so start from addr+7.
__load_u256:
push 7
add
read_mem 5
read_mem 3
pop 1
return
// lt256_sub: [a.l0..a.l7 b.l0..b.l7] -> [flag]
// Unsigned a < b via subtraction borrow chain.
// Store b at mem[0..7], reverse a, borrow chain from LSB.
__lt256:
// Store b at addr 0
push 0
call __store_u256
// Stack: a.l0..a.l7 (l7 on top)
// Reverse a so a.l0 is on top (process LSB first)
swap 7
swap 1
swap 6
swap 1
swap 2
swap 5
swap 2
swap 3
swap 4
swap 3
// Stack: a0 a1 a2 a3 a4 a5 a6 a7 (a0 on top)
// Limb 0: diff = 2^32 + a0 - b0, split, borrow = 1 - hi
push 0
read_mem 1
pop 1
push -1
mul
push 4294967296
add
add
split
pop 1
push -1
mul
push 1
add
// Limbs 1-7: same pattern with borrow propagation
swap 1
push 1
read_mem 1
pop 1
push -1
mul
push 4294967296
add
add
swap 1
push -1
mul
add
split
pop 1
push -1
mul
push 1
add
swap 1
push 2
read_mem 1
pop 1
push -1
mul
push 4294967296
add
add
swap 1
push -1
mul
add
split
pop 1
push -1
mul
push 1
add
swap 1
push 3
read_mem 1
pop 1
push -1
mul
push 4294967296
add
add
swap 1
push -1
mul
add
split
pop 1
push -1
mul
push 1
add
swap 1
push 4
read_mem 1
pop 1
push -1
mul
push 4294967296
add
add
swap 1
push -1
mul
add
split
pop 1
push -1
mul
push 1
add
swap 1
push 5
read_mem 1
pop 1
push -1
mul
push 4294967296
add
add
swap 1
push -1
mul
add
split
pop 1
push -1
mul
push 1
add
swap 1
push 6
read_mem 1
pop 1
push -1
mul
push 4294967296
add
add
swap 1
push -1
mul
add
split
pop 1
push -1
mul
push 1
add
swap 1
push 7
read_mem 1
pop 1
push -1
mul
push 4294967296
add
add
swap 1
push -1
mul
add
split
pop 1
push -1
mul
push 1
add
return
// reverse_8: reverse top 8 stack elements
__rev8:
swap 7
swap 1
swap 6
swap 1
swap 2
swap 5
swap 2
swap 3
swap 4
swap 3
return
// split_8: split top 8 field elements to U32 (range-check)
// [f0..f7] (f7 on top) -> [u0..u7] (u7 on top)
__split8:
split
pop 1
swap 1
split
pop 1
swap 2
split
pop 1
swap 3
split
pop 1
swap 4
split
pop 1
swap 5
split
pop 1
swap 6
split
pop 1
swap 7
split
pop 1
call __rev8
return
// ===================================================================
// PUBLIC API
// ===================================================================
// read_u256: -> [l0..l7]
std_crypto_ecdsa__read_u256:
read_io 5
read_io 3
call __split8
return
// divine_u256: -> [l0..l7]
std_crypto_ecdsa__divine_u256:
divine 5
divine 3
call __split8
return
// read_signature: -> [r.l0..r.l7 s.l0..s.l7]
std_crypto_ecdsa__read_signature:
call std_crypto_ecdsa__read_u256
call std_crypto_ecdsa__read_u256
return
// divine_signature: -> [r.l0..r.l7 s.l0..s.l7]
std_crypto_ecdsa__divine_signature:
call std_crypto_ecdsa__divine_u256
call std_crypto_ecdsa__divine_u256
return
// valid_range: [r.l0..r.l7 s.l0..s.l7 order.l0..order.l7] -> [flag]
// flag = !is_zero(r) AND !is_zero(s) AND r < order AND s < order
std_crypto_ecdsa__valid_range:
// Store order at mem[100..107]
push 100
call __store_u256
// Store s at mem[110..117]
push 110
call __store_u256
// Store r at mem[120..127]
push 120
call __store_u256
// Check 1: r != 0
push 120
call __load_u256
call __is_zero
call __not
// Check 2: s != 0
push 110
call __load_u256
call __is_zero
call __not
and
// Check 3: r < order
push 120
call __load_u256
push 100
call __load_u256
call __lt256
and
// Check 4: s < order
push 110
call __load_u256
push 100
call __load_u256
call __lt256
and
return
// is_low_s: [r.l0..r.l7 s.l0..s.l7 order.l0..order.l7] -> [flag]
// flag = s < order (discard r)
std_crypto_ecdsa__is_low_s:
// Store order at mem[100..107]
push 100
call __store_u256
// s is now on top. Store at mem[110..117]
push 110
call __store_u256
// Discard r (8 limbs)
pop 5
pop 3
// Load s, load order, compare
push 110
call __load_u256
push 100
call __load_u256
call __lt256
return
// write_u256: [l0..l7] -> (writes to public output)
std_crypto_ecdsa__write_u256:
call __rev8
write_io 5
write_io 3
return
// write_signature: [r.l0..r.l7 s.l0..s.l7] -> (writes r then s)
std_crypto_ecdsa__write_signature:
// Store s at mem[130..137]
push 130
call __store_u256
// Write r
call std_crypto_ecdsa__write_u256
// Reload and write s
push 130
call __load_u256
call std_crypto_ecdsa__write_u256
return