// Hand-optimized TASM baseline: std.crypto.merkle
// Implements: verify1-4, verify, authenticate_leaf3, authenticate_leaf
//
// Calling convention: arguments pushed left-to-right (first arg deepest).
// Digest = 5 field elements (d0 deepest, d4 on top of the group).
//
// Stack entry for verifyN(leaf, root, leaf_idx):
// st0=leaf_idx st1..5=root(d4..d0) st6..10=leaf(d4..d0)
//
// merkle_step needs: st0=idx, st1..5=digest.
// Root sits between idx and leaf, requiring a block swap.
//
// Block swap pattern (15 swaps) to exchange st1..5 with st6..10:
// For each i in 1..5: swap i / swap (i+5) / swap i
// This exchanges root[i] <-> leaf[i] while preserving idx at st0.
// ---------------------------------------------------------------
// verify1(leaf: Digest, root: Digest, leaf_idx: U32)
// 20 instructions
//
// Swap root/leaf blocks, 1 merkle_step, pop parent_idx,
// assert_vector (computed vs expected root), clean up.
// ---------------------------------------------------------------
__verify1:
swap 1
swap 6
swap 1
swap 2
swap 7
swap 2
swap 3
swap 8
swap 3
swap 4
swap 9
swap 4
swap 5
swap 10
swap 5
merkle_step
pop 1
assert_vector
pop 5
return
// ---------------------------------------------------------------
// verify2(leaf: Digest, root: Digest, leaf_idx: U32)
// 21 instructions
// ---------------------------------------------------------------
__verify2:
swap 1
swap 6
swap 1
swap 2
swap 7
swap 2
swap 3
swap 8
swap 3
swap 4
swap 9
swap 4
swap 5
swap 10
swap 5
merkle_step
merkle_step
pop 1
assert_vector
pop 5
return
// ---------------------------------------------------------------
// verify3(leaf: Digest, root: Digest, leaf_idx: U32)
// 22 instructions
// ---------------------------------------------------------------
__verify3:
swap 1
swap 6
swap 1
swap 2
swap 7
swap 2
swap 3
swap 8
swap 3
swap 4
swap 9
swap 4
swap 5
swap 10
swap 5
merkle_step
merkle_step
merkle_step
pop 1
assert_vector
pop 5
return
// ---------------------------------------------------------------
// verify4(leaf: Digest, root: Digest, leaf_idx: U32)
// 23 instructions
// ---------------------------------------------------------------
__verify4:
swap 1
swap 6
swap 1
swap 2
swap 7
swap 2
swap 3
swap 8
swap 3
swap 4
swap 9
swap 4
swap 5
swap 10
swap 5
merkle_step
merkle_step
merkle_step
merkle_step
pop 1
assert_vector
pop 5
return
// ---------------------------------------------------------------
// verify(leaf: Digest, root: Digest, leaf_idx: U32, depth: Field)
// 34 instructions (20 in __verify + 11 in __verify_loop,
// plus call = 1, pop 2 + assert + pop 5 + return = 5)
//
// Stack entry: st0=depth st1=idx st2..6=root st7..11=leaf
//
// Strategy:
// 1. Swap root(st2..6) with leaf(st7..11) using offset block swap
// Pattern: for i in 2..6: swap i / swap (i+5) / swap i
// 2. Loop: call __verify_loop (depth times merkle_step)
// 3. After loop: pop counter+idx, assert, clean up
// ---------------------------------------------------------------
__verify:
// block swap root and leaf, offset by 2 for depth+idx prefix
swap 2
swap 7
swap 2
swap 3
swap 8
swap 3
swap 4
swap 9
swap 4
swap 5
swap 10
swap 5
swap 6
swap 11
swap 6
// Stack: depth idx leaf4..0 root4..0
call __verify_loop
// Stack: 0 final_idx comp4..0 root4..0
pop 2
assert_vector
pop 5
return
// Loop body: decrement depth, do merkle_step, repeat.
// Swaps depth and idx around each merkle_step since
// merkle_step needs idx at st0 but we keep depth at st0 for the loop.
// 11 instructions (dup, push, eq, skiz, return, push, add, swap, merkle_step, swap, recurse)
__verify_loop:
dup 0
push 0
eq
skiz
return
push -1
add
swap 1
merkle_step
swap 1
recurse
// ---------------------------------------------------------------
// authenticate_leaf3(root: Digest, leaf_idx: U32) -> Digest
// 30 instructions
//
// Stack entry: st0=idx st1..5=root
// Algorithm:
// 1. divine 5 (leaf appears on top of stack)
// 2. Save leaf to RAM[0..4] for return value
// 3. Read leaf back + reverse for correct order
// 4. Rotate idx to top (5 swaps)
// 5. 3x merkle_step + assert against root
// 6. Restore leaf from RAM, reverse, return
// ---------------------------------------------------------------
__authenticate_leaf3:
divine 5
// Stack: l4 l3 l2 l1 l0 idx r4 r3 r2 r1 r0
// Save leaf to RAM[0..4] (l4->0, l3->1, l2->2, l1->3, l0->4)
push 0
write_mem 5
pop 1
// Stack: idx r4 r3 r2 r1 r0
// Read leaf back onto stack above idx+root
push 4
read_mem 5
pop 1
// Stack: l0 l1 l2 l3 l4 idx r4 r3 r2 r1 r0 (reversed)
// Reverse top 5 to restore correct order
swap 4
swap 1
swap 3
swap 1
// Stack: l4 l3 l2 l1 l0 idx r4 r3 r2 r1 r0
// Rotate idx from st5 to st0 (left-rotate positions 0..5)
swap 1
swap 2
swap 3
swap 4
swap 5
// Stack: idx l4 l3 l2 l1 l0 r4 r3 r2 r1 r0
merkle_step
merkle_step
merkle_step
// Stack: pidx c4 c3 c2 c1 c0 r4 r3 r2 r1 r0
pop 1
assert_vector
pop 5
// Stack: (clean)
// Restore leaf from RAM for return
push 4
read_mem 5
pop 1
// Stack: l0 l1 l2 l3 l4 (reversed)
swap 4
swap 1
swap 3
swap 1
// Stack: l4 l3 l2 l1 l0 (correct order)
return
// ---------------------------------------------------------------
// authenticate_leaf(root: Digest, leaf_idx: U32, depth: Field) -> Digest
// 35 instructions (excluding __verify_loop which is shared)
//
// Stack entry: st0=depth st1=idx st2..6=root
// Algorithm:
// 1. divine 5 (leaf on top)
// 2. Save leaf to RAM[0..4]
// 3. Read leaf back + reverse
// 4. Rotate depth+idx from below leaf to above leaf (7 swaps)
// 5. Block swap root and leaf (15 swaps via __verify inline)
// -- actually, restructure to minimize total swaps:
// After save+read+reverse: l4..l0 depth idx r4..r0
// We need: depth idx l4..l0 r4..r0 for the loop
// Rotate top 7 to bring depth(st5) and idx(st6) to positions 0,1.
// 6. call __verify_loop, assert, clean up
// 7. Restore leaf from RAM
// ---------------------------------------------------------------
__authenticate_leaf:
divine 5
// Stack: l4 l3 l2 l1 l0 depth idx r4 r3 r2 r1 r0
// Save leaf to RAM[0..4]
push 0
write_mem 5
pop 1
// Stack: depth idx r4 r3 r2 r1 r0
// Read leaf back
push 4
read_mem 5
pop 1
// Stack: l0 l1 l2 l3 l4 depth idx r4 r3 r2 r1 r0 (reversed)
// Reverse top 5
swap 4
swap 1
swap 3
swap 1
// Stack: l4 l3 l2 l1 l0 depth idx r4 r3 r2 r1 r0
// Need: depth idx l4 l3 l2 l1 l0 r4 r3 r2 r1 r0
// Rotate st5(depth) to st0 -- shifts leaf right by 1:
swap 1
swap 2
swap 3
swap 4
swap 5
// Stack: depth l4 l3 l2 l1 l0 idx r4 r3 r2 r1 r0
// Rotate st6(idx) to st1:
swap 1
swap 6
swap 1
// Stack: depth idx l3 l2 l1 l0 l4 r4 r3 r2 r1 r0
// Fix: l4 is at st6, needs to be at st2. Rotate it:
swap 2
swap 6
swap 2
// Stack: depth idx l4 l2 l1 l0 l3 r4 r3 r2 r1 r0
// Fix: l3 is at st6, needs to be at st3.
swap 3
swap 6
swap 3
// Stack: depth idx l4 l3 l1 l0 l2 r4 r3 r2 r1 r0
// Fix: l2 is at st6, needs to be at st4.
swap 4
swap 6
swap 4
// Stack: depth idx l4 l3 l2 l0 l1 r4 r3 r2 r1 r0
// Fix: l1 is at st6, needs to be at st5.
swap 5
swap 6
swap 5
// Stack: depth idx l4 l3 l2 l1 l0 r4 r3 r2 r1 r0
call __verify_loop
// Stack: 0 final_idx comp4..0 r4..r0
pop 2
assert_vector
pop 5
// Restore leaf from RAM
push 4
read_mem 5
pop 1
swap 4
swap 1
swap 3
swap 1
return