module std.compiler.lexer
// Self-hosted Trident lexer.
//
// Tokenizes source bytes stored in RAM into a token array in RAM.
// When compiled and proven on Triton VM, produces a STARK proof
// that the tokenization was performed correctly.
//
// Memory layout (all addresses passed as parameters):
// src_base .. src_base + src_len Source bytes (one Field per byte, [0-255])
// tok_base .. tok_base + n*4 Token output (kind, start, end, int_val)
// err_base .. err_base + n*3 Error output (code, start, end)
// state_base .. state_base + 8 Lexer state
//
// State layout:
// +0 = pos Current byte offset
// +1 = tok_count Tokens emitted
// +2 = err_count Errors recorded
// +3 = src_len Source length (immutable)
// +4 = src_base Source base address (immutable)
// +5 = tok_base Token base address (immutable)
// +6 = err_base Error base address (immutable)
// +7 = scratch Done flag / mode / accumulator
use vm.core.field
use vm.core.convert
use vm.io.mem
// =========================================================================
// Token kind constants (1-56)
// =========================================================================
// Keywords (1-22)
pub fn TK_PROGRAM() -> Field { 1 }
pub fn TK_MODULE() -> Field { 2 }
pub fn TK_USE() -> Field { 3 }
pub fn TK_FN() -> Field { 4 }
pub fn TK_PUB() -> Field { 5 }
pub fn TK_SEC() -> Field { 6 }
pub fn TK_LET() -> Field { 7 }
pub fn TK_MUT() -> Field { 8 }
pub fn TK_CONST() -> Field { 9 }
pub fn TK_STRUCT() -> Field { 10 }
pub fn TK_IF() -> Field { 11 }
pub fn TK_ELSE() -> Field { 12 }
pub fn TK_FOR() -> Field { 13 }
pub fn TK_IN() -> Field { 14 }
pub fn TK_BOUNDED() -> Field { 15 }
pub fn TK_RETURN() -> Field { 16 }
pub fn TK_TRUE() -> Field { 17 }
pub fn TK_FALSE() -> Field { 18 }
pub fn TK_EVENT() -> Field { 19 }
pub fn TK_REVEAL() -> Field { 20 }
pub fn TK_SEAL() -> Field { 21 }
pub fn TK_MATCH() -> Field { 22 }
// Type keywords (23-27)
pub fn TK_FIELD_TY() -> Field { 23 }
pub fn TK_XFIELD_TY() -> Field { 24 }
pub fn TK_BOOL_TY() -> Field { 25 }
pub fn TK_U32_TY() -> Field { 26 }
pub fn TK_DIGEST_TY() -> Field { 27 }
// Symbols (28-52)
pub fn TK_LPAREN() -> Field { 28 }
pub fn TK_RPAREN() -> Field { 29 }
pub fn TK_LBRACE() -> Field { 30 }
pub fn TK_RBRACE() -> Field { 31 }
pub fn TK_LBRACKET() -> Field { 32 }
pub fn TK_RBRACKET() -> Field { 33 }
pub fn TK_COMMA() -> Field { 34 }
pub fn TK_COLON() -> Field { 35 }
pub fn TK_SEMICOLON() -> Field { 36 }
pub fn TK_DOT() -> Field { 37 }
pub fn TK_DOTDOT() -> Field { 38 }
pub fn TK_ARROW() -> Field { 39 }
pub fn TK_EQ() -> Field { 40 }
pub fn TK_FAT_ARROW() -> Field { 41 }
pub fn TK_EQEQ() -> Field { 42 }
pub fn TK_PLUS() -> Field { 43 }
pub fn TK_STAR() -> Field { 44 }
pub fn TK_STARDOT() -> Field { 45 }
pub fn TK_LT() -> Field { 46 }
pub fn TK_GT() -> Field { 47 }
pub fn TK_AMP() -> Field { 48 }
pub fn TK_CARET() -> Field { 49 }
pub fn TK_SLASH_PERCENT() -> Field { 50 }
pub fn TK_HASH() -> Field { 51 }
pub fn TK_UNDERSCORE() -> Field { 52 }
// Literals and special (53-56)
pub fn TK_INTEGER() -> Field { 53 }
pub fn TK_IDENT() -> Field { 54 }
pub fn TK_ASM_BLOCK() -> Field { 55 }
pub fn TK_EOF() -> Field { 56 }
// Error codes (100+)
fn ERR_UNEXPECTED_CHAR() -> Field { 100 }
fn ERR_NO_SUBTRACTION() -> Field { 101 }
fn ERR_NO_DIVISION() -> Field { 102 }
fn ERR_INTEGER_TOO_LARGE() -> Field { 103 }
fn ERR_UNTERMINATED_ASM() -> Field { 104 }
fn ERR_ASM_MISSING_PAREN() -> Field { 105 }
fn ERR_ASM_MISSING_BRACE() -> Field { 106 }
// =========================================================================
// Byte classification helpers
// =========================================================================
fn is_whitespace(ch: Field) -> Bool {
if ch == 32 { true }
else if ch == 10 { true }
else if ch == 13 { true }
else if ch == 9 { true }
else { false }
}
fn is_alpha(ch: Field) -> Bool {
let ch_u: U32 = convert.as_u32(ch)
let a: U32 = convert.as_u32(65)
let z: U32 = convert.as_u32(91)
let la: U32 = convert.as_u32(97)
let lz: U32 = convert.as_u32(123)
// A-Z: 65..90, a-z: 97..122
// ch >= 65 && ch < 91 means uppercase
// ch >= 97 && ch < 123 means lowercase
// a <= ch iff !(ch < a)
if ch_u < a {
false
} else if ch_u < z {
true
} else if ch_u < la {
false
} else if ch_u < lz {
true
} else {
false
}
}
fn is_digit(ch: Field) -> Bool {
let ch_u: U32 = convert.as_u32(ch)
let lo: U32 = convert.as_u32(48)
let hi: U32 = convert.as_u32(58)
if ch_u < lo { false }
else if ch_u < hi { true }
else { false }
}
fn is_ident_start(ch: Field) -> Bool {
if is_alpha(ch) { true }
else if ch == 95 { true }
else { false }
}
fn is_ident_continue(ch: Field) -> Bool {
if is_alpha(ch) { true }
else if is_digit(ch) { true }
else if ch == 95 { true }
else { false }
}
// =========================================================================
// State accessors
// =========================================================================
fn get_pos(sb: Field) -> Field {
mem.read(sb)
}
fn set_pos(sb: Field, p: Field) {
mem.write(sb, p)
}
fn src_byte(sb: Field, offset: Field) -> Field {
let base: Field = mem.read(sb + 4)
mem.read(base + offset)
}
fn src_len(sb: Field) -> Field {
mem.read(sb + 3)
}
fn pos_lt_len(sb: Field) -> Bool {
let p: U32 = convert.as_u32(get_pos(sb))
let l: U32 = convert.as_u32(src_len(sb))
p < l
}
fn get_scratch(sb: Field) -> Field {
mem.read(sb + 7)
}
fn set_scratch(sb: Field, v: Field) {
mem.write(sb + 7, v)
}
// =========================================================================
// Token and error output
// =========================================================================
fn emit_token(sb: Field, kind: Field, start: Field, end: Field, int_val: Field) {
let count: Field = mem.read(sb + 1)
let base: Field = mem.read(sb + 5)
let off: Field = count * 4
mem.write(base + off, kind)
mem.write(base + off + 1, start)
mem.write(base + off + 2, end)
mem.write(base + off + 3, int_val)
mem.write(sb + 1, count + 1)
}
fn emit_error(sb: Field, code: Field, start: Field, end: Field) {
let count: Field = mem.read(sb + 2)
let base: Field = mem.read(sb + 6)
let off: Field = count * 3
mem.write(base + off, code)
mem.write(base + off + 1, start)
mem.write(base + off + 2, end)
mem.write(sb + 2, count + 1)
}
// =========================================================================
// Integer parsing: accumulate val = val * 10 + (byte - '0')
// =========================================================================
fn parse_integer(sb: Field, start: Field, end: Field) -> Field {
let len: Field = end + field.neg(start)
set_scratch(sb, 0)
for i in 0..len bounded 20 {
let idx: Field = convert.as_field(i)
let byte: Field = src_byte(sb, start + idx)
let digit: Field = byte + field.neg(48)
let acc: Field = get_scratch(sb)
set_scratch(sb, acc * 10 + digit)
}
let result: Field = get_scratch(sb)
set_scratch(sb, 0)
result
}
// =========================================================================
// Keyword recognition: length-first trie
// =========================================================================
fn classify_keyword(sb: Field, start: Field, len: Field) -> Field {
// Returns TK_* constant if keyword, 0 if identifier.
// Branch on length, then first byte, then remaining bytes.
let b0: Field = src_byte(sb, start)
if len == 1 {
if b0 == 95 { TK_UNDERSCORE() } else { 0 }
} else if len == 2 {
let b1: Field = src_byte(sb, start + 1)
if b0 == 102 {
// f -> fn
if b1 == 110 { TK_FN() } else { 0 }
} else if b0 == 105 {
// i -> if / in
if b1 == 102 { TK_IF() }
else if b1 == 110 { TK_IN() }
else { 0 }
} else { 0 }
} else if len == 3 {
let b1: Field = src_byte(sb, start + 1)
let b2: Field = src_byte(sb, start + 2)
if b0 == 102 {
// f -> for
if b1 == 111 { if b2 == 114 { TK_FOR() } else { 0 } } else { 0 }
} else if b0 == 108 {
// l -> let
if b1 == 101 { if b2 == 116 { TK_LET() } else { 0 } } else { 0 }
} else if b0 == 109 {
// m -> mut
if b1 == 117 { if b2 == 116 { TK_MUT() } else { 0 } } else { 0 }
} else if b0 == 112 {
// p -> pub
if b1 == 117 { if b2 == 98 { TK_PUB() } else { 0 } } else { 0 }
} else if b0 == 115 {
// s -> sec
if b1 == 101 { if b2 == 99 { TK_SEC() } else { 0 } } else { 0 }
} else if b0 == 117 {
// u -> use
if b1 == 115 { if b2 == 101 { TK_USE() } else { 0 } } else { 0 }
} else if b0 == 85 {
// U -> U32
if b1 == 51 { if b2 == 50 { TK_U32_TY() } else { 0 } } else { 0 }
} else { 0 }
} else if len == 4 {
let b1: Field = src_byte(sb, start + 1)
let b2: Field = src_byte(sb, start + 2)
let b3: Field = src_byte(sb, start + 3)
if b0 == 101 {
// e -> else: 101,108,115,101
if b1 == 108 { if b2 == 115 { if b3 == 101 { TK_ELSE() } else { 0 } } else { 0 } } else { 0 }
} else if b0 == 116 {
// t -> true: 116,114,117,101
if b1 == 114 { if b2 == 117 { if b3 == 101 { TK_TRUE() } else { 0 } } else { 0 } } else { 0 }
} else if b0 == 115 {
// s -> seal: 115,101,97,108
if b1 == 101 { if b2 == 97 { if b3 == 108 { TK_SEAL() } else { 0 } } else { 0 } } else { 0 }
} else if b0 == 66 {
// B -> Bool: 66,111,111,108
if b1 == 111 { if b2 == 111 { if b3 == 108 { TK_BOOL_TY() } else { 0 } } else { 0 } } else { 0 }
} else { 0 }
} else if len == 5 {
let b1: Field = src_byte(sb, start + 1)
let b2: Field = src_byte(sb, start + 2)
let b3: Field = src_byte(sb, start + 3)
let b4: Field = src_byte(sb, start + 4)
if b0 == 99 {
// c -> const: 99,111,110,115,116
if b1 == 111 { if b2 == 110 { if b3 == 115 { if b4 == 116 { TK_CONST() } else { 0 } } else { 0 } } else { 0 } } else { 0 }
} else if b0 == 101 {
// e -> event: 101,118,101,110,116
if b1 == 118 { if b2 == 101 { if b3 == 110 { if b4 == 116 { TK_EVENT() } else { 0 } } else { 0 } } else { 0 } } else { 0 }
} else if b0 == 102 {
// f -> false: 102,97,108,115,101
if b1 == 97 { if b2 == 108 { if b3 == 115 { if b4 == 101 { TK_FALSE() } else { 0 } } else { 0 } } else { 0 } } else { 0 }
} else if b0 == 109 {
// m -> match: 109,97,116,99,104
if b1 == 97 { if b2 == 116 { if b3 == 99 { if b4 == 104 { TK_MATCH() } else { 0 } } else { 0 } } else { 0 } } else { 0 }
} else if b0 == 70 {
// F -> Field: 70,105,101,108,100
if b1 == 105 { if b2 == 101 { if b3 == 108 { if b4 == 100 { TK_FIELD_TY() } else { 0 } } else { 0 } } else { 0 } } else { 0 }
} else { 0 }
} else if len == 6 {
let b1: Field = src_byte(sb, start + 1)
let b2: Field = src_byte(sb, start + 2)
let b3: Field = src_byte(sb, start + 3)
let b4: Field = src_byte(sb, start + 4)
let b5: Field = src_byte(sb, start + 5)
if b0 == 109 {
// m -> module: 109,111,100,117,108,101
if b1 == 111 { if b2 == 100 { if b3 == 117 { if b4 == 108 { if b5 == 101 { TK_MODULE() } else { 0 } } else { 0 } } else { 0 } } else { 0 } } else { 0 }
} else if b0 == 114 {
// r -> return or reveal: both start r,e
if b1 == 101 {
if b2 == 116 {
// ret -> return: 114,101,116,117,114,110
if b3 == 117 { if b4 == 114 { if b5 == 110 { TK_RETURN() } else { 0 } } else { 0 } } else { 0 }
} else if b2 == 118 {
// rev -> reveal: 114,101,118,101,97,108
if b3 == 101 { if b4 == 97 { if b5 == 108 { TK_REVEAL() } else { 0 } } else { 0 } } else { 0 }
} else { 0 }
} else { 0 }
} else if b0 == 115 {
// s -> struct: 115,116,114,117,99,116
if b1 == 116 { if b2 == 114 { if b3 == 117 { if b4 == 99 { if b5 == 116 { TK_STRUCT() } else { 0 } } else { 0 } } else { 0 } } else { 0 } } else { 0 }
} else if b0 == 88 {
// X -> XField: 88,70,105,101,108,100
if b1 == 70 { if b2 == 105 { if b3 == 101 { if b4 == 108 { if b5 == 100 { TK_XFIELD_TY() } else { 0 } } else { 0 } } else { 0 } } else { 0 } } else { 0 }
} else if b0 == 68 {
// D -> Digest: 68,105,103,101,115,116
if b1 == 105 { if b2 == 103 { if b3 == 101 { if b4 == 115 { if b5 == 116 { TK_DIGEST_TY() } else { 0 } } else { 0 } } else { 0 } } else { 0 } } else { 0 }
} else { 0 }
} else if len == 7 {
let b1: Field = src_byte(sb, start + 1)
let b2: Field = src_byte(sb, start + 2)
let b3: Field = src_byte(sb, start + 3)
let b4: Field = src_byte(sb, start + 4)
let b5: Field = src_byte(sb, start + 5)
let b6: Field = src_byte(sb, start + 6)
if b0 == 112 {
// p -> program: 112,114,111,103,114,97,109
if b1 == 114 { if b2 == 111 { if b3 == 103 { if b4 == 114 { if b5 == 97 { if b6 == 109 { TK_PROGRAM() } else { 0 } } else { 0 } } else { 0 } } else { 0 } } else { 0 } } else { 0 }
} else if b0 == 98 {
// b -> bounded: 98,111,117,110,100,101,100
if b1 == 111 { if b2 == 117 { if b3 == 110 { if b4 == 100 { if b5 == 101 { if b6 == 100 { TK_BOUNDED() } else { 0 } } else { 0 } } else { 0 } } else { 0 } } else { 0 } } else { 0 }
} else { 0 }
} else { 0 }
}
// =========================================================================
// Whitespace and comment skipper (single-loop 3-state machine)
// Mode: 0 = scanning, 1 = inside comment, 2 = done
// =========================================================================
fn skip_ws_and_comments(sb: Field) {
set_scratch(sb, 0)
for _i in 0..8192 bounded 8192 {
let mode: Field = get_scratch(sb)
if mode == 0 {
if pos_lt_len(sb) {
let p: Field = get_pos(sb)
let ch: Field = src_byte(sb, p)
if is_whitespace(ch) {
set_pos(sb, p + 1)
} else if ch == 47 {
// '/' โ check for "//" comment
let p1: Field = p + 1
let p1u: U32 = convert.as_u32(p1)
let lu: U32 = convert.as_u32(src_len(sb))
if p1u < lu {
let ch2: Field = src_byte(sb, p1)
if ch2 == 47 {
set_pos(sb, p + 2)
set_scratch(sb, 1)
} else {
set_scratch(sb, 2)
}
} else {
set_scratch(sb, 2)
}
} else {
set_scratch(sb, 2)
}
} else {
set_scratch(sb, 2)
}
} else if mode == 1 {
// Inside comment: skip to newline
if pos_lt_len(sb) {
let p: Field = get_pos(sb)
let ch: Field = src_byte(sb, p)
set_pos(sb, p + 1)
if ch == 10 {
set_scratch(sb, 0)
}
} else {
set_scratch(sb, 2)
}
}
// mode == 2: done, no-op
}
set_scratch(sb, 0)
}
// =========================================================================
// Identifier / keyword scanner
// =========================================================================
fn scan_ident_or_keyword(sb: Field) {
let start: Field = get_pos(sb)
// Advance while is_ident_continue
set_scratch(sb, 0)
for _i in 0..8192 bounded 8192 {
let done: Field = get_scratch(sb)
if done == 0 {
if pos_lt_len(sb) {
let p: Field = get_pos(sb)
let ch: Field = src_byte(sb, p)
if is_ident_continue(ch) {
set_pos(sb, p + 1)
} else {
set_scratch(sb, 1)
}
} else {
set_scratch(sb, 1)
}
}
}
set_scratch(sb, 0)
let end: Field = get_pos(sb)
let len: Field = end + field.neg(start)
// Check for "asm" (len==3, bytes 97,115,109)
if len == 3 {
let a0: Field = src_byte(sb, start)
let a1: Field = src_byte(sb, start + 1)
let a2: Field = src_byte(sb, start + 2)
if a0 == 97 {
if a1 == 115 {
if a2 == 109 {
scan_asm_block(sb, start)
return
}
}
}
}
let kw: Field = classify_keyword(sb, start, len)
if kw == 0 {
emit_token(sb, TK_IDENT(), start, end, 0)
} else {
emit_token(sb, kw, start, end, 0)
}
}
// =========================================================================
// Number scanner
// =========================================================================
fn scan_number(sb: Field) {
let start: Field = get_pos(sb)
// Advance while is_digit
set_scratch(sb, 0)
for _i in 0..20 bounded 20 {
let done: Field = get_scratch(sb)
if done == 0 {
if pos_lt_len(sb) {
let p: Field = get_pos(sb)
let ch: Field = src_byte(sb, p)
if is_digit(ch) {
set_pos(sb, p + 1)
} else {
set_scratch(sb, 1)
}
} else {
set_scratch(sb, 1)
}
}
}
set_scratch(sb, 0)
let end: Field = get_pos(sb)
let val: Field = parse_integer(sb, start, end)
emit_token(sb, TK_INTEGER(), start, end, val)
}
// =========================================================================
// Symbol scanner โ single and two-char operators
// =========================================================================
fn scan_symbol(sb: Field, ch: Field, start: Field) {
if ch == 40 { emit_token(sb, TK_LPAREN(), start, start + 1, 0) }
else if ch == 41 { emit_token(sb, TK_RPAREN(), start, start + 1, 0) }
else if ch == 123 { emit_token(sb, TK_LBRACE(), start, start + 1, 0) }
else if ch == 125 { emit_token(sb, TK_RBRACE(), start, start + 1, 0) }
else if ch == 91 { emit_token(sb, TK_LBRACKET(), start, start + 1, 0) }
else if ch == 93 { emit_token(sb, TK_RBRACKET(), start, start + 1, 0) }
else if ch == 44 { emit_token(sb, TK_COMMA(), start, start + 1, 0) }
else if ch == 58 { emit_token(sb, TK_COLON(), start, start + 1, 0) }
else if ch == 59 { emit_token(sb, TK_SEMICOLON(), start, start + 1, 0) }
else if ch == 43 { emit_token(sb, TK_PLUS(), start, start + 1, 0) }
else if ch == 60 { emit_token(sb, TK_LT(), start, start + 1, 0) }
else if ch == 62 { emit_token(sb, TK_GT(), start, start + 1, 0) }
else if ch == 38 { emit_token(sb, TK_AMP(), start, start + 1, 0) }
else if ch == 94 { emit_token(sb, TK_CARET(), start, start + 1, 0) }
else if ch == 35 { emit_token(sb, TK_HASH(), start, start + 1, 0) }
// Two-char symbols with lookahead
else if ch == 46 {
// . or ..
let next: Field = peek_byte(sb)
if next == 46 {
set_pos(sb, start + 2)
emit_token(sb, TK_DOTDOT(), start, start + 2, 0)
} else {
emit_token(sb, TK_DOT(), start, start + 1, 0)
}
} else if ch == 45 {
// - -> must be ->
let next: Field = peek_byte(sb)
if next == 62 {
set_pos(sb, start + 2)
emit_token(sb, TK_ARROW(), start, start + 2, 0)
} else {
emit_error(sb, ERR_NO_SUBTRACTION(), start, start + 1)
}
} else if ch == 61 {
// = or == or =>
let next: Field = peek_byte(sb)
if next == 61 {
set_pos(sb, start + 2)
emit_token(sb, TK_EQEQ(), start, start + 2, 0)
} else if next == 62 {
set_pos(sb, start + 2)
emit_token(sb, TK_FAT_ARROW(), start, start + 2, 0)
} else {
emit_token(sb, TK_EQ(), start, start + 1, 0)
}
} else if ch == 42 {
// * or *.
let next: Field = peek_byte(sb)
if next == 46 {
set_pos(sb, start + 2)
emit_token(sb, TK_STARDOT(), start, start + 2, 0)
} else {
emit_token(sb, TK_STAR(), start, start + 1, 0)
}
} else if ch == 47 {
// / -> must be /%
let next: Field = peek_byte(sb)
if next == 37 {
set_pos(sb, start + 2)
emit_token(sb, TK_SLASH_PERCENT(), start, start + 2, 0)
} else {
emit_error(sb, ERR_NO_DIVISION(), start, start + 1)
}
} else {
// Unexpected character
emit_error(sb, ERR_UNEXPECTED_CHAR(), start, start + 1)
}
}
// Peek at current byte without consuming. Returns 256 if at EOF.
fn peek_byte(sb: Field) -> Field {
let p: Field = get_pos(sb)
let pu: U32 = convert.as_u32(p)
let lu: U32 = convert.as_u32(src_len(sb))
if pu < lu {
src_byte(sb, p)
} else {
256
}
}
// =========================================================================
// ASM block scanner
// =========================================================================
// Skip whitespace inline (reuses scratch, expects scratch == 0 on entry)
fn skip_ws_inline(sb: Field) {
set_scratch(sb, 0)
for _i in 0..256 bounded 256 {
let done: Field = get_scratch(sb)
if done == 0 {
if pos_lt_len(sb) {
let p: Field = get_pos(sb)
let ch: Field = src_byte(sb, p)
if is_whitespace(ch) {
set_pos(sb, p + 1)
} else {
set_scratch(sb, 1)
}
} else {
set_scratch(sb, 1)
}
}
}
set_scratch(sb, 0)
}
// Parse effect number: +N, -N, or N. Returns value as Field.
// Negative effects stored as field.neg(n).
fn scan_effect_number(sb: Field) -> Field {
let mut neg: Bool = false
if pos_lt_len(sb) {
let p: Field = get_pos(sb)
let ch: Field = src_byte(sb, p)
if ch == 45 {
neg = true
set_pos(sb, p + 1)
} else if ch == 43 {
set_pos(sb, p + 1)
}
}
// Parse digits
let num_start: Field = get_pos(sb)
set_scratch(sb, 0)
for _i in 0..10 bounded 10 {
let done: Field = get_scratch(sb)
if done == 0 {
if pos_lt_len(sb) {
let p: Field = get_pos(sb)
let ch: Field = src_byte(sb, p)
if is_digit(ch) {
set_pos(sb, p + 1)
} else {
set_scratch(sb, 1)
}
} else {
set_scratch(sb, 1)
}
}
}
set_scratch(sb, 0)
let num_end: Field = get_pos(sb)
let n: Field = parse_integer(sb, num_start, num_end)
if neg { field.neg(n) } else { n }
}
fn scan_asm_block(sb: Field, kw_start: Field) {
// pos is right after "asm" identifier
skip_ws_inline(sb)
let mut effect: Field = 0
// Check for '('
if pos_lt_len(sb) {
let p: Field = get_pos(sb)
let ch: Field = src_byte(sb, p)
if ch == 40 {
// '(' found โ parse annotation
set_pos(sb, p + 1)
skip_ws_inline(sb)
if pos_lt_len(sb) {
let ap: Field = get_pos(sb)
let ach: Field = src_byte(sb, ap)
if is_alpha(ach) {
// Target tag: scan identifier
set_scratch(sb, 0)
for _i in 0..256 bounded 256 {
let done: Field = get_scratch(sb)
if done == 0 {
if pos_lt_len(sb) {
let tp: Field = get_pos(sb)
let tc: Field = src_byte(sb, tp)
if is_ident_continue(tc) {
set_pos(sb, tp + 1)
} else {
set_scratch(sb, 1)
}
} else {
set_scratch(sb, 1)
}
}
}
set_scratch(sb, 0)
// Tag is at (ap .. get_pos(sb)) โ stored implicitly
skip_ws_inline(sb)
// Optional comma + effect
if pos_lt_len(sb) {
let cp: Field = get_pos(sb)
let cc: Field = src_byte(sb, cp)
if cc == 44 {
set_pos(sb, cp + 1)
skip_ws_inline(sb)
effect = scan_effect_number(sb)
}
}
} else {
// Stack effect directly: +N or -N
effect = scan_effect_number(sb)
}
}
// Expect ')'
skip_ws_inline(sb)
if pos_lt_len(sb) {
let rp: Field = get_pos(sb)
let rc: Field = src_byte(sb, rp)
if rc == 41 {
set_pos(sb, rp + 1)
} else {
emit_error(sb, ERR_ASM_MISSING_PAREN(), rp, rp)
}
}
skip_ws_inline(sb)
}
}
// Expect '{'
if pos_lt_len(sb) {
let bp: Field = get_pos(sb)
let bc: Field = src_byte(sb, bp)
if bc == 123 {
set_pos(sb, bp + 1)
let body_start: Field = bp + 1
// Track brace depth, find matching '}'
set_scratch(sb, 1)
for _i in 0..8192 bounded 8192 {
let depth: Field = get_scratch(sb)
if depth == 0 {
// done
} else {
if pos_lt_len(sb) {
let dp: Field = get_pos(sb)
let dc: Field = src_byte(sb, dp)
if dc == 123 {
set_scratch(sb, depth + 1)
set_pos(sb, dp + 1)
} else if dc == 125 {
let new_depth: Field = depth + field.neg(1)
set_scratch(sb, new_depth)
if new_depth == 0 {
// Found matching '}', body_end = dp
// Don't advance past '}'
} else {
set_pos(sb, dp + 1)
}
} else {
set_pos(sb, dp + 1)
}
} else {
// EOF without closing brace
emit_error(sb, ERR_UNTERMINATED_ASM(), kw_start, get_pos(sb))
set_scratch(sb, 0)
}
}
}
let body_end: Field = get_pos(sb)
set_pos(sb, body_end + 1)
set_scratch(sb, 0)
emit_token(sb, TK_ASM_BLOCK(), body_start, body_end, effect)
} else {
emit_error(sb, ERR_ASM_MISSING_BRACE(), kw_start, bp)
emit_token(sb, TK_ASM_BLOCK(), bp, bp, effect)
}
} else {
emit_error(sb, ERR_ASM_MISSING_BRACE(), kw_start, get_pos(sb))
emit_token(sb, TK_ASM_BLOCK(), get_pos(sb), get_pos(sb), effect)
}
}
// =========================================================================
// Main entry point
// =========================================================================
pub fn lex(
src_base: Field,
src_len_val: Field,
tok_base: Field,
err_base: Field,
state_base: Field
) {
// Initialize state
mem.write(state_base, 0)
mem.write(state_base + 1, 0)
mem.write(state_base + 2, 0)
mem.write(state_base + 3, src_len_val)
mem.write(state_base + 4, src_base)
mem.write(state_base + 5, tok_base)
mem.write(state_base + 6, err_base)
mem.write(state_base + 7, 0)
// Main loop: at most 8192 tokens (bounded by max source length)
// Uses state_base + 7 as eof flag after skip_ws_and_comments resets it
for _i in 0..8192 bounded 8192 {
let eof_done: Field = mem.read(state_base + 7)
if eof_done == 0 {
skip_ws_and_comments(state_base)
if pos_lt_len(state_base) {
let p: Field = get_pos(state_base)
let ch: Field = src_byte(state_base, p)
if is_ident_start(ch) {
scan_ident_or_keyword(state_base)
} else if is_digit(ch) {
scan_number(state_base)
} else {
// Consume the byte and try as symbol
set_pos(state_base, p + 1)
scan_symbol(state_base, ch, p)
}
} else {
// EOF
emit_token(state_base, TK_EOF(), get_pos(state_base), get_pos(state_base), 0)
mem.write(state_base + 7, 1)
}
}
}
}
trident/std/compiler/lexer.tri
ฯ 0.0%