//! Incremental lexing: re-lex only the dirty byte region after an edit,
//! then splice the new tokens into the cached token list.
use crate::syntax::lexeme::Lexeme;
use crate::syntax::lexer::{Comment, Lexer};
use crate::syntax::span::{Span, Spanned};
/// Result of an incremental lex operation.
pub(super) struct IncrementalLexResult {
pub tokens: Vec<Spanned<Lexeme>>,
pub comments: Vec<Comment>,
}
/// Re-lex only the changed region and splice into the old token list.
///
/// - `source`: the NEW source text (edit already applied)
/// - `old_tokens`: previous token list (sorted by span.start)
/// - `old_comments`: previous comment list
/// - `edit_start`: byte offset where the edit begins (in the OLD source)
/// - `old_end`: byte offset where the edit ends (in the OLD source)
/// - `new_end`: byte offset where the edit ends (in the NEW source)
pub(super) fn incremental_lex(
source: &str,
old_tokens: &[Spanned<Lexeme>],
old_comments: &[Comment],
edit_start: usize,
old_end: usize,
new_end: usize,
) -> IncrementalLexResult {
let delta: i64 = new_end as i64 - old_end as i64;
// 1. Find first affected token: first whose span.end > edit_start
let first_dirty = old_tokens.partition_point(|t| (t.span.end as usize) <= edit_start);
// 2. Find resync search start: first token whose span.start >= old_end
let resync_search_start = old_tokens.partition_point(|t| (t.span.start as usize) < old_end);
// 3. Start re-lexing from the earlier of first_dirty's start or edit_start
let relex_start = if first_dirty < old_tokens.len() {
(old_tokens[first_dirty].span.start as usize).min(edit_start)
} else {
edit_start
};
// 4. Lex from relex_start in the new source
let mut lexer = Lexer::new_from_offset(source, 0, relex_start);
let mut new_tokens_mid: Vec<Spanned<Lexeme>> = Vec::new();
let mut resync_idx: Option<usize> = None;
loop {
let tok = lexer.next_spanned();
let is_eof = tok.node == Lexeme::Eof;
if !is_eof {
// Try to resynchronize against old tokens past the edit region
if let Some(idx) = find_resync(&tok, old_tokens, resync_search_start, delta) {
resync_idx = Some(idx);
// Don't push the resync token โ the suffix starts at idx
// (the shifted old token is identical to this new one)
break;
}
}
new_tokens_mid.push(tok);
if is_eof {
break;
}
}
let mid_comments = lexer.take_comments();
// 5. Assemble: prefix + middle + suffix
let mut result_tokens = Vec::with_capacity(old_tokens.len());
let mut result_comments = Vec::new();
// Prefix: tokens entirely before the edit (exclude Eof)
for t in &old_tokens[..first_dirty] {
if t.node != Lexeme::Eof {
result_tokens.push(t.clone());
}
}
// Prefix comments: before edit_start
for c in old_comments {
if (c.span.end as usize) <= edit_start {
result_comments.push(c.clone());
}
}
// Middle: newly lexed tokens + comments
result_tokens.extend(new_tokens_mid);
result_comments.extend(mid_comments);
if let Some(ri) = resync_idx {
// Suffix: shift remaining old tokens by delta
for t in &old_tokens[ri..] {
result_tokens.push(shift_token(t, delta));
}
// Suffix comments: after old_end, shifted
for c in old_comments {
if (c.span.start as usize) >= old_end {
result_comments.push(shift_comment(c, delta));
}
}
}
// If no resync, the middle already includes Eof โ no suffix needed.
// Sort comments by position (prefix + middle + suffix may interleave)
result_comments.sort_by_key(|c| c.span.start);
IncrementalLexResult {
tokens: result_tokens,
comments: result_comments,
}
}
/// Maximum tokens to scan past the expected resync point.
const RESYNC_WINDOW: usize = 8;
/// Check if `new_tok` matches an old token (after offset shift) past the edit.
fn find_resync(
new_tok: &Spanned<Lexeme>,
old_tokens: &[Spanned<Lexeme>],
search_start: usize,
delta: i64,
) -> Option<usize> {
let limit = (search_start + RESYNC_WINDOW).min(old_tokens.len());
for i in search_start..limit {
let old = &old_tokens[i];
let shifted_start = (old.span.start as i64 + delta) as u32;
let shifted_end = (old.span.end as i64 + delta) as u32;
if new_tok.span.start == shifted_start
&& new_tok.span.end == shifted_end
&& new_tok.node == old.node
{
return Some(i);
}
}
None
}
fn shift_token(tok: &Spanned<Lexeme>, delta: i64) -> Spanned<Lexeme> {
let new_start = tok.span.start as i64 + delta;
let new_end = tok.span.end as i64 + delta;
debug_assert!(new_start >= 0, "shift_token: start went negative");
debug_assert!(new_end >= 0, "shift_token: end went negative");
Spanned::new(
tok.node.clone(),
Span::new(tok.span.file_id, new_start as u32, new_end as u32),
)
}
fn shift_comment(comment: &Comment, delta: i64) -> Comment {
let new_start = comment.span.start as i64 + delta;
let new_end = comment.span.end as i64 + delta;
debug_assert!(new_start >= 0, "shift_comment: start went negative");
debug_assert!(new_end >= 0, "shift_comment: end went negative");
Comment {
text: comment.text.clone(),
span: Span::new(comment.span.file_id, new_start as u32, new_end as u32),
trailing: comment.trailing,
}
}
/// Result of classifying whether an edit is contained within a single
/// top-level item (function, struct, event, const) or crosses boundaries.
#[derive(Debug, PartialEq, Eq)]
#[allow(dead_code)] // infrastructure for Phase 2 incremental parsing
pub(super) enum EditScope {
/// Edit falls entirely within a single top-level item body.
/// The pair gives the token indices `[start, end)` of that item.
SingleItem(usize, usize),
/// Edit crosses item boundaries or is in the file header.
Full,
}
/// Classify an edit by scanning tokens for top-level brace pairs.
///
/// Walks the token list at brace depth 0 to find item boundaries. If the
/// edit region is entirely inside one item's brace pair, returns
/// `SingleItem`; otherwise returns `Full`.
#[allow(dead_code)] // infrastructure for Phase 2 incremental parsing
pub(super) fn classify_edit_scope(
tokens: &[Spanned<Lexeme>],
edit_start: usize,
edit_new_end: usize,
) -> EditScope {
// Find top-level items by tracking brace depth.
// An item starts at the token before a depth-0 LBrace and ends at
// the matching RBrace (when depth returns to 0).
let mut depth: u32 = 0;
let mut item_start: Option<usize> = None;
let mut brace_open_byte: usize = 0;
// Walk backwards from the first LBrace to find the item's leading keyword.
// For simplicity, track the token index where depth goes from 0โ1.
for (i, tok) in tokens.iter().enumerate() {
match &tok.node {
Lexeme::LBrace => {
if depth == 0 {
// Find the item start: scan backwards past identifiers,
// keywords, parens, types, attributes to find the leading
// keyword (fn, struct, event).
let mut start_idx = i;
while start_idx > 0 {
let prev = &tokens[start_idx - 1].node;
match prev {
Lexeme::Fn
| Lexeme::Struct
| Lexeme::Event
| Lexeme::Pub
| Lexeme::Sec
| Lexeme::Const
| Lexeme::Mut
| Lexeme::Hash
| Lexeme::Ident(_)
| Lexeme::LParen
| Lexeme::RParen
| Lexeme::Comma
| Lexeme::Colon
| Lexeme::Arrow
| Lexeme::FieldTy
| Lexeme::XFieldTy
| Lexeme::BoolTy
| Lexeme::U32Ty
| Lexeme::DigestTy
| Lexeme::LBracket
| Lexeme::RBracket
| Lexeme::Semicolon
| Lexeme::Integer(_) => {
start_idx -= 1;
}
_ => break,
}
}
item_start = Some(start_idx);
brace_open_byte = tok.span.start as usize;
}
depth += 1;
}
Lexeme::RBrace => {
if depth > 0 {
depth -= 1;
}
if depth == 0 {
if let Some(si) = item_start {
let brace_close_byte = tok.span.end as usize;
// Check if edit is entirely within this item's braces
if edit_start > brace_open_byte && edit_new_end < brace_close_byte {
return EditScope::SingleItem(si, i + 1);
}
}
item_start = None;
}
}
_ => {}
}
}
EditScope::Full
}
#[cfg(test)]
mod tests {
use super::*;
fn full_lex(source: &str) -> (Vec<Spanned<Lexeme>>, Vec<Comment>) {
let (tokens, comments, _) = Lexer::new(source, 0).tokenize();
(tokens, comments)
}
fn token_kinds(tokens: &[Spanned<Lexeme>]) -> Vec<&Lexeme> {
tokens.iter().map(|t| &t.node).collect()
}
#[test]
fn insert_single_char() {
let old_source = "fn main() {}";
let new_source = "fn main() {}"; // inserted a space
let (old_tokens, old_comments) = full_lex(old_source);
let (expected_tokens, _) = full_lex(new_source);
// Edit: insert ' ' at position 2 (after "fn")
let result = incremental_lex(new_source, &old_tokens, &old_comments, 2, 2, 3);
assert_eq!(token_kinds(&result.tokens), token_kinds(&expected_tokens));
// Verify spans match
for (got, want) in result.tokens.iter().zip(expected_tokens.iter()) {
assert_eq!(got.span.start, want.span.start);
assert_eq!(got.span.end, want.span.end);
}
}
#[test]
fn delete_token() {
let old_source = "fn main() { let x: Field = 42 }";
let new_source = "fn main() { let x: Field = }";
let (old_tokens, old_comments) = full_lex(old_source);
let (expected_tokens, _) = full_lex(new_source);
// Edit: delete "42 " at positions 27..30 โ becomes 27..27
let result = incremental_lex(new_source, &old_tokens, &old_comments, 27, 30, 27);
assert_eq!(token_kinds(&result.tokens), token_kinds(&expected_tokens));
}
#[test]
fn edit_at_start() {
let old_source = "fn main() {}";
let new_source = "pub fn main() {}";
let (old_tokens, old_comments) = full_lex(old_source);
let (expected_tokens, _) = full_lex(new_source);
// Edit: insert "pub " at position 0
let result = incremental_lex(new_source, &old_tokens, &old_comments, 0, 0, 4);
assert_eq!(token_kinds(&result.tokens), token_kinds(&expected_tokens));
}
#[test]
fn edit_at_end() {
let old_source = "fn main() {}";
let new_source = "fn main() {}\n";
let (old_tokens, old_comments) = full_lex(old_source);
let (expected_tokens, _) = full_lex(new_source);
// Edit: insert newline at position 12
let result = incremental_lex(new_source, &old_tokens, &old_comments, 12, 12, 13);
assert_eq!(token_kinds(&result.tokens), token_kinds(&expected_tokens));
}
#[test]
fn multiline_edit() {
let old_source = "fn main() {\n let x: Field = 42\n let y: Field = 7\n}";
let new_source = "fn main() {\n let z: Field = 99\n}";
let (old_tokens, old_comments) = full_lex(old_source);
let (expected_tokens, _) = full_lex(new_source);
// Edit: replace "let x: Field = 42\n let y: Field = 7" with "let z: Field = 99"
let result = incremental_lex(new_source, &old_tokens, &old_comments, 14, 49, 32);
assert_eq!(token_kinds(&result.tokens), token_kinds(&expected_tokens));
}
#[test]
fn comment_preserved() {
let old_source = "// header\nfn main() {}";
let new_source = "// header\nfn main() {}"; // insert space
let (old_tokens, old_comments) = full_lex(old_source);
let result = incremental_lex(new_source, &old_tokens, &old_comments, 12, 12, 13);
assert_eq!(result.comments.len(), 1);
assert_eq!(result.comments[0].text, "// header");
}
#[test]
fn edit_scope_within_function_body() {
let source = "program test\nfn main() {\n let x: Field = 42\n}\n";
let (tokens, _) = full_lex(source);
// Edit inside the function body (e.g., changing "42" to "99")
let scope = classify_edit_scope(&tokens, 40, 42);
assert!(
matches!(scope, EditScope::SingleItem(_, _)),
"edit inside fn body should be SingleItem, got {:?}",
scope
);
}
#[test]
fn edit_scope_in_file_header() {
let source = "program test\nuse vm.io\nfn main() {}\n";
let (tokens, _) = full_lex(source);
// Edit in the program declaration (changing "test")
let scope = classify_edit_scope(&tokens, 8, 12);
assert_eq!(scope, EditScope::Full);
}
#[test]
fn edit_scope_crossing_functions() {
let source = "program test\nfn foo() {\n let x: Field = 1\n}\nfn bar() {\n let y: Field = 2\n}\n";
let (tokens, _) = full_lex(source);
// Edit spanning from inside foo to inside bar
let scope = classify_edit_scope(&tokens, 30, 70);
assert_eq!(scope, EditScope::Full);
}
}
trident/src/lsp/incremental.rs
ฯ 0.0%
//! Incremental lexing: re-lex only the dirty byte region after an edit,
//! then splice the new tokens into the cached token list.
use crateLexeme;
use crate;
use crate;
/// Result of an incremental lex operation.
pub
/// Re-lex only the changed region and splice into the old token list.
///
/// - `source`: the NEW source text (edit already applied)
/// - `old_tokens`: previous token list (sorted by span.start)
/// - `old_comments`: previous comment list
/// - `edit_start`: byte offset where the edit begins (in the OLD source)
/// - `old_end`: byte offset where the edit ends (in the OLD source)
/// - `new_end`: byte offset where the edit ends (in the NEW source)
pub
/// Maximum tokens to scan past the expected resync point.
const RESYNC_WINDOW: usize = 8;
/// Check if `new_tok` matches an old token (after offset shift) past the edit.
/// Result of classifying whether an edit is contained within a single
/// top-level item (function, struct, event, const) or crosses boundaries.
// infrastructure for Phase 2 incremental parsing
pub
/// Classify an edit by scanning tokens for top-level brace pairs.
///
/// Walks the token list at brace depth 0 to find item boundaries. If the
/// edit region is entirely inside one item's brace pair, returns
/// `SingleItem`; otherwise returns `Full`.
// infrastructure for Phase 2 incremental parsing
pub