use crate::graph::PageStore;
use crate::parser::slugify_page_name;
use comrak::{
    arena_tree::Node,
    nodes::{Ast, AstNode, NodeValue},
    Arena, Options,
};
use regex::Regex;

use std::cell::RefCell;

fn setup_comrak_options() -> Options<'static> {
    let mut options = Options::default();
    // Enable WikiLink parsing
    options.extension.wikilinks_title_after_pipe = true;
    // GFM extensions
    options.extension.strikethrough = true;
    options.extension.table = true;
    options.extension.tasklist = true;
    options.extension.footnotes = true;
    options.extension.description_lists = true;
    // Parse options
    options.parse.relaxed_autolinks = true;
    // Render options โ€” we control input, allow raw HTML
    options.render.unsafe_ = true;
    options
}

use super::toc::{self, TocEntry};

/// Result of rendering markdown, including TOC data.
pub struct RenderResult {
    pub html: String,
    pub toc: Vec<TocEntry>,
}

/// Extract math blocks ($..$ and 

$$..$$

) from markdown, replacing them with placeholders.
/// Returns the processed markdown and a list of extracted math strings.
fn extract_math_blocks(markdown: &str) -> (String, Vec<String>) {
    let mut math_blocks: Vec<String> = Vec::new();
    let mut result = String::with_capacity(markdown.len());
    let chars: Vec<char> = markdown.chars().collect();
    let len = chars.len();
    let mut i = 0;
    let mut inside_wikilink = false;

    while i < len {
        // Track wiki-link boundaries: [[ opens, ]] closes
        if i + 1 < len && chars[i] == '[' && chars[i + 1] == '[' {
            inside_wikilink = true;
            result.push('[');
            result.push('[');
            i += 2;
            continue;
        }
        if inside_wikilink && i + 1 < len && chars[i] == ']' && chars[i + 1] == ']' {
            inside_wikilink = false;
            result.push(']');
            result.push(']');
            i += 2;
            continue;
        }
        // Skip $ inside wiki-links โ€” not math
        if inside_wikilink && chars[i] == '$' {
            result.push('$');
            i += 1;
            continue;
        }

        // Check for 

$$ (display math) first
        if i + 1 < len && chars[i] == '$' && chars[i + 1] == '$' {
            // Find closing $$


            let start = i;
            i += 2;
            let mut found = false;
            while i + 1 < len {
                if chars[i] == '$' && chars[i + 1] == '$' {
                    let math_str: String = chars[start..i + 2].iter().collect();
                    let idx = math_blocks.len();
                    math_blocks.push(math_str);
                    result.push_str(&format!("\n\nMATH_PLACEHOLDER_{}\n\n", idx));
                    i += 2;
                    found = true;
                    break;
                }
                i += 1;
            }
            if !found {
                // No closing 

$$, output as-is
                let remainder: String = chars[start..].iter().collect();
                result.push_str(&remainder);
                break;
            }
        }
        // Check for $ (inline math) โ€” skip if preceded by \
        else if chars[i] == '$' && (i == 0 || chars[i - 1] != '\\') {
            let start = i;
            i += 1;
            // Skip if immediately followed by space (not math)
            if i < len && chars[i] != ' ' && chars[i] != '$' {
                let mut found = false;
                while i < len {
                    if chars[i] == '$' && (i == 0 || chars[i - 1] != '\\') {
                        let math_str: String = chars[start..i + 1].iter().collect();
                        let idx = math_blocks.len();
                        math_blocks.push(math_str);
                        result.push_str(&format!("MATH_PLACEHOLDER_{}", idx));
                        i += 1;
                        found = true;
                        break;
                    }
                    // Don't cross newlines for inline math
                    if chars[i] == '\n' {
                        break;
                    }
                    i += 1;
                }
                if !found {
                    let remainder: String = chars[start..i].iter().collect();
                    result.push_str(&remainder);
                }
            } else {
                result.push('$');
            }
        } else {
            result.push(chars[i]);
            i += 1;
        }
    }

    (result, math_blocks)
}

/// Ensure underscores inside \text{} blocks are escaped as \_ for KaTeX.
/// KaTeX requires `\_` for literal underscores in \text{} (bare `_` triggers subscript).
/// Handles both already-escaped `\_` and bare `_` by normalizing first.
fn fix_text_underscores(math: &str) -> String {
    let re = Regex::new(r"\\text\{([^}]*)\}").unwrap();
    re.replace_all(math, |caps: ®ex::Captures| {
        let inner = &caps[1];
        // Normalize: strip \_ to _, then re-escape all _ to \_
        let normalized = inner.replace("\\_", "_");
        let fixed = normalized.replace('_', "\\_");
        format!("\\text{{{}}}", fixed)
    })
    .to_string()
}

/// Restore math blocks from placeholders in the rendered HTML.
fn restore_math_blocks(html: &str, math_blocks: &[String]) -> String {
    let re = Regex::new(r"MATH_PLACEHOLDER_(\d+)").unwrap();
    re.replace_all(html, |caps: ®ex::Captures| {
        let idx: usize = caps[1].parse().unwrap_or(0);
        if idx < math_blocks.len() {
            fix_text_underscores(&math_blocks[idx])
        } else {
            caps[0].to_string()
        }
    })
    .to_string()
}

/// Render markdown to HTML with wikilink resolution, embed expansion, block refs, and queries.
pub fn render_markdown(markdown: &str, store: &PageStore) -> RenderResult {
    // Pre-process: resolve embeds and block references in the markdown source
    let processed = resolve_embeds_and_refs(markdown, store, 0);

    // Pre-process: resolve query blocks
    let processed = crate::query::resolve_queries(&processed, store);

    // Protect math blocks from comrak processing
    let (processed, math_blocks) = extract_math_blocks(&processed);

    let arena = Arena::new();
    let options = setup_comrak_options();

    let root = comrak::parse_document(&arena, &processed, &options);

    // Extract TOC from headings before transforming
    let toc_entries = toc::extract_toc(root);

    // Transform wikilinks to proper HTML links
    transform_wikilinks(root, store, &arena);

    // Transform external links to open in new tab
    transform_external_links(root, &arena);

    // Add heading IDs for TOC anchors
    inject_heading_ids(root, &arena);

    // Render to HTML
    let mut html = Vec::new();
    comrak::format_html(root, &options, &mut html).unwrap();

    let mut html = String::from_utf8(html).unwrap_or_default();

    // Restore math blocks after HTML rendering
    if !math_blocks.is_empty() {
        html = restore_math_blocks(&html, &math_blocks);
    }

    RenderResult {
        html,
        toc: toc_entries,
    }
}

/// Inject id attributes into heading nodes by wrapping with anchor spans.
fn inject_heading_ids<'a>(root: &'a AstNode<'a>, arena: &'a Arena>) {
    let mut headings: Vec<(&'a AstNode<'a>, String)> = Vec::new();

    for node in root.descendants() {
        let data = node.data.borrow();
        if let NodeValue::Heading(_) = data.value {
            let text = get_node_text_content(node);
            if !text.is_empty() {
                let id = slug::slugify(&text);
                headings.push((node, id));
            }
        }
    }

    for (node, id) in headings {
        // Prepend an anchor before the heading content
        let anchor_html = format!(r#""#, id);
        let anchor_node = arena.alloc(Node::new(RefCell::new(Ast::new(
            NodeValue::HtmlInline(anchor_html),
            node.data.borrow().sourcepos.start,
        ))));
        node.prepend(anchor_node);
    }
}

lazy_static::lazy_static! {
    /// Matches `

` page embeds
    static ref EMBED_PAGE_RE: Regex = Regex::new(
        r"\{\{embed\s+\[\[([^\]]+)\]\]\s*\}\}"
    ).unwrap();
}

/// Resolve 

 in markdown.
/// `depth` prevents infinite recursion for circular embeds.
fn resolve_embeds_and_refs(markdown: &str, store: &PageStore, depth: usize) -> String {
    if depth > 3 {
        return markdown.to_string();
    }

    // Fast path: if no embed patterns exist, skip regex processing
    if !markdown.contains("{{embed") {
        return markdown.to_string();
    }

    // Resolve 

 โ†’ inline the page's content
    EMBED_PAGE_RE
        .replace_all(markdown, |caps: ®ex::Captures| {
            let page_name = &caps[1];
            let slug = slugify_page_name(page_name);
            if let Some(page) = store.pages.get(&slug) {
                let content = resolve_embeds_and_refs(&page.content_md, store, depth + 1);
                format!(
                    "\n
\n\n\n{}\n\n
\n", slug, slug, page.meta.title, content ) } else { format!( "
Embed: page \"{}\" not found
", page_name ) } }) .to_string() } fn transform_wikilinks<'a>( root: &'a AstNode<'a>, store: &PageStore, arena: &'a Arena>, ) { // Collect nodes that need transformation first to avoid borrow issues let mut nodes_to_transform: Vec<(&'a AstNode<'a>, String)> = Vec::new(); for node in root.descendants() { let data = node.data.borrow(); if let NodeValue::WikiLink(ref wl) = data.value { let url = wl.url.clone(); nodes_to_transform.push((node, url)); } } for (node, url) in nodes_to_transform { let slug = slugify_page_name(&url); // Resolve alias to canonical page ID let resolved_slug = if store.pages.contains_key(&slug) { slug.clone() } else if let Some(canonical_id) = store.alias_map.get(&slug) { canonical_id.clone() } else { slug.clone() }; let class = if store.stub_pages.contains(&resolved_slug) { "internal-link stub-link" } else if store.pages.contains_key(&resolved_slug) { "internal-link" } else { "internal-link stub-link" }; // Get display text from children or use URL let display = get_node_text_content(node); let display = if display.trim().is_empty() { url.clone() } else { display }; let html = format!( r#"{display}"#, ); // Replace WikiLink node with inline HTML let new_node = arena.alloc(Node::new(RefCell::new(Ast::new( NodeValue::HtmlInline(html), node.data.borrow().sourcepos.start, )))); node.insert_before(new_node); node.detach(); } } /// Transform external links (http/https) to open in new tab. fn transform_external_links<'a>(root: &'a AstNode<'a>, arena: &'a Arena>) { let mut nodes_to_transform: Vec<(&'a AstNode<'a>, String, String)> = Vec::new(); for node in root.descendants() { let data = node.data.borrow(); if let NodeValue::Link(ref link) = data.value { if link.url.starts_with("http://") || link.url.starts_with("https://") { let url = link.url.clone(); let title = link.title.clone(); nodes_to_transform.push((node, url, title)); } } } for (node, url, title) in nodes_to_transform { let display = get_node_text_content(node); let title_attr = if title.is_empty() { String::new() } else { format!(r#" title="{}""#, html_escape(&title)) }; let html = format!( r#"{}"#, html_escape(&url), title_attr, html_escape(&display), ); let new_node = arena.alloc(Node::new(RefCell::new(Ast::new( NodeValue::HtmlInline(html), node.data.borrow().sourcepos.start, )))); node.insert_before(new_node); node.detach(); } } fn html_escape(s: &str) -> String { s.replace('&', "&") .replace('<', "<") .replace('>', ">") .replace('"', """) } fn get_node_text_content<'a>(node: &'a AstNode<'a>) -> String { let mut text = String::new(); for child in node.children() { let data = child.data.borrow(); match &data.value { NodeValue::Text(t) => text.push_str(t), _ => { text.push_str(&get_node_text_content(child)); } } } text } #[cfg(test)] mod tests { use super::*; use crate::graph::build_graph; use crate::parser::{PageKind, PageMeta, ParsedPage}; use std::collections::HashMap; use std::path::PathBuf; fn empty_store() -> PageStore { build_graph(vec![]).unwrap() } fn store_with_page(name: &str) -> PageStore { let page = ParsedPage { id: slugify_page_name(name), meta: PageMeta { title: name.to_string(), properties: HashMap::new(), tags: vec![], public: Some(true), aliases: vec![], date: None, icon: None, menu_order: None, stake: None, }, kind: PageKind::Page, source_path: PathBuf::new(), namespace: None, content_md: String::new(), outgoing_links: vec![], }; build_graph(vec![page]).unwrap() } #[test] fn test_basic_markdown() { let store = empty_store(); let result = render_markdown("# Hello\n\nWorld", &store); assert!(result.html.contains("

")); assert!(result.html.contains("Hello")); assert!(result.html.contains("

World

")); } #[test] fn test_wikilink_resolved() { let store = store_with_page("My Page"); let result = render_markdown("Link to [[My Page]]", &store); assert!(result.html.contains("class=\"internal-link\"")); assert!(result.html.contains("href=\"/my-page\"")); } #[test] fn test_wikilink_stub() { let store = empty_store(); let result = render_markdown("Link to [[Missing Page]]", &store); assert!(result.html.contains("stub-link")); } #[test] fn test_gfm_features() { let store = empty_store(); let result = render_markdown("| A | B |\n|---|---|\n| 1 | 2 |", &store); assert!(result.html.contains("")); } #[test] fn test_math_block_protection() { let store = empty_store(); // Inline math with backslash-brace should be preserved let result = render_markdown( "The set $\\left\\{x \\in \\mathbb{R}\\right\\}$ is open.", &store, ); assert!( result.html.contains("\\left\\{"), "backslash-brace should be preserved in inline math" ); // Display math let result = render_markdown("$$ \\left\\{x > 0\\right\\}$$", &store); assert!( result.html.contains("\\left\\{"), "backslash-brace should be preserved in display math" ); assert!( result.html.contains("x > 0"), "greater-than should be preserved in display math" ); } #[test] fn test_text_underscore_fix() { // Already-escaped \_ should be preserved for KaTeX let store = empty_store(); let result = render_markdown( "$\\text{type\\_tag}(a)$", &store, ); assert!( result.html.contains("\\text{type\\_tag}"), "escaped underscore should be preserved in \\text{{}}: {}", result.html ); // Bare underscores in \text{} should be escaped to \_ let result = render_markdown( "$\\text{staking_share}$", &store, ); assert!( result.html.contains("\\text{staking\\_share}"), "bare underscore should be escaped in \\text{{}}: {}", result.html ); // Multiple \text{} blocks let result = render_markdown( "$\\text{BBG\\_root} = H(\\text{by\\_neuron.commit})$", &store, ); assert!(result.html.contains("\\text{BBG\\_root}")); assert!(result.html.contains("\\text{by\\_neuron.commit}")); } #[test] fn test_toc_generation() { let store = empty_store(); let result = render_markdown("# First\n\n## Second\n\n### Third\n\nContent", &store); assert_eq!(result.toc.len(), 3); assert_eq!(result.toc[0].text, "First"); assert_eq!(result.toc[0].level, 1); assert_eq!(result.toc[1].text, "Second"); assert_eq!(result.toc[1].level, 2); } }

Local Graph

1667 words ยท 9 min read