use regex::Regex;
lazy_static::lazy_static! {
static ref BULLET_RE: Regex = Regex::new(r"^(\s*)- (.*)$").unwrap();
static ref HEADING_RE: Regex = Regex::new(r"^(#{1,6})\s+(.*)$").unwrap();
}
/// Normalize Logseq outliner markdown into standard markdown.
///
/// Rules:
/// 1. Top-level `- ` bullets with no sub-bullets โ paragraphs (strip the `- `)
/// 2. Top-level `- ` bullets with sub-bullets โ keep as list structure
/// 3. `- ## Heading` โ promote to actual heading (strip `- `)
/// 4. Indentation depth tracked for block hierarchy
/// 5. Continuation lines (non-bullet lines at matching indent) merged with parent bullet
/// 6. Pipe tables auto-get GFM separator rows if missing
pub fn normalize(content: &str) -> String {
let lines: Vec<&str> = content.lines().collect();
if lines.is_empty() {
return String::new();
}
let parsed: Vec<BulletLine> = lines.iter().map(|l| parse_bullet_line(l)).collect();
let mut output = Vec::new();
let mut i = 0;
while i < parsed.len() {
let line = &parsed[i];
match line.kind {
LineKind::Empty => {
output.push(String::new());
i += 1;
}
LineKind::NonBullet => {
output.push(line.raw.to_string());
i += 1;
}
LineKind::Bullet { indent_level } => {
// Collect continuation lines for this bullet
let (full_content, next_i) = collect_bullet_content(&parsed, i);
i = next_i;
if indent_level == 0 {
let content_text = &full_content;
// Check if it's a heading: `- ## Heading`
if HEADING_RE.is_match(content_text.lines().next().unwrap_or("")) {
output.push(String::new());
output.push(full_content);
output.push(String::new());
continue;
}
// Check if this starts a pipe table sequence
if looks_like_table_row(content_text) {
let mut table_lines: Vec<String> =
full_content.lines().map(String::from).collect();
// Collect subsequent top-level bullets that are also table rows
while i < parsed.len() {
if let LineKind::Bullet { indent_level: 0 } = parsed[i].kind {
let (next_content, peek_i) = collect_bullet_content(&parsed, i);
if looks_like_table_row(&next_content) {
table_lines.extend(next_content.lines().map(String::from));
i = peek_i;
} else {
break;
}
} else {
break;
}
}
let table = table_lines.join("\n");
output.push(String::new());
output.push(ensure_table_separator(&table));
output.push(String::new());
continue;
}
// Check if this top-level bullet has sub-bullets
let has_children = i < parsed.len()
&& matches!(
parsed[i].kind,
LineKind::Bullet { indent_level } if indent_level > 0
);
if has_children {
// Keep as paragraph, then sub-bullets become a list
output.push(String::new());
output.push(full_content);
output.push(String::new());
// Collect all sub-bullets
while i < parsed.len() {
match &parsed[i].kind {
LineKind::Bullet { indent_level } if *indent_level > 0 => {
let (sub_content, next_i) = collect_bullet_content(&parsed, i);
i = next_i;
let sub_indent = indent_level - 1;
// Check if this sub-bullet is a table
if is_table_block(&sub_content) {
output.push(String::new());
output.push(ensure_table_separator(&sub_content));
output.push(String::new());
} else {
let prefix = " ".repeat(sub_indent);
emit_list_item(&mut output, &prefix, &sub_content);
}
}
LineKind::Empty => {
output.push(String::new());
i += 1;
}
_ => break,
}
}
} else {
// Single top-level bullet โ paragraph
output.push(String::new());
output.push(full_content);
output.push(String::new());
}
} else {
// Sub-bullet without a parent context
let sub_indent = indent_level.saturating_sub(1);
if is_table_block(&full_content) {
output.push(String::new());
output.push(ensure_table_separator(&full_content));
output.push(String::new());
} else {
let prefix = " ".repeat(sub_indent);
emit_list_item(&mut output, &prefix, &full_content);
}
}
}
}
}
// Clean up: remove leading/trailing empty lines, collapse multiple blanks
let result = output.join("\n");
collapse_blank_lines(&result)
}
/// Emit a markdown list item, properly indenting continuation lines.
fn emit_list_item(output: &mut Vec<String>, prefix: &str, content: &str) {
let lines: Vec<&str> = content.lines().collect();
if lines.len() <= 1 {
output.push(format!("{}- {}", prefix, content));
} else {
output.push(format!("{}- {}", prefix, lines[0]));
for l in &lines[1..] {
output.push(format!("{} {}", prefix, l));
}
}
}
/// Collect a bullet's content including continuation lines.
/// Continuation lines are non-bullet lines whose raw text starts with
/// the bullet's indent + 2 spaces (matching where "- " was).
/// Empty lines between continuations are bridged (skipped) so that
/// table rows separated by blank lines still merge correctly.
fn collect_bullet_content(parsed: &[BulletLine], start: usize) -> (String, usize) {
let bullet = &parsed[start];
let mut content = bullet.content.clone();
let raw_indent = bullet.raw_indent;
// Continuation prefix: same indent as bullet + 2 spaces (replacing "- ")
let continuation_prefix = format!("{} ", raw_indent);
let mut i = start + 1;
while i < parsed.len() {
match &parsed[i].kind {
LineKind::NonBullet => {
let raw = parsed[i].raw;
if raw.starts_with(&continuation_prefix) && !raw.trim().is_empty() {
let cont = &raw[continuation_prefix.len()..];
content.push('\n');
content.push_str(cont);
i += 1;
} else {
break;
}
}
LineKind::Empty => {
// Peek past empty lines: if followed by a valid continuation, bridge the gap
let mut peek = i + 1;
while peek < parsed.len() && matches!(parsed[peek].kind, LineKind::Empty) {
peek += 1;
}
if peek < parsed.len() {
if let LineKind::NonBullet = &parsed[peek].kind {
if parsed[peek].raw.starts_with(&continuation_prefix)
&& !parsed[peek].raw.trim().is_empty()
{
// Bridge: for table rows skip blank lines, otherwise preserve them
let cont_text = &parsed[peek].raw[continuation_prefix.len()..];
if !cont_text.trim_start().starts_with('|') {
content.push('\n');
}
i = peek;
continue;
}
}
}
break;
}
_ => break,
}
}
(content, i)
}
/// Check if content looks like a table row (first line starts with |).
fn looks_like_table_row(content: &str) -> bool {
content
.lines()
.next()
.map(|l| l.trim_start().starts_with('|'))
.unwrap_or(false)
}
/// Check if content is a multi-line table block (all lines start with |, >= 2 lines).
fn is_table_block(content: &str) -> bool {
let lines: Vec<&str> = content.lines().collect();
lines.len() >= 2 && lines.iter().all(|l| l.trim_start().starts_with('|'))
}
/// Ensure a pipe table has a GFM separator row after the header.
/// If the second line is NOT a separator (|---|---|), insert one.
fn ensure_table_separator(content: &str) -> String {
let lines: Vec<&str> = content.lines().collect();
if lines.len() < 2 {
return content.to_string();
}
// Check if second line is already a separator
let second = lines[1].trim();
if is_separator_row(second) {
return content.to_string();
}
// Count columns from first line
let header = lines[0].trim();
let col_count = count_table_columns(header);
if col_count == 0 {
return content.to_string();
}
let separator = format!("|{}|", vec!["---"; col_count].join("|"));
let mut result = Vec::new();
result.push(lines[0].to_string());
result.push(separator);
for line in &lines[1..] {
result.push(line.to_string());
}
result.join("\n")
}
/// Check if a line is a GFM table separator row (e.g., |---|---|).
fn is_separator_row(line: &str) -> bool {
if !line.starts_with('|') {
return false;
}
let inner = line.trim_start_matches('|').trim_end_matches('|');
inner.split('|').all(|cell| {
let trimmed = cell.trim();
!trimmed.is_empty() && trimmed.chars().all(|c| c == '-' || c == ':' || c == ' ')
})
}
/// Count the number of columns in a pipe table row.
fn count_table_columns(row: &str) -> usize {
let trimmed = row.trim();
if !trimmed.starts_with('|') || !trimmed.ends_with('|') {
// Count pipes and subtract 1 for leading pipe
let pipes = trimmed.matches('|').count();
if pipes >= 2 {
return pipes - 1;
}
return 0;
}
let inner = trimmed.trim_start_matches('|').trim_end_matches('|');
inner.split('|').count()
}
fn collapse_blank_lines(s: &str) -> String {
let mut result = Vec::new();
let mut prev_blank = false;
for line in s.lines() {
let is_blank = line.trim().is_empty();
if is_blank {
if !prev_blank {
result.push("");
}
prev_blank = true;
} else {
result.push(line);
prev_blank = false;
}
}
// Trim leading and trailing blank lines
let s = result.join("\n");
s.trim().to_string()
}
#[derive(Debug)]
struct BulletLine<'a> {
raw: &'a str,
raw_indent: &'a str,
kind: LineKind,
content: String,
}
#[derive(Debug)]
enum LineKind {
Empty,
NonBullet,
Bullet { indent_level: usize },
}
fn parse_bullet_line<'a>(line: &'a str) -> BulletLine<'a> {
if line.trim().is_empty() {
return BulletLine {
raw: line,
raw_indent: "",
kind: LineKind::Empty,
content: String::new(),
};
}
if let Some(caps) = BULLET_RE.captures(line) {
let indent = caps.get(1).map(|m| m.as_str()).unwrap_or("");
// Each indent level is 2 spaces or 1 tab
let indent_level = if indent.contains('\t') {
indent.matches('\t').count()
} else {
indent.len() / 2
};
let content = caps.get(2).map(|m| m.as_str()).unwrap_or("").to_string();
BulletLine {
raw: line,
raw_indent: indent,
kind: LineKind::Bullet { indent_level },
content,
}
} else {
BulletLine {
raw: line,
raw_indent: "",
kind: LineKind::NonBullet,
content: line.to_string(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simple_paragraphs() {
let input = "- First paragraph.\n- Second paragraph.";
let output = normalize(input);
assert!(output.contains("First paragraph."));
assert!(output.contains("Second paragraph."));
assert!(!output.contains("- "));
// Paragraphs must be separated by a blank line for comrak
assert!(output.contains("First paragraph.\n\nSecond paragraph."));
}
#[test]
fn test_heading_promotion() {
let input = "- ## My Heading\n- Some content";
let output = normalize(input);
assert!(output.contains("## My Heading"));
assert!(!output.starts_with("- ## "));
}
#[test]
fn test_nested_bullets_become_list() {
let input = "- Parent item\n - Child one\n - Child two";
let output = normalize(input);
assert!(output.contains("Parent item"));
assert!(output.contains("- Child one"));
assert!(output.contains("- Child two"));
}
#[test]
fn test_deeply_nested() {
let input = "- Top\n - Level 1\n - Level 2";
let output = normalize(input);
assert!(output.contains("- Level 1"));
assert!(output.contains(" - Level 2"));
}
#[test]
fn test_full_logseq_example() {
let input = "\
- This is the introduction to the theorem.
- The core principle states that:
- Consensus emergence follows predictable patterns
- These patterns can be modeled mathematically
- Using graph theory and information theory
- ## Applications
- [[Bostrom]] network uses this for GPU consensus
- Biological systems exhibit similar behavior";
let output = normalize(input);
assert!(output.contains("This is the introduction"));
assert!(output.contains("## Applications"));
assert!(output.contains("- [[Bostrom]]"));
}
#[test]
fn test_empty_input() {
assert_eq!(normalize(""), "");
}
#[test]
fn test_continuation_lines_table() {
// Logseq table with continuation lines in a sub-bullet
let input = "- Overview\n\t- | Header 1 | Header 2 |\n\t | Cell A | Cell B |\n\t | Cell C | Cell D |";
let output = normalize(input);
assert!(
output.contains("<table>") || output.contains("|---"),
"Table should have separator: {}",
output
);
assert!(output.contains("| Header 1 | Header 2 |"));
assert!(output.contains("| Cell A | Cell B |"));
assert!(output.contains("| Cell C | Cell D |"));
// Should NOT have "- |" as a list item
assert!(
!output.contains("- |"),
"Table rows should not be list items: {}",
output
);
}
#[test]
fn test_multi_bullet_table() {
// Each row is a separate top-level bullet
let input = "- | Name | Value |\n- | foo | 1 |\n- | bar | 2 |";
let output = normalize(input);
assert!(output.contains("| Name | Value |"));
assert!(output.contains("|---|---|"));
assert!(output.contains("| foo | 1 |"));
assert!(output.contains("| bar | 2 |"));
}
#[test]
fn test_table_with_existing_separator() {
let input = "- | A | B |\n- |---|---|\n- | 1 | 2 |";
let output = normalize(input);
// Should not duplicate the separator
let sep_count = output.matches("|---|---|").count();
assert_eq!(
sep_count, 1,
"Should have exactly one separator: {}",
output
);
}
#[test]
fn test_continuation_lines_non_table() {
// Multi-line content in a bullet (like code or continuation text)
let input = "- Start of block\n continues here\n and here";
let output = normalize(input);
assert!(output.contains("Start of block"));
assert!(output.contains("continues here"));
assert!(output.contains("and here"));
}
#[test]
fn test_table_with_empty_lines_between_rows() {
// Logseq sometimes has empty lines between table rows in continuation
let input = "- Parent\n\t- | block height | neuron |\n\t \n\t | 42 | bostrom1d8 |\n\t \n\t | 43 | bostrom1d8 |";
let output = normalize(input);
assert!(
output.contains("| block height | neuron |"),
"Header missing: {}",
output
);
assert!(
output.contains("| 42 | bostrom1d8 |"),
"Row 1 missing: {}",
output
);
assert!(
output.contains("| 43 | bostrom1d8 |"),
"Row 2 missing: {}",
output
);
assert!(
output.contains("|---|---|"),
"Separator missing: {}",
output
);
assert!(
!output.contains("- |"),
"Table rows should not be list items: {}",
output
);
}
#[test]
fn test_ensure_table_separator() {
let table = "| A | B |\n| 1 | 2 |";
let result = ensure_table_separator(table);
assert!(result.contains("|---|---|"));
let table_with_sep = "| A | B |\n|---|---|\n| 1 | 2 |";
let result = ensure_table_separator(table_with_sep);
assert_eq!(result.matches("|---|---|").count(), 1);
}
}
render/src/parser/outliner.rs
ฯ 0.0%