use crate::parser::{PageId, ParsedPage};
use crate::scanner::{DiscoveredFile, FileKind};
use anyhow::Result;
use globset::{Glob, GlobSetBuilder};
use std::path::{Path, PathBuf};
use walkdir::WalkDir;
/// Declaration of an external repository to include as a subgraph.
#[derive(Debug, Clone)]
pub struct SubgraphDecl {
pub name: String,
pub repo_path: PathBuf,
pub exclude_patterns: Vec<String>,
pub declaring_page_id: PageId,
}
/// Default exclude patterns applied to all subgraphs.
const DEFAULT_EXCLUDES: &[&str] = &[
".git/**",
"target/**",
"**/target/**",
"node_modules/**",
"**/node_modules/**",
"build/**",
"**/build/**",
".claude/**",
"**/.DS_Store",
"Cargo.lock",
"**/Cargo.lock",
];
/// Discover subgraph declarations from parsed root graph pages.
/// Looks for pages with `subgraph: true` in frontmatter properties.
pub fn discover_subgraphs(pages: &[ParsedPage], input_dir: &Path) -> Vec<SubgraphDecl> {
let mut decls = Vec::new();
for page in pages {
let props = &page.meta.properties;
// Check for subgraph: true
let is_subgraph = props
.get("subgraph")
.map(|v| v.trim().eq_ignore_ascii_case("true"))
.unwrap_or(false);
if !is_subgraph {
continue;
}
// Parse repo path (required)
let repo_raw = match props.get("repo") {
Some(v) => v.trim().to_string(),
None => {
eprintln!(
"Warning: subgraph page '{}' has subgraph: true but no repo: path",
page.id
);
continue;
}
};
// Resolve repo path relative to input_dir
let repo_path = input_dir.join(&repo_raw);
let repo_path = repo_path
.canonicalize()
.unwrap_or_else(|_| repo_path.clone());
// Parse exclude patterns (optional, comma-separated)
let custom_excludes: Vec<String> = props
.get("exclude")
.map(|v| {
v.split(',')
.map(|s| s.trim().trim_matches('"').to_string())
.filter(|s| !s.is_empty())
.collect()
})
.unwrap_or_default();
// Merge default + custom excludes
let mut exclude_patterns: Vec<String> =
DEFAULT_EXCLUDES.iter().map(|s| s.to_string()).collect();
exclude_patterns.extend(custom_excludes);
// Use the declaring page's title (original name before slugification)
// as the subgraph name to preserve namespace nesting.
// e.g., title "trident" โ name "trident"
// e.g., title "cyber/context" โ name "cyber/context"
let name = page.meta.title.to_lowercase();
decls.push(SubgraphDecl {
name,
repo_path,
exclude_patterns,
declaring_page_id: page.id.clone(),
});
}
decls
}
/// Scan an external repository and return discovered files under the subgraph namespace.
/// All files are collected; markdown files become Pages, everything else becomes Files.
pub fn scan_subgraph(decl: &SubgraphDecl) -> Result<Vec<DiscoveredFile>> {
if !decl.repo_path.exists() {
eprintln!(
"Warning: subgraph '{}' repo path does not exist: {} โ skipping",
decl.name,
decl.repo_path.display()
);
return Ok(vec![]);
}
// Build exclude glob set
let mut builder = GlobSetBuilder::new();
for pattern in &decl.exclude_patterns {
if let Ok(glob) = Glob::new(pattern) {
builder.add(glob);
}
}
let exclude_set = builder.build()?;
// Directories to skip entirely โ prevents WalkDir from descending into
// .git/objects, target/, node_modules/ etc. which can contain thousands of files.
let skip_dirs: std::collections::HashSet<&str> =
[".git", "target", "node_modules", "build", ".claude"].into();
let mut files = Vec::new();
for entry in WalkDir::new(&decl.repo_path)
.into_iter()
.filter_entry(|e| {
if e.file_type().is_dir() {
let name = e.file_name().to_string_lossy();
!skip_dirs.contains(name.as_ref())
} else {
true
}
})
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
{
let path = entry.path().to_path_buf();
// Get path relative to repo root for exclusion matching
let relative = path
.strip_prefix(&decl.repo_path)
.unwrap_or(&path)
.to_string_lossy();
if exclude_set.is_match(relative.as_ref()) {
continue;
}
let is_md = path
.extension()
.map(|ext| ext == "md" || ext == "markdown")
.unwrap_or(false);
if is_md {
let name = subgraph_page_name(&path, &decl.repo_path, &decl.name);
files.push(DiscoveredFile {
path,
kind: FileKind::Page,
name,
subgraph: Some(decl.name.clone()),
});
} else {
let name = subgraph_file_name(&path, &decl.repo_path, &decl.name);
files.push(DiscoveredFile {
path,
kind: FileKind::File,
name,
subgraph: Some(decl.name.clone()),
});
}
}
Ok(files)
}
/// Derive page name for a markdown file in a subgraph.
/// README.md at any level becomes the directory's page.
/// e.g., ~/git/trident/README.md โ "trident"
/// e.g., ~/git/trident/docs/README.md โ "trident/docs"
/// e.g., ~/git/trident/src/README.md โ "trident/src"
/// e.g., ~/git/trident/docs/explanation/vision.md โ "trident/docs/explanation/vision"
fn subgraph_page_name(path: &Path, repo_root: &Path, subgraph_name: &str) -> String {
let relative = path.strip_prefix(repo_root).unwrap_or(path);
let stem = relative.with_extension("");
let name = stem.to_string_lossy();
// README at any level becomes the parent directory's page
if name.eq_ignore_ascii_case("README") {
return subgraph_name.to_string();
}
if let Some(parent) = name.strip_suffix("/README").or_else(|| name.strip_suffix("/readme")) {
return format!("{}/{}", subgraph_name, parent);
}
// Case-insensitive check for README as last component
let last = name.rsplit('/').next().unwrap_or(&name);
if last.eq_ignore_ascii_case("README") {
let parent = &name[..name.len() - last.len() - 1];
return format!("{}/{}", subgraph_name, parent);
}
format!("{}/{}", subgraph_name, name)
}
/// Derive file name for a non-markdown file in a subgraph (preserves extension).
/// e.g., ~/git/trident/src/main.rs โ "trident/src/main.rs"
fn subgraph_file_name(path: &Path, repo_root: &Path, subgraph_name: &str) -> String {
let relative = path.strip_prefix(repo_root).unwrap_or(path);
let name = relative.to_string_lossy().to_string();
format!("{}/{}", subgraph_name, name)
}
/// Enforce namespace monopoly: remove root pages whose namespace conflicts
/// with a claimed subgraph namespace.
/// Returns list of (evicted_page_id, reason) for reporting.
pub fn enforce_namespace_monopoly(
root_pages: &mut Vec<ParsedPage>,
subgraph_namespaces: &[String],
) -> Vec<(PageId, String)> {
let mut evicted = Vec::new();
root_pages.retain(|page| {
if let Some(ref ns) = page.namespace {
for sg_ns in subgraph_namespaces {
if ns == sg_ns || ns.starts_with(&format!("{}/", sg_ns)) {
evicted.push((
page.id.clone(),
format!(
"namespace '{}' claimed by subgraph '{}'",
ns, sg_ns
),
));
return false;
}
}
}
true
});
evicted
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
#[test]
fn test_subgraph_page_name() {
let repo = PathBuf::from("/git/trident");
// Repo-root README maps to just the subgraph name
assert_eq!(
subgraph_page_name(&PathBuf::from("/git/trident/README.md"), &repo, "trident"),
"trident"
);
// Nested files keep full path
assert_eq!(
subgraph_page_name(
&PathBuf::from("/git/trident/docs/explanation/vision.md"),
&repo,
"trident"
),
"trident/docs/explanation/vision"
);
// Directory README becomes the directory page
assert_eq!(
subgraph_page_name(
&PathBuf::from("/git/trident/src/README.md"),
&repo,
"trident"
),
"trident/src"
);
assert_eq!(
subgraph_page_name(
&PathBuf::from("/git/trident/docs/README.md"),
&repo,
"trident"
),
"trident/docs"
);
}
#[test]
fn test_subgraph_file_name() {
let repo = PathBuf::from("/git/trident");
assert_eq!(
subgraph_file_name(&PathBuf::from("/git/trident/src/main.rs"), &repo, "trident"),
"trident/src/main.rs"
);
assert_eq!(
subgraph_file_name(&PathBuf::from("/git/trident/Cargo.toml"), &repo, "trident"),
"trident/Cargo.toml"
);
}
#[test]
fn test_namespace_monopoly_evicts_matching() {
use crate::parser::{PageKind, PageMeta};
use std::collections::HashMap;
let make = |id: &str, ns: Option<&str>| ParsedPage {
id: id.to_string(),
meta: PageMeta {
title: id.to_string(),
properties: HashMap::new(),
tags: vec![],
public: Some(true),
aliases: vec![],
date: None,
icon: None,
menu_order: None,
stake: None,
},
kind: PageKind::Page,
source_path: PathBuf::new(),
namespace: ns.map(|s| s.to_string()),
subgraph: None,
content_md: String::new(),
outgoing_links: vec![],
};
let mut pages = vec![
make("root-page", None),
make("trident-thesis", None), // root level, no namespace โ should NOT be evicted
make("trident-sub-thing", Some("trident")), // namespace = trident โ EVICTED
make("other-ns-page", Some("cyber")),
];
let evicted = enforce_namespace_monopoly(&mut pages, &["trident".to_string()]);
assert_eq!(pages.len(), 3);
assert_eq!(evicted.len(), 1);
assert_eq!(evicted[0].0, "trident-sub-thing");
// root-level pages with no namespace stay
assert!(pages.iter().any(|p| p.id == "trident-thesis"));
}
}
optica/src/scanner/subgraph.rs
ฯ 0.0%
use crate;
use crate;
use Result;
use ;
use ;
use WalkDir;
/// Declaration of an external repository to include as a subgraph.
/// Default exclude patterns applied to all subgraphs.
const DEFAULT_EXCLUDES: & = &;
/// Discover subgraph declarations from parsed root graph pages.
/// Looks for pages with `subgraph: true` in frontmatter properties.
/// Scan an external repository and return discovered files under the subgraph namespace.
/// All files are collected; markdown files become Pages, everything else becomes Files.
/// Derive page name for a markdown file in a subgraph.
/// README.md at any level becomes the directory's page.
/// e.g., ~/git/trident/README.md โ "trident"
/// e.g., ~/git/trident/docs/README.md โ "trident/docs"
/// e.g., ~/git/trident/src/README.md โ "trident/src"
/// e.g., ~/git/trident/docs/explanation/vision.md โ "trident/docs/explanation/vision"
/// Derive file name for a non-markdown file in a subgraph (preserves extension).
/// e.g., ~/git/trident/src/main.rs โ "trident/src/main.rs"
/// Enforce namespace monopoly: remove root pages whose namespace conflicts
/// with a claimed subgraph namespace.
/// Returns list of (evicted_page_id, reason) for reporting.