# context.nu β smart context packer for LLM consumption
# Scores pages by graph gravity (inbound links) and density,
# then greedily packs the most valuable content into a token budget.
#
# Usage:
# nu analizer/context.nu ~/git/cyber # graph only, default 900K tokens
# nu analizer/context.nu ~/git/cyber --subgraphs # include subgraph repos
# nu analizer/context.nu ~/git/cyber --subgraphs --budget 500 # 500K token budget
# nu analizer/context.nu ~/git/cyber --subgraphs --stats # print score table, don't write
def main [
graph_path: string,
--output (-o): string = "",
--subgraphs (-s),
--budget (-b): int = 900, # token budget in thousands
--stats, # print ranking table only
--soul: string = "", # path to preamble file (prepended before pages)
] {
let token_budget = $budget * 1000
# ~3.5 chars per token for mixed markdown+math content
let char_budget = ($token_budget * 3.5 | into int)
print $"Scanning pages..."
# --- collect all markdown files ---
mut all_files = (glob $"($graph_path)/root/**/*.md" | sort)
# blog and scripts
let blog = (glob $"($graph_path)/blog/*.md" | sort)
let scripts = (glob $"($graph_path)/analizer/*.nu" | sort)
let configs = (glob $"($graph_path)/*.md" | append (glob $"($graph_path)/*.toml") | sort)
$all_files = ($all_files | append $blog | append $scripts | append $configs)
# subgraph repos (sibling directories)
mut subgraph_files = []
if $subgraphs {
let git_root = ($graph_path | path dirname)
let repos = [hemera zheng nebu nox bbg cybernode mudra trident optica]
for repo in $repos {
let repo_path = $"($git_root)/($repo)"
if ($repo_path | path exists) {
let md = if ($"($repo_path)/root" | path exists) {
glob $"($repo_path)/root/**/*.md"
} else if ($"($repo_path)/graph" | path exists) {
glob $"($repo_path)/graph/**/*.md"
} else if ($"($repo_path)/pages" | path exists) {
glob $"($repo_path)/pages/**/*.md"
} else {
glob $"($repo_path)/**/*.md"
| where {|f| not ($f | str contains "/.git/")}
| where {|f| not ($f | str contains "/build/")}
| where {|f| not ($f | str contains "/target/")}
| where {|f| not ($f | str contains "/node_modules/")}
}
$subgraph_files = ($subgraph_files | append $md)
}
}
$all_files = ($all_files | append ($subgraph_files | sort))
}
print $"Total files: ($all_files | length)"
# --- build page name β file path index and alias map ---
let git_root = ($graph_path | path dirname)
mut page_index = {} # lowercase_name β file_path
mut alias_map = {} # lowercase_alias β lowercase_canonical_name
for f in $all_files {
let rel = if ($f | str starts-with $graph_path) {
$f | str replace $"($graph_path)/" ""
} else {
$f | str replace $"($git_root)/" ""
}
# derive page name from path: root/cyber/focus.md β cyber/focus
let page_name = if ($rel | str starts-with "root/") {
$rel | str replace "root/" "" | str replace ".md" ""
} else {
$rel | str replace ".md" "" | str replace ".nu" "" | str replace ".toml" ""
}
let name_lower = ($page_name | str downcase)
$page_index = ($page_index | merge {$name_lower: $f})
# extract aliases from frontmatter
let raw = (open --raw $f)
if ($raw | str starts-with "---") {
let lines = ($raw | lines)
let fm_matches = ($lines | skip 1 | enumerate | where {|x| $x.item == "---"})
let fm_end = if ($fm_matches | length) > 0 { $fm_matches | first | get index } else { 999 }
if $fm_end < 999 {
let fm_lines = ($lines | skip 1 | first $fm_end)
let alias_lines = ($fm_lines | where {|l| $l | str starts-with "alias:"})
if ($alias_lines | length) > 0 {
let alias_line = ($alias_lines | first)
let aliases = ($alias_line | str replace "alias:" "" | split row "," | each {|a| $a | str trim | str downcase} | where {|a| ($a | str length) > 0})
for a in $aliases {
$alias_map = ($alias_map | merge {$a: $name_lower})
}
}
}
}
}
print $"Page index: ($page_index | columns | length) pages, ($alias_map | columns | length) aliases"
# freeze mutable maps for closure capture
let alias_map = $alias_map
let page_index = $page_index
# --- extract links and compute scores ---
print $"Computing gravity and density..."
mut inbound_counts = {} # page_name β count of inbound links
mut page_data = [] # {file, name, size, outbound, links_raw}
for f in $all_files {
let raw = (open --raw $f)
let size = ($raw | str length)
let rel = if ($f | str starts-with $graph_path) {
$f | str replace $"($graph_path)/" ""
} else {
$f | str replace $"($git_root)/" ""
}
let page_name = if ($rel | str starts-with "root/") {
$rel | str replace "root/" "" | str replace ".md" ""
} else {
$rel | str replace ".md" "" | str replace ".nu" "" | str replace ".toml" ""
}
# extract wiki-links
let links = ($raw | parse --regex '\[\[([^\]]+)\]\]' | get capture0 | uniq)
let links_lower = ($links | each {|l| $l | str downcase})
# resolve each link and count inbound
for link in $links_lower {
# resolve alias β canonical
let target = if ($alias_map | get -o $link) != null {
$alias_map | get $link
} else {
$link
}
# increment inbound count
let current = ($inbound_counts | get -o $target | default 0)
$inbound_counts = ($inbound_counts | merge {$target: ($current + 1)})
}
# resolve targets for reflected gravity
let resolved = ($links_lower | each {|l|
if ($alias_map | get -o $l) != null { $alias_map | get $l } else { $l }
})
$page_data = ($page_data | append {
file: $f
name: ($page_name | str downcase)
rel: $rel
size: $size
outbound: ($links_lower | length)
targets: $resolved
})
}
# --- reflected gravity ---
# pages that link TO high-gravity pages inherit a fraction of that gravity.
# this is one step of diffusion: if you reference neuron (gravity 435),
# you get 435 * alpha added to your effective gravity.
# critical for subgraph pages that reference core concepts but have zero inbound.
let alpha = 0.05 # reflection coefficient
mut reflected = {}
for page in $page_data {
mut ref_sum = 0.0
for target in $page.targets {
let target_gravity = ($inbound_counts | get -o $target | default 0)
$ref_sum = $ref_sum + ($target_gravity | into float)
}
$reflected = ($reflected | merge {$page.name: ($ref_sum * $alpha)})
}
# --- score each page ---
# gravity: inbound links + reflected gravity from outbound targets
# density: outbound links per KB (how connected is this page relative to its size)
# substance: raw content size (longer = more knowledge, but with diminishing returns)
#
# score = effective_gravityΒ² Γ (1 + density) Γ log2(substance)
mut scored = []
for page in $page_data {
let raw_gravity = ($inbound_counts | get -o $page.name | default 0)
let ref_gravity = ($reflected | get -o $page.name | default 0.0)
let gravity = (($raw_gravity | into float) + $ref_gravity)
let density = if $page.size > 0 { ($page.outbound / ($page.size / 1024.0)) } else { 0.0 }
let substance = if $page.size > 100 { ($page.size | math log 2) } else { 1.0 }
# bonus for pages with high stake (frontmatter field)
let raw = (open --raw $page.file)
let has_stake = ($raw | str contains "stake:")
let gravity_sq = ($gravity * $gravity)
let score = $gravity_sq * (1.0 + $density) * $substance * (if $has_stake { 1.5 } else { 1.0 })
$scored = ($scored | append {
file: $page.file
rel: $page.rel
name: $page.name
size: $page.size
gravity: ($gravity | math round -p 1)
raw_gravity: $raw_gravity
outbound: $page.outbound
density: ($density | math round -p 2)
score: ($score | math round -p 1)
})
}
# sort by score descending
let ranked = ($scored | sort-by score -r)
if $stats {
print "\n=== TOP 50 pages by score ==="
let display = ($ranked | first 50 | each {|r| {
rel: $r.rel
gravity: $r.gravity
out: $r.outbound
score: $r.score
kb: ($r.size / 1024 | math round -p 1)
}})
print ($display | table)
let zero_gravity = ($ranked | where gravity == 0 | length)
let total = ($ranked | length)
print $"\nTotal: ($total) pages, ($zero_gravity) with zero inbound links"
# gravity distribution
let g_dist = ($ranked | where gravity > 0 | get gravity | describe)
print $"Gravity distribution: ($g_dist)"
return
}
# --- greedy knapsack: pack by score until budget ---
let budget_msg = $"Packing into ($budget)K token budget [($char_budget) chars]..."
print $budget_msg
mut packed = []
mut total_chars = 0
mut packed_count = 0
# prepend soul (personality preamble) if provided
if $soul != "" and ($soul | path exists) {
let soul_content = (open --raw $soul | str trim)
$packed = ($packed | append $soul_content)
$total_chars = $total_chars + ($soul_content | str length)
print $"Soul: ($soul_content | str length) chars prepended"
}
# always include top-level config first
let config_files = ($ranked | where {|r| ($r.rel | str starts-with "CLAUDE") or ($r.rel | str starts-with "README") or ($r.rel | str ends-with ".toml")})
for cf in $config_files {
let content = (open --raw $cf.file | str trim)
let entry = $"--- ($cf.rel) ---\n($content)\n"
$total_chars = $total_chars + ($entry | str length)
$packed = ($packed | append $entry)
$packed_count = $packed_count + 1
}
# pack by score
for page in $ranked {
if $total_chars >= $char_budget { break }
# skip configs already packed
if ($page.rel | str starts-with "CLAUDE") or ($page.rel | str starts-with "README") or ($page.rel | str ends-with ".toml") {
continue
}
let content = (open --raw $page.file | str trim)
let entry = $"--- ($page.rel) ---\n($content)\n"
let entry_size = ($entry | str length)
if ($total_chars + $entry_size) > $char_budget {
# skip this page if it would exceed budget β try smaller ones
continue
}
$packed = ($packed | append $entry)
$total_chars = $total_chars + $entry_size
$packed_count = $packed_count + 1
}
let total_pages = ($ranked | length)
let coverage_pct = ($packed_count * 100 / $total_pages | math round -p 1)
let est_tokens = ($total_chars / 3.5 | math round -p 0 | into int)
let gen_date = (date now | format date '%Y-%m-%d')
let graph_name = ($graph_path | path basename)
let header = [
$"# Knowledge Graph Context: ($graph_name)"
$"# Packed: ($packed_count) / ($total_pages) pages [($coverage_pct)%]"
$"# Estimated tokens: ($est_tokens) / ($token_budget) budget"
"# Method: gravity^2 * (1 + density) * log2(substance) β greedy knapsack"
$"# Generated: ($gen_date)"
""
] | str join "\n"
let result = ([$header] | append $packed | str join "\n")
if $output == "" {
let out_path = $"/tmp/cyber-context-($budget)k.md"
$result | save -f $out_path
let size_kb = ($total_chars / 1024 | math round -p 0)
print $"Saved ($packed_count)/($total_pages) pages to ($out_path) β ($size_kb) KB, ~($est_tokens) tokens [($coverage_pct)% coverage]"
} else {
$result | save -f $output
let size_kb = ($total_chars / 1024 | math round -p 0)
print $"Saved ($packed_count)/($total_pages) pages to ($output) β ($size_kb) KB, ~($est_tokens) tokens [($coverage_pct)% coverage]"
}
}
analizer/context.nu
Ο 0.0%