# Crystal ontology classifier for graph pages
# Usage: nu nu/classify.nu ~/git/cyber
# Outputs JSON classification to /tmp/crystal_classification.json
def main [graph_path: string] {
let pages_dir = ($graph_path | path join "pages")
# Type tag mappings: tag -> crystal type
let entity_tags = [
"species" "genus" "family" "class" "animals" "birds" "fungi" "plant"
"compound" "compound-"
"person"
"building" "building type" "camp" "district" "pond"
"ticker"
"module"
"page"
"aip"
"hero" "worker"
"service" "product"
"segment"
]
let process_tags = [
"operation" "critical"
"kitchen/menu" "recipe" "breakfast" "mains"
]
let property_tags = ["property"]
let measure_tags = ["measure"]
let pattern_tags = ["pattern"]
let relation_tags = ["relation"]
# Domain tag mappings: tag -> crystal domain
# Order matters: first match wins (more specific first)
let domain_rules = [
{ tags: ["property"] domain: "physics" }
{ tags: ["measure" "physics"] domain: "physics" }
{ tags: ["measure"] domain: "physics" }
{ tags: ["pattern"] domain: "mathematics" }
{ tags: ["relation"] domain: "mathematics" }
{ tags: ["species" "genus" "family" "class" "animals" "birds" "fungi" "shroom" "plant" "biology"] domain: "biology" }
{ tags: ["compound" "compound-"] domain: "chemistry" }
{ tags: ["physics" "force" "wave" "field"] domain: "physics" }
{ tags: ["mathematics" "algebra" "geometry" "topology" "statistics" "information theory" "game theory"] domain: "mathematics" }
{ tags: ["computer science" "cryptography" "machine learning" "cryptographic proofs"] domain: "computer science" }
{ tags: ["superhuman" "muscle" "disease" "longevity"] domain: "body" }
{ tags: ["cybernomics" "ticker" "token" "value" "delegation"] domain: "economics" }
{ tags: ["governance" "sovereignty" "law"] domain: "governance" }
{ tags: ["geography" "biome" "continent" "earth"] domain: "geography" }
{ tags: ["time" "history" "epoch" "year"] domain: "history" }
{ tags: ["culture" "language" "philosophy" "music"] domain: "culture" }
{ tags: ["color" "emotion" "spectrum"] domain: "culture" }
{ tags: ["food" "kitchen/menu" "recipe" "breakfast" "mains"] domain: "agriculture" }
{ tags: ["tech" "technology" "material"] domain: "materials" }
{ tags: ["energy"] domain: "energy" }
{ tags: ["person"] domain: "meta" }
{ tags: ["annotation" "research" "term" "uhash"] domain: "meta" }
{ tags: ["cyberia" "cv.land" "building" "building type" "camp" "district" "operation" "critical" "block" "team" "worker" "pond" "wc" "segment" "rack"] domain: "cyberia" }
{ tags: ["cyber" "cyb" "cip" "module" "aip" "prism" "bostrom" "page" "aos" "hero" "state" "param" "service" "product"] domain: "cyber" }
]
let results = (glob ($pages_dir | path join "*.md") | each {|f|
let content = (open $f --raw)
let size = ($content | str length)
let name = ($f | path basename | str replace ".md" "")
let lines = ($content | lines)
let first_line = ($lines | first)
let tags = if ($first_line | str starts-with "tags::") {
$first_line | str replace "tags:: " "" | str replace "tags::" "" | split row ", " | each {|t| $t | str trim} | where {|t| ($t | str length) > 0}
} else {
[]
}
# Size class
let size_class = if $size <= 256 { "atom" } else if $size <= 512 { "enzyme" } else if $size <= 1024 { "bridge" } else if $size <= 4096 { "article" } else { "deep" }
# Determine type
let has_entity = ($tags | any {|t| $t in $entity_tags})
let has_process = ($tags | any {|t| $t in $process_tags})
let has_property = ($tags | any {|t| $t in $property_tags})
let has_measure = ($tags | any {|t| $t in $measure_tags})
let has_pattern = ($tags | any {|t| $t in $pattern_tags})
let has_relation = ($tags | any {|t| $t in $relation_tags})
let has_article = ("article" in $tags)
let is_ticker = ($name | str starts-with "$")
let crystal_type = if $has_property { "Q" } else if $has_measure { "M" } else if $has_pattern { "S" } else if $has_relation { "R" } else if $has_process { "P" } else if $has_entity { "E" } else if $is_ticker { "E" } else { "unknown" }
# Determine domain
let crystal_domain = if ($tags | is-empty) and (not $is_ticker) {
"unknown"
} else if $is_ticker {
"economics"
} else {
let matched = ($domain_rules | where {|rule|
$rule.tags | any {|rt| $rt in $tags}
})
if ($matched | is-empty) { "unknown" } else { $matched | first | get domain }
}
let confidence = if $crystal_type == "unknown" or $crystal_domain == "unknown" { "low" } else { "high" }
{
name: $name
size: $size
size_class: $size_class
tags: $tags
crystal_type: $crystal_type
crystal_domain: $crystal_domain
confidence: $confidence
}
})
$results | to json | save -f /tmp/crystal_classification.json
let total = ($results | length)
let high = ($results | where confidence == "high" | length)
let low = ($results | where confidence == "low" | length)
print $"Total pages: ($total)"
print $"High confidence: ($high)"
print $"Low confidence: ($low)"
print ""
print "=== Type distribution ==="
print ($results | where confidence == "high" | group-by crystal_type | items {|k, v| {type: $k, count: ($v | length)}} | sort-by count -r | table)
print ""
print "=== Domain distribution ==="
print ($results | where confidence == "high" | group-by crystal_domain | items {|k, v| {domain: $k, count: ($v | length)}} | sort-by count -r | table)
}
analizer/classify.nu
ฯ 0.0%