diff --git a/src/graph.rs b/src/graph.rs index 67dcf5a..cba9b27 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -73,6 +73,19 @@ impl Graph { &self.communities } + /// Hub degree threshold: top 5% by degree + pub fn hub_threshold(&self) -> usize { + let mut degrees: Vec = self.keys.iter() + .map(|k| self.degree(k)) + .collect(); + degrees.sort_unstable(); + if degrees.len() >= 20 { + degrees[degrees.len() * 95 / 100] + } else { + usize::MAX + } + } + /// Local clustering coefficient: fraction of a node's neighbors /// that are also neighbors of each other. /// cc(v) = 2E / (deg * (deg - 1)) @@ -187,7 +200,7 @@ impl Graph { let n = degrees.len(); if n < 2 { return 0.0; } - degrees.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); + degrees.sort_by(|a, b| a.total_cmp(b)); let mean = degrees.iter().sum::() / n as f64; if mean < 1e-10 { return 0.0; } @@ -255,17 +268,7 @@ impl Graph { pub fn link_impact(&self, source: &str, target: &str) -> LinkImpact { let source_deg = self.degree(source); let target_deg = self.degree(target); - - // Hub threshold: top 5% by degree - let mut all_degrees: Vec = self.keys.iter() - .map(|k| self.degree(k)) - .collect(); - all_degrees.sort_unstable(); - let hub_threshold = if all_degrees.len() >= 20 { - all_degrees[all_degrees.len() * 95 / 100] - } else { - usize::MAX // can't define hubs with <20 nodes - }; + let hub_threshold = self.hub_threshold(); let is_hub_link = source_deg >= hub_threshold || target_deg >= hub_threshold; // Community check @@ -469,7 +472,7 @@ fn label_propagation( // Adopt the label with most votes if let Some((&best_label, _)) = votes.iter() - .max_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal)) + .max_by(|a, b| a.1.total_cmp(b.1)) { let current = labels[key]; if best_label != current { diff --git a/src/main.rs b/src/main.rs index 96f5de0..d175071 100644 --- a/src/main.rs +++ b/src/main.rs @@ -318,16 +318,9 @@ fn cmd_status() -> Result<(), String> { fn cmd_graph() -> Result<(), String> { let store = capnp_store::Store::load()?; let g = store.build_graph(); - println!("Top nodes by degree:"); - let results = query::execute_query( - &store, &g, "* | sort degree | limit 10")?; - for r in &results { - let deg = g.degree(&r.key); - let cc = g.clustering_coefficient(&r.key); - println!(" {:40} deg={:3} cc={:.3}", r.key, deg, cc); - } - Ok(()) + query::run_query(&store, &g, + "* | sort degree | limit 10 | select degree,clustering_coefficient") } fn cmd_used(args: &[String]) -> Result<(), String> { @@ -486,14 +479,9 @@ fn cmd_link(args: &[String]) -> Result<(), String> { let store = capnp_store::Store::load()?; let resolved = store.resolve_key(&key)?; let g = store.build_graph(); - println!("Neighbors of '{}':", resolved); - let neighbors = g.neighbors(&resolved); - for (i, (n, strength)) in neighbors.iter().enumerate() { - let cc = g.clustering_coefficient(n); - println!(" {:2}. [{:.2}] {} (cc={:.3})", i + 1, strength, n, cc); - } - Ok(()) + query::run_query(&store, &g, + &format!("neighbors('{}') | select strength,clustering_coefficient", resolved)) } fn cmd_replay_queue(args: &[String]) -> Result<(), String> { @@ -843,29 +831,7 @@ fn cmd_experience_mine(args: &[String]) -> Result<(), String> { let jsonl_path = if let Some(path) = args.first() { path.clone() } else { - // Find the most recent JSONL transcript - let projects_dir = std::path::Path::new(&std::env::var("HOME").unwrap_or_default()) - .join(".claude/projects"); - let mut entries: Vec<(std::time::SystemTime, std::path::PathBuf)> = Vec::new(); - if let Ok(dirs) = std::fs::read_dir(&projects_dir) { - for dir in dirs.flatten() { - if let Ok(files) = std::fs::read_dir(dir.path()) { - for file in files.flatten() { - let path = file.path(); - if path.extension().map_or(false, |ext| ext == "jsonl") { - if let Ok(meta) = file.metadata() { - if let Ok(mtime) = meta.modified() { - entries.push((mtime, path)); - } - } - } - } - } - } - } - entries.sort_by(|a, b| b.0.cmp(&a.0)); - entries.first() - .map(|(_, p)| p.to_string_lossy().to_string()) + find_current_transcript() .ok_or("no JSONL transcripts found")? }; @@ -1222,11 +1188,7 @@ fn cmd_spectral_suggest(args: &[String]) -> Result<(), String> { fn cmd_list_keys() -> Result<(), String> { let store = capnp_store::Store::load()?; let g = store.build_graph(); - let results = query::execute_query(&store, &g, "* | sort key asc")?; - for r in &results { - println!("{}", r.key); - } - Ok(()) + query::run_query(&store, &g, "* | sort key asc") } fn cmd_list_edges() -> Result<(), String> { @@ -1637,44 +1599,5 @@ Pipe stages:\n \ let query_str = args.join(" "); let store = capnp_store::Store::load()?; let graph = store.build_graph(); - - let stages = query::output_stages(&query_str)?; - let results = query::execute_query(&store, &graph, &query_str)?; - - // Check for count stage - if stages.iter().any(|s| matches!(s, query::Stage::Count)) { - println!("{}", results.len()); - return Ok(()); - } - - if results.is_empty() { - eprintln!("No results"); - return Ok(()); - } - - // Check for select stage - let fields: Option<&Vec> = stages.iter().find_map(|s| match s { - query::Stage::Select(f) => Some(f), - _ => None, - }); - - if let Some(fields) = fields { - let mut header = vec!["key".to_string()]; - header.extend(fields.iter().cloned()); - println!("{}", header.join("\t")); - - for r in &results { - let mut row = vec![r.key.clone()]; - for f in fields { - row.push(query::format_field(f, &r.key, &store, &graph)); - } - println!("{}", row.join("\t")); - } - } else { - for r in &results { - println!("{}", r.key); - } - } - - Ok(()) + query::run_query(&store, &graph, &query_str) } diff --git a/src/migrate.rs b/src/migrate.rs index d20c16d..ea4bfec 100644 --- a/src/migrate.rs +++ b/src/migrate.rs @@ -23,19 +23,10 @@ use std::collections::HashMap; use std::env; use std::fs; use std::path::{Path, PathBuf}; -use std::time::{SystemTime, UNIX_EPOCH}; - fn home() -> PathBuf { PathBuf::from(env::var("HOME").expect("HOME not set")) } -fn now_epoch() -> f64 { - SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs_f64() -} - // Old system data structures (just enough for deserialization) #[derive(Deserialize)] @@ -206,7 +197,7 @@ pub fn migrate() -> Result<(), String> { let node = Node { uuid, version: 1, - timestamp: now_epoch(), + timestamp: capnp_store::now_epoch(), node_type: if key.contains("journal") { NodeType::EpisodicSession } else { @@ -246,7 +237,7 @@ pub fn migrate() -> Result<(), String> { let node = Node { uuid, version: 1, - timestamp: now_epoch(), + timestamp: capnp_store::now_epoch(), node_type: if key.contains("journal") { NodeType::EpisodicSession } else { diff --git a/src/neuro.rs b/src/neuro.rs index b29419e..c631fa7 100644 --- a/src/neuro.rs +++ b/src/neuro.rs @@ -10,14 +10,7 @@ use crate::similarity; use crate::spectral::{self, SpectralEmbedding, SpectralPosition}; use std::collections::HashMap; -use std::time::{SystemTime, UNIX_EPOCH}; - -fn now_epoch() -> f64 { - SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs_f64() -} +use crate::capnp_store::now_epoch; const SECS_PER_DAY: f64 = 86400.0; @@ -137,7 +130,7 @@ pub fn replay_queue_with_graph( }) .collect(); - items.sort_by(|a, b| b.priority.partial_cmp(&a.priority).unwrap_or(std::cmp::Ordering::Equal)); + items.sort_by(|a, b| b.priority.total_cmp(&a.priority)); items.truncate(count); items } @@ -228,7 +221,7 @@ fn format_topology_header(graph: &Graph) -> String { let e = graph.edge_count(); // Identify saturated hubs — nodes with degree well above threshold - let threshold = hub_threshold(graph); + let threshold = graph.hub_threshold(); let mut hubs: Vec<_> = graph.nodes().iter() .map(|k| (k.clone(), graph.degree(k))) .filter(|(_, d)| *d >= threshold) @@ -262,22 +255,10 @@ fn format_topology_header(graph: &Graph) -> String { n, e, graph.community_count(), sigma, alpha, gini, avg_cc, hub_list) } -/// Compute the hub degree threshold (top 5% by degree) -fn hub_threshold(graph: &Graph) -> usize { - let mut degrees: Vec = graph.nodes().iter() - .map(|k| graph.degree(k)) - .collect(); - degrees.sort_unstable(); - if degrees.len() >= 20 { - degrees[degrees.len() * 95 / 100] - } else { - usize::MAX - } -} /// Format node data section for prompt templates fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> String { - let hub_thresh = hub_threshold(graph); + let hub_thresh = graph.hub_threshold(); let mut out = String::new(); for item in items { let node = match store.nodes.get(&item.key) { @@ -363,7 +344,7 @@ fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> S }) .filter(|(_, sim)| *sim > 0.1) .collect(); - candidates.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); + candidates.sort_by(|a, b| b.1.total_cmp(&a.1)); candidates.truncate(8); if !candidates.is_empty() { @@ -568,7 +549,7 @@ pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result = episodes.iter().map(|(k, _)| k.clone()).collect(); @@ -959,7 +940,7 @@ pub fn differentiate_hub_with_graph(store: &Store, hub_key: &str, graph: &Graph) } } - moves.sort_by(|a, b| b.similarity.partial_cmp(&a.similarity).unwrap_or(std::cmp::Ordering::Equal)); + moves.sort_by(|a, b| b.similarity.total_cmp(&a.similarity)); Some(moves) } @@ -1017,7 +998,7 @@ pub fn apply_differentiation( /// Find all file-level hubs that have section children to split into. pub fn find_differentiable_hubs(store: &Store) -> Vec<(String, usize, usize)> { let graph = store.build_graph(); - let threshold = hub_threshold(&graph); + let threshold = graph.hub_threshold(); let mut hubs = Vec::new(); for key in graph.nodes() { @@ -1093,7 +1074,7 @@ pub fn triangle_close( } } - pair_scores.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal)); + pair_scores.sort_by(|a, b| b.2.total_cmp(&a.2)); let to_add = pair_scores.len().min(max_links_per_hub); if to_add > 0 { @@ -1168,7 +1149,7 @@ pub fn link_orphans( .filter(|(_, s)| *s >= sim_threshold) .collect(); - scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); + scores.sort_by(|a, b| b.1.total_cmp(&a.1)); let to_link = scores.len().min(links_per_orphan); if to_link == 0 { continue; } diff --git a/src/query.rs b/src/query.rs index 22a502c..21b4beb 100644 --- a/src/query.rs +++ b/src/query.rs @@ -24,6 +24,7 @@ use crate::capnp_store::{NodeType, Provenance, RelationType, Store}; use crate::graph::Graph; use regex::Regex; +use std::collections::BTreeMap; // -- AST types -- @@ -240,40 +241,26 @@ fn as_str(v: &Value) -> String { } fn compare(lhs: &Value, op: CmpOp, rhs: &Value) -> bool { + if let CmpOp::Match = op { + return Regex::new(&as_str(rhs)) + .map(|re| re.is_match(&as_str(lhs))) + .unwrap_or(false); + } + + // Numeric comparison if both parse, otherwise string + let ord = match (as_num(lhs), as_num(rhs)) { + (Some(a), Some(b)) => a.total_cmp(&b), + _ => as_str(lhs).cmp(&as_str(rhs)), + }; + match op { - CmpOp::Match => { - let text = as_str(lhs); - let pat = as_str(rhs); - match Regex::new(&pat) { - Ok(re) => re.is_match(&text), - Err(_) => false, - } - } - CmpOp::Eq => { - if let (Some(a), Some(b)) = (as_num(lhs), as_num(rhs)) { - a == b - } else { - as_str(lhs) == as_str(rhs) - } - } - CmpOp::Ne => { - if let (Some(a), Some(b)) = (as_num(lhs), as_num(rhs)) { - a != b - } else { - as_str(lhs) != as_str(rhs) - } - } - CmpOp::Gt | CmpOp::Lt | CmpOp::Ge | CmpOp::Le => { - let a = as_num(lhs).unwrap_or(f64::NAN); - let b = as_num(rhs).unwrap_or(f64::NAN); - match op { - CmpOp::Gt => a > b, - CmpOp::Lt => a < b, - CmpOp::Ge => a >= b, - CmpOp::Le => a <= b, - _ => unreachable!(), - } - } + CmpOp::Eq => ord.is_eq(), + CmpOp::Ne => !ord.is_eq(), + CmpOp::Gt => ord.is_gt(), + CmpOp::Lt => ord.is_lt(), + CmpOp::Ge => !ord.is_lt(), + CmpOp::Le => !ord.is_gt(), + CmpOp::Match => unreachable!(), } } @@ -301,60 +288,27 @@ fn resolve_value(v: &Value, store: &Store, graph: &Graph) -> Value { } } -fn eval_node(expr: &Expr, key: &str, store: &Store, graph: &Graph) -> bool { - match expr { - Expr::All => true, - Expr::Comparison { field, op, value } => { - let lhs = match resolve_field(field, key, store, graph) { - Some(v) => v, - None => return false, - }; - let rhs = resolve_value(value, store, graph); - compare(&lhs, *op, &rhs) - } - Expr::And(a, b) => { - eval_node(a, key, store, graph) && eval_node(b, key, store, graph) - } - Expr::Or(a, b) => { - eval_node(a, key, store, graph) || eval_node(b, key, store, graph) - } - Expr::Not(e) => !eval_node(e, key, store, graph), - Expr::Neighbors { .. } => false, - } -} - -fn eval_edge( +/// Evaluate an expression against a field resolver. +/// The resolver returns field values — different for nodes vs edges. +fn eval( expr: &Expr, - _source: &str, - target: &str, - strength: f32, - rel_type: RelationType, + resolve: &dyn Fn(&str) -> Option, store: &Store, graph: &Graph, ) -> bool { match expr { Expr::All => true, Expr::Comparison { field, op, value } => { - let lhs = match field.as_str() { - "strength" => Value::Num(strength as f64), - "rel_type" => Value::Str(rel_type_label(rel_type).to_string()), - _ => match resolve_field(field, target, store, graph) { - Some(v) => v, - None => return false, - }, + let lhs = match resolve(field) { + Some(v) => v, + None => return false, }; let rhs = resolve_value(value, store, graph); compare(&lhs, *op, &rhs) } - Expr::And(a, b) => { - eval_edge(a, _source, target, strength, rel_type, store, graph) - && eval_edge(b, _source, target, strength, rel_type, store, graph) - } - Expr::Or(a, b) => { - eval_edge(a, _source, target, strength, rel_type, store, graph) - || eval_edge(b, _source, target, strength, rel_type, store, graph) - } - Expr::Not(e) => !eval_edge(e, _source, target, strength, rel_type, store, graph), + Expr::And(a, b) => eval(a, resolve, store, graph) && eval(b, resolve, store, graph), + Expr::Or(a, b) => eval(a, resolve, store, graph) || eval(b, resolve, store, graph), + Expr::Not(e) => !eval(e, resolve, store, graph), Expr::Neighbors { .. } => false, } } @@ -363,6 +317,7 @@ fn eval_edge( pub struct QueryResult { pub key: String, + pub fields: BTreeMap, } // -- Query executor -- @@ -374,7 +329,14 @@ pub fn execute_query( ) -> Result, String> { let q = query_parser::query(query_str) .map_err(|e| format!("Parse error: {}", e))?; + execute_parsed(store, graph, &q) +} +fn execute_parsed( + store: &Store, + graph: &Graph, + q: &Query, +) -> Result, String> { let mut results = match &q.expr { Expr::Neighbors { key, filter } => { let resolved = store.resolve_key(key).unwrap_or_else(|_| key.clone()); @@ -382,14 +344,24 @@ pub fn execute_query( let mut out = Vec::new(); for edge in edges { let include = match filter { - Some(f) => eval_edge( - f, &resolved, &edge.target, - edge.strength, edge.rel_type, store, graph, - ), + Some(f) => { + let strength = edge.strength; + let rt = edge.rel_type; + let target = &edge.target; + eval(f, &|field| match field { + "strength" => Some(Value::Num(strength as f64)), + "rel_type" => Some(Value::Str(rel_type_label(rt).to_string())), + _ => resolve_field(field, target, store, graph), + }, store, graph) + } None => true, }; if include { - out.push(QueryResult { key: edge.target.clone() }); + let mut fields = BTreeMap::new(); + fields.insert("strength".into(), Value::Num(edge.strength as f64)); + fields.insert("rel_type".into(), + Value::Str(rel_type_label(edge.rel_type).to_string())); + out.push(QueryResult { key: edge.target.clone(), fields }); } } out @@ -398,14 +370,43 @@ pub fn execute_query( let mut out = Vec::new(); for key in store.nodes.keys() { if store.nodes[key].deleted { continue; } - if eval_node(&q.expr, key, store, graph) { - out.push(QueryResult { key: key.clone() }); + if eval(&q.expr, &|f| resolve_field(f, key, store, graph), store, graph) { + out.push(QueryResult { key: key.clone(), fields: BTreeMap::new() }); } } out } }; + // Collect fields needed by select/sort stages and resolve them once + let needed: Vec = { + let mut set = Vec::new(); + for stage in &q.stages { + match stage { + Stage::Select(fields) => { + for f in fields { + if !set.contains(f) { set.push(f.clone()); } + } + } + Stage::Sort { field, .. } => { + if !set.contains(field) { set.push(field.clone()); } + } + _ => {} + } + } + set + }; + + for r in &mut results { + for f in &needed { + if !r.fields.contains_key(f) { + if let Some(v) = resolve_field(f, &r.key, store, graph) { + r.fields.insert(f.clone(), v); + } + } + } + } + // Apply pipeline stages let mut has_sort = false; for stage in &q.stages { @@ -414,25 +415,17 @@ pub fn execute_query( has_sort = true; let asc = *ascending; results.sort_by(|a, b| { - let va = resolve_field(field, &a.key, store, graph) - .and_then(|v| as_num(&v)); - let vb = resolve_field(field, &b.key, store, graph) - .and_then(|v| as_num(&v)); - match (va, vb) { - (Some(a), Some(b)) => if asc { - a.partial_cmp(&b).unwrap_or(std::cmp::Ordering::Equal) - } else { - b.partial_cmp(&a).unwrap_or(std::cmp::Ordering::Equal) - }, - // String fallback for non-numeric fields + let va = a.fields.get(field).and_then(|v| as_num(v)); + let vb = b.fields.get(field).and_then(|v| as_num(v)); + let ord = match (va, vb) { + (Some(a), Some(b)) => a.total_cmp(&b), _ => { - let sa = resolve_field(field, &a.key, store, graph) - .map(|v| as_str(&v)).unwrap_or_default(); - let sb = resolve_field(field, &b.key, store, graph) - .map(|v| as_str(&v)).unwrap_or_default(); - if asc { sa.cmp(&sb) } else { sb.cmp(&sa) } + let sa = a.fields.get(field).map(|v| as_str(v)).unwrap_or_default(); + let sb = b.fields.get(field).map(|v| as_str(v)).unwrap_or_default(); + sa.cmp(&sb) } - } + }; + if asc { ord } else { ord.reverse() } }); } Stage::Limit(n) => { @@ -454,33 +447,66 @@ pub fn execute_query( Ok(results) } -/// Extract the output stages from a parsed query (for cmd_query formatting) -pub fn output_stages(query_str: &str) -> Result, String> { - let q = query_parser::query(query_str) - .map_err(|e| format!("Parse error: {}", e))?; - Ok(q.stages) -} - -/// Format a field value for display -pub fn format_field(field: &str, key: &str, store: &Store, graph: &Graph) -> String { - match resolve_field(field, key, store, graph) { - Some(Value::Num(n)) => { - if n == n.floor() && n.abs() < 1e15 { - format!("{}", n as i64) +/// Format a Value for display +pub fn format_value(v: &Value) -> String { + match v { + Value::Num(n) => { + if *n == n.floor() && n.abs() < 1e15 { + format!("{}", *n as i64) } else { format!("{:.3}", n) } } - Some(Value::Str(s)) => { - if field == "content" { - let truncated: String = s.chars().take(80).collect(); - if s.len() > 80 { format!("{}...", truncated) } else { truncated } - } else { - s - } - } - Some(Value::Ident(s)) => s, - Some(Value::FnCall(_)) => "?".to_string(), - None => "-".to_string(), + Value::Str(s) => s.clone(), + Value::Ident(s) => s.clone(), + Value::FnCall(_) => "?".to_string(), } } + +/// Execute query and print formatted output. +pub fn run_query(store: &Store, graph: &Graph, query_str: &str) -> Result<(), String> { + let q = query_parser::query(query_str) + .map_err(|e| format!("Parse error: {}", e))?; + + let results = execute_parsed(store, graph, &q)?; + + // Count stage + if q.stages.iter().any(|s| matches!(s, Stage::Count)) { + println!("{}", results.len()); + return Ok(()); + } + + if results.is_empty() { + eprintln!("No results"); + return Ok(()); + } + + // Select stage + let fields: Option<&Vec> = q.stages.iter().find_map(|s| match s { + Stage::Select(f) => Some(f), + _ => None, + }); + + if let Some(fields) = fields { + let mut header = vec!["key".to_string()]; + header.extend(fields.iter().cloned()); + println!("{}", header.join("\t")); + + for r in &results { + let mut row = vec![r.key.clone()]; + for f in fields { + row.push(match r.fields.get(f) { + Some(v) => format_value(v), + None => "-".to_string(), + }); + } + println!("{}", row.join("\t")); + } + } else { + for r in &results { + println!("{}", r.key); + } + } + + Ok(()) +} diff --git a/src/search.rs b/src/search.rs index e0a59df..ba70d1c 100644 --- a/src/search.rs +++ b/src/search.rs @@ -7,7 +7,6 @@ use crate::capnp_store::StoreView; use crate::graph::Graph; -use std::cmp::Ordering; use std::collections::{HashMap, HashSet, VecDeque}; pub struct SearchResult { @@ -57,7 +56,7 @@ fn spreading_activation( } let mut results: Vec<_> = activation.into_iter().collect(); - results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal)); + results.sort_by(|a, b| b.1.total_cmp(&a.1)); results } diff --git a/src/similarity.rs b/src/similarity.rs index 38a399c..01281a1 100644 --- a/src/similarity.rs +++ b/src/similarity.rs @@ -100,7 +100,7 @@ pub fn pairwise_similar( } } - results.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal)); + results.sort_by(|a, b| b.2.total_cmp(&a.2)); results }