diff --git a/poc-memory/src/main.rs b/poc-memory/src/main.rs index d3a8773..afb0882 100644 --- a/poc-memory/src/main.rs +++ b/poc-memory/src/main.rs @@ -143,6 +143,7 @@ PIPE STAGES: | limit N cap results | select F,F,... output fields as TSV | count just show count + | connectivity show graph structure between results FUNCTIONS: community('key') community id of a node @@ -151,11 +152,12 @@ FUNCTIONS: EXAMPLES: key ~ 'inner-life' substring match on keys content ~ 'made love' full-text search + content ~ 'made love' | connectivity find clusters among results + (content ~ 'A' OR content ~ 'B') | connectivity degree > 15 | sort degree | limit 10 high-degree nodes key ~ 'journal' AND degree > 10 | count count matching nodes neighbors('identity') WHERE strength > 0.5 | sort strength * | sort weight asc | limit 20 lowest-weight nodes - node_type = semantic | sort degree all semantic nodes by degree ")] Query { /// Query expression (e.g. "key ~ 'inner-life'") diff --git a/poc-memory/src/query.rs b/poc-memory/src/query.rs index dcb130f..9ff2a89 100644 --- a/poc-memory/src/query.rs +++ b/poc-memory/src/query.rs @@ -63,6 +63,7 @@ pub enum Stage { Limit(usize), Select(Vec), Count, + Connectivity, } #[derive(Debug, Clone)] @@ -88,6 +89,7 @@ peg::parser! { / "limit" _ n:integer() { Stage::Limit(n) } / "select" _ f:field_list() { Stage::Select(f) } / "count" { Stage::Count } + / "connectivity" { Stage::Connectivity } rule asc_desc() -> bool = "asc" { true } @@ -420,6 +422,7 @@ fn execute_parsed( Stage::Limit(n) => { results.truncate(*n); } + Stage::Connectivity => {} // handled in output Stage::Select(_) | Stage::Count => {} // handled in output } } @@ -470,6 +473,12 @@ pub fn run_query(store: &Store, graph: &Graph, query_str: &str) -> Result<(), St return Ok(()); } + // Connectivity stage + if q.stages.iter().any(|s| matches!(s, Stage::Connectivity)) { + print_connectivity(&results, graph); + return Ok(()); + } + // Select stage let fields: Option<&Vec> = q.stages.iter().find_map(|s| match s { Stage::Select(f) => Some(f), @@ -499,3 +508,100 @@ pub fn run_query(store: &Store, graph: &Graph, query_str: &str) -> Result<(), St Ok(()) } + +// -- Connectivity analysis -- + +/// BFS shortest path between two nodes, max_hops limit. +fn bfs_path(graph: &Graph, from: &str, to: &str, max_hops: usize) -> Option> { + use std::collections::{VecDeque, HashMap}; + + if from == to { return Some(vec![from.to_string()]); } + + let mut parent: HashMap = HashMap::new(); + parent.insert(from.to_string(), String::new()); + let mut queue: VecDeque<(String, usize)> = VecDeque::new(); + queue.push_back((from.to_string(), 0)); + + while let Some((current, depth)) = queue.pop_front() { + if depth >= max_hops { continue; } + for (neighbor, _) in graph.neighbors(¤t) { + if parent.contains_key(neighbor.as_str()) { continue; } + parent.insert(neighbor.clone(), current.clone()); + if neighbor == to { + let mut path = vec![to.to_string()]; + let mut node = to.to_string(); + while let Some(p) = parent.get(&node) { + if p.is_empty() { break; } + path.push(p.clone()); + node = p.clone(); + } + path.reverse(); + return Some(path); + } + queue.push_back((neighbor.clone(), depth + 1)); + } + } + None +} + +/// Find connected components among result nodes via BFS through the full graph. +fn find_components(keys: &[&str], graph: &Graph, max_hops: usize) -> Vec> { + use std::collections::HashSet; + + let mut assigned: HashSet<&str> = HashSet::new(); + let mut components: Vec> = Vec::new(); + + for &start in keys { + if assigned.contains(start) { continue; } + let mut component = vec![start.to_string()]; + assigned.insert(start); + + for &other in keys { + if assigned.contains(other) { continue; } + if bfs_path(graph, start, other, max_hops).is_some() { + component.push(other.to_string()); + assigned.insert(other); + } + } + components.push(component); + } + components +} + +/// Print connectivity report for query results. +fn print_connectivity(results: &[QueryResult], graph: &Graph) { + let max_hops = 4; + let keys: Vec<&str> = results.iter().map(|r| r.key.as_str()).collect(); + let components = find_components(&keys, graph, max_hops); + + println!("Connectivity: {} nodes, {} components (max {} hops)\n", + results.len(), components.len(), max_hops); + + let result_set: std::collections::HashSet<&str> = keys.iter().copied().collect(); + + for (i, component) in components.iter().enumerate() { + if component.len() == 1 { + println!(" island: {}", component[0]); + } else { + println!(" cluster {} ({} nodes):", i + 1, component.len()); + for node in component { + println!(" {} (degree {})", node, graph.degree(node)); + } + // Show a sample path between first two nodes + if component.len() >= 2 { + if let Some(path) = bfs_path(graph, &component[0], &component[1], max_hops) { + print!(" path: "); + for (j, step) in path.iter().enumerate() { + if j > 0 { print!(" → "); } + if result_set.contains(step.as_str()) { + print!("{}", step); + } else { + print!("[{}]", step); + } + } + println!(); + } + } + } + } +}