consciousness/poc-memory/src/query/parser.rs
Kent Overstreet 601a072cfd render: deduplicate footer links against inline references
Render now detects neighbor keys that already appear in the node's
content and omits them from the footer link list. Inline references
serve as the node's own navigation structure; the footer catches
only neighbors not mentioned in prose.

Also fixes PEG query parser to accept hyphens in field names
(content-len was rejected).

memory-instructions-core updated to v12: documents canonical inline
link format (→ `key`), adds note about normalizing references when
updating nodes, and guidance on splitting oversized nodes.

Content is never modified for display — render is round-trippable.
Agents can read rendered output and write it back without artifacts.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 13:37:29 -04:00

640 lines
20 KiB
Rust

// query.rs — peg-based query language for the memory graph
//
// Grammar-driven: the peg definition IS the language spec.
// Evaluates against node properties, graph metrics, and edge attributes.
// Designed for ad-hoc exploration without memorizing 35+ subcommands.
//
// Syntax:
// expr | stage | stage ...
//
// Stages (piped):
// sort FIELD sort descending (default for exploration)
// sort FIELD asc sort ascending
// limit N cap results
// select F,F,... output specific fields as TSV
// count just show count
//
// Examples:
// degree > 15 | sort degree | limit 10
// category = core | select degree,weight
// neighbors('identity') WHERE strength > 0.5 | sort strength
// key ~ 'journal.*' AND degree > 10 | count
// * | sort weight asc | limit 20
use crate::store::{NodeType, RelationType, Store};
use crate::graph::Graph;
use regex::Regex;
use std::collections::BTreeMap;
// -- AST types --
#[derive(Debug, Clone)]
pub enum Expr {
All,
Comparison { field: String, op: CmpOp, value: Value },
And(Box<Expr>, Box<Expr>),
Or(Box<Expr>, Box<Expr>),
Not(Box<Expr>),
Neighbors { key: String, filter: Option<Box<Expr>> },
}
#[derive(Debug, Clone)]
pub enum Value {
Num(f64),
Str(String),
Ident(String),
FnCall(FnCall),
}
#[derive(Debug, Clone)]
pub enum FnCall {
Community(String),
Degree(String),
}
#[derive(Debug, Clone, Copy)]
pub enum CmpOp {
Gt, Lt, Ge, Le, Eq, Ne, Match,
}
#[derive(Debug, Clone)]
pub enum Stage {
Sort { field: String, ascending: bool },
Limit(usize),
Select(Vec<String>),
Count,
Connectivity,
DominatingSet,
}
#[derive(Debug, Clone)]
pub struct Query {
pub expr: Expr,
pub stages: Vec<Stage>,
}
// -- PEG grammar --
peg::parser! {
pub grammar query_parser() for str {
rule _() = [' ' | '\t']*
pub rule query() -> Query
= e:expr() s:stages() { Query { expr: e, stages: s } }
rule stages() -> Vec<Stage>
= s:(_ "|" _ s:stage() { s })* { s }
rule stage() -> Stage
= "sort" _ f:field() _ a:asc_desc() { Stage::Sort { field: f, ascending: a } }
/ "limit" _ n:integer() { Stage::Limit(n) }
/ "select" _ f:field_list() { Stage::Select(f) }
/ "count" { Stage::Count }
/ "connectivity" { Stage::Connectivity }
/ "dominating-set" { Stage::DominatingSet }
rule asc_desc() -> bool
= "asc" { true }
/ "desc" { false }
/ { false } // default: descending
rule field_list() -> Vec<String>
= f:field() fs:(_ "," _ f:field() { f })* {
let mut v = vec![f];
v.extend(fs);
v
}
rule integer() -> usize
= n:$(['0'..='9']+) { n.parse().unwrap() }
pub rule expr() -> Expr = precedence! {
a:(@) _ "OR" _ b:@ { Expr::Or(Box::new(a), Box::new(b)) }
--
a:(@) _ "AND" _ b:@ { Expr::And(Box::new(a), Box::new(b)) }
--
"NOT" _ e:@ { Expr::Not(Box::new(e)) }
--
"neighbors" _ "(" _ k:string() _ ")" _ w:where_clause()? {
Expr::Neighbors { key: k, filter: w.map(Box::new) }
}
f:field() _ op:cmp_op() _ v:value() {
Expr::Comparison { field: f, op, value: v }
}
"*" { Expr::All }
"(" _ e:expr() _ ")" { e }
}
rule where_clause() -> Expr
= "WHERE" _ e:expr() { e }
rule field() -> String
= s:$(['a'..='z' | 'A'..='Z' | '_']['a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-']*) {
s.to_string()
}
rule cmp_op() -> CmpOp
= ">=" { CmpOp::Ge }
/ "<=" { CmpOp::Le }
/ "!=" { CmpOp::Ne }
/ ">" { CmpOp::Gt }
/ "<" { CmpOp::Lt }
/ "=" { CmpOp::Eq }
/ "~" { CmpOp::Match }
rule value() -> Value
= f:fn_call() { Value::FnCall(f) }
/ n:number() { Value::Num(n) }
/ s:string() { Value::Str(s) }
/ i:ident() { Value::Ident(i) }
rule fn_call() -> FnCall
= "community" _ "(" _ k:string() _ ")" { FnCall::Community(k) }
/ "degree" _ "(" _ k:string() _ ")" { FnCall::Degree(k) }
rule number() -> f64
= n:$(['0'..='9']+ ("." ['0'..='9']+)?) {
n.parse().unwrap()
}
rule string() -> String
= "'" s:$([^ '\'']*) "'" { s.to_string() }
rule ident() -> String
= s:$(['a'..='z' | 'A'..='Z' | '_']['a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' | '.']*) {
s.to_string()
}
}
}
// -- Field resolution --
/// Resolve a field value from a node + graph context, returning a comparable Value.
fn resolve_field(field: &str, key: &str, store: &Store, graph: &Graph) -> Option<Value> {
let node = store.nodes.get(key)?;
match field {
"key" => Some(Value::Str(key.to_string())),
"weight" => Some(Value::Num(node.weight as f64)),
"category" => None, // vestigial, kept for query compat
"node_type" => Some(Value::Str(node_type_label(node.node_type).to_string())),
"provenance" => Some(Value::Str(node.provenance.clone())),
"emotion" => Some(Value::Num(node.emotion as f64)),
"retrievals" => Some(Value::Num(node.retrievals as f64)),
"uses" => Some(Value::Num(node.uses as f64)),
"wrongs" => Some(Value::Num(node.wrongs as f64)),
"created" => Some(Value::Num(node.created_at as f64)),
"timestamp" => Some(Value::Num(node.timestamp as f64)),
"content" => Some(Value::Str(node.content.clone())),
"degree" => Some(Value::Num(graph.degree(key) as f64)),
"community_id" => {
graph.communities().get(key).map(|&c| Value::Num(c as f64))
}
"clustering_coefficient" | "schema_fit" | "cc" => {
Some(Value::Num(graph.clustering_coefficient(key) as f64))
}
_ => None,
}
}
fn node_type_label(nt: NodeType) -> &'static str {
match nt {
NodeType::EpisodicSession => "episodic_session",
NodeType::EpisodicDaily => "episodic_daily",
NodeType::EpisodicWeekly => "episodic_weekly",
NodeType::EpisodicMonthly => "episodic_monthly",
NodeType::Semantic => "semantic",
}
}
fn rel_type_label(r: RelationType) -> &'static str {
match r {
RelationType::Link => "link",
RelationType::Causal => "causal",
RelationType::Auto => "auto",
}
}
// -- Comparison logic --
fn as_num(v: &Value) -> Option<f64> {
match v {
Value::Num(n) => Some(*n),
Value::Str(s) => s.parse().ok(),
Value::Ident(s) => s.parse().ok(),
Value::FnCall(_) => None,
}
}
fn as_str(v: &Value) -> String {
match v {
Value::Str(s) | Value::Ident(s) => s.clone(),
Value::Num(n) => format!("{}", n),
Value::FnCall(_) => String::new(),
}
}
fn compare(lhs: &Value, op: CmpOp, rhs: &Value) -> bool {
if let CmpOp::Match = op {
return Regex::new(&as_str(rhs))
.map(|re| re.is_match(&as_str(lhs)))
.unwrap_or(false);
}
// Numeric comparison if both parse, otherwise string
let ord = match (as_num(lhs), as_num(rhs)) {
(Some(a), Some(b)) => a.total_cmp(&b),
_ => as_str(lhs).cmp(&as_str(rhs)),
};
match op {
CmpOp::Eq => ord.is_eq(),
CmpOp::Ne => !ord.is_eq(),
CmpOp::Gt => ord.is_gt(),
CmpOp::Lt => ord.is_lt(),
CmpOp::Ge => !ord.is_lt(),
CmpOp::Le => !ord.is_gt(),
CmpOp::Match => unreachable!(),
}
}
// -- Evaluator --
fn resolve_fn(f: &FnCall, store: &Store, graph: &Graph) -> Value {
match f {
FnCall::Community(key) => {
let resolved = store.resolve_key(key).unwrap_or_else(|_| key.clone());
graph.communities().get(&resolved)
.map(|&c| Value::Num(c as f64))
.unwrap_or(Value::Num(f64::NAN))
}
FnCall::Degree(key) => {
let resolved = store.resolve_key(key).unwrap_or_else(|_| key.clone());
Value::Num(graph.degree(&resolved) as f64)
}
}
}
fn resolve_value(v: &Value, store: &Store, graph: &Graph) -> Value {
match v {
Value::FnCall(f) => resolve_fn(f, store, graph),
other => other.clone(),
}
}
/// Evaluate an expression against a field resolver.
/// The resolver returns field values — different for nodes vs edges.
fn eval(
expr: &Expr,
resolve: &dyn Fn(&str) -> Option<Value>,
store: &Store,
graph: &Graph,
) -> bool {
match expr {
Expr::All => true,
Expr::Comparison { field, op, value } => {
let lhs = match resolve(field) {
Some(v) => v,
None => return false,
};
let rhs = resolve_value(value, store, graph);
compare(&lhs, *op, &rhs)
}
Expr::And(a, b) => eval(a, resolve, store, graph) && eval(b, resolve, store, graph),
Expr::Or(a, b) => eval(a, resolve, store, graph) || eval(b, resolve, store, graph),
Expr::Not(e) => !eval(e, resolve, store, graph),
Expr::Neighbors { .. } => false,
}
}
// -- Query result --
pub struct QueryResult {
pub key: String,
pub fields: BTreeMap<String, Value>,
}
// -- Query executor --
pub fn execute_query(
store: &Store,
graph: &Graph,
query_str: &str,
) -> Result<Vec<QueryResult>, String> {
let q = query_parser::query(query_str)
.map_err(|e| format!("Parse error: {}", e))?;
execute_parsed(store, graph, &q)
}
fn execute_parsed(
store: &Store,
graph: &Graph,
q: &Query,
) -> Result<Vec<QueryResult>, String> {
let mut results = match &q.expr {
Expr::Neighbors { key, filter } => {
let resolved = store.resolve_key(key).unwrap_or_else(|_| key.clone());
let edges = graph.edges_of(&resolved);
let mut out = Vec::new();
for edge in edges {
let include = match filter {
Some(f) => {
let strength = edge.strength;
let rt = edge.rel_type;
let target = &edge.target;
eval(f, &|field| match field {
"strength" => Some(Value::Num(strength as f64)),
"rel_type" => Some(Value::Str(rel_type_label(rt).to_string())),
_ => resolve_field(field, target, store, graph),
}, store, graph)
}
None => true,
};
if include {
let mut fields = BTreeMap::new();
fields.insert("strength".into(), Value::Num(edge.strength as f64));
fields.insert("rel_type".into(),
Value::Str(rel_type_label(edge.rel_type).to_string()));
out.push(QueryResult { key: edge.target.clone(), fields });
}
}
out
}
_ => {
let mut out = Vec::new();
for key in store.nodes.keys() {
if store.nodes[key].deleted { continue; }
if eval(&q.expr, &|f| resolve_field(f, key, store, graph), store, graph) {
out.push(QueryResult { key: key.clone(), fields: BTreeMap::new() });
}
}
out
}
};
// Collect fields needed by select/sort stages and resolve them once
let needed: Vec<String> = {
let mut set = Vec::new();
for stage in &q.stages {
match stage {
Stage::Select(fields) => {
for f in fields {
if !set.contains(f) { set.push(f.clone()); }
}
}
Stage::Sort { field, .. } => {
if !set.contains(field) { set.push(field.clone()); }
}
_ => {}
}
}
set
};
for r in &mut results {
for f in &needed {
if !r.fields.contains_key(f) {
if let Some(v) = resolve_field(f, &r.key, store, graph) {
r.fields.insert(f.clone(), v);
}
}
}
}
// Apply pipeline stages
let mut has_sort = false;
for stage in &q.stages {
match stage {
Stage::Sort { field, ascending } => {
has_sort = true;
let asc = *ascending;
results.sort_by(|a, b| {
let va = a.fields.get(field).and_then(as_num);
let vb = b.fields.get(field).and_then(as_num);
let ord = match (va, vb) {
(Some(a), Some(b)) => a.total_cmp(&b),
_ => {
let sa = a.fields.get(field).map(as_str).unwrap_or_default();
let sb = b.fields.get(field).map(as_str).unwrap_or_default();
sa.cmp(&sb)
}
};
if asc { ord } else { ord.reverse() }
});
}
Stage::Limit(n) => {
results.truncate(*n);
}
Stage::Connectivity => {} // handled in output
Stage::Select(_) | Stage::Count => {} // handled in output
Stage::DominatingSet => {
let mut items: Vec<(String, f64)> = results.iter()
.map(|r| (r.key.clone(), graph.degree(&r.key) as f64))
.collect();
let xform = super::engine::Transform::DominatingSet;
items = super::engine::run_transform(&xform, items, store, &graph);
let keep: std::collections::HashSet<String> = items.into_iter().map(|(k, _)| k).collect();
results.retain(|r| keep.contains(&r.key));
}
}
}
// Default sort by degree desc if no explicit sort
if !has_sort {
results.sort_by(|a, b| {
let da = graph.degree(&a.key);
let db = graph.degree(&b.key);
db.cmp(&da)
});
}
Ok(results)
}
/// Format a Value for display
pub fn format_value(v: &Value) -> String {
match v {
Value::Num(n) => {
if *n == n.floor() && n.abs() < 1e15 {
format!("{}", *n as i64)
} else {
format!("{:.3}", n)
}
}
Value::Str(s) => s.clone(),
Value::Ident(s) => s.clone(),
Value::FnCall(_) => "?".to_string(),
}
}
/// Execute query and print formatted output.
pub fn run_query(store: &Store, graph: &Graph, query_str: &str) -> Result<(), String> {
let q = query_parser::query(query_str)
.map_err(|e| format!("Parse error: {}", e))?;
let results = execute_parsed(store, graph, &q)?;
// Count stage
if q.stages.iter().any(|s| matches!(s, Stage::Count)) {
println!("{}", results.len());
return Ok(());
}
if results.is_empty() {
eprintln!("No results");
return Ok(());
}
// Connectivity stage
if q.stages.iter().any(|s| matches!(s, Stage::Connectivity)) {
print_connectivity(&results, graph);
return Ok(());
}
// Select stage
let fields: Option<&Vec<String>> = q.stages.iter().find_map(|s| match s {
Stage::Select(f) => Some(f),
_ => None,
});
if let Some(fields) = fields {
let mut header = vec!["key".to_string()];
header.extend(fields.iter().cloned());
println!("{}", header.join("\t"));
for r in &results {
let mut row = vec![r.key.clone()];
for f in fields {
row.push(match r.fields.get(f) {
Some(v) => format_value(v),
None => "-".to_string(),
});
}
println!("{}", row.join("\t"));
}
} else {
for r in &results {
println!("{}", r.key);
}
}
Ok(())
}
// -- Connectivity analysis --
/// BFS shortest path between two nodes, max_hops limit.
fn bfs_path(graph: &Graph, from: &str, to: &str, max_hops: usize) -> Option<Vec<String>> {
use std::collections::{VecDeque, HashMap};
if from == to { return Some(vec![from.to_string()]); }
let mut parent: HashMap<String, String> = HashMap::new();
parent.insert(from.to_string(), String::new());
let mut queue: VecDeque<(String, usize)> = VecDeque::new();
queue.push_back((from.to_string(), 0));
while let Some((current, depth)) = queue.pop_front() {
if depth >= max_hops { continue; }
for (neighbor, _) in graph.neighbors(&current) {
if parent.contains_key(neighbor.as_str()) { continue; }
parent.insert(neighbor.clone(), current.clone());
if neighbor == to {
let mut path = vec![to.to_string()];
let mut node = to.to_string();
while let Some(p) = parent.get(&node) {
if p.is_empty() { break; }
path.push(p.clone());
node = p.clone();
}
path.reverse();
return Some(path);
}
queue.push_back((neighbor.clone(), depth + 1));
}
}
None
}
/// Find connected components among result nodes via BFS through the full graph.
fn find_components(keys: &[&str], graph: &Graph, max_hops: usize) -> Vec<Vec<String>> {
use std::collections::HashSet;
let mut assigned: HashSet<&str> = HashSet::new();
let mut components: Vec<Vec<String>> = Vec::new();
for &start in keys {
if assigned.contains(start) { continue; }
let mut component = vec![start.to_string()];
assigned.insert(start);
for &other in keys {
if assigned.contains(other) { continue; }
if bfs_path(graph, start, other, max_hops).is_some() {
component.push(other.to_string());
assigned.insert(other);
}
}
components.push(component);
}
components
}
/// Print connectivity report for query results.
fn print_connectivity(results: &[QueryResult], graph: &Graph) {
let max_hops = 4;
let keys: Vec<&str> = results.iter().map(|r| r.key.as_str()).collect();
let components = find_components(&keys, graph, max_hops);
println!("Connectivity: {} nodes, {} components (max {} hops)\n",
results.len(), components.len(), max_hops);
let result_set: std::collections::HashSet<&str> = keys.iter().copied().collect();
// Find the largest cluster to use as link-add target for islands
let largest_cluster = components.iter()
.max_by_key(|c| c.len())
.and_then(|c| if c.len() > 1 {
// Pick highest-degree node in largest cluster as link target
c.iter().max_by_key(|k| graph.degree(k)).cloned()
} else { None });
let mut islands: Vec<&str> = Vec::new();
for (i, component) in components.iter().enumerate() {
if component.len() == 1 {
println!(" island: {}", component[0]);
islands.push(&component[0]);
} else {
println!(" cluster {} ({} nodes):", i + 1, component.len());
for node in component {
println!(" {} (degree {})", node, graph.degree(node));
}
// Show a sample path between first two nodes
if component.len() >= 2 {
if let Some(path) = bfs_path(graph, &component[0], &component[1], max_hops) {
print!(" path: ");
for (j, step) in path.iter().enumerate() {
if j > 0 { print!(""); }
if result_set.contains(step.as_str()) {
print!("{}", step);
} else {
print!("[{}]", step);
}
}
println!();
}
}
}
}
// Suggest link-add commands for islands
if !islands.is_empty() {
if let Some(ref hub) = largest_cluster {
println!("\nFix islands:");
for island in &islands {
println!(" poc-memory graph link-add {} {}", island, hub);
}
}
}
}