From a36449032ce7317ea0a7e09285c5a7cfe9ee728d Mon Sep 17 00:00:00 2001 From: ProofOfConcept Date: Tue, 3 Mar 2026 10:55:30 -0500 Subject: [PATCH] query: peg-based query language for ad-hoc graph exploration poc-memory query "degree > 15" poc-memory query "key ~ 'journal.*' AND degree > 10" poc-memory query "neighbors('identity.md') WHERE strength > 0.5" poc-memory query "community_id = community('identity.md')" --fields degree,category Grammar-driven: the peg definition IS the language spec. Supports boolean logic (AND/OR/NOT), numeric and string comparison, regex match (~), graph traversal (neighbors() with WHERE), and function calls (community(), degree()). Output flags: --fields, --sort, --limit, --count. New dependency: peg 0.8 (~68KB, 2 tiny deps). --- Cargo.lock | 29 ++++ Cargo.toml | 2 + src/graph.rs | 7 + src/main.rs | 83 +++++++++- src/query.rs | 424 +++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 544 insertions(+), 1 deletion(-) create mode 100644 src/query.rs diff --git a/Cargo.lock b/Cargo.lock index daabe75..76f950b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -932,6 +932,33 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "peg" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9928cfca101b36ec5163e70049ee5368a8a1c3c6efc9ca9c5f9cc2f816152477" +dependencies = [ + "peg-macros", + "peg-runtime", +] + +[[package]] +name = "peg-macros" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6298ab04c202fa5b5d52ba03269fb7b74550b150323038878fe6c372d8280f71" +dependencies = [ + "peg-runtime", + "proc-macro2", + "quote", +] + +[[package]] +name = "peg-runtime" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "132dca9b868d927b35b5dd728167b2dee150eb1ad686008fc71ccb298b776fca" + [[package]] name = "pest" version = "2.8.6" @@ -991,6 +1018,8 @@ dependencies = [ "faer", "libc", "memmap2", + "peg", + "rayon", "regex", "rkyv", "serde", diff --git a/Cargo.toml b/Cargo.toml index b3e51f3..968c02d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,8 @@ libc = "0.2" faer = "0.24.0" rkyv = { version = "0.7", features = ["validation", "std"] } memmap2 = "0.9" +rayon = "1" +peg = "0.8" [build-dependencies] capnpc = "0.20" diff --git a/src/graph.rs b/src/graph.rs index e9447f2..67dcf5a 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -43,6 +43,13 @@ impl Graph { self.adj.values().map(|e| e.len()).sum::() / 2 } + /// All edges for a node (full Edge data including rel_type) + pub fn edges_of(&self, key: &str) -> &[Edge] { + self.adj.get(key) + .map(|v| v.as_slice()) + .unwrap_or(&[]) + } + /// All neighbor keys with strengths pub fn neighbors(&self, key: &str) -> Vec<(&String, f32)> { self.adj.get(key) diff --git a/src/main.rs b/src/main.rs index 348747d..90b4c25 100644 --- a/src/main.rs +++ b/src/main.rs @@ -20,6 +20,7 @@ mod search; mod similarity; mod migrate; mod neuro; +mod query; mod spectral; pub mod memory_capnp { @@ -118,6 +119,7 @@ fn main() { "export" => cmd_export(&args[2..]), "journal-write" => cmd_journal_write(&args[2..]), "journal-tail" => cmd_journal_tail(&args[2..]), + "query" => cmd_query(&args[2..]), _ => { eprintln!("Unknown command: {}", args[1]); usage(); @@ -192,7 +194,11 @@ Commands: import FILE [FILE...] Import markdown file(s) into the store export [FILE|--all] Export store nodes to markdown file(s) journal-write TEXT Write a journal entry to the store - journal-tail [N] [--full] Show last N journal entries (default 20, --full for content)"); + journal-tail [N] [--full] Show last N journal entries (default 20, --full for content) + query EXPR [--fields F] [--sort F] [--limit N] [--count] + Query the memory graph with expressions + Examples: \"degree > 15\", \"key ~ 'journal.*'\", + \"neighbors('identity.md') WHERE strength > 0.5\""); } fn cmd_search(args: &[String]) -> Result<(), String> { @@ -1615,3 +1621,78 @@ fn cmd_interference(args: &[String]) -> Result<(), String> { } Ok(()) } + +fn cmd_query(args: &[String]) -> Result<(), String> { + if args.is_empty() { + return Err("Usage: poc-memory query EXPR [--fields F,F,...] [--sort F] [--limit N] [--count]".into()); + } + + // Parse flags — query string is the first non-flag arg + let mut opts = query::QueryOpts::default(); + let mut query_str = None; + let mut i = 0; + while i < args.len() { + match args[i].as_str() { + "--fields" if i + 1 < args.len() => { + opts.fields = args[i + 1].split(',').map(|s| s.trim().to_string()).collect(); + i += 2; + } + "--sort" if i + 1 < args.len() => { + opts.sort_field = Some(args[i + 1].clone()); + i += 2; + } + "--limit" if i + 1 < args.len() => { + opts.limit = Some(args[i + 1].parse().map_err(|_| "invalid --limit")?); + i += 2; + } + "--count" => { + opts.count_only = true; + i += 1; + } + _ if query_str.is_none() => { + query_str = Some(args[i].clone()); + i += 1; + } + _ => { + return Err(format!("unexpected argument: {}", args[i])); + } + } + } + + let query_str = query_str.ok_or("missing query expression")?; + let store = capnp_store::Store::load()?; + let graph = store.build_graph(); + + let results = query::execute_query(&store, &graph, &query_str, &opts)?; + + if opts.count_only { + println!("{}", results.len()); + return Ok(()); + } + + if results.is_empty() { + eprintln!("No results"); + return Ok(()); + } + + // If --fields specified, show as TSV with header + if !opts.fields.is_empty() { + let mut header = vec!["key".to_string()]; + header.extend(opts.fields.iter().cloned()); + println!("{}", header.join("\t")); + + for r in &results { + let mut row = vec![r.key.clone()]; + for f in &opts.fields { + row.push(query::format_field(f, &r.key, &store, &graph)); + } + println!("{}", row.join("\t")); + } + } else { + for r in &results { + println!("{}", r.key); + } + } + + Ok(()) +} diff --git a/src/query.rs b/src/query.rs new file mode 100644 index 0000000..c3c170c --- /dev/null +++ b/src/query.rs @@ -0,0 +1,424 @@ +// query.rs — peg-based query language for the memory graph +// +// Grammar-driven: the peg definition IS the language spec. +// Evaluates against node properties, graph metrics, and edge attributes. +// Designed for ad-hoc exploration without memorizing 35+ subcommands. + +use crate::capnp_store::{NodeType, Provenance, RelationType, Store}; +use crate::graph::Graph; +use regex::Regex; + +// -- AST types -- + +#[derive(Debug, Clone)] +pub enum Expr { + Comparison { field: String, op: CmpOp, value: Value }, + And(Box, Box), + Or(Box, Box), + Not(Box), + Neighbors { key: String, filter: Option> }, +} + +#[derive(Debug, Clone)] +pub enum Value { + Num(f64), + Str(String), + Ident(String), + FnCall(FnCall), +} + +#[derive(Debug, Clone)] +pub enum FnCall { + Community(String), + Degree(String), +} + +#[derive(Debug, Clone, Copy)] +pub enum CmpOp { + Gt, Lt, Ge, Le, Eq, Ne, Match, +} + +// -- PEG grammar -- + +peg::parser! { + pub grammar query_parser() for str { + rule _() = [' ' | '\t']* + + pub rule expr() -> Expr = precedence! { + a:(@) _ "OR" _ b:@ { Expr::Or(Box::new(a), Box::new(b)) } + -- + a:(@) _ "AND" _ b:@ { Expr::And(Box::new(a), Box::new(b)) } + -- + "NOT" _ e:@ { Expr::Not(Box::new(e)) } + -- + "neighbors" _ "(" _ k:string() _ ")" _ w:where_clause()? { + Expr::Neighbors { key: k, filter: w.map(Box::new) } + } + f:field() _ op:cmp_op() _ v:value() { + Expr::Comparison { field: f, op, value: v } + } + "(" _ e:expr() _ ")" { e } + } + + rule where_clause() -> Expr + = "WHERE" _ e:expr() { e } + + rule field() -> String + = s:$(['a'..='z' | 'A'..='Z' | '_']['a'..='z' | 'A'..='Z' | '0'..='9' | '_']*) { + s.to_string() + } + + rule cmp_op() -> CmpOp + = ">=" { CmpOp::Ge } + / "<=" { CmpOp::Le } + / "!=" { CmpOp::Ne } + / ">" { CmpOp::Gt } + / "<" { CmpOp::Lt } + / "=" { CmpOp::Eq } + / "~" { CmpOp::Match } + + rule value() -> Value + = f:fn_call() { Value::FnCall(f) } + / n:number() { Value::Num(n) } + / s:string() { Value::Str(s) } + / i:ident() { Value::Ident(i) } + + rule fn_call() -> FnCall + = "community" _ "(" _ k:string() _ ")" { FnCall::Community(k) } + / "degree" _ "(" _ k:string() _ ")" { FnCall::Degree(k) } + + rule number() -> f64 + = n:$(['0'..='9']+ ("." ['0'..='9']+)?) { + n.parse().unwrap() + } + + rule string() -> String + = "'" s:$([^ '\'']*) "'" { s.to_string() } + + rule ident() -> String + = s:$(['a'..='z' | 'A'..='Z' | '_']['a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' | '.']*) { + s.to_string() + } + } +} + +// -- Field resolution -- + +/// Resolve a field value from a node + graph context, returning a comparable Value. +fn resolve_field(field: &str, key: &str, store: &Store, graph: &Graph) -> Option { + let node = store.nodes.get(key)?; + match field { + "key" => Some(Value::Str(key.to_string())), + "weight" => Some(Value::Num(node.weight as f64)), + "category" => Some(Value::Str(node.category.label().to_string())), + "node_type" => Some(Value::Str(node_type_label(node.node_type).to_string())), + "provenance" => Some(Value::Str(provenance_label(node.provenance).to_string())), + "emotion" => Some(Value::Num(node.emotion as f64)), + "retrievals" => Some(Value::Num(node.retrievals as f64)), + "uses" => Some(Value::Num(node.uses as f64)), + "wrongs" => Some(Value::Num(node.wrongs as f64)), + "created" => Some(Value::Str(node.created.clone())), + "content" => Some(Value::Str(node.content.clone())), + "degree" => Some(Value::Num(graph.degree(key) as f64)), + "community_id" => { + graph.communities().get(key).map(|&c| Value::Num(c as f64)) + } + "clustering_coefficient" => { + Some(Value::Num(graph.clustering_coefficient(key) as f64)) + } + "schema_fit" => { + node.schema_fit.map(|f| Value::Num(f as f64)) + } + _ => None, + } +} + +fn node_type_label(nt: NodeType) -> &'static str { + match nt { + NodeType::EpisodicSession => "episodic_session", + NodeType::EpisodicDaily => "episodic_daily", + NodeType::EpisodicWeekly => "episodic_weekly", + NodeType::Semantic => "semantic", + } +} + +fn provenance_label(p: Provenance) -> &'static str { + match p { + Provenance::Manual => "manual", + Provenance::Journal => "journal", + Provenance::Agent => "agent", + Provenance::Dream => "dream", + Provenance::Derived => "derived", + } +} + +fn rel_type_label(r: RelationType) -> &'static str { + match r { + RelationType::Link => "link", + RelationType::Causal => "causal", + RelationType::Auto => "auto", + } +} + +// -- Comparison logic -- + +/// Extract numeric value for comparison +fn as_num(v: &Value) -> Option { + match v { + Value::Num(n) => Some(*n), + Value::Str(s) => s.parse().ok(), + Value::Ident(s) => s.parse().ok(), + Value::FnCall(_) => None, + } +} + +/// Extract string value for comparison +fn as_str(v: &Value) -> String { + match v { + Value::Str(s) | Value::Ident(s) => s.clone(), + Value::Num(n) => format!("{}", n), + Value::FnCall(_) => String::new(), + } +} + +/// Compare two values with the given operator +fn compare(lhs: &Value, op: CmpOp, rhs: &Value) -> bool { + match op { + CmpOp::Match => { + let text = as_str(lhs); + let pat = as_str(rhs); + match Regex::new(&pat) { + Ok(re) => re.is_match(&text), + Err(_) => false, + } + } + CmpOp::Eq => { + // Try numeric first, fall back to string + if let (Some(a), Some(b)) = (as_num(lhs), as_num(rhs)) { + a == b + } else { + as_str(lhs) == as_str(rhs) + } + } + CmpOp::Ne => { + if let (Some(a), Some(b)) = (as_num(lhs), as_num(rhs)) { + a != b + } else { + as_str(lhs) != as_str(rhs) + } + } + CmpOp::Gt | CmpOp::Lt | CmpOp::Ge | CmpOp::Le => { + let a = as_num(lhs).unwrap_or(f64::NAN); + let b = as_num(rhs).unwrap_or(f64::NAN); + match op { + CmpOp::Gt => a > b, + CmpOp::Lt => a < b, + CmpOp::Ge => a >= b, + CmpOp::Le => a <= b, + _ => unreachable!(), + } + } + } +} + +// -- Evaluator -- + +/// Resolve function calls that return values (community ID, degree of specific node) +fn resolve_fn(f: &FnCall, store: &Store, graph: &Graph) -> Value { + match f { + FnCall::Community(key) => { + let resolved = store.resolve_key(key).unwrap_or_else(|_| key.clone()); + graph.communities().get(&resolved) + .map(|&c| Value::Num(c as f64)) + .unwrap_or(Value::Num(f64::NAN)) + } + FnCall::Degree(key) => { + let resolved = store.resolve_key(key).unwrap_or_else(|_| key.clone()); + Value::Num(graph.degree(&resolved) as f64) + } + } +} + +/// Resolve a Value, evaluating function calls +fn resolve_value(v: &Value, store: &Store, graph: &Graph) -> Value { + match v { + Value::FnCall(f) => resolve_fn(f, store, graph), + other => other.clone(), + } +} + +/// Evaluate an expression against a node +fn eval_node(expr: &Expr, key: &str, store: &Store, graph: &Graph) -> bool { + match expr { + Expr::Comparison { field, op, value } => { + let lhs = match resolve_field(field, key, store, graph) { + Some(v) => v, + None => return false, + }; + let rhs = resolve_value(value, store, graph); + compare(&lhs, *op, &rhs) + } + Expr::And(a, b) => { + eval_node(a, key, store, graph) && eval_node(b, key, store, graph) + } + Expr::Or(a, b) => { + eval_node(a, key, store, graph) || eval_node(b, key, store, graph) + } + Expr::Not(e) => !eval_node(e, key, store, graph), + Expr::Neighbors { .. } => false, // neighbors() is a top-level expression, not a predicate + } +} + +/// Evaluate a WHERE clause against an edge +fn eval_edge( + expr: &Expr, + _source: &str, + target: &str, + strength: f32, + rel_type: RelationType, + store: &Store, + graph: &Graph, +) -> bool { + match expr { + Expr::Comparison { field, op, value } => { + // Edge-context fields + let lhs = match field.as_str() { + "strength" => Value::Num(strength as f64), + "rel_type" => Value::Str(rel_type_label(rel_type).to_string()), + // Fall through to node fields on the target + _ => match resolve_field(field, target, store, graph) { + Some(v) => v, + None => return false, + }, + }; + let rhs = resolve_value(value, store, graph); + compare(&lhs, *op, &rhs) + } + Expr::And(a, b) => { + eval_edge(a, _source, target, strength, rel_type, store, graph) + && eval_edge(b, _source, target, strength, rel_type, store, graph) + } + Expr::Or(a, b) => { + eval_edge(a, _source, target, strength, rel_type, store, graph) + || eval_edge(b, _source, target, strength, rel_type, store, graph) + } + Expr::Not(e) => !eval_edge(e, _source, target, strength, rel_type, store, graph), + Expr::Neighbors { .. } => false, + } +} + +// -- Query result -- + +pub struct QueryResult { + pub key: String, +} + +// -- Query options -- + +pub struct QueryOpts { + pub fields: Vec, + pub sort_field: Option, + pub limit: Option, + pub count_only: bool, +} + +impl Default for QueryOpts { + fn default() -> Self { + QueryOpts { + fields: Vec::new(), + sort_field: None, + limit: None, + count_only: false, + } + } +} + +// -- Query executor -- + +/// Parse and execute a query, returning matching node keys. +pub fn execute_query( + store: &Store, + graph: &Graph, + query_str: &str, + opts: &QueryOpts, +) -> Result, String> { + let expr = query_parser::expr(query_str) + .map_err(|e| format!("Parse error: {}", e))?; + + let mut results = match &expr { + // neighbors() is a set-returning expression + Expr::Neighbors { key, filter } => { + let resolved = store.resolve_key(key).unwrap_or_else(|_| key.clone()); + let edges = graph.edges_of(&resolved); + let mut out = Vec::new(); + for edge in edges { + let include = match filter { + Some(f) => eval_edge( + f, &resolved, &edge.target, + edge.strength, edge.rel_type, store, graph, + ), + None => true, + }; + if include { + out.push(QueryResult { key: edge.target.clone() }); + } + } + out + } + // Everything else: scan all nodes + _ => { + let mut out = Vec::new(); + for key in store.nodes.keys() { + if store.nodes[key].deleted { continue; } + if eval_node(&expr, key, store, graph) { + out.push(QueryResult { key: key.clone() }); + } + } + out + } + }; + + // Sort + let sort_field = opts.sort_field.as_deref().unwrap_or("degree"); + results.sort_by(|a, b| { + let va = resolve_field(sort_field, &a.key, store, graph) + .and_then(|v| as_num(&v)) + .unwrap_or(0.0); + let vb = resolve_field(sort_field, &b.key, store, graph) + .and_then(|v| as_num(&v)) + .unwrap_or(0.0); + vb.partial_cmp(&va).unwrap_or(std::cmp::Ordering::Equal) + }); + + // Limit + if let Some(limit) = opts.limit { + results.truncate(limit); + } + + Ok(results) +} + +/// Format a field value for display +pub fn format_field(field: &str, key: &str, store: &Store, graph: &Graph) -> String { + match resolve_field(field, key, store, graph) { + Some(Value::Num(n)) => { + if n == n.floor() && n.abs() < 1e15 { + format!("{}", n as i64) + } else { + format!("{:.3}", n) + } + } + Some(Value::Str(s)) => { + // Truncate content for display + if field == "content" { + let truncated: String = s.chars().take(80).collect(); + if s.len() > 80 { format!("{}...", truncated) } else { truncated } + } else { + s + } + } + Some(Value::Ident(s)) => s, + Some(Value::FnCall(_)) => "?".to_string(), + None => "-".to_string(), + } +}