query: peg-based query language for ad-hoc graph exploration

poc-memory query "degree > 15"
poc-memory query "key ~ 'journal.*' AND degree > 10"
poc-memory query "neighbors('identity.md') WHERE strength > 0.5"
poc-memory query "community_id = community('identity.md')" --fields degree,category

Grammar-driven: the peg definition IS the language spec. Supports
boolean logic (AND/OR/NOT), numeric and string comparison, regex
match (~), graph traversal (neighbors() with WHERE), and function
calls (community(), degree()). Output flags: --fields, --sort,
--limit, --count.

New dependency: peg 0.8 (~68KB, 2 tiny deps).
This commit is contained in:
ProofOfConcept 2026-03-03 10:55:30 -05:00
parent 71e6f15d82
commit a36449032c
5 changed files with 544 additions and 1 deletions

29
Cargo.lock generated
View file

@ -932,6 +932,33 @@ version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
[[package]]
name = "peg"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9928cfca101b36ec5163e70049ee5368a8a1c3c6efc9ca9c5f9cc2f816152477"
dependencies = [
"peg-macros",
"peg-runtime",
]
[[package]]
name = "peg-macros"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6298ab04c202fa5b5d52ba03269fb7b74550b150323038878fe6c372d8280f71"
dependencies = [
"peg-runtime",
"proc-macro2",
"quote",
]
[[package]]
name = "peg-runtime"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "132dca9b868d927b35b5dd728167b2dee150eb1ad686008fc71ccb298b776fca"
[[package]] [[package]]
name = "pest" name = "pest"
version = "2.8.6" version = "2.8.6"
@ -991,6 +1018,8 @@ dependencies = [
"faer", "faer",
"libc", "libc",
"memmap2", "memmap2",
"peg",
"rayon",
"regex", "regex",
"rkyv", "rkyv",
"serde", "serde",

View file

@ -14,6 +14,8 @@ libc = "0.2"
faer = "0.24.0" faer = "0.24.0"
rkyv = { version = "0.7", features = ["validation", "std"] } rkyv = { version = "0.7", features = ["validation", "std"] }
memmap2 = "0.9" memmap2 = "0.9"
rayon = "1"
peg = "0.8"
[build-dependencies] [build-dependencies]
capnpc = "0.20" capnpc = "0.20"

View file

@ -43,6 +43,13 @@ impl Graph {
self.adj.values().map(|e| e.len()).sum::<usize>() / 2 self.adj.values().map(|e| e.len()).sum::<usize>() / 2
} }
/// All edges for a node (full Edge data including rel_type)
pub fn edges_of(&self, key: &str) -> &[Edge] {
self.adj.get(key)
.map(|v| v.as_slice())
.unwrap_or(&[])
}
/// All neighbor keys with strengths /// All neighbor keys with strengths
pub fn neighbors(&self, key: &str) -> Vec<(&String, f32)> { pub fn neighbors(&self, key: &str) -> Vec<(&String, f32)> {
self.adj.get(key) self.adj.get(key)

View file

@ -20,6 +20,7 @@ mod search;
mod similarity; mod similarity;
mod migrate; mod migrate;
mod neuro; mod neuro;
mod query;
mod spectral; mod spectral;
pub mod memory_capnp { pub mod memory_capnp {
@ -118,6 +119,7 @@ fn main() {
"export" => cmd_export(&args[2..]), "export" => cmd_export(&args[2..]),
"journal-write" => cmd_journal_write(&args[2..]), "journal-write" => cmd_journal_write(&args[2..]),
"journal-tail" => cmd_journal_tail(&args[2..]), "journal-tail" => cmd_journal_tail(&args[2..]),
"query" => cmd_query(&args[2..]),
_ => { _ => {
eprintln!("Unknown command: {}", args[1]); eprintln!("Unknown command: {}", args[1]);
usage(); usage();
@ -192,7 +194,11 @@ Commands:
import FILE [FILE...] Import markdown file(s) into the store import FILE [FILE...] Import markdown file(s) into the store
export [FILE|--all] Export store nodes to markdown file(s) export [FILE|--all] Export store nodes to markdown file(s)
journal-write TEXT Write a journal entry to the store journal-write TEXT Write a journal entry to the store
journal-tail [N] [--full] Show last N journal entries (default 20, --full for content)"); journal-tail [N] [--full] Show last N journal entries (default 20, --full for content)
query EXPR [--fields F] [--sort F] [--limit N] [--count]
Query the memory graph with expressions
Examples: \"degree > 15\", \"key ~ 'journal.*'\",
\"neighbors('identity.md') WHERE strength > 0.5\"");
} }
fn cmd_search(args: &[String]) -> Result<(), String> { fn cmd_search(args: &[String]) -> Result<(), String> {
@ -1615,3 +1621,78 @@ fn cmd_interference(args: &[String]) -> Result<(), String> {
} }
Ok(()) Ok(())
} }
fn cmd_query(args: &[String]) -> Result<(), String> {
if args.is_empty() {
return Err("Usage: poc-memory query EXPR [--fields F,F,...] [--sort F] [--limit N] [--count]".into());
}
// Parse flags — query string is the first non-flag arg
let mut opts = query::QueryOpts::default();
let mut query_str = None;
let mut i = 0;
while i < args.len() {
match args[i].as_str() {
"--fields" if i + 1 < args.len() => {
opts.fields = args[i + 1].split(',').map(|s| s.trim().to_string()).collect();
i += 2;
}
"--sort" if i + 1 < args.len() => {
opts.sort_field = Some(args[i + 1].clone());
i += 2;
}
"--limit" if i + 1 < args.len() => {
opts.limit = Some(args[i + 1].parse().map_err(|_| "invalid --limit")?);
i += 2;
}
"--count" => {
opts.count_only = true;
i += 1;
}
_ if query_str.is_none() => {
query_str = Some(args[i].clone());
i += 1;
}
_ => {
return Err(format!("unexpected argument: {}", args[i]));
}
}
}
let query_str = query_str.ok_or("missing query expression")?;
let store = capnp_store::Store::load()?;
let graph = store.build_graph();
let results = query::execute_query(&store, &graph, &query_str, &opts)?;
if opts.count_only {
println!("{}", results.len());
return Ok(());
}
if results.is_empty() {
eprintln!("No results");
return Ok(());
}
// If --fields specified, show as TSV with header
if !opts.fields.is_empty() {
let mut header = vec!["key".to_string()];
header.extend(opts.fields.iter().cloned());
println!("{}", header.join("\t"));
for r in &results {
let mut row = vec![r.key.clone()];
for f in &opts.fields {
row.push(query::format_field(f, &r.key, &store, &graph));
}
println!("{}", row.join("\t"));
}
} else {
for r in &results {
println!("{}", r.key);
}
}
Ok(())
}

424
src/query.rs Normal file
View file

@ -0,0 +1,424 @@
// query.rs — peg-based query language for the memory graph
//
// Grammar-driven: the peg definition IS the language spec.
// Evaluates against node properties, graph metrics, and edge attributes.
// Designed for ad-hoc exploration without memorizing 35+ subcommands.
use crate::capnp_store::{NodeType, Provenance, RelationType, Store};
use crate::graph::Graph;
use regex::Regex;
// -- AST types --
#[derive(Debug, Clone)]
pub enum Expr {
Comparison { field: String, op: CmpOp, value: Value },
And(Box<Expr>, Box<Expr>),
Or(Box<Expr>, Box<Expr>),
Not(Box<Expr>),
Neighbors { key: String, filter: Option<Box<Expr>> },
}
#[derive(Debug, Clone)]
pub enum Value {
Num(f64),
Str(String),
Ident(String),
FnCall(FnCall),
}
#[derive(Debug, Clone)]
pub enum FnCall {
Community(String),
Degree(String),
}
#[derive(Debug, Clone, Copy)]
pub enum CmpOp {
Gt, Lt, Ge, Le, Eq, Ne, Match,
}
// -- PEG grammar --
peg::parser! {
pub grammar query_parser() for str {
rule _() = [' ' | '\t']*
pub rule expr() -> Expr = precedence! {
a:(@) _ "OR" _ b:@ { Expr::Or(Box::new(a), Box::new(b)) }
--
a:(@) _ "AND" _ b:@ { Expr::And(Box::new(a), Box::new(b)) }
--
"NOT" _ e:@ { Expr::Not(Box::new(e)) }
--
"neighbors" _ "(" _ k:string() _ ")" _ w:where_clause()? {
Expr::Neighbors { key: k, filter: w.map(Box::new) }
}
f:field() _ op:cmp_op() _ v:value() {
Expr::Comparison { field: f, op, value: v }
}
"(" _ e:expr() _ ")" { e }
}
rule where_clause() -> Expr
= "WHERE" _ e:expr() { e }
rule field() -> String
= s:$(['a'..='z' | 'A'..='Z' | '_']['a'..='z' | 'A'..='Z' | '0'..='9' | '_']*) {
s.to_string()
}
rule cmp_op() -> CmpOp
= ">=" { CmpOp::Ge }
/ "<=" { CmpOp::Le }
/ "!=" { CmpOp::Ne }
/ ">" { CmpOp::Gt }
/ "<" { CmpOp::Lt }
/ "=" { CmpOp::Eq }
/ "~" { CmpOp::Match }
rule value() -> Value
= f:fn_call() { Value::FnCall(f) }
/ n:number() { Value::Num(n) }
/ s:string() { Value::Str(s) }
/ i:ident() { Value::Ident(i) }
rule fn_call() -> FnCall
= "community" _ "(" _ k:string() _ ")" { FnCall::Community(k) }
/ "degree" _ "(" _ k:string() _ ")" { FnCall::Degree(k) }
rule number() -> f64
= n:$(['0'..='9']+ ("." ['0'..='9']+)?) {
n.parse().unwrap()
}
rule string() -> String
= "'" s:$([^ '\'']*) "'" { s.to_string() }
rule ident() -> String
= s:$(['a'..='z' | 'A'..='Z' | '_']['a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' | '.']*) {
s.to_string()
}
}
}
// -- Field resolution --
/// Resolve a field value from a node + graph context, returning a comparable Value.
fn resolve_field(field: &str, key: &str, store: &Store, graph: &Graph) -> Option<Value> {
let node = store.nodes.get(key)?;
match field {
"key" => Some(Value::Str(key.to_string())),
"weight" => Some(Value::Num(node.weight as f64)),
"category" => Some(Value::Str(node.category.label().to_string())),
"node_type" => Some(Value::Str(node_type_label(node.node_type).to_string())),
"provenance" => Some(Value::Str(provenance_label(node.provenance).to_string())),
"emotion" => Some(Value::Num(node.emotion as f64)),
"retrievals" => Some(Value::Num(node.retrievals as f64)),
"uses" => Some(Value::Num(node.uses as f64)),
"wrongs" => Some(Value::Num(node.wrongs as f64)),
"created" => Some(Value::Str(node.created.clone())),
"content" => Some(Value::Str(node.content.clone())),
"degree" => Some(Value::Num(graph.degree(key) as f64)),
"community_id" => {
graph.communities().get(key).map(|&c| Value::Num(c as f64))
}
"clustering_coefficient" => {
Some(Value::Num(graph.clustering_coefficient(key) as f64))
}
"schema_fit" => {
node.schema_fit.map(|f| Value::Num(f as f64))
}
_ => None,
}
}
fn node_type_label(nt: NodeType) -> &'static str {
match nt {
NodeType::EpisodicSession => "episodic_session",
NodeType::EpisodicDaily => "episodic_daily",
NodeType::EpisodicWeekly => "episodic_weekly",
NodeType::Semantic => "semantic",
}
}
fn provenance_label(p: Provenance) -> &'static str {
match p {
Provenance::Manual => "manual",
Provenance::Journal => "journal",
Provenance::Agent => "agent",
Provenance::Dream => "dream",
Provenance::Derived => "derived",
}
}
fn rel_type_label(r: RelationType) -> &'static str {
match r {
RelationType::Link => "link",
RelationType::Causal => "causal",
RelationType::Auto => "auto",
}
}
// -- Comparison logic --
/// Extract numeric value for comparison
fn as_num(v: &Value) -> Option<f64> {
match v {
Value::Num(n) => Some(*n),
Value::Str(s) => s.parse().ok(),
Value::Ident(s) => s.parse().ok(),
Value::FnCall(_) => None,
}
}
/// Extract string value for comparison
fn as_str(v: &Value) -> String {
match v {
Value::Str(s) | Value::Ident(s) => s.clone(),
Value::Num(n) => format!("{}", n),
Value::FnCall(_) => String::new(),
}
}
/// Compare two values with the given operator
fn compare(lhs: &Value, op: CmpOp, rhs: &Value) -> bool {
match op {
CmpOp::Match => {
let text = as_str(lhs);
let pat = as_str(rhs);
match Regex::new(&pat) {
Ok(re) => re.is_match(&text),
Err(_) => false,
}
}
CmpOp::Eq => {
// Try numeric first, fall back to string
if let (Some(a), Some(b)) = (as_num(lhs), as_num(rhs)) {
a == b
} else {
as_str(lhs) == as_str(rhs)
}
}
CmpOp::Ne => {
if let (Some(a), Some(b)) = (as_num(lhs), as_num(rhs)) {
a != b
} else {
as_str(lhs) != as_str(rhs)
}
}
CmpOp::Gt | CmpOp::Lt | CmpOp::Ge | CmpOp::Le => {
let a = as_num(lhs).unwrap_or(f64::NAN);
let b = as_num(rhs).unwrap_or(f64::NAN);
match op {
CmpOp::Gt => a > b,
CmpOp::Lt => a < b,
CmpOp::Ge => a >= b,
CmpOp::Le => a <= b,
_ => unreachable!(),
}
}
}
}
// -- Evaluator --
/// Resolve function calls that return values (community ID, degree of specific node)
fn resolve_fn(f: &FnCall, store: &Store, graph: &Graph) -> Value {
match f {
FnCall::Community(key) => {
let resolved = store.resolve_key(key).unwrap_or_else(|_| key.clone());
graph.communities().get(&resolved)
.map(|&c| Value::Num(c as f64))
.unwrap_or(Value::Num(f64::NAN))
}
FnCall::Degree(key) => {
let resolved = store.resolve_key(key).unwrap_or_else(|_| key.clone());
Value::Num(graph.degree(&resolved) as f64)
}
}
}
/// Resolve a Value, evaluating function calls
fn resolve_value(v: &Value, store: &Store, graph: &Graph) -> Value {
match v {
Value::FnCall(f) => resolve_fn(f, store, graph),
other => other.clone(),
}
}
/// Evaluate an expression against a node
fn eval_node(expr: &Expr, key: &str, store: &Store, graph: &Graph) -> bool {
match expr {
Expr::Comparison { field, op, value } => {
let lhs = match resolve_field(field, key, store, graph) {
Some(v) => v,
None => return false,
};
let rhs = resolve_value(value, store, graph);
compare(&lhs, *op, &rhs)
}
Expr::And(a, b) => {
eval_node(a, key, store, graph) && eval_node(b, key, store, graph)
}
Expr::Or(a, b) => {
eval_node(a, key, store, graph) || eval_node(b, key, store, graph)
}
Expr::Not(e) => !eval_node(e, key, store, graph),
Expr::Neighbors { .. } => false, // neighbors() is a top-level expression, not a predicate
}
}
/// Evaluate a WHERE clause against an edge
fn eval_edge(
expr: &Expr,
_source: &str,
target: &str,
strength: f32,
rel_type: RelationType,
store: &Store,
graph: &Graph,
) -> bool {
match expr {
Expr::Comparison { field, op, value } => {
// Edge-context fields
let lhs = match field.as_str() {
"strength" => Value::Num(strength as f64),
"rel_type" => Value::Str(rel_type_label(rel_type).to_string()),
// Fall through to node fields on the target
_ => match resolve_field(field, target, store, graph) {
Some(v) => v,
None => return false,
},
};
let rhs = resolve_value(value, store, graph);
compare(&lhs, *op, &rhs)
}
Expr::And(a, b) => {
eval_edge(a, _source, target, strength, rel_type, store, graph)
&& eval_edge(b, _source, target, strength, rel_type, store, graph)
}
Expr::Or(a, b) => {
eval_edge(a, _source, target, strength, rel_type, store, graph)
|| eval_edge(b, _source, target, strength, rel_type, store, graph)
}
Expr::Not(e) => !eval_edge(e, _source, target, strength, rel_type, store, graph),
Expr::Neighbors { .. } => false,
}
}
// -- Query result --
pub struct QueryResult {
pub key: String,
}
// -- Query options --
pub struct QueryOpts {
pub fields: Vec<String>,
pub sort_field: Option<String>,
pub limit: Option<usize>,
pub count_only: bool,
}
impl Default for QueryOpts {
fn default() -> Self {
QueryOpts {
fields: Vec::new(),
sort_field: None,
limit: None,
count_only: false,
}
}
}
// -- Query executor --
/// Parse and execute a query, returning matching node keys.
pub fn execute_query(
store: &Store,
graph: &Graph,
query_str: &str,
opts: &QueryOpts,
) -> Result<Vec<QueryResult>, String> {
let expr = query_parser::expr(query_str)
.map_err(|e| format!("Parse error: {}", e))?;
let mut results = match &expr {
// neighbors() is a set-returning expression
Expr::Neighbors { key, filter } => {
let resolved = store.resolve_key(key).unwrap_or_else(|_| key.clone());
let edges = graph.edges_of(&resolved);
let mut out = Vec::new();
for edge in edges {
let include = match filter {
Some(f) => eval_edge(
f, &resolved, &edge.target,
edge.strength, edge.rel_type, store, graph,
),
None => true,
};
if include {
out.push(QueryResult { key: edge.target.clone() });
}
}
out
}
// Everything else: scan all nodes
_ => {
let mut out = Vec::new();
for key in store.nodes.keys() {
if store.nodes[key].deleted { continue; }
if eval_node(&expr, key, store, graph) {
out.push(QueryResult { key: key.clone() });
}
}
out
}
};
// Sort
let sort_field = opts.sort_field.as_deref().unwrap_or("degree");
results.sort_by(|a, b| {
let va = resolve_field(sort_field, &a.key, store, graph)
.and_then(|v| as_num(&v))
.unwrap_or(0.0);
let vb = resolve_field(sort_field, &b.key, store, graph)
.and_then(|v| as_num(&v))
.unwrap_or(0.0);
vb.partial_cmp(&va).unwrap_or(std::cmp::Ordering::Equal)
});
// Limit
if let Some(limit) = opts.limit {
results.truncate(limit);
}
Ok(results)
}
/// Format a field value for display
pub fn format_field(field: &str, key: &str, store: &Store, graph: &Graph) -> String {
match resolve_field(field, key, store, graph) {
Some(Value::Num(n)) => {
if n == n.floor() && n.abs() < 1e15 {
format!("{}", n as i64)
} else {
format!("{:.3}", n)
}
}
Some(Value::Str(s)) => {
// Truncate content for display
if field == "content" {
let truncated: String = s.chars().take(80).collect();
if s.len() > 80 { format!("{}...", truncated) } else { truncated }
} else {
s
}
}
Some(Value::Ident(s)) => s,
Some(Value::FnCall(_)) => "?".to_string(),
None => "-".to_string(),
}
}