consciousness/poc-memory/src/query.rs

502 lines
15 KiB
Rust
Raw Normal View History

// query.rs — peg-based query language for the memory graph
//
// Grammar-driven: the peg definition IS the language spec.
// Evaluates against node properties, graph metrics, and edge attributes.
// Designed for ad-hoc exploration without memorizing 35+ subcommands.
//
// Syntax:
// expr | stage | stage ...
//
// Stages (piped):
// sort FIELD sort descending (default for exploration)
// sort FIELD asc sort ascending
// limit N cap results
// select F,F,... output specific fields as TSV
// count just show count
//
// Examples:
// degree > 15 | sort degree | limit 10
// category = core | select degree,weight
// neighbors('identity') WHERE strength > 0.5 | sort strength
// key ~ 'journal.*' AND degree > 10 | count
// * | sort weight asc | limit 20
use crate::store::{NodeType, RelationType, Store};
use crate::graph::Graph;
use regex::Regex;
use std::collections::BTreeMap;
// -- AST types --
#[derive(Debug, Clone)]
pub enum Expr {
All,
Comparison { field: String, op: CmpOp, value: Value },
And(Box<Expr>, Box<Expr>),
Or(Box<Expr>, Box<Expr>),
Not(Box<Expr>),
Neighbors { key: String, filter: Option<Box<Expr>> },
}
#[derive(Debug, Clone)]
pub enum Value {
Num(f64),
Str(String),
Ident(String),
FnCall(FnCall),
}
#[derive(Debug, Clone)]
pub enum FnCall {
Community(String),
Degree(String),
}
#[derive(Debug, Clone, Copy)]
pub enum CmpOp {
Gt, Lt, Ge, Le, Eq, Ne, Match,
}
#[derive(Debug, Clone)]
pub enum Stage {
Sort { field: String, ascending: bool },
Limit(usize),
Select(Vec<String>),
Count,
}
#[derive(Debug, Clone)]
pub struct Query {
pub expr: Expr,
pub stages: Vec<Stage>,
}
// -- PEG grammar --
peg::parser! {
pub grammar query_parser() for str {
rule _() = [' ' | '\t']*
pub rule query() -> Query
= e:expr() s:stages() { Query { expr: e, stages: s } }
rule stages() -> Vec<Stage>
= s:(_ "|" _ s:stage() { s })* { s }
rule stage() -> Stage
= "sort" _ f:field() _ a:asc_desc() { Stage::Sort { field: f, ascending: a } }
/ "limit" _ n:integer() { Stage::Limit(n) }
/ "select" _ f:field_list() { Stage::Select(f) }
/ "count" { Stage::Count }
rule asc_desc() -> bool
= "asc" { true }
/ "desc" { false }
/ { false } // default: descending
rule field_list() -> Vec<String>
= f:field() fs:(_ "," _ f:field() { f })* {
let mut v = vec![f];
v.extend(fs);
v
}
rule integer() -> usize
= n:$(['0'..='9']+) { n.parse().unwrap() }
pub rule expr() -> Expr = precedence! {
a:(@) _ "OR" _ b:@ { Expr::Or(Box::new(a), Box::new(b)) }
--
a:(@) _ "AND" _ b:@ { Expr::And(Box::new(a), Box::new(b)) }
--
"NOT" _ e:@ { Expr::Not(Box::new(e)) }
--
"neighbors" _ "(" _ k:string() _ ")" _ w:where_clause()? {
Expr::Neighbors { key: k, filter: w.map(Box::new) }
}
f:field() _ op:cmp_op() _ v:value() {
Expr::Comparison { field: f, op, value: v }
}
"*" { Expr::All }
"(" _ e:expr() _ ")" { e }
}
rule where_clause() -> Expr
= "WHERE" _ e:expr() { e }
rule field() -> String
= s:$(['a'..='z' | 'A'..='Z' | '_']['a'..='z' | 'A'..='Z' | '0'..='9' | '_']*) {
s.to_string()
}
rule cmp_op() -> CmpOp
= ">=" { CmpOp::Ge }
/ "<=" { CmpOp::Le }
/ "!=" { CmpOp::Ne }
/ ">" { CmpOp::Gt }
/ "<" { CmpOp::Lt }
/ "=" { CmpOp::Eq }
/ "~" { CmpOp::Match }
rule value() -> Value
= f:fn_call() { Value::FnCall(f) }
/ n:number() { Value::Num(n) }
/ s:string() { Value::Str(s) }
/ i:ident() { Value::Ident(i) }
rule fn_call() -> FnCall
= "community" _ "(" _ k:string() _ ")" { FnCall::Community(k) }
/ "degree" _ "(" _ k:string() _ ")" { FnCall::Degree(k) }
rule number() -> f64
= n:$(['0'..='9']+ ("." ['0'..='9']+)?) {
n.parse().unwrap()
}
rule string() -> String
= "'" s:$([^ '\'']*) "'" { s.to_string() }
rule ident() -> String
= s:$(['a'..='z' | 'A'..='Z' | '_']['a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' | '.']*) {
s.to_string()
}
}
}
// -- Field resolution --
/// Resolve a field value from a node + graph context, returning a comparable Value.
fn resolve_field(field: &str, key: &str, store: &Store, graph: &Graph) -> Option<Value> {
let node = store.nodes.get(key)?;
match field {
"key" => Some(Value::Str(key.to_string())),
"weight" => Some(Value::Num(node.weight as f64)),
"category" => None, // vestigial, kept for query compat
"node_type" => Some(Value::Str(node_type_label(node.node_type).to_string())),
"provenance" => Some(Value::Str(node.provenance.label().to_string())),
"emotion" => Some(Value::Num(node.emotion as f64)),
"retrievals" => Some(Value::Num(node.retrievals as f64)),
"uses" => Some(Value::Num(node.uses as f64)),
"wrongs" => Some(Value::Num(node.wrongs as f64)),
"created" => Some(Value::Str(node.created.clone())),
"content" => Some(Value::Str(node.content.clone())),
"degree" => Some(Value::Num(graph.degree(key) as f64)),
"community_id" => {
graph.communities().get(key).map(|&c| Value::Num(c as f64))
}
"clustering_coefficient" | "schema_fit" | "cc" => {
Some(Value::Num(graph.clustering_coefficient(key) as f64))
}
_ => None,
}
}
fn node_type_label(nt: NodeType) -> &'static str {
match nt {
NodeType::EpisodicSession => "episodic_session",
NodeType::EpisodicDaily => "episodic_daily",
NodeType::EpisodicWeekly => "episodic_weekly",
NodeType::EpisodicMonthly => "episodic_monthly",
NodeType::Semantic => "semantic",
}
}
fn rel_type_label(r: RelationType) -> &'static str {
match r {
RelationType::Link => "link",
RelationType::Causal => "causal",
RelationType::Auto => "auto",
}
}
// -- Comparison logic --
fn as_num(v: &Value) -> Option<f64> {
match v {
Value::Num(n) => Some(*n),
Value::Str(s) => s.parse().ok(),
Value::Ident(s) => s.parse().ok(),
Value::FnCall(_) => None,
}
}
fn as_str(v: &Value) -> String {
match v {
Value::Str(s) | Value::Ident(s) => s.clone(),
Value::Num(n) => format!("{}", n),
Value::FnCall(_) => String::new(),
}
}
fn compare(lhs: &Value, op: CmpOp, rhs: &Value) -> bool {
if let CmpOp::Match = op {
return Regex::new(&as_str(rhs))
.map(|re| re.is_match(&as_str(lhs)))
.unwrap_or(false);
}
// Numeric comparison if both parse, otherwise string
let ord = match (as_num(lhs), as_num(rhs)) {
(Some(a), Some(b)) => a.total_cmp(&b),
_ => as_str(lhs).cmp(&as_str(rhs)),
};
match op {
CmpOp::Eq => ord.is_eq(),
CmpOp::Ne => !ord.is_eq(),
CmpOp::Gt => ord.is_gt(),
CmpOp::Lt => ord.is_lt(),
CmpOp::Ge => !ord.is_lt(),
CmpOp::Le => !ord.is_gt(),
CmpOp::Match => unreachable!(),
}
}
// -- Evaluator --
fn resolve_fn(f: &FnCall, store: &Store, graph: &Graph) -> Value {
match f {
FnCall::Community(key) => {
let resolved = store.resolve_key(key).unwrap_or_else(|_| key.clone());
graph.communities().get(&resolved)
.map(|&c| Value::Num(c as f64))
.unwrap_or(Value::Num(f64::NAN))
}
FnCall::Degree(key) => {
let resolved = store.resolve_key(key).unwrap_or_else(|_| key.clone());
Value::Num(graph.degree(&resolved) as f64)
}
}
}
fn resolve_value(v: &Value, store: &Store, graph: &Graph) -> Value {
match v {
Value::FnCall(f) => resolve_fn(f, store, graph),
other => other.clone(),
}
}
/// Evaluate an expression against a field resolver.
/// The resolver returns field values — different for nodes vs edges.
fn eval(
expr: &Expr,
resolve: &dyn Fn(&str) -> Option<Value>,
store: &Store,
graph: &Graph,
) -> bool {
match expr {
Expr::All => true,
Expr::Comparison { field, op, value } => {
let lhs = match resolve(field) {
Some(v) => v,
None => return false,
};
let rhs = resolve_value(value, store, graph);
compare(&lhs, *op, &rhs)
}
Expr::And(a, b) => eval(a, resolve, store, graph) && eval(b, resolve, store, graph),
Expr::Or(a, b) => eval(a, resolve, store, graph) || eval(b, resolve, store, graph),
Expr::Not(e) => !eval(e, resolve, store, graph),
Expr::Neighbors { .. } => false,
}
}
// -- Query result --
pub struct QueryResult {
pub key: String,
pub fields: BTreeMap<String, Value>,
}
// -- Query executor --
pub fn execute_query(
store: &Store,
graph: &Graph,
query_str: &str,
) -> Result<Vec<QueryResult>, String> {
let q = query_parser::query(query_str)
.map_err(|e| format!("Parse error: {}", e))?;
execute_parsed(store, graph, &q)
}
fn execute_parsed(
store: &Store,
graph: &Graph,
q: &Query,
) -> Result<Vec<QueryResult>, String> {
let mut results = match &q.expr {
Expr::Neighbors { key, filter } => {
let resolved = store.resolve_key(key).unwrap_or_else(|_| key.clone());
let edges = graph.edges_of(&resolved);
let mut out = Vec::new();
for edge in edges {
let include = match filter {
Some(f) => {
let strength = edge.strength;
let rt = edge.rel_type;
let target = &edge.target;
eval(f, &|field| match field {
"strength" => Some(Value::Num(strength as f64)),
"rel_type" => Some(Value::Str(rel_type_label(rt).to_string())),
_ => resolve_field(field, target, store, graph),
}, store, graph)
}
None => true,
};
if include {
let mut fields = BTreeMap::new();
fields.insert("strength".into(), Value::Num(edge.strength as f64));
fields.insert("rel_type".into(),
Value::Str(rel_type_label(edge.rel_type).to_string()));
out.push(QueryResult { key: edge.target.clone(), fields });
}
}
out
}
_ => {
let mut out = Vec::new();
for key in store.nodes.keys() {
if store.nodes[key].deleted { continue; }
if eval(&q.expr, &|f| resolve_field(f, key, store, graph), store, graph) {
out.push(QueryResult { key: key.clone(), fields: BTreeMap::new() });
}
}
out
}
};
// Collect fields needed by select/sort stages and resolve them once
let needed: Vec<String> = {
let mut set = Vec::new();
for stage in &q.stages {
match stage {
Stage::Select(fields) => {
for f in fields {
if !set.contains(f) { set.push(f.clone()); }
}
}
Stage::Sort { field, .. } => {
if !set.contains(field) { set.push(field.clone()); }
}
_ => {}
}
}
set
};
for r in &mut results {
for f in &needed {
if !r.fields.contains_key(f) {
if let Some(v) = resolve_field(f, &r.key, store, graph) {
r.fields.insert(f.clone(), v);
}
}
}
}
// Apply pipeline stages
let mut has_sort = false;
for stage in &q.stages {
match stage {
Stage::Sort { field, ascending } => {
has_sort = true;
let asc = *ascending;
results.sort_by(|a, b| {
let va = a.fields.get(field).and_then(as_num);
let vb = b.fields.get(field).and_then(as_num);
let ord = match (va, vb) {
(Some(a), Some(b)) => a.total_cmp(&b),
_ => {
let sa = a.fields.get(field).map(as_str).unwrap_or_default();
let sb = b.fields.get(field).map(as_str).unwrap_or_default();
sa.cmp(&sb)
}
};
if asc { ord } else { ord.reverse() }
});
}
Stage::Limit(n) => {
results.truncate(*n);
}
Stage::Select(_) | Stage::Count => {} // handled in output
}
}
// Default sort by degree desc if no explicit sort
if !has_sort {
results.sort_by(|a, b| {
let da = graph.degree(&a.key);
let db = graph.degree(&b.key);
db.cmp(&da)
});
}
Ok(results)
}
/// Format a Value for display
pub fn format_value(v: &Value) -> String {
match v {
Value::Num(n) => {
if *n == n.floor() && n.abs() < 1e15 {
format!("{}", *n as i64)
} else {
format!("{:.3}", n)
}
}
Value::Str(s) => s.clone(),
Value::Ident(s) => s.clone(),
Value::FnCall(_) => "?".to_string(),
}
}
/// Execute query and print formatted output.
pub fn run_query(store: &Store, graph: &Graph, query_str: &str) -> Result<(), String> {
let q = query_parser::query(query_str)
.map_err(|e| format!("Parse error: {}", e))?;
let results = execute_parsed(store, graph, &q)?;
// Count stage
if q.stages.iter().any(|s| matches!(s, Stage::Count)) {
println!("{}", results.len());
return Ok(());
}
if results.is_empty() {
eprintln!("No results");
return Ok(());
}
// Select stage
let fields: Option<&Vec<String>> = q.stages.iter().find_map(|s| match s {
Stage::Select(f) => Some(f),
_ => None,
});
if let Some(fields) = fields {
let mut header = vec!["key".to_string()];
header.extend(fields.iter().cloned());
println!("{}", header.join("\t"));
for r in &results {
let mut row = vec![r.key.clone()];
for f in fields {
row.push(match r.fields.get(f) {
Some(v) => format_value(v),
None => "-".to_string(),
});
}
println!("{}", row.join("\t"));
}
} else {
for r in &results {
println!("{}", r.key);
}
}
Ok(())
}