forked from kent/consciousness
query: unify PEG and engine parsers
PEG parser now handles both expression syntax (degree > 5 | sort degree) and pipeline syntax (all | type:episodic | sort:timestamp). Deleted Stage::parse() and helpers from engine.rs — it's now pure execution. All callers use parse_stages() from parser.rs as the single entry point. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
bc991c3521
commit
aad227e487
8 changed files with 562 additions and 253 deletions
|
|
@ -260,7 +260,7 @@ async fn query(args: &serde_json::Value) -> Result<String> {
|
|||
let store = arc.lock().await;
|
||||
let graph = store.build_graph();
|
||||
|
||||
let stages = crate::search::Stage::parse_pipeline(query_str)
|
||||
let stages = crate::query_parser::parse_stages(query_str)
|
||||
.map_err(|e| anyhow::anyhow!("{}", e))?;
|
||||
let results = crate::search::run_query(&stages, vec![], &graph, &store, false, 100);
|
||||
let keys: Vec<String> = results.into_iter().map(|(k, _)| k).collect();
|
||||
|
|
@ -272,12 +272,61 @@ async fn query(args: &serde_json::Value) -> Result<String> {
|
|||
Ok(crate::subconscious::prompts::format_nodes_section(&store, &items, &graph))
|
||||
}
|
||||
_ => {
|
||||
crate::query_parser::query_to_string(&store, &graph, query_str)
|
||||
.map_err(|e| anyhow::anyhow!("{}", e))
|
||||
// Compact output: check for count/select stages, else just list keys
|
||||
use crate::search::{Stage, Transform};
|
||||
let has_count = stages.iter().any(|s| matches!(s, Stage::Transform(Transform::Count)));
|
||||
if has_count {
|
||||
return Ok(keys.len().to_string());
|
||||
}
|
||||
if keys.is_empty() {
|
||||
return Ok("no results".to_string());
|
||||
}
|
||||
let select_fields: Option<&Vec<String>> = stages.iter().find_map(|s| match s {
|
||||
Stage::Transform(Transform::Select(f)) => Some(f),
|
||||
_ => None,
|
||||
});
|
||||
if let Some(fields) = select_fields {
|
||||
let mut out = String::from("key\t");
|
||||
out.push_str(&fields.join("\t"));
|
||||
out.push('\n');
|
||||
for key in &keys {
|
||||
out.push_str(key);
|
||||
for f in fields {
|
||||
out.push('\t');
|
||||
out.push_str(&resolve_field_str(&store, &graph, key, f));
|
||||
}
|
||||
out.push('\n');
|
||||
}
|
||||
Ok(out)
|
||||
} else {
|
||||
Ok(keys.join("\n"))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_field_str(store: &crate::store::Store, graph: &crate::graph::Graph, key: &str, field: &str) -> String {
|
||||
let node = match store.nodes.get(key) {
|
||||
Some(n) => n,
|
||||
None => return "-".to_string(),
|
||||
};
|
||||
match field {
|
||||
"key" => key.to_string(),
|
||||
"weight" => format!("{:.3}", node.weight),
|
||||
"node_type" => format!("{:?}", node.node_type),
|
||||
"provenance" => node.provenance.clone(),
|
||||
"emotion" => format!("{}", node.emotion),
|
||||
"retrievals" => format!("{}", node.retrievals),
|
||||
"uses" => format!("{}", node.uses),
|
||||
"wrongs" => format!("{}", node.wrongs),
|
||||
"created" => format!("{}", node.created_at),
|
||||
"timestamp" => format!("{}", node.timestamp),
|
||||
"degree" => format!("{}", graph.degree(key)),
|
||||
"content_len" => format!("{}", node.content.len()),
|
||||
_ => "-".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
// ── Journal tools ──────────────────────────────────────────────
|
||||
|
||||
async fn journal_tail(args: &serde_json::Value) -> Result<String> {
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ pub fn cmd_run_agent(agent: &str, count: usize, target: &[String], query: Option
|
|||
target.to_vec()
|
||||
} else if let Some(q) = query {
|
||||
let graph = store.build_graph();
|
||||
let stages = crate::search::Stage::parse_pipeline(q)?;
|
||||
let stages = crate::query_parser::parse_stages(q)?;
|
||||
let results = crate::search::run_query(&stages, vec![], &graph, &store, false, count);
|
||||
if results.is_empty() {
|
||||
return Err(format!("query returned no results: {}", q));
|
||||
|
|
|
|||
|
|
@ -3,25 +3,26 @@
|
|||
|
||||
pub fn cmd_search(terms: &[String], pipeline_args: &[String], expand: bool, full: bool, debug: bool, fuzzy: bool, content: bool) -> Result<(), String> {
|
||||
use std::collections::BTreeMap;
|
||||
use crate::search::{Stage, Algorithm, AlgoStage};
|
||||
|
||||
// When running inside an agent session, exclude already-surfaced nodes
|
||||
let seen = crate::session::HookSession::from_env()
|
||||
.map(|s| s.seen())
|
||||
.unwrap_or_default();
|
||||
|
||||
// Parse pipeline stages (unified: algorithms, filters, transforms, generators)
|
||||
let stages: Vec<crate::search::Stage> = if pipeline_args.is_empty() {
|
||||
vec![crate::search::Stage::Algorithm(crate::search::AlgoStage::parse("spread").unwrap())]
|
||||
// Build pipeline: if args provided, parse them; otherwise default to spread
|
||||
let stages: Vec<Stage> = if pipeline_args.is_empty() {
|
||||
vec![Stage::Algorithm(AlgoStage { algo: Algorithm::Spread, params: std::collections::HashMap::new() })]
|
||||
} else {
|
||||
pipeline_args.iter()
|
||||
.map(|a| crate::search::Stage::parse(a))
|
||||
.collect::<Result<Vec<_>, _>>()?
|
||||
// Join args with | and parse as unified query
|
||||
let pipeline_str = format!("all | {}", pipeline_args.join(" | "));
|
||||
crate::query_parser::parse_stages(&pipeline_str)?
|
||||
};
|
||||
|
||||
// Check if pipeline needs full Store (has filters/transforms/generators)
|
||||
let needs_store = stages.iter().any(|s| !matches!(s, crate::search::Stage::Algorithm(_)));
|
||||
let needs_store = stages.iter().any(|s| !matches!(s, Stage::Algorithm(_)));
|
||||
// Check if pipeline starts with a generator (doesn't need seed terms)
|
||||
let has_generator = stages.first().map(|s| matches!(s, crate::search::Stage::Generator(_))).unwrap_or(false);
|
||||
let has_generator = stages.first().map(|s| matches!(s, Stage::Generator(_))).unwrap_or(false);
|
||||
|
||||
if terms.is_empty() && !has_generator {
|
||||
return Err("search requires terms or a generator stage (e.g. 'all')".into());
|
||||
|
|
|
|||
|
|
@ -157,6 +157,9 @@ pub enum Filter {
|
|||
pub enum Transform {
|
||||
Sort(SortField),
|
||||
Limit(usize),
|
||||
Select(Vec<String>),
|
||||
Count,
|
||||
Connectivity,
|
||||
DominatingSet,
|
||||
}
|
||||
|
||||
|
|
@ -168,6 +171,8 @@ pub enum SortField {
|
|||
Degree,
|
||||
Weight,
|
||||
Isolation,
|
||||
Key,
|
||||
Named(String, bool), // (field_name, ascending)
|
||||
Composite(Vec<(ScoreField, f64)>),
|
||||
}
|
||||
|
||||
|
|
@ -206,79 +211,6 @@ impl Cmp {
|
|||
}
|
||||
}
|
||||
|
||||
/// Parse a comparison like ">0.5", ">=60", "<7d" (durations converted to seconds).
|
||||
fn parse_cmp(s: &str) -> Result<Cmp, String> {
|
||||
let (op_len, ctor): (usize, fn(f64) -> Cmp) = if s.starts_with(">=") {
|
||||
(2, Cmp::Gte)
|
||||
} else if s.starts_with("<=") {
|
||||
(2, Cmp::Lte)
|
||||
} else if s.starts_with('>') {
|
||||
(1, Cmp::Gt)
|
||||
} else if s.starts_with('<') {
|
||||
(1, Cmp::Lt)
|
||||
} else if s.starts_with('=') {
|
||||
(1, Cmp::Eq)
|
||||
} else {
|
||||
return Err(format!("expected comparison operator in '{}'", s));
|
||||
};
|
||||
|
||||
let val_str = &s[op_len..];
|
||||
let val = parse_duration_or_number(val_str)?;
|
||||
Ok(ctor(val))
|
||||
}
|
||||
|
||||
/// Parse "7d", "24h", "30m" as seconds, or plain numbers.
|
||||
fn parse_duration_or_number(s: &str) -> Result<f64, String> {
|
||||
if let Some(n) = s.strip_suffix('d') {
|
||||
let v: f64 = n.parse().map_err(|_| format!("bad number: {}", n))?;
|
||||
Ok(v * 86400.0)
|
||||
} else if let Some(n) = s.strip_suffix('h') {
|
||||
let v: f64 = n.parse().map_err(|_| format!("bad number: {}", n))?;
|
||||
Ok(v * 3600.0)
|
||||
} else if let Some(n) = s.strip_suffix('m') {
|
||||
let v: f64 = n.parse().map_err(|_| format!("bad number: {}", n))?;
|
||||
Ok(v * 60.0)
|
||||
} else {
|
||||
s.parse().map_err(|_| format!("bad number: {}", s))
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse composite sort: "isolation*0.7+recency(linker)*0.3"
|
||||
/// Each term is field or field(arg), optionally *weight (default 1.0).
|
||||
fn parse_composite_sort(s: &str) -> Result<Vec<(ScoreField, f64)>, String> {
|
||||
let mut terms = Vec::new();
|
||||
for term in s.split('+') {
|
||||
let term = term.trim();
|
||||
let (field_part, weight) = if let Some((f, w)) = term.rsplit_once('*') {
|
||||
(f, w.parse::<f64>().map_err(|_| format!("bad weight: {}", w))?)
|
||||
} else {
|
||||
(term, 1.0)
|
||||
};
|
||||
|
||||
// Parse field, possibly with (arg)
|
||||
let field = if let Some((name, arg)) = field_part.split_once('(') {
|
||||
let arg = arg.strip_suffix(')').ok_or("missing ) in sort field")?;
|
||||
match name {
|
||||
"recency" => ScoreField::Recency(arg.to_string()),
|
||||
_ => return Err(format!("unknown parameterized sort field: {}", name)),
|
||||
}
|
||||
} else {
|
||||
match field_part {
|
||||
"isolation" => ScoreField::Isolation,
|
||||
"degree" => ScoreField::Degree,
|
||||
"weight" => ScoreField::Weight,
|
||||
"content-len" => ScoreField::ContentLen,
|
||||
"priority" => ScoreField::Priority,
|
||||
_ => return Err(format!("unknown sort field: {}", field_part)),
|
||||
}
|
||||
};
|
||||
terms.push((field, weight));
|
||||
}
|
||||
if terms.is_empty() {
|
||||
return Err("empty composite sort".into());
|
||||
}
|
||||
Ok(terms)
|
||||
}
|
||||
|
||||
/// Compute a 0-1 score for a node on a single dimension.
|
||||
fn score_field(
|
||||
|
|
@ -348,129 +280,6 @@ impl CompositeCache {
|
|||
}
|
||||
}
|
||||
|
||||
/// Parse a NodeType from a label.
|
||||
fn parse_node_type(s: &str) -> Result<NodeType, String> {
|
||||
match s {
|
||||
"episodic" | "session" => Ok(NodeType::EpisodicSession),
|
||||
"daily" => Ok(NodeType::EpisodicDaily),
|
||||
"weekly" => Ok(NodeType::EpisodicWeekly),
|
||||
"monthly" => Ok(NodeType::EpisodicMonthly),
|
||||
"semantic" => Ok(NodeType::Semantic),
|
||||
_ => Err(format!("unknown node type: {} (use: episodic, semantic, daily, weekly, monthly)", s)),
|
||||
}
|
||||
}
|
||||
|
||||
impl Stage {
|
||||
/// Parse a single stage from a string.
|
||||
///
|
||||
/// Algorithm names are tried first (bare words), then predicate syntax
|
||||
/// (contains ':'). No ambiguity since algorithms are bare words.
|
||||
pub fn parse(s: &str) -> Result<Self, String> {
|
||||
let s = s.trim();
|
||||
let (negated, s) = if let Some(rest) = s.strip_prefix('!') {
|
||||
(true, rest)
|
||||
} else {
|
||||
(false, s)
|
||||
};
|
||||
|
||||
// Generator: "all"
|
||||
if s == "all" {
|
||||
return Ok(Stage::Generator(Generator::All));
|
||||
}
|
||||
|
||||
// Transform: "dominating-set"
|
||||
if s == "dominating-set" {
|
||||
return Ok(Stage::Transform(Transform::DominatingSet));
|
||||
}
|
||||
|
||||
// Try algorithm parse first (bare words, no colon)
|
||||
if !s.contains(':')
|
||||
&& let Ok(algo) = AlgoStage::parse(s) {
|
||||
return Ok(Stage::Algorithm(algo));
|
||||
}
|
||||
|
||||
// Algorithm with params: "spread,max_hops=4" (contains comma but no colon)
|
||||
if s.contains(',') && !s.contains(':') {
|
||||
return AlgoStage::parse(s).map(Stage::Algorithm);
|
||||
}
|
||||
|
||||
// Predicate/transform syntax: "key:value"
|
||||
let (prefix, value) = s.split_once(':')
|
||||
.ok_or_else(|| format!("unknown stage: {}", s))?;
|
||||
|
||||
let filter_or_transform = match prefix {
|
||||
"type" => Stage::Filter(Filter::Type(parse_node_type(value)?)),
|
||||
"key" => Stage::Filter(Filter::KeyGlob(value.to_string())),
|
||||
"weight" => Stage::Filter(Filter::Weight(parse_cmp(value)?)),
|
||||
"age" => Stage::Filter(Filter::Age(parse_cmp(value)?)),
|
||||
"content-len" => Stage::Filter(Filter::ContentLen(parse_cmp(value)?)),
|
||||
"provenance" => {
|
||||
Stage::Filter(Filter::Provenance(value.to_string()))
|
||||
}
|
||||
"not-visited" => {
|
||||
let (agent, dur) = value.split_once(',')
|
||||
.ok_or("not-visited:AGENT,DURATION")?;
|
||||
let secs = parse_duration_or_number(dur)?;
|
||||
Stage::Filter(Filter::NotVisited {
|
||||
agent: agent.to_string(),
|
||||
duration: secs as i64,
|
||||
})
|
||||
}
|
||||
"visited" => Stage::Filter(Filter::Visited {
|
||||
agent: value.to_string(),
|
||||
}),
|
||||
"sort" => {
|
||||
// Check for composite sort: field*weight+field*weight+...
|
||||
let field = if value.contains('+') || value.contains('*') {
|
||||
SortField::Composite(parse_composite_sort(value)?)
|
||||
} else {
|
||||
match value {
|
||||
"priority" => SortField::Priority,
|
||||
"timestamp" => SortField::Timestamp,
|
||||
"content-len" => SortField::ContentLen,
|
||||
"degree" => SortField::Degree,
|
||||
"weight" => SortField::Weight,
|
||||
"isolation" => SortField::Isolation,
|
||||
_ => return Err(format!("unknown sort field: {}", value)),
|
||||
}
|
||||
};
|
||||
Stage::Transform(Transform::Sort(field))
|
||||
}
|
||||
"limit" => {
|
||||
let n: usize = value.parse()
|
||||
.map_err(|_| format!("bad limit: {}", value))?;
|
||||
Stage::Transform(Transform::Limit(n))
|
||||
}
|
||||
"match" => {
|
||||
let terms: Vec<String> = value.split(',')
|
||||
.map(|t| t.to_string())
|
||||
.collect();
|
||||
Stage::Generator(Generator::Match(terms))
|
||||
}
|
||||
// Algorithm with colon in params? Try fallback.
|
||||
_ => return AlgoStage::parse(s).map(Stage::Algorithm)
|
||||
.map_err(|_| format!("unknown stage: {}", s)),
|
||||
};
|
||||
|
||||
// Apply negation to filters
|
||||
if negated {
|
||||
match filter_or_transform {
|
||||
Stage::Filter(f) => Ok(Stage::Filter(Filter::Negated(Box::new(f)))),
|
||||
_ => Err("! prefix only works on filter stages".to_string()),
|
||||
}
|
||||
} else {
|
||||
Ok(filter_or_transform)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a pipe-separated pipeline string.
|
||||
pub fn parse_pipeline(s: &str) -> Result<Vec<Stage>, String> {
|
||||
s.split('|')
|
||||
.map(|part| Stage::parse(part.trim()))
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Stage {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
|
|
@ -479,6 +288,9 @@ impl fmt::Display for Stage {
|
|||
Stage::Filter(filt) => write!(f, "{}", filt),
|
||||
Stage::Transform(Transform::Sort(field)) => write!(f, "sort:{:?}", field),
|
||||
Stage::Transform(Transform::Limit(n)) => write!(f, "limit:{}", n),
|
||||
Stage::Transform(Transform::Select(fields)) => write!(f, "select:{}", fields.join(",")),
|
||||
Stage::Transform(Transform::Count) => write!(f, "count"),
|
||||
Stage::Transform(Transform::Connectivity) => write!(f, "connectivity"),
|
||||
Stage::Transform(Transform::DominatingSet) => write!(f, "dominating-set"),
|
||||
Stage::Algorithm(a) => write!(f, "{}", a.algo),
|
||||
}
|
||||
|
|
@ -613,7 +425,7 @@ fn run_generator(g: &Generator, store: &Store) -> Vec<(String, f64)> {
|
|||
}
|
||||
}
|
||||
|
||||
fn eval_filter(filt: &Filter, key: &str, store: &Store, now: i64) -> bool {
|
||||
pub fn eval_filter(filt: &Filter, key: &str, store: &Store, now: i64) -> bool {
|
||||
let node = match store.nodes.get(key) {
|
||||
Some(n) => n,
|
||||
None => return false,
|
||||
|
|
@ -686,6 +498,39 @@ pub fn run_transform(
|
|||
sb.total_cmp(&sa) // most isolated first
|
||||
});
|
||||
}
|
||||
SortField::Key => {
|
||||
items.sort_by(|a, b| a.0.cmp(&b.0));
|
||||
}
|
||||
SortField::Named(field, asc) => {
|
||||
// Resolve field from node properties
|
||||
let resolve = |key: &str| -> Option<f64> {
|
||||
let node = store.nodes.get(key)?;
|
||||
match field.as_str() {
|
||||
"weight" => Some(node.weight as f64),
|
||||
"emotion" => Some(node.emotion as f64),
|
||||
"retrievals" => Some(node.retrievals as f64),
|
||||
"uses" => Some(node.uses as f64),
|
||||
"wrongs" => Some(node.wrongs as f64),
|
||||
"created" => Some(node.created_at as f64),
|
||||
"timestamp" => Some(node.timestamp as f64),
|
||||
"degree" => Some(graph.degree(key) as f64),
|
||||
"content_len" => Some(node.content.len() as f64),
|
||||
_ => None,
|
||||
}
|
||||
};
|
||||
let asc = *asc;
|
||||
items.sort_by(|a, b| {
|
||||
let va = resolve(&a.0);
|
||||
let vb = resolve(&b.0);
|
||||
let ord = match (va, vb) {
|
||||
(Some(a), Some(b)) => a.total_cmp(&b),
|
||||
(Some(_), None) => std::cmp::Ordering::Less,
|
||||
(None, Some(_)) => std::cmp::Ordering::Greater,
|
||||
(None, None) => a.0.cmp(&b.0),
|
||||
};
|
||||
if asc { ord } else { ord.reverse() }
|
||||
});
|
||||
}
|
||||
SortField::Priority => {
|
||||
// Pre-compute priorities to avoid O(n log n) calls
|
||||
// inside the sort comparator.
|
||||
|
|
@ -725,6 +570,8 @@ pub fn run_transform(
|
|||
items.truncate(*n);
|
||||
items
|
||||
}
|
||||
// Output mode directives - don't modify result set, handled at output layer
|
||||
Transform::Select(_) | Transform::Count | Transform::Connectivity => items,
|
||||
Transform::DominatingSet => {
|
||||
// Greedy 3-covering dominating set: pick the node that covers
|
||||
// the most under-covered neighbors, repeat until every node
|
||||
|
|
|
|||
|
|
@ -26,6 +26,12 @@ use crate::graph::Graph;
|
|||
use regex::Regex;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
// Re-export engine types used by Query
|
||||
pub use super::engine::{
|
||||
Stage, Filter, Transform, Generator, SortField,
|
||||
Algorithm, AlgoStage, Cmp,
|
||||
};
|
||||
|
||||
// -- AST types --
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
|
|
@ -57,16 +63,6 @@ pub enum CmpOp {
|
|||
Gt, Lt, Ge, Le, Eq, Ne, Match,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Stage {
|
||||
Sort { field: String, ascending: bool },
|
||||
Limit(usize),
|
||||
Select(Vec<String>),
|
||||
Count,
|
||||
Connectivity,
|
||||
DominatingSet,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Query {
|
||||
pub expr: Expr,
|
||||
|
|
@ -86,18 +82,54 @@ peg::parser! {
|
|||
= s:(_ "|" _ s:stage() { s })* { s }
|
||||
|
||||
rule stage() -> Stage
|
||||
= "sort" _ f:field() _ a:asc_desc() { Stage::Sort { field: f, ascending: a } }
|
||||
/ "limit" _ n:integer() { Stage::Limit(n) }
|
||||
/ "select" _ f:field_list() { Stage::Select(f) }
|
||||
/ "count" { Stage::Count }
|
||||
/ "connectivity" { Stage::Connectivity }
|
||||
/ "dominating-set" { Stage::DominatingSet }
|
||||
// Original PEG syntax (space-separated)
|
||||
= "sort" _ f:field() _ a:asc_desc() {
|
||||
Stage::Transform(Transform::Sort(make_sort_field(&f, a)))
|
||||
}
|
||||
/ "limit" _ n:integer() { Stage::Transform(Transform::Limit(n)) }
|
||||
/ "select" _ f:field_list() { Stage::Transform(Transform::Select(f)) }
|
||||
/ "count" { Stage::Transform(Transform::Count) }
|
||||
/ "connectivity" { Stage::Transform(Transform::Connectivity) }
|
||||
/ "dominating-set" { Stage::Transform(Transform::DominatingSet) }
|
||||
// Pipeline syntax (colon-separated)
|
||||
/ "sort:" f:field() { Stage::Transform(Transform::Sort(make_sort_field(&f, false))) }
|
||||
/ "limit:" n:integer() { Stage::Transform(Transform::Limit(n)) }
|
||||
/ "select:" f:field_list_colon() { Stage::Transform(Transform::Select(f)) }
|
||||
/ "type:" t:ident() { make_type_filter(&t) }
|
||||
/ "age:" c:cmp_duration() { Stage::Filter(Filter::Age(c)) }
|
||||
/ "key:" g:ident() { Stage::Filter(Filter::KeyGlob(g)) }
|
||||
/ "provenance:" p:ident() { Stage::Filter(Filter::Provenance(p)) }
|
||||
/ "all" { Stage::Generator(Generator::All) }
|
||||
// Graph algorithms
|
||||
/ "spread" { Stage::Algorithm(AlgoStage { algo: Algorithm::Spread, params: std::collections::HashMap::new() }) }
|
||||
/ "spectral" { Stage::Algorithm(AlgoStage { algo: Algorithm::Spectral, params: std::collections::HashMap::new() }) }
|
||||
|
||||
rule asc_desc() -> bool
|
||||
= "asc" { true }
|
||||
/ "desc" { false }
|
||||
/ { false } // default: descending
|
||||
|
||||
rule field_list_colon() -> Vec<String>
|
||||
= f:field() fs:("," f:field() { f })* {
|
||||
let mut v = vec![f];
|
||||
v.extend(fs);
|
||||
v
|
||||
}
|
||||
|
||||
rule cmp_duration() -> Cmp
|
||||
= ">=" n:duration() { Cmp::Gte(n) }
|
||||
/ "<=" n:duration() { Cmp::Lte(n) }
|
||||
/ ">" n:duration() { Cmp::Gt(n) }
|
||||
/ "<" n:duration() { Cmp::Lt(n) }
|
||||
/ "=" n:duration() { Cmp::Eq(n) }
|
||||
|
||||
rule duration() -> f64
|
||||
= n:number() "d" { n * 86400.0 }
|
||||
/ n:number() "h" { n * 3600.0 }
|
||||
/ n:number() "m" { n * 60.0 }
|
||||
/ n:number() "s" { n }
|
||||
/ n:number() { n }
|
||||
|
||||
rule field_list() -> Vec<String>
|
||||
= f:field() fs:(_ "," _ f:field() { f })* {
|
||||
let mut v = vec![f];
|
||||
|
|
@ -122,6 +154,7 @@ peg::parser! {
|
|||
Expr::Comparison { field: f, op, value: v }
|
||||
}
|
||||
"*" { Expr::All }
|
||||
"all" { Expr::All }
|
||||
"(" _ e:expr() _ ")" { e }
|
||||
}
|
||||
|
||||
|
|
@ -167,6 +200,55 @@ peg::parser! {
|
|||
}
|
||||
}
|
||||
|
||||
// -- Helper functions for PEG grammar --
|
||||
|
||||
fn make_sort_field(field: &str, ascending: bool) -> SortField {
|
||||
match field {
|
||||
"priority" => SortField::Priority,
|
||||
"timestamp" => SortField::Timestamp,
|
||||
"content-len" | "content_len" => SortField::ContentLen,
|
||||
"degree" => SortField::Degree,
|
||||
"weight" => SortField::Weight,
|
||||
"isolation" => SortField::Isolation,
|
||||
"key" => SortField::Key,
|
||||
_ => SortField::Named(field.to_string(), ascending),
|
||||
}
|
||||
}
|
||||
|
||||
fn make_type_filter(type_name: &str) -> Stage {
|
||||
let node_type = match type_name {
|
||||
"episodic" | "session" => NodeType::EpisodicSession,
|
||||
"daily" => NodeType::EpisodicDaily,
|
||||
"weekly" => NodeType::EpisodicWeekly,
|
||||
"monthly" => NodeType::EpisodicMonthly,
|
||||
"semantic" => NodeType::Semantic,
|
||||
_ => NodeType::Semantic, // fallback
|
||||
};
|
||||
Stage::Filter(Filter::Type(node_type))
|
||||
}
|
||||
|
||||
/// Parse a query string into Vec<Stage> for pipeline execution.
|
||||
/// This is the unified entry point — replaces engine::Stage::parse_pipeline.
|
||||
pub fn parse_stages(s: &str) -> Result<Vec<Stage>, String> {
|
||||
let q = query_parser::query(s)
|
||||
.map_err(|e| format!("Parse error: {}", e))?;
|
||||
|
||||
let mut stages = Vec::new();
|
||||
|
||||
// Convert Expr to a Generator stage
|
||||
match &q.expr {
|
||||
Expr::All => stages.push(Stage::Generator(Generator::All)),
|
||||
_ => {
|
||||
// For complex expressions, we need the Query-based path
|
||||
// This shouldn't happen for pipeline queries
|
||||
return Err("Complex expressions not supported in pipeline mode; use CLI query".into());
|
||||
}
|
||||
}
|
||||
|
||||
stages.extend(q.stages);
|
||||
Ok(stages)
|
||||
}
|
||||
|
||||
// -- Field resolution --
|
||||
|
||||
/// Resolve a field value from a node + graph context, returning a comparable Value.
|
||||
|
|
@ -377,12 +459,12 @@ fn execute_parsed(
|
|||
let mut set = Vec::new();
|
||||
for stage in &q.stages {
|
||||
match stage {
|
||||
Stage::Select(fields) => {
|
||||
Stage::Transform(Transform::Select(fields)) => {
|
||||
for f in fields {
|
||||
if !set.contains(f) { set.push(f.clone()); }
|
||||
}
|
||||
}
|
||||
Stage::Sort { field, .. } => {
|
||||
Stage::Transform(Transform::Sort(SortField::Named(field, _))) => {
|
||||
if !set.contains(field) { set.push(field.clone()); }
|
||||
}
|
||||
_ => {}
|
||||
|
|
@ -404,37 +486,75 @@ fn execute_parsed(
|
|||
let mut has_sort = false;
|
||||
for stage in &q.stages {
|
||||
match stage {
|
||||
Stage::Sort { field, ascending } => {
|
||||
Stage::Transform(Transform::Sort(sort_field)) => {
|
||||
has_sort = true;
|
||||
let asc = *ascending;
|
||||
results.sort_by(|a, b| {
|
||||
let va = a.fields.get(field).and_then(as_num);
|
||||
let vb = b.fields.get(field).and_then(as_num);
|
||||
let ord = match (va, vb) {
|
||||
(Some(a), Some(b)) => a.total_cmp(&b),
|
||||
_ => {
|
||||
let sa = a.fields.get(field).map(as_str).unwrap_or_default();
|
||||
let sb = b.fields.get(field).map(as_str).unwrap_or_default();
|
||||
sa.cmp(&sb)
|
||||
}
|
||||
};
|
||||
if asc { ord } else { ord.reverse() }
|
||||
});
|
||||
match sort_field {
|
||||
SortField::Named(field, asc) => {
|
||||
let asc = *asc;
|
||||
let field = field.clone();
|
||||
results.sort_by(|a, b| {
|
||||
let va = a.fields.get(&field).and_then(as_num);
|
||||
let vb = b.fields.get(&field).and_then(as_num);
|
||||
let ord = match (va, vb) {
|
||||
(Some(a), Some(b)) => a.total_cmp(&b),
|
||||
_ => {
|
||||
let sa = a.fields.get(&field).map(as_str).unwrap_or_default();
|
||||
let sb = b.fields.get(&field).map(as_str).unwrap_or_default();
|
||||
sa.cmp(&sb)
|
||||
}
|
||||
};
|
||||
if asc { ord } else { ord.reverse() }
|
||||
});
|
||||
}
|
||||
SortField::Key => {
|
||||
results.sort_by(|a, b| a.key.cmp(&b.key));
|
||||
}
|
||||
SortField::Degree => {
|
||||
results.sort_by(|a, b| {
|
||||
let da = graph.degree(&a.key);
|
||||
let db = graph.degree(&b.key);
|
||||
db.cmp(&da)
|
||||
});
|
||||
}
|
||||
SortField::Weight => {
|
||||
results.sort_by(|a, b| {
|
||||
let wa = store.nodes.get(&a.key).map(|n| n.weight).unwrap_or(0.0);
|
||||
let wb = store.nodes.get(&b.key).map(|n| n.weight).unwrap_or(0.0);
|
||||
wb.total_cmp(&wa)
|
||||
});
|
||||
}
|
||||
SortField::Timestamp => {
|
||||
results.sort_by(|a, b| {
|
||||
let ta = store.nodes.get(&a.key).map(|n| n.timestamp).unwrap_or(0);
|
||||
let tb = store.nodes.get(&b.key).map(|n| n.timestamp).unwrap_or(0);
|
||||
tb.cmp(&ta)
|
||||
});
|
||||
}
|
||||
_ => {} // other sort fields handled by default degree sort
|
||||
}
|
||||
}
|
||||
Stage::Limit(n) => {
|
||||
Stage::Transform(Transform::Limit(n)) => {
|
||||
results.truncate(*n);
|
||||
}
|
||||
Stage::Connectivity => {} // handled in output
|
||||
Stage::Select(_) | Stage::Count => {} // handled in output
|
||||
Stage::DominatingSet => {
|
||||
Stage::Transform(Transform::Connectivity) => {} // handled in output
|
||||
Stage::Transform(Transform::Select(_) | Transform::Count) => {} // handled in output
|
||||
Stage::Transform(Transform::DominatingSet) => {
|
||||
let mut items: Vec<(String, f64)> = results.iter()
|
||||
.map(|r| (r.key.clone(), graph.degree(&r.key) as f64))
|
||||
.collect();
|
||||
let xform = super::engine::Transform::DominatingSet;
|
||||
let xform = Transform::DominatingSet;
|
||||
items = super::engine::run_transform(&xform, items, store, graph);
|
||||
let keep: std::collections::HashSet<String> = items.into_iter().map(|(k, _)| k).collect();
|
||||
results.retain(|r| keep.contains(&r.key));
|
||||
}
|
||||
Stage::Filter(filt) => {
|
||||
// Apply filter to narrow results
|
||||
let now = crate::store::now_epoch();
|
||||
results.retain(|r| super::engine::eval_filter(filt, &r.key, store, now));
|
||||
}
|
||||
Stage::Generator(_) | Stage::Algorithm(_) => {
|
||||
// Generators are handled by Expr, algorithms not applicable here
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -474,7 +594,7 @@ pub fn run_query(store: &Store, graph: &Graph, query_str: &str) -> Result<(), St
|
|||
let results = execute_parsed(store, graph, &q)?;
|
||||
|
||||
// Count stage
|
||||
if q.stages.iter().any(|s| matches!(s, Stage::Count)) {
|
||||
if q.stages.iter().any(|s| matches!(s, Stage::Transform(Transform::Count))) {
|
||||
println!("{}", results.len());
|
||||
return Ok(());
|
||||
}
|
||||
|
|
@ -485,14 +605,14 @@ pub fn run_query(store: &Store, graph: &Graph, query_str: &str) -> Result<(), St
|
|||
}
|
||||
|
||||
// Connectivity stage
|
||||
if q.stages.iter().any(|s| matches!(s, Stage::Connectivity)) {
|
||||
if q.stages.iter().any(|s| matches!(s, Stage::Transform(Transform::Connectivity))) {
|
||||
print_connectivity(&results, graph);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Select stage
|
||||
let fields: Option<&Vec<String>> = q.stages.iter().find_map(|s| match s {
|
||||
Stage::Select(f) => Some(f),
|
||||
Stage::Transform(Transform::Select(f)) => Some(f),
|
||||
_ => None,
|
||||
});
|
||||
|
||||
|
|
@ -527,7 +647,7 @@ pub fn query_to_string(store: &Store, graph: &Graph, query_str: &str) -> Result<
|
|||
|
||||
let results = execute_parsed(store, graph, &q)?;
|
||||
|
||||
if q.stages.iter().any(|s| matches!(s, Stage::Count)) {
|
||||
if q.stages.iter().any(|s| matches!(s, Stage::Transform(Transform::Count))) {
|
||||
return Ok(results.len().to_string());
|
||||
}
|
||||
if results.is_empty() {
|
||||
|
|
@ -535,7 +655,7 @@ pub fn query_to_string(store: &Store, graph: &Graph, query_str: &str) -> Result<
|
|||
}
|
||||
|
||||
let fields: Option<&Vec<String>> = q.stages.iter().find_map(|s| match s {
|
||||
Stage::Select(f) => Some(f),
|
||||
Stage::Transform(Transform::Select(f)) => Some(f),
|
||||
_ => None,
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -801,7 +801,7 @@ pub fn run_agent(
|
|||
|
||||
// Run the query if present
|
||||
let keys = if !def.query.is_empty() {
|
||||
let mut stages = search::Stage::parse_pipeline(&def.query)?;
|
||||
let mut stages = crate::query_parser::parse_stages(&def.query)?;
|
||||
let has_limit = stages.iter().any(|s|
|
||||
matches!(s, search::Stage::Transform(search::Transform::Limit(_))));
|
||||
if !has_limit {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue