consciousness/poc-memory/src/query/engine.rs

// Memory search: composable query pipeline.
//
// The pipeline has four kinds of stages, all composing left-to-right:
//
//   Generators — produce a result set from nothing:
//     all                        every non-deleted node
//     match:TERM                 text match (current seed extraction)
//
//   Filters — narrow an existing result set on node metadata:
//     type:episodic              node_type == EpisodicSession
//     type:semantic              node_type == Semantic
//     type:daily                 node_type == EpisodicDaily
//     type:weekly                node_type == EpisodicWeekly
//     type:monthly               node_type == EpisodicMonthly
//     key:GLOB                   glob match on key
//     weight:>0.5                numeric comparison on weight
//     age:<7d                    created/modified within duration
//     content-len:>1000          content size filter
//     provenance:manual          provenance match
//     not-visited:AGENT,DUR      not seen by agent in duration
//     visited:AGENT              has been seen by agent
//
//   Transforms — reorder or reshape:
//     sort:priority              consolidation priority scoring
//     sort:timestamp             by timestamp (desc)
//     sort:content-len           by content size
//     sort:degree                by graph degree
//     sort:weight                by weight
//     limit:N                    truncate to N results
//
//   Algorithms — graph exploration (existing):
//     spread                     spreading activation
//     spectral,k=20              spectral nearest neighbors
//     confluence                 multi-source reachability
//     geodesic                   straightest spectral paths
//     manifold                   extrapolation along seed direction
//
// Stages are parsed from strings and composed via the -p flag or
// pipe-separated in agent definitions.

use crate::store::{Store, StoreView, NodeType};
use crate::graph::Graph;
use crate::spectral;

use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
use std::fmt;

pub struct SearchResult {
    pub key: String,
    pub activation: f64,
    pub is_direct: bool,
    pub snippet: Option<String>,
}

/// A parsed algorithm stage with its parameters.
#[derive(Clone, Debug)]
pub struct AlgoStage {
    pub algo: Algorithm,
    pub params: HashMap<String, String>,
}

#[derive(Clone, Debug)]
pub enum Algorithm {
    Spread,
    Spectral,
    Manifold,
    Confluence,
    Geodesic,
}

impl fmt::Display for Algorithm {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
            Algorithm::Spread => write!(f, "spread"),
            Algorithm::Spectral => write!(f, "spectral"),
            Algorithm::Manifold => write!(f, "manifold"),
            Algorithm::Confluence => write!(f, "confluence"),
            Algorithm::Geodesic => write!(f, "geodesic"),
        }
    }
}

impl AlgoStage {
    /// Parse "spread,max_hops=4,edge_decay=0.5" into an AlgoStage.
    pub fn parse(s: &str) -> Result<Self, String> {
        let mut parts = s.split(',');
        let name = parts.next().unwrap_or("");
        let algo = match name {
            "spread" => Algorithm::Spread,
            "spectral" => Algorithm::Spectral,
            "manifold" => Algorithm::Manifold,
            "confluence" => Algorithm::Confluence,
            "geodesic" => Algorithm::Geodesic,
            _ => return Err(format!("unknown algorithm: {}", name)),
        };
        let mut params = HashMap::new();
        for part in parts {
            if let Some((k, v)) = part.split_once('=') {
                params.insert(k.to_string(), v.to_string());
            } else {
                return Err(format!("bad param (expected key=val): {}", part));
            }
        }
        Ok(AlgoStage { algo, params })
    }

    fn param_f64(&self, key: &str, default: f64) -> f64 {
        self.params.get(key)
            .and_then(|v| v.parse().ok())
            .unwrap_or(default)
    }

    fn param_u32(&self, key: &str, default: u32) -> u32 {
        self.params.get(key)
            .and_then(|v| v.parse().ok())
            .unwrap_or(default)
    }

    fn param_usize(&self, key: &str, default: usize) -> usize {
        self.params.get(key)
            .and_then(|v| v.parse().ok())
            .unwrap_or(default)
    }
}

// ── Unified query pipeline ──────────────────────────────────────────

/// A pipeline stage: generator, filter, transform, or graph algorithm.
#[derive(Clone, Debug)]
pub enum Stage {
    Generator(Generator),
    Filter(Filter),
    Transform(Transform),
    Algorithm(AlgoStage),
}

#[derive(Clone, Debug)]
pub enum Generator {
    All,                        // every non-deleted node
    Match(Vec<String>),         // text match seeds
}

#[derive(Clone, Debug)]
pub enum Filter {
    Type(NodeType),
    KeyGlob(String),
    Weight(Cmp),
    Age(Cmp),                   // vs now - timestamp (seconds)
    ContentLen(Cmp),
    Provenance(String),
    NotVisited { agent: String, duration: i64 },  // seconds
    Visited { agent: String },
    Negated(Box<Filter>),
}

#[derive(Clone, Debug)]
pub enum Transform {
    Sort(SortField),
    Limit(usize),
}

#[derive(Clone, Debug)]
pub enum SortField {
    Priority,
    Timestamp,
    ContentLen,
    Degree,
    Weight,
}

/// Numeric comparison operator.
#[derive(Clone, Debug)]
pub enum Cmp {
    Gt(f64),
    Gte(f64),
    Lt(f64),
    Lte(f64),
    Eq(f64),
}

impl Cmp {
    fn matches(&self, val: f64) -> bool {
        match self {
            Cmp::Gt(x)  => val > *x,
            Cmp::Gte(x) => val >= *x,
            Cmp::Lt(x)  => val < *x,
            Cmp::Lte(x) => val <= *x,
            Cmp::Eq(x)  => (val - x).abs() < f64::EPSILON,
        }
    }
}

/// Parse a comparison like ">0.5", ">=60", "<7d" (durations converted to seconds).
fn parse_cmp(s: &str) -> Result<Cmp, String> {
    let (op_len, ctor): (usize, fn(f64) -> Cmp) = if s.starts_with(">=") {
        (2, Cmp::Gte)
    } else if s.starts_with("<=") {
        (2, Cmp::Lte)
    } else if s.starts_with('>') {
        (1, Cmp::Gt)
    } else if s.starts_with('<') {
        (1, Cmp::Lt)
    } else if s.starts_with('=') {
        (1, Cmp::Eq)
    } else {
        return Err(format!("expected comparison operator in '{}'", s));
    };

    let val_str = &s[op_len..];
    let val = parse_duration_or_number(val_str)?;
    Ok(ctor(val))
}

/// Parse "7d", "24h", "30m" as seconds, or plain numbers.
fn parse_duration_or_number(s: &str) -> Result<f64, String> {
    if let Some(n) = s.strip_suffix('d') {
        let v: f64 = n.parse().map_err(|_| format!("bad number: {}", n))?;
        Ok(v * 86400.0)
    } else if let Some(n) = s.strip_suffix('h') {
        let v: f64 = n.parse().map_err(|_| format!("bad number: {}", n))?;
        Ok(v * 3600.0)
    } else if let Some(n) = s.strip_suffix('m') {
        let v: f64 = n.parse().map_err(|_| format!("bad number: {}", n))?;
        Ok(v * 60.0)
    } else {
        s.parse().map_err(|_| format!("bad number: {}", s))
    }
}

/// Parse a NodeType from a label.
fn parse_node_type(s: &str) -> Result<NodeType, String> {
    match s {
        "episodic" | "session" => Ok(NodeType::EpisodicSession),
        "daily"                => Ok(NodeType::EpisodicDaily),
        "weekly"               => Ok(NodeType::EpisodicWeekly),
        "monthly"              => Ok(NodeType::EpisodicMonthly),
        "semantic"             => Ok(NodeType::Semantic),
        _ => Err(format!("unknown node type: {} (use: episodic, semantic, daily, weekly, monthly)", s)),
    }
}

impl Stage {
    /// Parse a single stage from a string.
    ///
    /// Algorithm names are tried first (bare words), then predicate syntax
    /// (contains ':'). No ambiguity since algorithms are bare words.
    pub fn parse(s: &str) -> Result<Self, String> {
        let s = s.trim();
        let (negated, s) = if let Some(rest) = s.strip_prefix('!') {
            (true, rest)
        } else {
            (false, s)
        };

        // Generator: "all"
        if s == "all" {
            return Ok(Stage::Generator(Generator::All));
        }

        // Try algorithm parse first (bare words, no colon)
        if !s.contains(':') {
            if let Ok(algo) = AlgoStage::parse(s) {
                return Ok(Stage::Algorithm(algo));
            }
        }

        // Algorithm with params: "spread,max_hops=4" (contains comma but no colon)
        if s.contains(',') && !s.contains(':') {
            return AlgoStage::parse(s).map(Stage::Algorithm);
        }

        // Predicate/transform syntax: "key:value"
        let (prefix, value) = s.split_once(':')
            .ok_or_else(|| format!("unknown stage: {}", s))?;

        let filter_or_transform = match prefix {
            "type" => Stage::Filter(Filter::Type(parse_node_type(value)?)),
            "key" => Stage::Filter(Filter::KeyGlob(value.to_string())),
            "weight" => Stage::Filter(Filter::Weight(parse_cmp(value)?)),
            "age" => Stage::Filter(Filter::Age(parse_cmp(value)?)),
            "content-len" => Stage::Filter(Filter::ContentLen(parse_cmp(value)?)),
            "provenance" => {
                Stage::Filter(Filter::Provenance(value.to_string()))
            }
            "not-visited" => {
                let (agent, dur) = value.split_once(',')
                    .ok_or("not-visited:AGENT,DURATION")?;
                let secs = parse_duration_or_number(dur)?;
                Stage::Filter(Filter::NotVisited {
                    agent: agent.to_string(),
                    duration: secs as i64,
                })
            }
            "visited" => Stage::Filter(Filter::Visited {
                agent: value.to_string(),
            }),
            "sort" => {
                let field = match value {
                    "priority"    => SortField::Priority,
                    "timestamp"   => SortField::Timestamp,
                    "content-len" => SortField::ContentLen,
                    "degree"      => SortField::Degree,
                    "weight"      => SortField::Weight,
                    _ => return Err(format!("unknown sort field: {}", value)),
                };
                Stage::Transform(Transform::Sort(field))
            }
            "limit" => {
                let n: usize = value.parse()
                    .map_err(|_| format!("bad limit: {}", value))?;
                Stage::Transform(Transform::Limit(n))
            }
            "match" => {
                let terms: Vec<String> = value.split(',')
                    .map(|t| t.to_string())
                    .collect();
                Stage::Generator(Generator::Match(terms))
            }
            // Algorithm with colon in params? Try fallback.
            _ => return AlgoStage::parse(s).map(Stage::Algorithm)
                .map_err(|_| format!("unknown stage: {}", s)),
        };

        // Apply negation to filters
        if negated {
            match filter_or_transform {
                Stage::Filter(f) => Ok(Stage::Filter(Filter::Negated(Box::new(f)))),
                _ => Err("! prefix only works on filter stages".to_string()),
            }
        } else {
            Ok(filter_or_transform)
        }
    }

    /// Parse a pipe-separated pipeline string.
    pub fn parse_pipeline(s: &str) -> Result<Vec<Stage>, String> {
        s.split('|')
            .map(|part| Stage::parse(part.trim()))
            .collect()
    }
}

impl fmt::Display for Stage {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
            Stage::Generator(Generator::All) => write!(f, "all"),
            Stage::Generator(Generator::Match(terms)) => write!(f, "match:{}", terms.join(",")),
            Stage::Filter(filt) => write!(f, "{}", filt),
            Stage::Transform(Transform::Sort(field)) => write!(f, "sort:{:?}", field),
            Stage::Transform(Transform::Limit(n)) => write!(f, "limit:{}", n),
            Stage::Algorithm(a) => write!(f, "{}", a.algo),
        }
    }
}

impl fmt::Display for Filter {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
            Filter::Type(t) => write!(f, "type:{:?}", t),
            Filter::KeyGlob(g) => write!(f, "key:{}", g),
            Filter::Weight(c) => write!(f, "weight:{}", c),
            Filter::Age(c) => write!(f, "age:{}", c),
            Filter::ContentLen(c) => write!(f, "content-len:{}", c),
            Filter::Provenance(p) => write!(f, "provenance:{}", p),
            Filter::NotVisited { agent, duration } => write!(f, "not-visited:{},{}s", agent, duration),
            Filter::Visited { agent } => write!(f, "visited:{}", agent),
            Filter::Negated(inner) => write!(f, "!{}", inner),
        }
    }
}

impl fmt::Display for Cmp {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
            Cmp::Gt(v)  => write!(f, ">{}", v),
            Cmp::Gte(v) => write!(f, ">={}", v),
            Cmp::Lt(v)  => write!(f, "<{}", v),
            Cmp::Lte(v) => write!(f, "<={}", v),
            Cmp::Eq(v)  => write!(f, "={}", v),
        }
    }
}

/// Simple glob matching (supports * and ?).
fn glob_matches(pattern: &str, text: &str) -> bool {
    fn inner(pat: &[char], txt: &[char]) -> bool {
        if pat.is_empty() { return txt.is_empty(); }
        if pat[0] == '*' {
            // Try matching * against 0..n characters
            for skip in 0..=txt.len() {
                if inner(&pat[1..], &txt[skip..]) { return true; }
            }
            return false;
        }
        if txt.is_empty() { return false; }
        if pat[0] == '?' || pat[0] == txt[0] {
            return inner(&pat[1..], &txt[1..]);
        }
        false
    }

    let pat: Vec<char> = pattern.chars().collect();
    let txt: Vec<char> = text.chars().collect();
    inner(&pat, &txt)
}

/// Run a unified query pipeline. Requires &Store for filter/transform stages.
///
/// If the pipeline starts with no generator, the input `seeds` are used.
/// Generators produce a fresh result set (ignoring seeds). Filters narrow
/// the current set. Transforms reorder/truncate. Algorithms do graph
/// exploration.
pub fn run_query(
    stages: &[Stage],
    seeds: Vec<(String, f64)>,
    graph: &Graph,
    store: &Store,
    debug: bool,
    max_results: usize,
) -> Vec<(String, f64)> {
    let now = crate::store::now_epoch();
    let mut current = seeds;

    for stage in stages {
        if debug {
            println!("\n[query] === {} ({} items in) ===", stage, current.len());
        }

        current = match stage {
            Stage::Generator(gen) => run_generator(gen, store),

            Stage::Filter(filt) => {
                current.into_iter()
                    .filter(|(key, _)| eval_filter(filt, key, store, now))
                    .collect()
            }

            Stage::Transform(xform) => run_transform(xform, current, store, graph),

            Stage::Algorithm(algo_stage) => {
                match algo_stage.algo {
                    Algorithm::Spread => run_spread(&current, graph, store, algo_stage, debug),
                    Algorithm::Spectral => run_spectral(&current, graph, algo_stage, debug),
                    Algorithm::Manifold => run_manifold(&current, graph, algo_stage, debug),
                    Algorithm::Confluence => run_confluence(&current, graph, store, algo_stage, debug),
                    Algorithm::Geodesic => run_geodesic(&current, graph, algo_stage, debug),
                }
            }
        };

        if debug {
            println!("[query] → {} results", current.len());
            for (key, score) in current.iter().take(10) {
                println!("  [{:.4}] {}", score, key);
            }
            if current.len() > 10 {
                println!("  ... ({} more)", current.len() - 10);
            }
        }
    }

    current.truncate(max_results);
    current
}

fn run_generator(gen: &Generator, store: &Store) -> Vec<(String, f64)> {
    match gen {
        Generator::All => {
            store.nodes.iter()
                .filter(|(_, n)| !n.deleted)
                .map(|(key, n)| (key.clone(), n.weight as f64))
                .collect()
        }
        Generator::Match(terms) => {
            let weighted: BTreeMap<String, f64> = terms.iter()
                .map(|t| (t.to_lowercase(), 1.0))
                .collect();
            let (seeds, _) = match_seeds(&weighted, store);
            seeds
        }
    }
}

fn eval_filter(filt: &Filter, key: &str, store: &Store, now: i64) -> bool {
    let node = match store.nodes.get(key) {
        Some(n) => n,
        None => return false,
    };

    match filt {
        Filter::Type(t) => node.node_type == *t,
        Filter::KeyGlob(pattern) => glob_matches(pattern, key),
        Filter::Weight(cmp) => cmp.matches(node.weight as f64),
        Filter::Age(cmp) => {
            let age_secs = (now - node.timestamp) as f64;
            cmp.matches(age_secs)
        }
        Filter::ContentLen(cmp) => cmp.matches(node.content.len() as f64),
        Filter::Provenance(p) => node.provenance == *p,
        Filter::NotVisited { agent, duration } => {
            let last = store.last_visited(key, agent);
            last == 0 || (now - last) > *duration
        }
        Filter::Visited { agent } => {
            store.last_visited(key, agent) > 0
        }
        Filter::Negated(inner) => !eval_filter(inner, key, store, now),
    }
}

fn run_transform(
    xform: &Transform,
    mut items: Vec<(String, f64)>,
    store: &Store,
    graph: &Graph,
) -> Vec<(String, f64)> {
    match xform {
        Transform::Sort(field) => {
            match field {
                SortField::Weight => {
                    items.sort_by(|a, b| b.1.total_cmp(&a.1));
                }
                SortField::Timestamp => {
                    items.sort_by(|a, b| {
                        let ta = store.nodes.get(&a.0).map(|n| n.timestamp).unwrap_or(0);
                        let tb = store.nodes.get(&b.0).map(|n| n.timestamp).unwrap_or(0);
                        tb.cmp(&ta) // desc
                    });
                }
                SortField::ContentLen => {
                    items.sort_by(|a, b| {
                        let la = store.nodes.get(&a.0).map(|n| n.content.len()).unwrap_or(0);
                        let lb = store.nodes.get(&b.0).map(|n| n.content.len()).unwrap_or(0);
                        lb.cmp(&la) // desc
                    });
                }
                SortField::Degree => {
                    items.sort_by(|a, b| {
                        let da = graph.degree(&a.0);
                        let db = graph.degree(&b.0);
                        db.cmp(&da) // desc
                    });
                }
                SortField::Priority => {
                    // Pre-compute priorities to avoid O(n log n) calls
                    // inside the sort comparator.
                    let priorities: HashMap<String, f64> = items.iter()
                        .map(|(key, _)| {
                            let p = crate::neuro::consolidation_priority(
                                store, key, graph, None);
                            (key.clone(), p)
                        })
                        .collect();
                    items.sort_by(|a, b| {
                        let pa = priorities.get(&a.0).copied().unwrap_or(0.0);
                        let pb = priorities.get(&b.0).copied().unwrap_or(0.0);
                        pb.total_cmp(&pa) // desc
                    });
                }
            }
            items
        }
        Transform::Limit(n) => {
            items.truncate(*n);
            items
        }
    }
}

/// Extract seeds from weighted terms by matching against node keys and content.
///
/// Three matching strategies, in priority order:
/// 1. Exact key match: term matches a node key exactly → full weight
/// 2. Key component match: term matches a word in a hyphenated/underscored key → 0.5× weight
/// 3. Content match: term appears in node content → 0.2× weight (capped at 50 nodes)
///
/// Returns (seeds, direct_hits) where direct_hits tracks which keys
/// were matched directly (vs found by an algorithm stage).
pub fn match_seeds(
    terms: &BTreeMap<String, f64>,
    store: &impl StoreView,
) -> (Vec<(String, f64)>, HashSet<String>) {
    match_seeds_opts(terms, store, false, false)
}

pub fn match_seeds_opts(
    terms: &BTreeMap<String, f64>,
    store: &impl StoreView,
    component_match: bool,
    content_fallback: bool,
) -> (Vec<(String, f64)>, HashSet<String>) {
    let mut seed_map: HashMap<String, f64> = HashMap::new();
    let mut direct_hits: HashSet<String> = HashSet::new();

    // Build key lookup: lowercase key → (original key, weight)
    let mut key_map: HashMap<String, (String, f64)> = HashMap::new();
    // Build component index: word → vec of (original key, weight)
    let mut component_map: HashMap<String, Vec<(String, f64)>> = HashMap::new();

    store.for_each_node(|key, _content, weight| {
        let lkey = key.to_lowercase();
        key_map.insert(lkey.clone(), (key.to_owned(), weight as f64));

        // Split key on hyphens, underscores, dots, hashes for component matching
        for component in lkey.split(|c: char| c == '-' || c == '_' || c == '.' || c == '#') {
            if component.len() >= 3 {
                component_map.entry(component.to_owned())
                    .or_default()
                    .push((key.to_owned(), weight as f64));
            }
        }
    });

    for (term, &term_weight) in terms {
        // Strategy 1: exact key match
        if let Some((orig_key, node_weight)) = key_map.get(term) {
            let score = term_weight * node_weight;
            *seed_map.entry(orig_key.clone()).or_insert(0.0) += score;
            direct_hits.insert(orig_key.clone());
            continue;
        }

        // Strategy 2: key component match (0.5× weight) — only when explicitly requested
        if component_match {
            if let Some(matches) = component_map.get(term.as_str()) {
                for (orig_key, node_weight) in matches {
                    let score = term_weight * node_weight * 0.5;
                    *seed_map.entry(orig_key.clone()).or_insert(0.0) += score;
                    direct_hits.insert(orig_key.clone());
                }
                continue;
            }
        }

        // Strategy 3: content match (0.2× weight) — only when explicitly requested
        if content_fallback {
            let term_lower = term.to_lowercase();
            if term_lower.len() >= 3 {
                let mut content_hits = 0;
                store.for_each_node(|key, content, weight| {
                    if content_hits >= 50 { return; }
                    if content.to_lowercase().contains(&term_lower) {
                        let score = term_weight * weight as f64 * 0.2;
                        *seed_map.entry(key.to_owned()).or_insert(0.0) += score;
                        content_hits += 1;
                    }
                });
            }
        }
    }

    let seeds: Vec<(String, f64)> = seed_map.into_iter().collect();
    (seeds, direct_hits)
}

/// Run a pipeline of algorithm stages.
pub fn run_pipeline(
    stages: &[AlgoStage],
    seeds: Vec<(String, f64)>,
    graph: &Graph,
    store: &impl StoreView,
    debug: bool,
    max_results: usize,
) -> Vec<(String, f64)> {
    let mut current = seeds;

    for stage in stages {
        if debug {
            println!("\n[search] === {} ({} seeds in) ===", stage.algo, current.len());
        }

        current = match stage.algo {
            Algorithm::Spread => run_spread(&current, graph, store, stage, debug),
            Algorithm::Spectral => run_spectral(&current, graph, stage, debug),
            Algorithm::Manifold => run_manifold(&current, graph, stage, debug),
            Algorithm::Confluence => run_confluence(&current, graph, store, stage, debug),
            Algorithm::Geodesic => run_geodesic(&current, graph, stage, debug),
        };

        if debug {
            println!("[search] {} → {} results", stage.algo, current.len());
            for (i, (key, score)) in current.iter().enumerate().take(15) {
                let cutoff = if i + 1 == max_results { " <-- cutoff" } else { "" };
                println!("  [{:.4}] {}{}", score, key, cutoff);
            }
            if current.len() > 15 {
                println!("  ... ({} more)", current.len() - 15);
            }
        }
    }

    current.truncate(max_results);
    current
}

/// Spreading activation: propagate scores through graph edges.
///
/// Tunable params: max_hops (default from store), edge_decay (default from store),
/// min_activation (default from store).
fn run_spread(
    seeds: &[(String, f64)],
    graph: &Graph,
    store: &impl StoreView,
    stage: &AlgoStage,
    _debug: bool,
) -> Vec<(String, f64)> {
    let store_params = store.params();
    let max_hops = stage.param_u32("max_hops", store_params.max_hops);
    let edge_decay = stage.param_f64("edge_decay", store_params.edge_decay);
    let min_activation = stage.param_f64("min_activation", store_params.min_activation * 0.1);

    spreading_activation(seeds, graph, store, max_hops, edge_decay, min_activation)
}

/// Spectral projection: find nearest neighbors in spectral embedding space.
///
/// Tunable params: k (default 20, number of neighbors to find).
fn run_spectral(
    seeds: &[(String, f64)],
    graph: &Graph,
    stage: &AlgoStage,
    debug: bool,
) -> Vec<(String, f64)> {
    let k = stage.param_usize("k", 20);

    let emb = match spectral::load_embedding() {
        Ok(e) => e,
        Err(e) => {
            if debug { println!("  no spectral embedding: {}", e); }
            return seeds.to_vec();
        }
    };

    let weighted_seeds: Vec<(&str, f64)> = seeds.iter()
        .map(|(k, w)| (k.as_str(), *w))
        .collect();
    let projected = spectral::nearest_to_seeds_weighted(
        &emb, &weighted_seeds, Some(graph), k,
    );

    if debug {
        for (key, dist) in &projected {
            let score = 1.0 / (1.0 + dist);
            println!("  dist={:.6}  score={:.4}  {}", dist, score, key);
        }
    }

    // Merge: keep original seeds, add spectral results as new seeds
    let seed_set: HashSet<&str> = seeds.iter().map(|(k, _)| k.as_str()).collect();
    let mut result = seeds.to_vec();
    for (key, dist) in projected {
        if !seed_set.contains(key.as_str()) {
            let score = 1.0 / (1.0 + dist);
            result.push((key, score));
        }
    }
    result
}

/// Confluence: multi-source reachability scoring.
///
/// Unlike spreading activation (which takes max activation from any source),
/// confluence rewards nodes reachable from *multiple* seeds. For each candidate
/// node within k hops, score = sum of (seed_weight * edge_decay^distance) across
/// all seeds that can reach it. Nodes at the intersection of multiple seeds'
/// neighborhoods score highest.
///
/// This naturally handles mixed seeds: unrelated seeds activate disjoint
/// neighborhoods that don't overlap, so their results separate naturally.
///
/// Tunable params: max_hops (default 3), edge_decay (default 0.5),
/// min_sources (default 2, minimum number of distinct seeds that must reach a node).
fn run_confluence(
    seeds: &[(String, f64)],
    graph: &Graph,
    store: &impl StoreView,
    stage: &AlgoStage,
    debug: bool,
) -> Vec<(String, f64)> {
    let max_hops = stage.param_u32("max_hops", 3);
    let edge_decay = stage.param_f64("edge_decay", 0.5);
    let min_sources = stage.param_usize("min_sources", 2);

    // For each seed, BFS outward collecting (node → activation) at each distance
    // Track which seeds contributed to each node's score
    let mut node_scores: HashMap<String, f64> = HashMap::new();
    let mut node_sources: HashMap<String, HashSet<usize>> = HashMap::new();

    for (seed_idx, (seed_key, seed_weight)) in seeds.iter().enumerate() {
        let mut visited: HashMap<String, f64> = HashMap::new();
        let mut queue: VecDeque<(String, u32)> = VecDeque::new();

        visited.insert(seed_key.clone(), *seed_weight);
        queue.push_back((seed_key.clone(), 0));

        while let Some((key, depth)) = queue.pop_front() {
            if depth >= max_hops { continue; }

            let act = visited[&key];

            for (neighbor, strength) in graph.neighbors(&key) {
                let neighbor_weight = store.node_weight(neighbor.as_str());
                let propagated = act * edge_decay * neighbor_weight * strength as f64;
                if propagated < 0.001 { continue; }

                if !visited.contains_key(neighbor.as_str()) || visited[neighbor.as_str()] < propagated {
                    visited.insert(neighbor.clone(), propagated);
                    queue.push_back((neighbor.clone(), depth + 1));
                }
            }
        }

        // Accumulate into global scores (additive across seeds)
        for (key, act) in visited {
            *node_scores.entry(key.clone()).or_insert(0.0) += act;
            node_sources.entry(key).or_default().insert(seed_idx);
        }
    }

    // Filter to nodes reached by min_sources distinct seeds
    let mut results: Vec<(String, f64)> = node_scores.into_iter()
        .filter(|(key, _)| {
            node_sources.get(key).map(|s| s.len()).unwrap_or(0) >= min_sources
        })
        .collect();

    if debug {
        // Show source counts
        for (key, score) in results.iter().take(15) {
            let sources = node_sources.get(key).map(|s| s.len()).unwrap_or(0);
            println!("  [{:.4}] {} (from {} seeds)", score, key, sources);
        }
    }

    results.sort_by(|a, b| b.1.total_cmp(&a.1));
    results
}

/// Geodesic: straightest paths between seed pairs in spectral space.
///
/// For each pair of seeds, walk the graph from one to the other, at each
/// step choosing the neighbor whose spectral direction most aligns with
/// the target direction. Nodes along these geodesic paths score higher
/// the more paths pass through them and the straighter those paths are.
///
/// Tunable params: max_path (default 6), k (default 20 results).
fn run_geodesic(
    seeds: &[(String, f64)],
    graph: &Graph,
    stage: &AlgoStage,
    debug: bool,
) -> Vec<(String, f64)> {
    let max_path = stage.param_usize("max_path", 6);
    let k = stage.param_usize("k", 20);

    let emb = match spectral::load_embedding() {
        Ok(e) => e,
        Err(e) => {
            if debug { println!("  no spectral embedding: {}", e); }
            return seeds.to_vec();
        }
    };

    // Filter seeds to those with valid spectral coords
    let valid_seeds: Vec<(&str, f64, &Vec<f64>)> = seeds.iter()
        .filter_map(|(key, weight)| {
            emb.coords.get(key.as_str())
                .filter(|c| c.iter().any(|&v| v.abs() > 1e-12))
                .map(|c| (key.as_str(), *weight, c))
        })
        .collect();

    if valid_seeds.len() < 2 {
        if debug { println!("  need ≥2 seeds with spectral coords, have {}", valid_seeds.len()); }
        return seeds.to_vec();
    }

    // For each pair of seeds, find the geodesic path
    let mut path_counts: HashMap<String, f64> = HashMap::new();
    let seed_set: HashSet<&str> = seeds.iter().map(|(k, _)| k.as_str()).collect();

    for i in 0..valid_seeds.len() {
        for j in (i + 1)..valid_seeds.len() {
            let (key_a, weight_a, coords_a) = &valid_seeds[i];
            let (key_b, weight_b, coords_b) = &valid_seeds[j];
            let pair_weight = weight_a * weight_b;

            // Walk from A toward B
            let path_ab = geodesic_walk(
                key_a, coords_a, coords_b, graph, &emb, max_path,
            );
            // Walk from B toward A
            let path_ba = geodesic_walk(
                key_b, coords_b, coords_a, graph, &emb, max_path,
            );

            // Score nodes on both paths (nodes found from both directions score double)
            for (node, alignment) in path_ab.iter().chain(path_ba.iter()) {
                if !seed_set.contains(node.as_str()) {
                    *path_counts.entry(node.clone()).or_insert(0.0) += pair_weight * alignment;
                }
            }
        }
    }

    if debug && !path_counts.is_empty() {
        println!("  {} pairs examined, {} distinct nodes on paths",
            valid_seeds.len() * (valid_seeds.len() - 1) / 2,
            path_counts.len());
    }

    // Merge with original seeds
    let mut results = seeds.to_vec();
    let mut path_results: Vec<(String, f64)> = path_counts.into_iter().collect();
    path_results.sort_by(|a, b| b.1.total_cmp(&a.1));
    path_results.truncate(k);

    for (key, score) in path_results {
        if !seed_set.contains(key.as_str()) {
            results.push((key, score));
        }
    }

    results.sort_by(|a, b| b.1.total_cmp(&a.1));
    results
}

/// Walk from `start` toward `target_coords` in spectral space, choosing
/// the neighbor at each step whose direction most aligns with the target.
/// Returns (node_key, alignment_score) for each intermediate node.
fn geodesic_walk(
    start: &str,
    start_coords: &[f64],
    target_coords: &[f64],
    graph: &Graph,
    emb: &spectral::SpectralEmbedding,
    max_steps: usize,
) -> Vec<(String, f64)> {
    let mut path = Vec::new();
    let mut current = start.to_string();
    let mut current_coords = start_coords.to_vec();
    let mut visited: HashSet<String> = HashSet::new();
    visited.insert(current.clone());

    for _ in 0..max_steps {
        // Direction we want to travel: from current toward target
        let direction: Vec<f64> = target_coords.iter()
            .zip(current_coords.iter())
            .map(|(t, c)| t - c)
            .collect();

        let dir_norm = direction.iter().map(|d| d * d).sum::<f64>().sqrt();
        if dir_norm < 1e-12 { break; } // arrived

        // Among neighbors with spectral coords, find the one most aligned
        let mut best: Option<(String, Vec<f64>, f64)> = None;

        for (neighbor, _strength) in graph.neighbors(&current) {
            if visited.contains(neighbor.as_str()) { continue; }

            let neighbor_coords = match emb.coords.get(neighbor.as_str()) {
                Some(c) if c.iter().any(|&v| v.abs() > 1e-12) => c,
                _ => continue,
            };

            // Direction to this neighbor
            let step: Vec<f64> = neighbor_coords.iter()
                .zip(current_coords.iter())
                .map(|(n, c)| n - c)
                .collect();

            let step_norm = step.iter().map(|s| s * s).sum::<f64>().sqrt();
            if step_norm < 1e-12 { continue; }

            // Cosine similarity between desired direction and step direction
            let dot: f64 = direction.iter().zip(step.iter()).map(|(d, s)| d * s).sum();
            let alignment = dot / (dir_norm * step_norm);

            if alignment > 0.0 { // only consider forward-facing neighbors
                if best.as_ref().map(|(_, _, a)| alignment > *a).unwrap_or(true) {
                    best = Some((neighbor.clone(), neighbor_coords.clone(), alignment));
                }
            }
        }

        match best {
            Some((next_key, next_coords, alignment)) => {
                path.push((next_key.clone(), alignment));
                visited.insert(next_key.clone());
                current = next_key;
                current_coords = next_coords;
            }
            None => break, // no forward-facing neighbors
        }
    }

    path
}

/// Manifold: extrapolation along the direction defined by seeds.
///
/// Instead of finding what's *near* the seeds in spectral space (proximity),
/// find what's in the *direction* the seeds define. Given a weighted centroid
/// of seeds and the principal direction they span, find nodes that continue
/// along that direction.
///
/// Tunable params: k (default 20 results).
fn run_manifold(
    seeds: &[(String, f64)],
    graph: &Graph,
    stage: &AlgoStage,
    debug: bool,
) -> Vec<(String, f64)> {
    let k = stage.param_usize("k", 20);

    let emb = match spectral::load_embedding() {
        Ok(e) => e,
        Err(e) => {
            if debug { println!("  no spectral embedding: {}", e); }
            return seeds.to_vec();
        }
    };

    // Collect seeds with valid spectral coordinates
    let seed_data: Vec<(&str, f64, &Vec<f64>)> = seeds.iter()
        .filter_map(|(key, weight)| {
            emb.coords.get(key.as_str())
                .filter(|c| c.iter().any(|&v| v.abs() > 1e-12))
                .map(|c| (key.as_str(), *weight, c))
        })
        .collect();

    if seed_data.is_empty() {
        if debug { println!("  no seeds with spectral coords"); }
        return seeds.to_vec();
    }

    let dims = emb.dims;

    // Compute weighted centroid of seeds
    let mut centroid = vec![0.0f64; dims];
    let mut total_weight = 0.0;
    for (_, weight, coords) in &seed_data {
        for (i, &c) in coords.iter().enumerate() {
            centroid[i] += c * weight;
        }
        total_weight += weight;
    }
    if total_weight > 0.0 {
        for c in &mut centroid {
            *c /= total_weight;
        }
    }

    // Compute principal direction via power iteration on seed covariance.
    // Initialize with the two most separated seeds (largest spectral distance).
    let mut direction = vec![0.0f64; dims];
    if seed_data.len() >= 2 {
        // Find the two seeds furthest apart in spectral space
        let mut best_dist = 0.0f64;
        for i in 0..seed_data.len() {
            for j in (i + 1)..seed_data.len() {
                let dist: f64 = seed_data[i].2.iter().zip(seed_data[j].2.iter())
                    .map(|(a, b)| (a - b).powi(2)).sum::<f64>().sqrt();
                if dist > best_dist {
                    best_dist = dist;
                    for d in 0..dims {
                        direction[d] = seed_data[j].2[d] - seed_data[i].2[d];
                    }
                }
            }
        }

        // Power iteration: 3 rounds on the weighted covariance matrix
        for _ in 0..3 {
            let mut new_dir = vec![0.0f64; dims];
            for (_, weight, coords) in &seed_data {
                let dev: Vec<f64> = coords.iter().zip(centroid.iter()).map(|(c, m)| c - m).collect();
                let dot: f64 = dev.iter().zip(direction.iter()).map(|(d, v)| d * v).sum();
                for d in 0..dims {
                    new_dir[d] += weight * dot * dev[d];
                }
            }
            // Normalize
            let norm = new_dir.iter().map(|d| d * d).sum::<f64>().sqrt();
            if norm > 1e-12 {
                for d in &mut new_dir { *d /= norm; }
            }
            direction = new_dir;
        }
    }

    let dir_norm = direction.iter().map(|d| d * d).sum::<f64>().sqrt();

    let seed_set: HashSet<&str> = seeds.iter().map(|(k, _)| k.as_str()).collect();

    // Score each non-seed node by projection onto the direction from centroid
    let mut candidates: Vec<(String, f64)> = emb.coords.iter()
        .filter(|(key, coords)| {
            !seed_set.contains(key.as_str())
            && coords.iter().any(|&v| v.abs() > 1e-12)
        })
        .map(|(key, coords)| {
            let deviation: Vec<f64> = coords.iter().zip(centroid.iter())
                .map(|(c, m)| c - m)
                .collect();

            let score = if dir_norm > 1e-12 {
                // Project onto direction: how far along the principal axis
                let projection: f64 = deviation.iter().zip(direction.iter())
                    .map(|(d, v)| d * v)
                    .sum::<f64>() / dir_norm;

                // Distance from the axis (perpendicular component)
                let proj_vec: Vec<f64> = direction.iter()
                    .map(|&d| d * projection / dir_norm)
                    .collect();
                let perp_dist: f64 = deviation.iter().zip(proj_vec.iter())
                    .map(|(d, p)| (d - p).powi(2))
                    .sum::<f64>()
                    .sqrt();

                // Score: prefer nodes far along the direction but close to the axis
                // Use absolute projection (both directions from centroid are interesting)
                let along = projection.abs();
                if perp_dist < 1e-12 {
                    along
                } else {
                    along / (1.0 + perp_dist)
                }
            } else {
                // No direction (single seed or all seeds coincide): use distance from centroid
                let dist: f64 = deviation.iter().map(|d| d * d).sum::<f64>().sqrt();
                1.0 / (1.0 + dist)
            };

            // Bonus for being connected to seeds in the graph
            let graph_bonus: f64 = graph.neighbors(key).iter()
                .filter(|(n, _)| seed_set.contains(n.as_str()))
                .map(|(_, s)| *s as f64 * 0.1)
                .sum();

            (key.clone(), score + graph_bonus)
        })
        .collect();

    candidates.sort_by(|a, b| b.1.total_cmp(&a.1));
    candidates.truncate(k);

    if debug {
        for (key, score) in candidates.iter().take(15) {
            println!("  [{:.4}] {}", score, key);
        }
    }

    // Merge with original seeds
    let mut results = seeds.to_vec();
    for (key, score) in candidates {
        results.push((key, score));
    }
    results.sort_by(|a, b| b.1.total_cmp(&a.1));
    results
}

/// Simultaneous wavefront spreading activation.
///
/// All seeds emit at once. At each hop, activations from all sources
/// sum at each node, and the combined activation map propagates on
/// the next hop. This creates interference patterns — nodes where
/// multiple wavefronts overlap get reinforced and radiate stronger.
fn spreading_activation(
    seeds: &[(String, f64)],
    graph: &Graph,
    store: &impl StoreView,
    max_hops: u32,
    edge_decay: f64,
    min_activation: f64,
) -> Vec<(String, f64)> {
    let mut activation: HashMap<String, f64> = HashMap::new();

    // Initialize wavefront from all seeds
    let mut frontier: HashMap<String, f64> = HashMap::new();
    for (key, act) in seeds {
        *frontier.entry(key.clone()).or_insert(0.0) += act;
        *activation.entry(key.clone()).or_insert(0.0) += act;
    }

    // Propagate hop by hop — all sources simultaneously
    // Node weight does NOT gate traversal — only edge_decay and edge strength.
    // Node weight is applied at the end for ranking.
    for _hop in 0..max_hops {
        let mut next_frontier: HashMap<String, f64> = HashMap::new();

        for (key, act) in &frontier {
            for (neighbor, strength) in graph.neighbors(key) {
                let propagated = act * edge_decay * strength as f64;
                if propagated < min_activation { continue; }

                *next_frontier.entry(neighbor.clone()).or_insert(0.0) += propagated;
            }
        }

        if next_frontier.is_empty() { break; }

        // Merge into total activation and advance frontier
        for (key, act) in &next_frontier {
            *activation.entry(key.clone()).or_insert(0.0) += act;
        }
        frontier = next_frontier;
    }

    // Apply node weight for ranking, not traversal
    let mut results: Vec<_> = activation.into_iter()
        .map(|(key, act)| {
            let weight = store.node_weight(&key);
            (key, act * weight)
        })
        .collect();
    results.sort_by(|a, b| b.1.total_cmp(&a.1));
    results
}

/// Search with weighted terms: exact key matching + spectral projection.
///
/// Terms are matched against node keys. Matching nodes become seeds,
/// scored by term_weight × node_weight. Seeds are then projected into
/// spectral space to find nearby nodes, with link weights modulating distance.
pub fn search_weighted(
    terms: &BTreeMap<String, f64>,
    store: &impl StoreView,
) -> Vec<SearchResult> {
    search_weighted_inner(terms, store, false, 5)
}

/// Like search_weighted but with debug output and configurable result count.
pub fn search_weighted_debug(
    terms: &BTreeMap<String, f64>,
    store: &impl StoreView,
    max_results: usize,
) -> Vec<SearchResult> {
    search_weighted_inner(terms, store, true, max_results)
}

fn search_weighted_inner(
    terms: &BTreeMap<String, f64>,
    store: &impl StoreView,
    debug: bool,
    max_results: usize,
) -> Vec<SearchResult> {
    let graph = crate::graph::build_graph_fast(store);
    let (seeds, direct_hits) = match_seeds(terms, store);

    if seeds.is_empty() {
        return Vec::new();
    }

    if debug {
        println!("\n[search] === SEEDS ({}) ===", seeds.len());
        let mut sorted_seeds = seeds.clone();
        sorted_seeds.sort_by(|a, b| b.1.total_cmp(&a.1));
        for (key, score) in sorted_seeds.iter().take(20) {
            println!("  {:.4}  {}", score, key);
        }
    }

    // Default pipeline: spectral → spread (legacy behavior)
    let pipeline = vec![
        AlgoStage { algo: Algorithm::Spectral, params: HashMap::new() },
        AlgoStage { algo: Algorithm::Spread, params: HashMap::new() },
    ];

    let raw_results = run_pipeline(&pipeline, seeds, &graph, store, debug, max_results);

    raw_results.into_iter()
        .take(max_results)
        .map(|(key, activation)| {
            let is_direct = direct_hits.contains(&key);
            SearchResult { key, activation, is_direct, snippet: None }
        }).collect()
}

/// Search with equal-weight terms (for interactive use).
pub fn search(query: &str, store: &impl StoreView) -> Vec<SearchResult> {
    let terms: BTreeMap<String, f64> = query.split_whitespace()
        .map(|t| (t.to_lowercase(), 1.0))
        .collect();
    search_weighted(&terms, store)
}

/// Extract meaningful search terms from natural language.
/// Strips common English stop words, returns up to max_terms words.
pub fn extract_query_terms(text: &str, max_terms: usize) -> String {
    const STOP_WORDS: &[&str] = &[
        "the", "a", "an", "is", "are", "was", "were", "do", "does", "did",
        "have", "has", "had", "will", "would", "could", "should", "can",
        "may", "might", "shall", "been", "being", "to", "of", "in", "for",
        "on", "with", "at", "by", "from", "as", "but", "or", "and", "not",
        "no", "if", "then", "than", "that", "this", "it", "its", "my",
        "your", "our", "we", "you", "i", "me", "he", "she", "they", "them",
        "what", "how", "why", "when", "where", "about", "just", "let",
        "want", "tell", "show", "think", "know", "see", "look", "make",
        "get", "go", "some", "any", "all", "very", "really", "also", "too",
        "so", "up", "out", "here", "there",
    ];

    text.to_lowercase()
        .split(|c: char| !c.is_alphanumeric())
        .filter(|w| !w.is_empty() && w.len() > 2 && !STOP_WORDS.contains(w))
        .take(max_terms)
        .collect::<Vec<_>>()
        .join(" ")
}

/// Format search results as text lines (for hook consumption).
pub fn format_results(results: &[SearchResult]) -> String {
    let mut out = String::new();
    for (i, r) in results.iter().enumerate() {
        let marker = if r.is_direct { "→" } else { " " };
        out.push_str(&format!("{}{:2}. [{:.2}/{:.2}] {}",
            marker, i + 1, r.activation, r.activation, r.key));
        out.push('\n');
        if let Some(ref snippet) = r.snippet {
            out.push_str(&format!("         {}\n", snippet));
        }
    }
    out
}
-												search: unified query pipeline with filters, transforms, generators

Extend the pipeline with four stage types composing left-to-right:
  Generators: all, match:TERM
  Filters: type:, key:, weight:, age:, content-len:, provenance:,
           not-visited:, visited: (plus ! negation)
  Transforms: sort:(priority|timestamp|content-len|degree|weight), limit:N
  Algorithms: spread, spectral, confluence, geodesic, manifold (unchanged)

Duration syntax (7d, 24h, 30m) and glob matching on keys.
CLI auto-detects filter/transform stages and loads full Store;
algorithm-only pipelines keep the fast MmapView path.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-10 15:22:12 -04:00
+								// Memory search: composable query pipeline.
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								//
-												search: unified query pipeline with filters, transforms, generators

Extend the pipeline with four stage types composing left-to-right:
  Generators: all, match:TERM
  Filters: type:, key:, weight:, age:, content-len:, provenance:,
           not-visited:, visited: (plus ! negation)
  Transforms: sort:(priority|timestamp|content-len|degree|weight), limit:N
  Algorithms: spread, spectral, confluence, geodesic, manifold (unchanged)

Duration syntax (7d, 24h, 30m) and glob matching on keys.
CLI auto-detects filter/transform stages and loads full Store;
algorithm-only pipelines keep the fast MmapView path.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-10 15:22:12 -04:00
+								// The pipeline has four kinds of stages, all composing left-to-right:
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								//
-												search: unified query pipeline with filters, transforms, generators

Extend the pipeline with four stage types composing left-to-right:
  Generators: all, match:TERM
  Filters: type:, key:, weight:, age:, content-len:, provenance:,
           not-visited:, visited: (plus ! negation)
  Transforms: sort:(priority|timestamp|content-len|degree|weight), limit:N
  Algorithms: spread, spectral, confluence, geodesic, manifold (unchanged)

Duration syntax (7d, 24h, 30m) and glob matching on keys.
CLI auto-detects filter/transform stages and loads full Store;
algorithm-only pipelines keep the fast MmapView path.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-10 15:22:12 -04:00
+								//   Generators — produce a result set from nothing:
 								//     all                        every non-deleted node
 								//     match:TERM                 text match (current seed extraction)
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								//
-												search: unified query pipeline with filters, transforms, generators

Extend the pipeline with four stage types composing left-to-right:
  Generators: all, match:TERM
  Filters: type:, key:, weight:, age:, content-len:, provenance:,
           not-visited:, visited: (plus ! negation)
  Transforms: sort:(priority|timestamp|content-len|degree|weight), limit:N
  Algorithms: spread, spectral, confluence, geodesic, manifold (unchanged)

Duration syntax (7d, 24h, 30m) and glob matching on keys.
CLI auto-detects filter/transform stages and loads full Store;
algorithm-only pipelines keep the fast MmapView path.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-10 15:22:12 -04:00
+								//   Filters — narrow an existing result set on node metadata:
 								//     type:episodic              node_type == EpisodicSession
 								//     type:semantic              node_type == Semantic
 								//     type:daily                 node_type == EpisodicDaily
 								//     type:weekly                node_type == EpisodicWeekly
 								//     type:monthly               node_type == EpisodicMonthly
 								//     key:GLOB                   glob match on key
 								//     weight:>0.5                numeric comparison on weight
 								//     age:<7d                    created/modified within duration
 								//     content-len:>1000          content size filter
 								//     provenance:manual          provenance match
 								//     not-visited:AGENT,DUR      not seen by agent in duration
 								//     visited:AGENT              has been seen by agent
 								//
 								//   Transforms — reorder or reshape:
 								//     sort:priority              consolidation priority scoring
 								//     sort:timestamp             by timestamp (desc)
 								//     sort:content-len           by content size
 								//     sort:degree                by graph degree
 								//     sort:weight                by weight
 								//     limit:N                    truncate to N results
 								//
 								//   Algorithms — graph exploration (existing):
 								//     spread                     spreading activation
 								//     spectral,k=20              spectral nearest neighbors
 								//     confluence                 multi-source reachability
 								//     geodesic                   straightest spectral paths
 								//     manifold                   extrapolation along seed direction
 								//
 								// Stages are parsed from strings and composed via the -p flag or
 								// pipe-separated in agent definitions.
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
-												provenance: convert from enum to freeform string

The Provenance enum couldn't represent agents defined outside the
source code. Replace it with a Text field in the capnp schema so any
agent can write its own provenance label (e.g. "extractor:write",
"rename:tombstone") without a code change.

Schema: rename old enum fields to provenanceOld, add new Text
provenance fields. Old enum kept for reading legacy records.
Migration: from_capnp_migrate() falls back to old enum when the
new text field is empty.

Also adds `poc-memory tail` command for viewing recent store writes.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-11 01:19:52 -04:00
+								use crate::store::{Store, StoreView, NodeType};
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								use crate::graph::Graph;
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								use crate::spectral;
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
 								use std::fmt;
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
 								pub struct SearchResult {
 								    pub key: String,
 								    pub activation: f64,
 								    pub is_direct: bool,
 								    pub snippet: Option<String>,
 								}
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								/// A parsed algorithm stage with its parameters.
 								#[derive(Clone, Debug)]
 								pub struct AlgoStage {
 								    pub algo: Algorithm,
 								    pub params: HashMap<String, String>,
 								}
 								#[derive(Clone, Debug)]
 								pub enum Algorithm {
 								    Spread,
 								    Spectral,
 								    Manifold,
-												search: add confluence, geodesic, and manifold algorithms

Three new composable search stages:

  confluence — multi-source spreading activation. Unlike spread (which
  takes max from any source), confluence rewards nodes reachable from
  multiple seeds additively. Naturally separates unrelated seed groups
  since their neighborhoods don't overlap. Params: max_hops, edge_decay,
  min_sources.

  geodesic — straightest path between seed pairs in spectral space.
  At each graph hop, picks the neighbor whose spectral direction most
  aligns with the target (cosine similarity of direction vectors).
  Nodes on many geodesic paths score highest. Params: max_path, k.

  manifold — extrapolation along the direction seeds define. Computes
  weighted centroid + principal axis of seeds in spectral space, then
  scores candidates by projection onto that axis (penalized by
  perpendicular distance). Finds what's "further along" rather than
  "nearby." Params: k.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:22:29 -04:00
+								    Confluence,
 								    Geodesic,
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								}
 								impl fmt::Display for Algorithm {
 								    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 								        match self {
 								            Algorithm::Spread => write!(f, "spread"),
 								            Algorithm::Spectral => write!(f, "spectral"),
 								            Algorithm::Manifold => write!(f, "manifold"),
-												search: add confluence, geodesic, and manifold algorithms

Three new composable search stages:

  confluence — multi-source spreading activation. Unlike spread (which
  takes max from any source), confluence rewards nodes reachable from
  multiple seeds additively. Naturally separates unrelated seed groups
  since their neighborhoods don't overlap. Params: max_hops, edge_decay,
  min_sources.

  geodesic — straightest path between seed pairs in spectral space.
  At each graph hop, picks the neighbor whose spectral direction most
  aligns with the target (cosine similarity of direction vectors).
  Nodes on many geodesic paths score highest. Params: max_path, k.

  manifold — extrapolation along the direction seeds define. Computes
  weighted centroid + principal axis of seeds in spectral space, then
  scores candidates by projection onto that axis (penalized by
  perpendicular distance). Finds what's "further along" rather than
  "nearby." Params: k.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:22:29 -04:00
+								            Algorithm::Confluence => write!(f, "confluence"),
 								            Algorithm::Geodesic => write!(f, "geodesic"),
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								        }
 								    }
 								}
 								impl AlgoStage {
 								    /// Parse "spread,max_hops=4,edge_decay=0.5" into an AlgoStage.
 								    pub fn parse(s: &str) -> Result<Self, String> {
 								        let mut parts = s.split(',');
 								        let name = parts.next().unwrap_or("");
 								        let algo = match name {
 								            "spread" => Algorithm::Spread,
 								            "spectral" => Algorithm::Spectral,
 								            "manifold" => Algorithm::Manifold,
-												search: add confluence, geodesic, and manifold algorithms

Three new composable search stages:

  confluence — multi-source spreading activation. Unlike spread (which
  takes max from any source), confluence rewards nodes reachable from
  multiple seeds additively. Naturally separates unrelated seed groups
  since their neighborhoods don't overlap. Params: max_hops, edge_decay,
  min_sources.

  geodesic — straightest path between seed pairs in spectral space.
  At each graph hop, picks the neighbor whose spectral direction most
  aligns with the target (cosine similarity of direction vectors).
  Nodes on many geodesic paths score highest. Params: max_path, k.

  manifold — extrapolation along the direction seeds define. Computes
  weighted centroid + principal axis of seeds in spectral space, then
  scores candidates by projection onto that axis (penalized by
  perpendicular distance). Finds what's "further along" rather than
  "nearby." Params: k.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:22:29 -04:00
+								            "confluence" => Algorithm::Confluence,
 								            "geodesic" => Algorithm::Geodesic,
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								            _ => return Err(format!("unknown algorithm: {}", name)),
 								        };
 								        let mut params = HashMap::new();
 								        for part in parts {
 								            if let Some((k, v)) = part.split_once('=') {
 								                params.insert(k.to_string(), v.to_string());
 								            } else {
 								                return Err(format!("bad param (expected key=val): {}", part));
 								            }
 								        }
 								        Ok(AlgoStage { algo, params })
 								    }
 								    fn param_f64(&self, key: &str, default: f64) -> f64 {
 								        self.params.get(key)
 								            .and_then(|v| v.parse().ok())
 								            .unwrap_or(default)
 								    }
 								    fn param_u32(&self, key: &str, default: u32) -> u32 {
 								        self.params.get(key)
 								            .and_then(|v| v.parse().ok())
 								            .unwrap_or(default)
 								    }
 								    fn param_usize(&self, key: &str, default: usize) -> usize {
 								        self.params.get(key)
 								            .and_then(|v| v.parse().ok())
 								            .unwrap_or(default)
 								    }
 								}
-												search: unified query pipeline with filters, transforms, generators

Extend the pipeline with four stage types composing left-to-right:
  Generators: all, match:TERM
  Filters: type:, key:, weight:, age:, content-len:, provenance:,
           not-visited:, visited: (plus ! negation)
  Transforms: sort:(priority|timestamp|content-len|degree|weight), limit:N
  Algorithms: spread, spectral, confluence, geodesic, manifold (unchanged)

Duration syntax (7d, 24h, 30m) and glob matching on keys.
CLI auto-detects filter/transform stages and loads full Store;
algorithm-only pipelines keep the fast MmapView path.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-10 15:22:12 -04:00
+								// ── Unified query pipeline ──────────────────────────────────────────
 								/// A pipeline stage: generator, filter, transform, or graph algorithm.
 								#[derive(Clone, Debug)]
 								pub enum Stage {
 								    Generator(Generator),
 								    Filter(Filter),
 								    Transform(Transform),
 								    Algorithm(AlgoStage),
 								}
 								#[derive(Clone, Debug)]
 								pub enum Generator {
 								    All,                        // every non-deleted node
 								    Match(Vec<String>),         // text match seeds
 								}
 								#[derive(Clone, Debug)]
 								pub enum Filter {
 								    Type(NodeType),
 								    KeyGlob(String),
 								    Weight(Cmp),
 								    Age(Cmp),                   // vs now - timestamp (seconds)
 								    ContentLen(Cmp),
-												provenance: convert from enum to freeform string

The Provenance enum couldn't represent agents defined outside the
source code. Replace it with a Text field in the capnp schema so any
agent can write its own provenance label (e.g. "extractor:write",
"rename:tombstone") without a code change.

Schema: rename old enum fields to provenanceOld, add new Text
provenance fields. Old enum kept for reading legacy records.
Migration: from_capnp_migrate() falls back to old enum when the
new text field is empty.

Also adds `poc-memory tail` command for viewing recent store writes.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-11 01:19:52 -04:00
+								    Provenance(String),
-												search: unified query pipeline with filters, transforms, generators

Extend the pipeline with four stage types composing left-to-right:
  Generators: all, match:TERM
  Filters: type:, key:, weight:, age:, content-len:, provenance:,
           not-visited:, visited: (plus ! negation)
  Transforms: sort:(priority|timestamp|content-len|degree|weight), limit:N
  Algorithms: spread, spectral, confluence, geodesic, manifold (unchanged)

Duration syntax (7d, 24h, 30m) and glob matching on keys.
CLI auto-detects filter/transform stages and loads full Store;
algorithm-only pipelines keep the fast MmapView path.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-10 15:22:12 -04:00
+								    NotVisited { agent: String, duration: i64 },  // seconds
 								    Visited { agent: String },
 								    Negated(Box<Filter>),
 								}
 								#[derive(Clone, Debug)]
 								pub enum Transform {
 								    Sort(SortField),
 								    Limit(usize),
 								}
 								#[derive(Clone, Debug)]
 								pub enum SortField {
 								    Priority,
 								    Timestamp,
 								    ContentLen,
 								    Degree,
 								    Weight,
 								}
 								/// Numeric comparison operator.
 								#[derive(Clone, Debug)]
 								pub enum Cmp {
 								    Gt(f64),
 								    Gte(f64),
 								    Lt(f64),
 								    Lte(f64),
 								    Eq(f64),
 								}
 								impl Cmp {
 								    fn matches(&self, val: f64) -> bool {
 								        match self {
 								            Cmp::Gt(x)  => val > *x,
 								            Cmp::Gte(x) => val >= *x,
 								            Cmp::Lt(x)  => val < *x,
 								            Cmp::Lte(x) => val <= *x,
 								            Cmp::Eq(x)  => (val - x).abs() < f64::EPSILON,
 								        }
 								    }
 								}
 								/// Parse a comparison like ">0.5", ">=60", "<7d" (durations converted to seconds).
 								fn parse_cmp(s: &str) -> Result<Cmp, String> {
 								    let (op_len, ctor): (usize, fn(f64) -> Cmp) = if s.starts_with(">=") {
 								        (2, Cmp::Gte)
 								    } else if s.starts_with("<=") {
 								        (2, Cmp::Lte)
 								    } else if s.starts_with('>') {
 								        (1, Cmp::Gt)
 								    } else if s.starts_with('<') {
 								        (1, Cmp::Lt)
 								    } else if s.starts_with('=') {
 								        (1, Cmp::Eq)
 								    } else {
 								        return Err(format!("expected comparison operator in '{}'", s));
 								    };
 								    let val_str = &s[op_len..];
 								    let val = parse_duration_or_number(val_str)?;
 								    Ok(ctor(val))
 								}
 								/// Parse "7d", "24h", "30m" as seconds, or plain numbers.
 								fn parse_duration_or_number(s: &str) -> Result<f64, String> {
 								    if let Some(n) = s.strip_suffix('d') {
 								        let v: f64 = n.parse().map_err(|_| format!("bad number: {}", n))?;
 								        Ok(v * 86400.0)
 								    } else if let Some(n) = s.strip_suffix('h') {
 								        let v: f64 = n.parse().map_err(|_| format!("bad number: {}", n))?;
 								        Ok(v * 3600.0)
 								    } else if let Some(n) = s.strip_suffix('m') {
 								        let v: f64 = n.parse().map_err(|_| format!("bad number: {}", n))?;
 								        Ok(v * 60.0)
 								    } else {
 								        s.parse().map_err(|_| format!("bad number: {}", s))
 								    }
 								}
 								/// Parse a NodeType from a label.
 								fn parse_node_type(s: &str) -> Result<NodeType, String> {
 								    match s {
 								        "episodic" | "session" => Ok(NodeType::EpisodicSession),
 								        "daily"                => Ok(NodeType::EpisodicDaily),
 								        "weekly"               => Ok(NodeType::EpisodicWeekly),
 								        "monthly"              => Ok(NodeType::EpisodicMonthly),
 								        "semantic"             => Ok(NodeType::Semantic),
 								        _ => Err(format!("unknown node type: {} (use: episodic, semantic, daily, weekly, monthly)", s)),
 								    }
 								}
 								impl Stage {
 								    /// Parse a single stage from a string.
 								    ///
 								    /// Algorithm names are tried first (bare words), then predicate syntax
 								    /// (contains ':'). No ambiguity since algorithms are bare words.
 								    pub fn parse(s: &str) -> Result<Self, String> {
 								        let s = s.trim();
 								        let (negated, s) = if let Some(rest) = s.strip_prefix('!') {
 								            (true, rest)
 								        } else {
 								            (false, s)
 								        };
 								        // Generator: "all"
 								        if s == "all" {
 								            return Ok(Stage::Generator(Generator::All));
 								        }
 								        // Try algorithm parse first (bare words, no colon)
 								        if !s.contains(':') {
 								            if let Ok(algo) = AlgoStage::parse(s) {
 								                return Ok(Stage::Algorithm(algo));
 								            }
 								        }
 								        // Algorithm with params: "spread,max_hops=4" (contains comma but no colon)
 								        if s.contains(',') && !s.contains(':') {
 								            return AlgoStage::parse(s).map(Stage::Algorithm);
 								        }
 								        // Predicate/transform syntax: "key:value"
 								        let (prefix, value) = s.split_once(':')
 								            .ok_or_else(|| format!("unknown stage: {}", s))?;
 								        let filter_or_transform = match prefix {
 								            "type" => Stage::Filter(Filter::Type(parse_node_type(value)?)),
 								            "key" => Stage::Filter(Filter::KeyGlob(value.to_string())),
 								            "weight" => Stage::Filter(Filter::Weight(parse_cmp(value)?)),
 								            "age" => Stage::Filter(Filter::Age(parse_cmp(value)?)),
 								            "content-len" => Stage::Filter(Filter::ContentLen(parse_cmp(value)?)),
 								            "provenance" => {
-												provenance: convert from enum to freeform string

The Provenance enum couldn't represent agents defined outside the
source code. Replace it with a Text field in the capnp schema so any
agent can write its own provenance label (e.g. "extractor:write",
"rename:tombstone") without a code change.

Schema: rename old enum fields to provenanceOld, add new Text
provenance fields. Old enum kept for reading legacy records.
Migration: from_capnp_migrate() falls back to old enum when the
new text field is empty.

Also adds `poc-memory tail` command for viewing recent store writes.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-11 01:19:52 -04:00
+								                Stage::Filter(Filter::Provenance(value.to_string()))
-												search: unified query pipeline with filters, transforms, generators

Extend the pipeline with four stage types composing left-to-right:
  Generators: all, match:TERM
  Filters: type:, key:, weight:, age:, content-len:, provenance:,
           not-visited:, visited: (plus ! negation)
  Transforms: sort:(priority|timestamp|content-len|degree|weight), limit:N
  Algorithms: spread, spectral, confluence, geodesic, manifold (unchanged)

Duration syntax (7d, 24h, 30m) and glob matching on keys.
CLI auto-detects filter/transform stages and loads full Store;
algorithm-only pipelines keep the fast MmapView path.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-10 15:22:12 -04:00
+								            }
 								            "not-visited" => {
 								                let (agent, dur) = value.split_once(',')
 								                    .ok_or("not-visited:AGENT,DURATION")?;
 								                let secs = parse_duration_or_number(dur)?;
 								                Stage::Filter(Filter::NotVisited {
 								                    agent: agent.to_string(),
 								                    duration: secs as i64,
 								                })
 								            }
 								            "visited" => Stage::Filter(Filter::Visited {
 								                agent: value.to_string(),
 								            }),
 								            "sort" => {
 								                let field = match value {
 								                    "priority"    => SortField::Priority,
 								                    "timestamp"   => SortField::Timestamp,
 								                    "content-len" => SortField::ContentLen,
 								                    "degree"      => SortField::Degree,
 								                    "weight"      => SortField::Weight,
 								                    _ => return Err(format!("unknown sort field: {}", value)),
 								                };
 								                Stage::Transform(Transform::Sort(field))
 								            }
 								            "limit" => {
 								                let n: usize = value.parse()
 								                    .map_err(|_| format!("bad limit: {}", value))?;
 								                Stage::Transform(Transform::Limit(n))
 								            }
 								            "match" => {
 								                let terms: Vec<String> = value.split(',')
 								                    .map(|t| t.to_string())
 								                    .collect();
 								                Stage::Generator(Generator::Match(terms))
 								            }
 								            // Algorithm with colon in params? Try fallback.
 								            _ => return AlgoStage::parse(s).map(Stage::Algorithm)
 								                .map_err(|_| format!("unknown stage: {}", s)),
 								        };
 								        // Apply negation to filters
 								        if negated {
 								            match filter_or_transform {
 								                Stage::Filter(f) => Ok(Stage::Filter(Filter::Negated(Box::new(f)))),
 								                _ => Err("! prefix only works on filter stages".to_string()),
 								            }
 								        } else {
 								            Ok(filter_or_transform)
 								        }
 								    }
 								    /// Parse a pipe-separated pipeline string.
 								    pub fn parse_pipeline(s: &str) -> Result<Vec<Stage>, String> {
 								        s.split('|')
 								            .map(|part| Stage::parse(part.trim()))
 								            .collect()
 								    }
 								}
 								impl fmt::Display for Stage {
 								    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 								        match self {
 								            Stage::Generator(Generator::All) => write!(f, "all"),
 								            Stage::Generator(Generator::Match(terms)) => write!(f, "match:{}", terms.join(",")),
 								            Stage::Filter(filt) => write!(f, "{}", filt),
 								            Stage::Transform(Transform::Sort(field)) => write!(f, "sort:{:?}", field),
 								            Stage::Transform(Transform::Limit(n)) => write!(f, "limit:{}", n),
 								            Stage::Algorithm(a) => write!(f, "{}", a.algo),
 								        }
 								    }
 								}
 								impl fmt::Display for Filter {
 								    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 								        match self {
 								            Filter::Type(t) => write!(f, "type:{:?}", t),
 								            Filter::KeyGlob(g) => write!(f, "key:{}", g),
 								            Filter::Weight(c) => write!(f, "weight:{}", c),
 								            Filter::Age(c) => write!(f, "age:{}", c),
 								            Filter::ContentLen(c) => write!(f, "content-len:{}", c),
-												provenance: convert from enum to freeform string

The Provenance enum couldn't represent agents defined outside the
source code. Replace it with a Text field in the capnp schema so any
agent can write its own provenance label (e.g. "extractor:write",
"rename:tombstone") without a code change.

Schema: rename old enum fields to provenanceOld, add new Text
provenance fields. Old enum kept for reading legacy records.
Migration: from_capnp_migrate() falls back to old enum when the
new text field is empty.

Also adds `poc-memory tail` command for viewing recent store writes.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-11 01:19:52 -04:00
+								            Filter::Provenance(p) => write!(f, "provenance:{}", p),
-												search: unified query pipeline with filters, transforms, generators

Extend the pipeline with four stage types composing left-to-right:
  Generators: all, match:TERM
  Filters: type:, key:, weight:, age:, content-len:, provenance:,
           not-visited:, visited: (plus ! negation)
  Transforms: sort:(priority|timestamp|content-len|degree|weight), limit:N
  Algorithms: spread, spectral, confluence, geodesic, manifold (unchanged)

Duration syntax (7d, 24h, 30m) and glob matching on keys.
CLI auto-detects filter/transform stages and loads full Store;
algorithm-only pipelines keep the fast MmapView path.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-10 15:22:12 -04:00
+								            Filter::NotVisited { agent, duration } => write!(f, "not-visited:{},{}s", agent, duration),
 								            Filter::Visited { agent } => write!(f, "visited:{}", agent),
 								            Filter::Negated(inner) => write!(f, "!{}", inner),
 								        }
 								    }
 								}
 								impl fmt::Display for Cmp {
 								    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 								        match self {
 								            Cmp::Gt(v)  => write!(f, ">{}", v),
 								            Cmp::Gte(v) => write!(f, ">={}", v),
 								            Cmp::Lt(v)  => write!(f, "<{}", v),
 								            Cmp::Lte(v) => write!(f, "<={}", v),
 								            Cmp::Eq(v)  => write!(f, "={}", v),
 								        }
 								    }
 								}
 								/// Simple glob matching (supports * and ?).
 								fn glob_matches(pattern: &str, text: &str) -> bool {
 								    fn inner(pat: &[char], txt: &[char]) -> bool {
 								        if pat.is_empty() { return txt.is_empty(); }
 								        if pat[0] == '*' {
 								            // Try matching * against 0..n characters
 								            for skip in 0..=txt.len() {
 								                if inner(&pat[1..], &txt[skip..]) { return true; }
 								            }
 								            return false;
 								        }
 								        if txt.is_empty() { return false; }
 								        if pat[0] == '?' || pat[0] == txt[0] {
 								            return inner(&pat[1..], &txt[1..]);
 								        }
 								        false
 								    }
 								    let pat: Vec<char> = pattern.chars().collect();
 								    let txt: Vec<char> = text.chars().collect();
 								    inner(&pat, &txt)
 								}
 								/// Run a unified query pipeline. Requires &Store for filter/transform stages.
 								///
 								/// If the pipeline starts with no generator, the input `seeds` are used.
 								/// Generators produce a fresh result set (ignoring seeds). Filters narrow
 								/// the current set. Transforms reorder/truncate. Algorithms do graph
 								/// exploration.
 								pub fn run_query(
 								    stages: &[Stage],
 								    seeds: Vec<(String, f64)>,
 								    graph: &Graph,
 								    store: &Store,
 								    debug: bool,
 								    max_results: usize,
 								) -> Vec<(String, f64)> {
 								    let now = crate::store::now_epoch();
 								    let mut current = seeds;
 								    for stage in stages {
 								        if debug {
 								            println!("\n[query] === {} ({} items in) ===", stage, current.len());
 								        }
 								        current = match stage {
 								            Stage::Generator(gen) => run_generator(gen, store),
 								            Stage::Filter(filt) => {
 								                current.into_iter()
 								                    .filter(|(key, _)| eval_filter(filt, key, store, now))
 								                    .collect()
 								            }
 								            Stage::Transform(xform) => run_transform(xform, current, store, graph),
 								            Stage::Algorithm(algo_stage) => {
 								                match algo_stage.algo {
 								                    Algorithm::Spread => run_spread(&current, graph, store, algo_stage, debug),
 								                    Algorithm::Spectral => run_spectral(&current, graph, algo_stage, debug),
 								                    Algorithm::Manifold => run_manifold(&current, graph, algo_stage, debug),
 								                    Algorithm::Confluence => run_confluence(&current, graph, store, algo_stage, debug),
 								                    Algorithm::Geodesic => run_geodesic(&current, graph, algo_stage, debug),
 								                }
 								            }
 								        };
 								        if debug {
 								            println!("[query] → {} results", current.len());
 								            for (key, score) in current.iter().take(10) {
 								                println!("  [{:.4}] {}", score, key);
 								            }
 								            if current.len() > 10 {
 								                println!("  ... ({} more)", current.len() - 10);
 								            }
 								        }
 								    }
 								    current.truncate(max_results);
 								    current
 								}
 								fn run_generator(gen: &Generator, store: &Store) -> Vec<(String, f64)> {
 								    match gen {
 								        Generator::All => {
 								            store.nodes.iter()
 								                .filter(|(_, n)| !n.deleted)
 								                .map(|(key, n)| (key.clone(), n.weight as f64))
 								                .collect()
 								        }
 								        Generator::Match(terms) => {
 								            let weighted: BTreeMap<String, f64> = terms.iter()
 								                .map(|t| (t.to_lowercase(), 1.0))
 								                .collect();
 								            let (seeds, _) = match_seeds(&weighted, store);
 								            seeds
 								        }
 								    }
 								}
 								fn eval_filter(filt: &Filter, key: &str, store: &Store, now: i64) -> bool {
 								    let node = match store.nodes.get(key) {
 								        Some(n) => n,
 								        None => return false,
 								    };
 								    match filt {
 								        Filter::Type(t) => node.node_type == *t,
 								        Filter::KeyGlob(pattern) => glob_matches(pattern, key),
 								        Filter::Weight(cmp) => cmp.matches(node.weight as f64),
 								        Filter::Age(cmp) => {
 								            let age_secs = (now - node.timestamp) as f64;
 								            cmp.matches(age_secs)
 								        }
 								        Filter::ContentLen(cmp) => cmp.matches(node.content.len() as f64),
 								        Filter::Provenance(p) => node.provenance == *p,
 								        Filter::NotVisited { agent, duration } => {
 								            let last = store.last_visited(key, agent);
 								            last == 0 || (now - last) > *duration
 								        }
 								        Filter::Visited { agent } => {
 								            store.last_visited(key, agent) > 0
 								        }
 								        Filter::Negated(inner) => !eval_filter(inner, key, store, now),
 								    }
 								}
 								fn run_transform(
 								    xform: &Transform,
 								    mut items: Vec<(String, f64)>,
 								    store: &Store,
 								    graph: &Graph,
 								) -> Vec<(String, f64)> {
 								    match xform {
 								        Transform::Sort(field) => {
 								            match field {
 								                SortField::Weight => {
 								                    items.sort_by(|a, b| b.1.total_cmp(&a.1));
 								                }
 								                SortField::Timestamp => {
 								                    items.sort_by(|a, b| {
 								                        let ta = store.nodes.get(&a.0).map(|n| n.timestamp).unwrap_or(0);
 								                        let tb = store.nodes.get(&b.0).map(|n| n.timestamp).unwrap_or(0);
 								                        tb.cmp(&ta) // desc
 								                    });
 								                }
 								                SortField::ContentLen => {
 								                    items.sort_by(|a, b| {
 								                        let la = store.nodes.get(&a.0).map(|n| n.content.len()).unwrap_or(0);
 								                        let lb = store.nodes.get(&b.0).map(|n| n.content.len()).unwrap_or(0);
 								                        lb.cmp(&la) // desc
 								                    });
 								                }
 								                SortField::Degree => {
 								                    items.sort_by(|a, b| {
 								                        let da = graph.degree(&a.0);
 								                        let db = graph.degree(&b.0);
 								                        db.cmp(&da) // desc
 								                    });
 								                }
 								                SortField::Priority => {
 								                    // Pre-compute priorities to avoid O(n log n) calls
 								                    // inside the sort comparator.
 								                    let priorities: HashMap<String, f64> = items.iter()
 								                        .map(|(key, _)| {
 								                            let p = crate::neuro::consolidation_priority(
 								                                store, key, graph, None);
 								                            (key.clone(), p)
 								                        })
 								                        .collect();
 								                    items.sort_by(|a, b| {
 								                        let pa = priorities.get(&a.0).copied().unwrap_or(0.0);
 								                        let pb = priorities.get(&b.0).copied().unwrap_or(0.0);
 								                        pb.total_cmp(&pa) // desc
 								                    });
 								                }
 								            }
 								            items
 								        }
 								        Transform::Limit(n) => {
 								            items.truncate(*n);
 								            items
 								        }
 								    }
 								}
-												memory-search: add fuzzy key matching and content-based seed extraction

match_seeds() previously only found nodes whose keys exactly matched
search terms. This meant searches like "formal verification" or
"bcachefs plan" returned nothing — no nodes are keyed with those
exact strings.

Three-tier matching strategy:
1. Exact key match (full weight) — unchanged
2. Key component match (0.5× weight) — split keys on -/_/./#,
   match individual words. "plan" now finds "the-plan", "verification"
   finds "c-to-rust-verification-workflow", etc.
3. Content match (0.2× weight, capped at 50 hits) — search node
   content for terms that didn't match any key. Catches nodes whose
   keys are opaque but whose content is relevant.

Also adds prompt-based seeding to the hook pipeline: extract_query_terms
from the user's prompt and merge into the term set. Previously the hook
only seeded from transcript scanning (finding node keys as substrings
in conversation history), which meant fresh sessions or queries about
new topics produced no search results at all.

											
										
										
											2026-03-10 00:41:08 -04:00
+								/// Extract seeds from weighted terms by matching against node keys and content.
 								///
 								/// Three matching strategies, in priority order:
 								/// 1. Exact key match: term matches a node key exactly → full weight
 								/// 2. Key component match: term matches a word in a hyphenated/underscored key → 0.5× weight
 								/// 3. Content match: term appears in node content → 0.2× weight (capped at 50 nodes)
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								///
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								/// Returns (seeds, direct_hits) where direct_hits tracks which keys
 								/// were matched directly (vs found by an algorithm stage).
 								pub fn match_seeds(
 								    terms: &BTreeMap<String, f64>,
 								    store: &impl StoreView,
-												search: make component and content matching opt-in

Default search now uses exact key match only. Component matching
(--fuzzy) and content search (--content) are explicit flags. This
makes missing graph structure visible instead of silently falling
back to broad matching.

											
										
										
											2026-03-10 23:01:46 -04:00
+								) -> (Vec<(String, f64)>, HashSet<String>) {
 								    match_seeds_opts(terms, store, false, false)
 								}
 								pub fn match_seeds_opts(
 								    terms: &BTreeMap<String, f64>,
 								    store: &impl StoreView,
 								    component_match: bool,
 								    content_fallback: bool,
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								) -> (Vec<(String, f64)>, HashSet<String>) {
-												memory-search: add fuzzy key matching and content-based seed extraction

match_seeds() previously only found nodes whose keys exactly matched
search terms. This meant searches like "formal verification" or
"bcachefs plan" returned nothing — no nodes are keyed with those
exact strings.

Three-tier matching strategy:
1. Exact key match (full weight) — unchanged
2. Key component match (0.5× weight) — split keys on -/_/./#,
   match individual words. "plan" now finds "the-plan", "verification"
   finds "c-to-rust-verification-workflow", etc.
3. Content match (0.2× weight, capped at 50 hits) — search node
   content for terms that didn't match any key. Catches nodes whose
   keys are opaque but whose content is relevant.

Also adds prompt-based seeding to the hook pipeline: extract_query_terms
from the user's prompt and merge into the term set. Previously the hook
only seeded from transcript scanning (finding node keys as substrings
in conversation history), which meant fresh sessions or queries about
new topics produced no search results at all.

											
										
										
											2026-03-10 00:41:08 -04:00
+								    let mut seed_map: HashMap<String, f64> = HashMap::new();
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								    let mut direct_hits: HashSet<String> = HashSet::new();
-												memory-search: add fuzzy key matching and content-based seed extraction

match_seeds() previously only found nodes whose keys exactly matched
search terms. This meant searches like "formal verification" or
"bcachefs plan" returned nothing — no nodes are keyed with those
exact strings.

Three-tier matching strategy:
1. Exact key match (full weight) — unchanged
2. Key component match (0.5× weight) — split keys on -/_/./#,
   match individual words. "plan" now finds "the-plan", "verification"
   finds "c-to-rust-verification-workflow", etc.
3. Content match (0.2× weight, capped at 50 hits) — search node
   content for terms that didn't match any key. Catches nodes whose
   keys are opaque but whose content is relevant.

Also adds prompt-based seeding to the hook pipeline: extract_query_terms
from the user's prompt and merge into the term set. Previously the hook
only seeded from transcript scanning (finding node keys as substrings
in conversation history), which meant fresh sessions or queries about
new topics produced no search results at all.

											
										
										
											2026-03-10 00:41:08 -04:00
+								    // Build key lookup: lowercase key → (original key, weight)
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								    let mut key_map: HashMap<String, (String, f64)> = HashMap::new();
-												memory-search: add fuzzy key matching and content-based seed extraction

match_seeds() previously only found nodes whose keys exactly matched
search terms. This meant searches like "formal verification" or
"bcachefs plan" returned nothing — no nodes are keyed with those
exact strings.

Three-tier matching strategy:
1. Exact key match (full weight) — unchanged
2. Key component match (0.5× weight) — split keys on -/_/./#,
   match individual words. "plan" now finds "the-plan", "verification"
   finds "c-to-rust-verification-workflow", etc.
3. Content match (0.2× weight, capped at 50 hits) — search node
   content for terms that didn't match any key. Catches nodes whose
   keys are opaque but whose content is relevant.

Also adds prompt-based seeding to the hook pipeline: extract_query_terms
from the user's prompt and merge into the term set. Previously the hook
only seeded from transcript scanning (finding node keys as substrings
in conversation history), which meant fresh sessions or queries about
new topics produced no search results at all.

											
										
										
											2026-03-10 00:41:08 -04:00
+								    // Build component index: word → vec of (original key, weight)
 								    let mut component_map: HashMap<String, Vec<(String, f64)>> = HashMap::new();
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								    store.for_each_node(|key, _content, weight| {
-												memory-search: add fuzzy key matching and content-based seed extraction

match_seeds() previously only found nodes whose keys exactly matched
search terms. This meant searches like "formal verification" or
"bcachefs plan" returned nothing — no nodes are keyed with those
exact strings.

Three-tier matching strategy:
1. Exact key match (full weight) — unchanged
2. Key component match (0.5× weight) — split keys on -/_/./#,
   match individual words. "plan" now finds "the-plan", "verification"
   finds "c-to-rust-verification-workflow", etc.
3. Content match (0.2× weight, capped at 50 hits) — search node
   content for terms that didn't match any key. Catches nodes whose
   keys are opaque but whose content is relevant.

Also adds prompt-based seeding to the hook pipeline: extract_query_terms
from the user's prompt and merge into the term set. Previously the hook
only seeded from transcript scanning (finding node keys as substrings
in conversation history), which meant fresh sessions or queries about
new topics produced no search results at all.

											
										
										
											2026-03-10 00:41:08 -04:00
+								        let lkey = key.to_lowercase();
 								        key_map.insert(lkey.clone(), (key.to_owned(), weight as f64));
 								        // Split key on hyphens, underscores, dots, hashes for component matching
 								        for component in lkey.split(|c: char| c == '-' || c == '_' || c == '.' || c == '#') {
 								            if component.len() >= 3 {
 								                component_map.entry(component.to_owned())
 								                    .or_default()
 								                    .push((key.to_owned(), weight as f64));
 								            }
 								        }
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								    });
 								    for (term, &term_weight) in terms {
-												memory-search: add fuzzy key matching and content-based seed extraction

match_seeds() previously only found nodes whose keys exactly matched
search terms. This meant searches like "formal verification" or
"bcachefs plan" returned nothing — no nodes are keyed with those
exact strings.

Three-tier matching strategy:
1. Exact key match (full weight) — unchanged
2. Key component match (0.5× weight) — split keys on -/_/./#,
   match individual words. "plan" now finds "the-plan", "verification"
   finds "c-to-rust-verification-workflow", etc.
3. Content match (0.2× weight, capped at 50 hits) — search node
   content for terms that didn't match any key. Catches nodes whose
   keys are opaque but whose content is relevant.

Also adds prompt-based seeding to the hook pipeline: extract_query_terms
from the user's prompt and merge into the term set. Previously the hook
only seeded from transcript scanning (finding node keys as substrings
in conversation history), which meant fresh sessions or queries about
new topics produced no search results at all.

											
										
										
											2026-03-10 00:41:08 -04:00
+								        // Strategy 1: exact key match
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								        if let Some((orig_key, node_weight)) = key_map.get(term) {
 								            let score = term_weight * node_weight;
-												memory-search: add fuzzy key matching and content-based seed extraction

match_seeds() previously only found nodes whose keys exactly matched
search terms. This meant searches like "formal verification" or
"bcachefs plan" returned nothing — no nodes are keyed with those
exact strings.

Three-tier matching strategy:
1. Exact key match (full weight) — unchanged
2. Key component match (0.5× weight) — split keys on -/_/./#,
   match individual words. "plan" now finds "the-plan", "verification"
   finds "c-to-rust-verification-workflow", etc.
3. Content match (0.2× weight, capped at 50 hits) — search node
   content for terms that didn't match any key. Catches nodes whose
   keys are opaque but whose content is relevant.

Also adds prompt-based seeding to the hook pipeline: extract_query_terms
from the user's prompt and merge into the term set. Previously the hook
only seeded from transcript scanning (finding node keys as substrings
in conversation history), which meant fresh sessions or queries about
new topics produced no search results at all.

											
										
										
											2026-03-10 00:41:08 -04:00
+								            *seed_map.entry(orig_key.clone()).or_insert(0.0) += score;
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								            direct_hits.insert(orig_key.clone());
-												memory-search: add fuzzy key matching and content-based seed extraction

match_seeds() previously only found nodes whose keys exactly matched
search terms. This meant searches like "formal verification" or
"bcachefs plan" returned nothing — no nodes are keyed with those
exact strings.

Three-tier matching strategy:
1. Exact key match (full weight) — unchanged
2. Key component match (0.5× weight) — split keys on -/_/./#,
   match individual words. "plan" now finds "the-plan", "verification"
   finds "c-to-rust-verification-workflow", etc.
3. Content match (0.2× weight, capped at 50 hits) — search node
   content for terms that didn't match any key. Catches nodes whose
   keys are opaque but whose content is relevant.

Also adds prompt-based seeding to the hook pipeline: extract_query_terms
from the user's prompt and merge into the term set. Previously the hook
only seeded from transcript scanning (finding node keys as substrings
in conversation history), which meant fresh sessions or queries about
new topics produced no search results at all.

											
										
										
											2026-03-10 00:41:08 -04:00
+								            continue;
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								        }
-												memory-search: add fuzzy key matching and content-based seed extraction

match_seeds() previously only found nodes whose keys exactly matched
search terms. This meant searches like "formal verification" or
"bcachefs plan" returned nothing — no nodes are keyed with those
exact strings.

Three-tier matching strategy:
1. Exact key match (full weight) — unchanged
2. Key component match (0.5× weight) — split keys on -/_/./#,
   match individual words. "plan" now finds "the-plan", "verification"
   finds "c-to-rust-verification-workflow", etc.
3. Content match (0.2× weight, capped at 50 hits) — search node
   content for terms that didn't match any key. Catches nodes whose
   keys are opaque but whose content is relevant.

Also adds prompt-based seeding to the hook pipeline: extract_query_terms
from the user's prompt and merge into the term set. Previously the hook
only seeded from transcript scanning (finding node keys as substrings
in conversation history), which meant fresh sessions or queries about
new topics produced no search results at all.

											
										
										
											2026-03-10 00:41:08 -04:00
-												search: make component and content matching opt-in

Default search now uses exact key match only. Component matching
(--fuzzy) and content search (--content) are explicit flags. This
makes missing graph structure visible instead of silently falling
back to broad matching.

											
										
										
											2026-03-10 23:01:46 -04:00
+								        // Strategy 2: key component match (0.5× weight) — only when explicitly requested
 								        if component_match {
 								            if let Some(matches) = component_map.get(term.as_str()) {
 								                for (orig_key, node_weight) in matches {
 								                    let score = term_weight * node_weight * 0.5;
 								                    *seed_map.entry(orig_key.clone()).or_insert(0.0) += score;
 								                    direct_hits.insert(orig_key.clone());
 								                }
 								                continue;
-												memory-search: add fuzzy key matching and content-based seed extraction

match_seeds() previously only found nodes whose keys exactly matched
search terms. This meant searches like "formal verification" or
"bcachefs plan" returned nothing — no nodes are keyed with those
exact strings.

Three-tier matching strategy:
1. Exact key match (full weight) — unchanged
2. Key component match (0.5× weight) — split keys on -/_/./#,
   match individual words. "plan" now finds "the-plan", "verification"
   finds "c-to-rust-verification-workflow", etc.
3. Content match (0.2× weight, capped at 50 hits) — search node
   content for terms that didn't match any key. Catches nodes whose
   keys are opaque but whose content is relevant.

Also adds prompt-based seeding to the hook pipeline: extract_query_terms
from the user's prompt and merge into the term set. Previously the hook
only seeded from transcript scanning (finding node keys as substrings
in conversation history), which meant fresh sessions or queries about
new topics produced no search results at all.

											
										
										
											2026-03-10 00:41:08 -04:00
+								            }
 								        }
-												search: make component and content matching opt-in

Default search now uses exact key match only. Component matching
(--fuzzy) and content search (--content) are explicit flags. This
makes missing graph structure visible instead of silently falling
back to broad matching.

											
										
										
											2026-03-10 23:01:46 -04:00
+								        // Strategy 3: content match (0.2× weight) — only when explicitly requested
 								        if content_fallback {
 								            let term_lower = term.to_lowercase();
 								            if term_lower.len() >= 3 {
 								                let mut content_hits = 0;
 								                store.for_each_node(|key, content, weight| {
 								                    if content_hits >= 50 { return; }
 								                    if content.to_lowercase().contains(&term_lower) {
 								                        let score = term_weight * weight as f64 * 0.2;
 								                        *seed_map.entry(key.to_owned()).or_insert(0.0) += score;
 								                        content_hits += 1;
 								                    }
 								                });
-												memory-search: add fuzzy key matching and content-based seed extraction

match_seeds() previously only found nodes whose keys exactly matched
search terms. This meant searches like "formal verification" or
"bcachefs plan" returned nothing — no nodes are keyed with those
exact strings.

Three-tier matching strategy:
1. Exact key match (full weight) — unchanged
2. Key component match (0.5× weight) — split keys on -/_/./#,
   match individual words. "plan" now finds "the-plan", "verification"
   finds "c-to-rust-verification-workflow", etc.
3. Content match (0.2× weight, capped at 50 hits) — search node
   content for terms that didn't match any key. Catches nodes whose
   keys are opaque but whose content is relevant.

Also adds prompt-based seeding to the hook pipeline: extract_query_terms
from the user's prompt and merge into the term set. Previously the hook
only seeded from transcript scanning (finding node keys as substrings
in conversation history), which meant fresh sessions or queries about
new topics produced no search results at all.

											
										
										
											2026-03-10 00:41:08 -04:00
+								            }
-												search: make component and content matching opt-in

Default search now uses exact key match only. Component matching
(--fuzzy) and content search (--content) are explicit flags. This
makes missing graph structure visible instead of silently falling
back to broad matching.

											
										
										
											2026-03-10 23:01:46 -04:00
+								        }
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								    }
-												memory-search: add fuzzy key matching and content-based seed extraction

match_seeds() previously only found nodes whose keys exactly matched
search terms. This meant searches like "formal verification" or
"bcachefs plan" returned nothing — no nodes are keyed with those
exact strings.

Three-tier matching strategy:
1. Exact key match (full weight) — unchanged
2. Key component match (0.5× weight) — split keys on -/_/./#,
   match individual words. "plan" now finds "the-plan", "verification"
   finds "c-to-rust-verification-workflow", etc.
3. Content match (0.2× weight, capped at 50 hits) — search node
   content for terms that didn't match any key. Catches nodes whose
   keys are opaque but whose content is relevant.

Also adds prompt-based seeding to the hook pipeline: extract_query_terms
from the user's prompt and merge into the term set. Previously the hook
only seeded from transcript scanning (finding node keys as substrings
in conversation history), which meant fresh sessions or queries about
new topics produced no search results at all.

											
										
										
											2026-03-10 00:41:08 -04:00
+								    let seeds: Vec<(String, f64)> = seed_map.into_iter().collect();
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								    (seeds, direct_hits)
 								}
 								/// Run a pipeline of algorithm stages.
 								pub fn run_pipeline(
 								    stages: &[AlgoStage],
 								    seeds: Vec<(String, f64)>,
 								    graph: &Graph,
 								    store: &impl StoreView,
 								    debug: bool,
 								    max_results: usize,
 								) -> Vec<(String, f64)> {
 								    let mut current = seeds;
 								    for stage in stages {
 								        if debug {
 								            println!("\n[search] === {} ({} seeds in) ===", stage.algo, current.len());
 								        }
 								        current = match stage.algo {
 								            Algorithm::Spread => run_spread(&current, graph, store, stage, debug),
 								            Algorithm::Spectral => run_spectral(&current, graph, stage, debug),
-												search: add confluence, geodesic, and manifold algorithms

Three new composable search stages:

  confluence — multi-source spreading activation. Unlike spread (which
  takes max from any source), confluence rewards nodes reachable from
  multiple seeds additively. Naturally separates unrelated seed groups
  since their neighborhoods don't overlap. Params: max_hops, edge_decay,
  min_sources.

  geodesic — straightest path between seed pairs in spectral space.
  At each graph hop, picks the neighbor whose spectral direction most
  aligns with the target (cosine similarity of direction vectors).
  Nodes on many geodesic paths score highest. Params: max_path, k.

  manifold — extrapolation along the direction seeds define. Computes
  weighted centroid + principal axis of seeds in spectral space, then
  scores candidates by projection onto that axis (penalized by
  perpendicular distance). Finds what's "further along" rather than
  "nearby." Params: k.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:22:29 -04:00
+								            Algorithm::Manifold => run_manifold(&current, graph, stage, debug),
 								            Algorithm::Confluence => run_confluence(&current, graph, store, stage, debug),
 								            Algorithm::Geodesic => run_geodesic(&current, graph, stage, debug),
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								        };
 								        if debug {
 								            println!("[search] {} → {} results", stage.algo, current.len());
 								            for (i, (key, score)) in current.iter().enumerate().take(15) {
 								                let cutoff = if i + 1 == max_results { " <-- cutoff" } else { "" };
 								                println!("  [{:.4}] {}{}", score, key, cutoff);
 								            }
 								            if current.len() > 15 {
 								                println!("  ... ({} more)", current.len() - 15);
 								            }
 								        }
 								    }
 								    current.truncate(max_results);
 								    current
 								}
 								/// Spreading activation: propagate scores through graph edges.
 								///
 								/// Tunable params: max_hops (default from store), edge_decay (default from store),
 								/// min_activation (default from store).
 								fn run_spread(
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								    seeds: &[(String, f64)],
 								    graph: &Graph,
-												spectral decomposition, search improvements, char boundary fix

- New spectral module: Laplacian eigendecomposition of the memory graph.
  Commands: spectral, spectral-save, spectral-neighbors, spectral-positions,
  spectral-suggest. Spectral neighbors expand search results beyond keyword
  matching to structural proximity.

- Search: use StoreView trait to avoid 6MB state.bin rewrite on every query.
  Append-only retrieval logging. Spectral expansion shows structurally
  nearby nodes after text results.

- Fix panic in journal-tail: string truncation at byte 67 could land inside
  a multi-byte character (em dash). Now walks back to char boundary.

- Replay queue: show classification and spectral outlier score.

- Knowledge agents: extractor, challenger, connector prompts and runner
  scripts for automated graph enrichment.

- memory-search hook: stale state file cleanup (24h expiry).

											
										
										
											2026-03-03 01:33:31 -05:00
+								    store: &impl StoreView,
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								    stage: &AlgoStage,
 								    _debug: bool,
 								) -> Vec<(String, f64)> {
 								    let store_params = store.params();
 								    let max_hops = stage.param_u32("max_hops", store_params.max_hops);
 								    let edge_decay = stage.param_f64("edge_decay", store_params.edge_decay);
-												spread: simultaneous wavefront instead of independent BFS

All seeds emit at once. At each hop, activations from all sources
sum at each node, and the combined map propagates on the next hop.
Nodes where multiple wavefronts overlap get reinforced and radiate
stronger — natural interference patterns.

Lower default min_activation threshold (×0.1) since individual
contributions are smaller in additive mode.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:35:27 -04:00
+								    let min_activation = stage.param_f64("min_activation", store_params.min_activation * 0.1);
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
 								    spreading_activation(seeds, graph, store, max_hops, edge_decay, min_activation)
 								}
 								/// Spectral projection: find nearest neighbors in spectral embedding space.
 								///
 								/// Tunable params: k (default 20, number of neighbors to find).
 								fn run_spectral(
 								    seeds: &[(String, f64)],
 								    graph: &Graph,
 								    stage: &AlgoStage,
 								    debug: bool,
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								) -> Vec<(String, f64)> {
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								    let k = stage.param_usize("k", 20);
 								    let emb = match spectral::load_embedding() {
 								        Ok(e) => e,
 								        Err(e) => {
 								            if debug { println!("  no spectral embedding: {}", e); }
 								            return seeds.to_vec();
 								        }
 								    };
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								    let weighted_seeds: Vec<(&str, f64)> = seeds.iter()
 								        .map(|(k, w)| (k.as_str(), *w))
 								        .collect();
 								    let projected = spectral::nearest_to_seeds_weighted(
 								        &emb, &weighted_seeds, Some(graph), k,
 								    );
 								    if debug {
 								        for (key, dist) in &projected {
 								            let score = 1.0 / (1.0 + dist);
 								            println!("  dist={:.6}  score={:.4}  {}", dist, score, key);
 								        }
 								    }
 								    // Merge: keep original seeds, add spectral results as new seeds
 								    let seed_set: HashSet<&str> = seeds.iter().map(|(k, _)| k.as_str()).collect();
 								    let mut result = seeds.to_vec();
 								    for (key, dist) in projected {
 								        if !seed_set.contains(key.as_str()) {
 								            let score = 1.0 / (1.0 + dist);
 								            result.push((key, score));
 								        }
 								    }
 								    result
 								}
-												search: add confluence, geodesic, and manifold algorithms

Three new composable search stages:

  confluence — multi-source spreading activation. Unlike spread (which
  takes max from any source), confluence rewards nodes reachable from
  multiple seeds additively. Naturally separates unrelated seed groups
  since their neighborhoods don't overlap. Params: max_hops, edge_decay,
  min_sources.

  geodesic — straightest path between seed pairs in spectral space.
  At each graph hop, picks the neighbor whose spectral direction most
  aligns with the target (cosine similarity of direction vectors).
  Nodes on many geodesic paths score highest. Params: max_path, k.

  manifold — extrapolation along the direction seeds define. Computes
  weighted centroid + principal axis of seeds in spectral space, then
  scores candidates by projection onto that axis (penalized by
  perpendicular distance). Finds what's "further along" rather than
  "nearby." Params: k.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:22:29 -04:00
+								/// Confluence: multi-source reachability scoring.
 								///
 								/// Unlike spreading activation (which takes max activation from any source),
 								/// confluence rewards nodes reachable from *multiple* seeds. For each candidate
 								/// node within k hops, score = sum of (seed_weight * edge_decay^distance) across
 								/// all seeds that can reach it. Nodes at the intersection of multiple seeds'
 								/// neighborhoods score highest.
 								///
 								/// This naturally handles mixed seeds: unrelated seeds activate disjoint
 								/// neighborhoods that don't overlap, so their results separate naturally.
 								///
 								/// Tunable params: max_hops (default 3), edge_decay (default 0.5),
 								/// min_sources (default 2, minimum number of distinct seeds that must reach a node).
 								fn run_confluence(
 								    seeds: &[(String, f64)],
 								    graph: &Graph,
 								    store: &impl StoreView,
 								    stage: &AlgoStage,
 								    debug: bool,
 								) -> Vec<(String, f64)> {
 								    let max_hops = stage.param_u32("max_hops", 3);
 								    let edge_decay = stage.param_f64("edge_decay", 0.5);
 								    let min_sources = stage.param_usize("min_sources", 2);
 								    // For each seed, BFS outward collecting (node → activation) at each distance
 								    // Track which seeds contributed to each node's score
 								    let mut node_scores: HashMap<String, f64> = HashMap::new();
 								    let mut node_sources: HashMap<String, HashSet<usize>> = HashMap::new();
 								    for (seed_idx, (seed_key, seed_weight)) in seeds.iter().enumerate() {
 								        let mut visited: HashMap<String, f64> = HashMap::new();
 								        let mut queue: VecDeque<(String, u32)> = VecDeque::new();
 								        visited.insert(seed_key.clone(), *seed_weight);
 								        queue.push_back((seed_key.clone(), 0));
 								        while let Some((key, depth)) = queue.pop_front() {
 								            if depth >= max_hops { continue; }
 								            let act = visited[&key];
 								            for (neighbor, strength) in graph.neighbors(&key) {
 								                let neighbor_weight = store.node_weight(neighbor.as_str());
 								                let propagated = act * edge_decay * neighbor_weight * strength as f64;
 								                if propagated < 0.001 { continue; }
 								                if !visited.contains_key(neighbor.as_str()) || visited[neighbor.as_str()] < propagated {
 								                    visited.insert(neighbor.clone(), propagated);
 								                    queue.push_back((neighbor.clone(), depth + 1));
 								                }
 								            }
 								        }
 								        // Accumulate into global scores (additive across seeds)
 								        for (key, act) in visited {
 								            *node_scores.entry(key.clone()).or_insert(0.0) += act;
 								            node_sources.entry(key).or_default().insert(seed_idx);
 								        }
 								    }
 								    // Filter to nodes reached by min_sources distinct seeds
 								    let mut results: Vec<(String, f64)> = node_scores.into_iter()
 								        .filter(|(key, _)| {
 								            node_sources.get(key).map(|s| s.len()).unwrap_or(0) >= min_sources
 								        })
 								        .collect();
 								    if debug {
 								        // Show source counts
 								        for (key, score) in results.iter().take(15) {
 								            let sources = node_sources.get(key).map(|s| s.len()).unwrap_or(0);
 								            println!("  [{:.4}] {} (from {} seeds)", score, key, sources);
 								        }
 								    }
 								    results.sort_by(|a, b| b.1.total_cmp(&a.1));
 								    results
 								}
 								/// Geodesic: straightest paths between seed pairs in spectral space.
 								///
 								/// For each pair of seeds, walk the graph from one to the other, at each
 								/// step choosing the neighbor whose spectral direction most aligns with
 								/// the target direction. Nodes along these geodesic paths score higher
 								/// the more paths pass through them and the straighter those paths are.
 								///
 								/// Tunable params: max_path (default 6), k (default 20 results).
 								fn run_geodesic(
 								    seeds: &[(String, f64)],
 								    graph: &Graph,
 								    stage: &AlgoStage,
 								    debug: bool,
 								) -> Vec<(String, f64)> {
 								    let max_path = stage.param_usize("max_path", 6);
 								    let k = stage.param_usize("k", 20);
 								    let emb = match spectral::load_embedding() {
 								        Ok(e) => e,
 								        Err(e) => {
 								            if debug { println!("  no spectral embedding: {}", e); }
 								            return seeds.to_vec();
 								        }
 								    };
 								    // Filter seeds to those with valid spectral coords
 								    let valid_seeds: Vec<(&str, f64, &Vec<f64>)> = seeds.iter()
 								        .filter_map(|(key, weight)| {
 								            emb.coords.get(key.as_str())
 								                .filter(|c| c.iter().any(|&v| v.abs() > 1e-12))
 								                .map(|c| (key.as_str(), *weight, c))
 								        })
 								        .collect();
 								    if valid_seeds.len() < 2 {
 								        if debug { println!("  need ≥2 seeds with spectral coords, have {}", valid_seeds.len()); }
 								        return seeds.to_vec();
 								    }
 								    // For each pair of seeds, find the geodesic path
 								    let mut path_counts: HashMap<String, f64> = HashMap::new();
 								    let seed_set: HashSet<&str> = seeds.iter().map(|(k, _)| k.as_str()).collect();
 								    for i in 0..valid_seeds.len() {
 								        for j in (i + 1)..valid_seeds.len() {
 								            let (key_a, weight_a, coords_a) = &valid_seeds[i];
 								            let (key_b, weight_b, coords_b) = &valid_seeds[j];
 								            let pair_weight = weight_a * weight_b;
 								            // Walk from A toward B
 								            let path_ab = geodesic_walk(
 								                key_a, coords_a, coords_b, graph, &emb, max_path,
 								            );
 								            // Walk from B toward A
 								            let path_ba = geodesic_walk(
 								                key_b, coords_b, coords_a, graph, &emb, max_path,
 								            );
 								            // Score nodes on both paths (nodes found from both directions score double)
 								            for (node, alignment) in path_ab.iter().chain(path_ba.iter()) {
 								                if !seed_set.contains(node.as_str()) {
 								                    *path_counts.entry(node.clone()).or_insert(0.0) += pair_weight * alignment;
 								                }
 								            }
 								        }
 								    }
 								    if debug && !path_counts.is_empty() {
 								        println!("  {} pairs examined, {} distinct nodes on paths",
 								            valid_seeds.len() * (valid_seeds.len() - 1) / 2,
 								            path_counts.len());
 								    }
 								    // Merge with original seeds
 								    let mut results = seeds.to_vec();
 								    let mut path_results: Vec<(String, f64)> = path_counts.into_iter().collect();
 								    path_results.sort_by(|a, b| b.1.total_cmp(&a.1));
 								    path_results.truncate(k);
 								    for (key, score) in path_results {
 								        if !seed_set.contains(key.as_str()) {
 								            results.push((key, score));
 								        }
 								    }
 								    results.sort_by(|a, b| b.1.total_cmp(&a.1));
 								    results
 								}
 								/// Walk from `start` toward `target_coords` in spectral space, choosing
 								/// the neighbor at each step whose direction most aligns with the target.
 								/// Returns (node_key, alignment_score) for each intermediate node.
 								fn geodesic_walk(
 								    start: &str,
 								    start_coords: &[f64],
 								    target_coords: &[f64],
 								    graph: &Graph,
 								    emb: &spectral::SpectralEmbedding,
 								    max_steps: usize,
 								) -> Vec<(String, f64)> {
 								    let mut path = Vec::new();
 								    let mut current = start.to_string();
 								    let mut current_coords = start_coords.to_vec();
 								    let mut visited: HashSet<String> = HashSet::new();
 								    visited.insert(current.clone());
 								    for _ in 0..max_steps {
 								        // Direction we want to travel: from current toward target
 								        let direction: Vec<f64> = target_coords.iter()
 								            .zip(current_coords.iter())
 								            .map(|(t, c)| t - c)
 								            .collect();
 								        let dir_norm = direction.iter().map(|d| d * d).sum::<f64>().sqrt();
 								        if dir_norm < 1e-12 { break; } // arrived
 								        // Among neighbors with spectral coords, find the one most aligned
 								        let mut best: Option<(String, Vec<f64>, f64)> = None;
 								        for (neighbor, _strength) in graph.neighbors(&current) {
 								            if visited.contains(neighbor.as_str()) { continue; }
 								            let neighbor_coords = match emb.coords.get(neighbor.as_str()) {
 								                Some(c) if c.iter().any(|&v| v.abs() > 1e-12) => c,
 								                _ => continue,
 								            };
 								            // Direction to this neighbor
 								            let step: Vec<f64> = neighbor_coords.iter()
 								                .zip(current_coords.iter())
 								                .map(|(n, c)| n - c)
 								                .collect();
 								            let step_norm = step.iter().map(|s| s * s).sum::<f64>().sqrt();
 								            if step_norm < 1e-12 { continue; }
 								            // Cosine similarity between desired direction and step direction
 								            let dot: f64 = direction.iter().zip(step.iter()).map(|(d, s)| d * s).sum();
 								            let alignment = dot / (dir_norm * step_norm);
 								            if alignment > 0.0 { // only consider forward-facing neighbors
 								                if best.as_ref().map(|(_, _, a)| alignment > *a).unwrap_or(true) {
 								                    best = Some((neighbor.clone(), neighbor_coords.clone(), alignment));
 								                }
 								            }
 								        }
 								        match best {
 								            Some((next_key, next_coords, alignment)) => {
 								                path.push((next_key.clone(), alignment));
 								                visited.insert(next_key.clone());
 								                current = next_key;
 								                current_coords = next_coords;
 								            }
 								            None => break, // no forward-facing neighbors
 								        }
 								    }
 								    path
 								}
 								/// Manifold: extrapolation along the direction defined by seeds.
 								///
 								/// Instead of finding what's *near* the seeds in spectral space (proximity),
 								/// find what's in the *direction* the seeds define. Given a weighted centroid
 								/// of seeds and the principal direction they span, find nodes that continue
 								/// along that direction.
 								///
 								/// Tunable params: k (default 20 results).
 								fn run_manifold(
 								    seeds: &[(String, f64)],
 								    graph: &Graph,
 								    stage: &AlgoStage,
 								    debug: bool,
 								) -> Vec<(String, f64)> {
 								    let k = stage.param_usize("k", 20);
 								    let emb = match spectral::load_embedding() {
 								        Ok(e) => e,
 								        Err(e) => {
 								            if debug { println!("  no spectral embedding: {}", e); }
 								            return seeds.to_vec();
 								        }
 								    };
 								    // Collect seeds with valid spectral coordinates
 								    let seed_data: Vec<(&str, f64, &Vec<f64>)> = seeds.iter()
 								        .filter_map(|(key, weight)| {
 								            emb.coords.get(key.as_str())
 								                .filter(|c| c.iter().any(|&v| v.abs() > 1e-12))
 								                .map(|c| (key.as_str(), *weight, c))
 								        })
 								        .collect();
 								    if seed_data.is_empty() {
 								        if debug { println!("  no seeds with spectral coords"); }
 								        return seeds.to_vec();
 								    }
 								    let dims = emb.dims;
 								    // Compute weighted centroid of seeds
 								    let mut centroid = vec![0.0f64; dims];
 								    let mut total_weight = 0.0;
 								    for (_, weight, coords) in &seed_data {
 								        for (i, &c) in coords.iter().enumerate() {
 								            centroid[i] += c * weight;
 								        }
 								        total_weight += weight;
 								    }
 								    if total_weight > 0.0 {
 								        for c in &mut centroid {
 								            *c /= total_weight;
 								        }
 								    }
-												manifold: fix direction initialization, add power iteration rounds

Initialize direction from the two most spectrally separated seeds
instead of relying on input order (which was alphabetical from
BTreeMap). Run 3 rounds of power iteration with normalization
instead of 1 for better convergence.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:27:24 -04:00
+								    // Compute principal direction via power iteration on seed covariance.
 								    // Initialize with the two most separated seeds (largest spectral distance).
-												search: add confluence, geodesic, and manifold algorithms

Three new composable search stages:

  confluence — multi-source spreading activation. Unlike spread (which
  takes max from any source), confluence rewards nodes reachable from
  multiple seeds additively. Naturally separates unrelated seed groups
  since their neighborhoods don't overlap. Params: max_hops, edge_decay,
  min_sources.

  geodesic — straightest path between seed pairs in spectral space.
  At each graph hop, picks the neighbor whose spectral direction most
  aligns with the target (cosine similarity of direction vectors).
  Nodes on many geodesic paths score highest. Params: max_path, k.

  manifold — extrapolation along the direction seeds define. Computes
  weighted centroid + principal axis of seeds in spectral space, then
  scores candidates by projection onto that axis (penalized by
  perpendicular distance). Finds what's "further along" rather than
  "nearby." Params: k.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:22:29 -04:00
+								    let mut direction = vec![0.0f64; dims];
 								    if seed_data.len() >= 2 {
-												manifold: fix direction initialization, add power iteration rounds

Initialize direction from the two most spectrally separated seeds
instead of relying on input order (which was alphabetical from
BTreeMap). Run 3 rounds of power iteration with normalization
instead of 1 for better convergence.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:27:24 -04:00
+								        // Find the two seeds furthest apart in spectral space
 								        let mut best_dist = 0.0f64;
 								        for i in 0..seed_data.len() {
 								            for j in (i + 1)..seed_data.len() {
 								                let dist: f64 = seed_data[i].2.iter().zip(seed_data[j].2.iter())
 								                    .map(|(a, b)| (a - b).powi(2)).sum::<f64>().sqrt();
 								                if dist > best_dist {
 								                    best_dist = dist;
 								                    for d in 0..dims {
 								                        direction[d] = seed_data[j].2[d] - seed_data[i].2[d];
 								                    }
 								                }
 								            }
-												search: add confluence, geodesic, and manifold algorithms

Three new composable search stages:

  confluence — multi-source spreading activation. Unlike spread (which
  takes max from any source), confluence rewards nodes reachable from
  multiple seeds additively. Naturally separates unrelated seed groups
  since their neighborhoods don't overlap. Params: max_hops, edge_decay,
  min_sources.

  geodesic — straightest path between seed pairs in spectral space.
  At each graph hop, picks the neighbor whose spectral direction most
  aligns with the target (cosine similarity of direction vectors).
  Nodes on many geodesic paths score highest. Params: max_path, k.

  manifold — extrapolation along the direction seeds define. Computes
  weighted centroid + principal axis of seeds in spectral space, then
  scores candidates by projection onto that axis (penalized by
  perpendicular distance). Finds what's "further along" rather than
  "nearby." Params: k.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:22:29 -04:00
+								        }
-												manifold: fix direction initialization, add power iteration rounds

Initialize direction from the two most spectrally separated seeds
instead of relying on input order (which was alphabetical from
BTreeMap). Run 3 rounds of power iteration with normalization
instead of 1 for better convergence.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:27:24 -04:00
+								        // Power iteration: 3 rounds on the weighted covariance matrix
 								        for _ in 0..3 {
 								            let mut new_dir = vec![0.0f64; dims];
 								            for (_, weight, coords) in &seed_data {
 								                let dev: Vec<f64> = coords.iter().zip(centroid.iter()).map(|(c, m)| c - m).collect();
 								                let dot: f64 = dev.iter().zip(direction.iter()).map(|(d, v)| d * v).sum();
 								                for d in 0..dims {
 								                    new_dir[d] += weight * dot * dev[d];
 								                }
 								            }
 								            // Normalize
 								            let norm = new_dir.iter().map(|d| d * d).sum::<f64>().sqrt();
 								            if norm > 1e-12 {
 								                for d in &mut new_dir { *d /= norm; }
-												search: add confluence, geodesic, and manifold algorithms

Three new composable search stages:

  confluence — multi-source spreading activation. Unlike spread (which
  takes max from any source), confluence rewards nodes reachable from
  multiple seeds additively. Naturally separates unrelated seed groups
  since their neighborhoods don't overlap. Params: max_hops, edge_decay,
  min_sources.

  geodesic — straightest path between seed pairs in spectral space.
  At each graph hop, picks the neighbor whose spectral direction most
  aligns with the target (cosine similarity of direction vectors).
  Nodes on many geodesic paths score highest. Params: max_path, k.

  manifold — extrapolation along the direction seeds define. Computes
  weighted centroid + principal axis of seeds in spectral space, then
  scores candidates by projection onto that axis (penalized by
  perpendicular distance). Finds what's "further along" rather than
  "nearby." Params: k.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:22:29 -04:00
+								            }
-												manifold: fix direction initialization, add power iteration rounds

Initialize direction from the two most spectrally separated seeds
instead of relying on input order (which was alphabetical from
BTreeMap). Run 3 rounds of power iteration with normalization
instead of 1 for better convergence.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:27:24 -04:00
+								            direction = new_dir;
-												search: add confluence, geodesic, and manifold algorithms

Three new composable search stages:

  confluence — multi-source spreading activation. Unlike spread (which
  takes max from any source), confluence rewards nodes reachable from
  multiple seeds additively. Naturally separates unrelated seed groups
  since their neighborhoods don't overlap. Params: max_hops, edge_decay,
  min_sources.

  geodesic — straightest path between seed pairs in spectral space.
  At each graph hop, picks the neighbor whose spectral direction most
  aligns with the target (cosine similarity of direction vectors).
  Nodes on many geodesic paths score highest. Params: max_path, k.

  manifold — extrapolation along the direction seeds define. Computes
  weighted centroid + principal axis of seeds in spectral space, then
  scores candidates by projection onto that axis (penalized by
  perpendicular distance). Finds what's "further along" rather than
  "nearby." Params: k.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:22:29 -04:00
+								        }
 								    }
 								    let dir_norm = direction.iter().map(|d| d * d).sum::<f64>().sqrt();
 								    let seed_set: HashSet<&str> = seeds.iter().map(|(k, _)| k.as_str()).collect();
 								    // Score each non-seed node by projection onto the direction from centroid
 								    let mut candidates: Vec<(String, f64)> = emb.coords.iter()
 								        .filter(|(key, coords)| {
 								            !seed_set.contains(key.as_str())
 								            && coords.iter().any(|&v| v.abs() > 1e-12)
 								        })
 								        .map(|(key, coords)| {
 								            let deviation: Vec<f64> = coords.iter().zip(centroid.iter())
 								                .map(|(c, m)| c - m)
 								                .collect();
 								            let score = if dir_norm > 1e-12 {
 								                // Project onto direction: how far along the principal axis
 								                let projection: f64 = deviation.iter().zip(direction.iter())
 								                    .map(|(d, v)| d * v)
 								                    .sum::<f64>() / dir_norm;
 								                // Distance from the axis (perpendicular component)
 								                let proj_vec: Vec<f64> = direction.iter()
 								                    .map(|&d| d * projection / dir_norm)
 								                    .collect();
 								                let perp_dist: f64 = deviation.iter().zip(proj_vec.iter())
 								                    .map(|(d, p)| (d - p).powi(2))
 								                    .sum::<f64>()
 								                    .sqrt();
 								                // Score: prefer nodes far along the direction but close to the axis
 								                // Use absolute projection (both directions from centroid are interesting)
 								                let along = projection.abs();
 								                if perp_dist < 1e-12 {
 								                    along
 								                } else {
 								                    along / (1.0 + perp_dist)
 								                }
 								            } else {
 								                // No direction (single seed or all seeds coincide): use distance from centroid
 								                let dist: f64 = deviation.iter().map(|d| d * d).sum::<f64>().sqrt();
 .0 / (1.0 + dist)
 								            };
 								            // Bonus for being connected to seeds in the graph
 								            let graph_bonus: f64 = graph.neighbors(key).iter()
 								                .filter(|(n, _)| seed_set.contains(n.as_str()))
 								                .map(|(_, s)| *s as f64 * 0.1)
 								                .sum();
 								            (key.clone(), score + graph_bonus)
 								        })
 								        .collect();
 								    candidates.sort_by(|a, b| b.1.total_cmp(&a.1));
 								    candidates.truncate(k);
 								    if debug {
 								        for (key, score) in candidates.iter().take(15) {
 								            println!("  [{:.4}] {}", score, key);
 								        }
 								    }
 								    // Merge with original seeds
 								    let mut results = seeds.to_vec();
 								    for (key, score) in candidates {
 								        results.push((key, score));
 								    }
 								    results.sort_by(|a, b| b.1.total_cmp(&a.1));
 								    results
 								}
-												spread: simultaneous wavefront instead of independent BFS

All seeds emit at once. At each hop, activations from all sources
sum at each node, and the combined map propagates on the next hop.
Nodes where multiple wavefronts overlap get reinforced and radiate
stronger — natural interference patterns.

Lower default min_activation threshold (×0.1) since individual
contributions are smaller in additive mode.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:35:27 -04:00
+								/// Simultaneous wavefront spreading activation.
 								///
 								/// All seeds emit at once. At each hop, activations from all sources
 								/// sum at each node, and the combined activation map propagates on
 								/// the next hop. This creates interference patterns — nodes where
 								/// multiple wavefronts overlap get reinforced and radiate stronger.
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								fn spreading_activation(
 								    seeds: &[(String, f64)],
 								    graph: &Graph,
 								    store: &impl StoreView,
 								    max_hops: u32,
 								    edge_decay: f64,
 								    min_activation: f64,
 								) -> Vec<(String, f64)> {
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								    let mut activation: HashMap<String, f64> = HashMap::new();
-												spread: simultaneous wavefront instead of independent BFS

All seeds emit at once. At each hop, activations from all sources
sum at each node, and the combined map propagates on the next hop.
Nodes where multiple wavefronts overlap get reinforced and radiate
stronger — natural interference patterns.

Lower default min_activation threshold (×0.1) since individual
contributions are smaller in additive mode.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:35:27 -04:00
+								    // Initialize wavefront from all seeds
 								    let mut frontier: HashMap<String, f64> = HashMap::new();
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								    for (key, act) in seeds {
-												spread: simultaneous wavefront instead of independent BFS

All seeds emit at once. At each hop, activations from all sources
sum at each node, and the combined map propagates on the next hop.
Nodes where multiple wavefronts overlap get reinforced and radiate
stronger — natural interference patterns.

Lower default min_activation threshold (×0.1) since individual
contributions are smaller in additive mode.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:35:27 -04:00
+								        *frontier.entry(key.clone()).or_insert(0.0) += act;
 								        *activation.entry(key.clone()).or_insert(0.0) += act;
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								    }
-												spread: simultaneous wavefront instead of independent BFS

All seeds emit at once. At each hop, activations from all sources
sum at each node, and the combined map propagates on the next hop.
Nodes where multiple wavefronts overlap get reinforced and radiate
stronger — natural interference patterns.

Lower default min_activation threshold (×0.1) since individual
contributions are smaller in additive mode.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:35:27 -04:00
+								    // Propagate hop by hop — all sources simultaneously
-												spread: separate traversal from ranking

Node weight no longer gates signal propagation — only edge_decay
and edge_strength affect traversal. Node weight is applied at the
end for ranking. This lets low-weight nodes serve as bridges
without killing the signal passing through them.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:38:33 -04:00
+								    // Node weight does NOT gate traversal — only edge_decay and edge strength.
 								    // Node weight is applied at the end for ranking.
-												spread: simultaneous wavefront instead of independent BFS

All seeds emit at once. At each hop, activations from all sources
sum at each node, and the combined map propagates on the next hop.
Nodes where multiple wavefronts overlap get reinforced and radiate
stronger — natural interference patterns.

Lower default min_activation threshold (×0.1) since individual
contributions are smaller in additive mode.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:35:27 -04:00
+								    for _hop in 0..max_hops {
 								        let mut next_frontier: HashMap<String, f64> = HashMap::new();
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
-												spread: simultaneous wavefront instead of independent BFS

All seeds emit at once. At each hop, activations from all sources
sum at each node, and the combined map propagates on the next hop.
Nodes where multiple wavefronts overlap get reinforced and radiate
stronger — natural interference patterns.

Lower default min_activation threshold (×0.1) since individual
contributions are smaller in additive mode.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:35:27 -04:00
+								        for (key, act) in &frontier {
 								            for (neighbor, strength) in graph.neighbors(key) {
-												spread: separate traversal from ranking

Node weight no longer gates signal propagation — only edge_decay
and edge_strength affect traversal. Node weight is applied at the
end for ranking. This lets low-weight nodes serve as bridges
without killing the signal passing through them.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:38:33 -04:00
+								                let propagated = act * edge_decay * strength as f64;
-												spread: simultaneous wavefront instead of independent BFS

All seeds emit at once. At each hop, activations from all sources
sum at each node, and the combined map propagates on the next hop.
Nodes where multiple wavefronts overlap get reinforced and radiate
stronger — natural interference patterns.

Lower default min_activation threshold (×0.1) since individual
contributions are smaller in additive mode.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:35:27 -04:00
+								                if propagated < min_activation { continue; }
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
-												spread: simultaneous wavefront instead of independent BFS

All seeds emit at once. At each hop, activations from all sources
sum at each node, and the combined map propagates on the next hop.
Nodes where multiple wavefronts overlap get reinforced and radiate
stronger — natural interference patterns.

Lower default min_activation threshold (×0.1) since individual
contributions are smaller in additive mode.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:35:27 -04:00
+								                *next_frontier.entry(neighbor.clone()).or_insert(0.0) += propagated;
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								            }
 								        }
-												spread: simultaneous wavefront instead of independent BFS

All seeds emit at once. At each hop, activations from all sources
sum at each node, and the combined map propagates on the next hop.
Nodes where multiple wavefronts overlap get reinforced and radiate
stronger — natural interference patterns.

Lower default min_activation threshold (×0.1) since individual
contributions are smaller in additive mode.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:35:27 -04:00
 								        if next_frontier.is_empty() { break; }
 								        // Merge into total activation and advance frontier
 								        for (key, act) in &next_frontier {
 								            *activation.entry(key.clone()).or_insert(0.0) += act;
 								        }
 								        frontier = next_frontier;
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								    }
-												spread: separate traversal from ranking

Node weight no longer gates signal propagation — only edge_decay
and edge_strength affect traversal. Node weight is applied at the
end for ranking. This lets low-weight nodes serve as bridges
without killing the signal passing through them.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:38:33 -04:00
+								    // Apply node weight for ranking, not traversal
 								    let mut results: Vec<_> = activation.into_iter()
 								        .map(|(key, act)| {
 								            let weight = store.node_weight(&key);
 								            (key, act * weight)
 								        })
 								        .collect();
-												query: rich QueryResult + toolkit cleanup

QueryResult carries a fields map (BTreeMap<String, Value>) so callers
don't re-resolve fields after queries run. Neighbors queries inject
edge context (strength, rel_type) at construction time.

New public API:
- run_query(): parse + execute + format in one call
- format_value(): format a Value for display
- execute_parsed(): internal, avoids double-parse in run_query

Removed: output_stages(), format_field()

Simplified commands:
- cmd_query, cmd_graph, cmd_link, cmd_list_keys all delegate to run_query
- cmd_experience_mine uses existing find_current_transcript()

Deduplication:
- now_epoch() 3 copies → 1 (capnp_store's public fn)
- hub_threshold → Graph::hub_threshold() method
- eval_node + eval_edge → single eval() with closure for field resolution
- compare() collapsed via Ordering (35 → 15 lines)

Modernization:
- 12 sites of partial_cmp().unwrap_or(Ordering::Equal) → total_cmp()

											
										
										
											2026-03-03 12:07:04 -05:00
+								    results.sort_by(|a, b| b.1.total_cmp(&a.1));
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								    results
 								}
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								/// Search with weighted terms: exact key matching + spectral projection.
 								///
 								/// Terms are matched against node keys. Matching nodes become seeds,
 								/// scored by term_weight × node_weight. Seeds are then projected into
 								/// spectral space to find nearby nodes, with link weights modulating distance.
 								pub fn search_weighted(
 								    terms: &BTreeMap<String, f64>,
 								    store: &impl StoreView,
 								) -> Vec<SearchResult> {
 								    search_weighted_inner(terms, store, false, 5)
 								}
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								/// Like search_weighted but with debug output and configurable result count.
 								pub fn search_weighted_debug(
 								    terms: &BTreeMap<String, f64>,
 								    store: &impl StoreView,
 								    max_results: usize,
 								) -> Vec<SearchResult> {
 								    search_weighted_inner(terms, store, true, max_results)
 								}
 								fn search_weighted_inner(
 								    terms: &BTreeMap<String, f64>,
 								    store: &impl StoreView,
 								    debug: bool,
 								    max_results: usize,
 								) -> Vec<SearchResult> {
 								    let graph = crate::graph::build_graph_fast(store);
 								    let (seeds, direct_hits) = match_seeds(terms, store);
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
 								    if seeds.is_empty() {
 								        return Vec::new();
 								    }
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								    if debug {
 								        println!("\n[search] === SEEDS ({}) ===", seeds.len());
 								        let mut sorted_seeds = seeds.clone();
 								        sorted_seeds.sort_by(|a, b| b.1.total_cmp(&a.1));
 								        for (key, score) in sorted_seeds.iter().take(20) {
 								            println!("  {:.4}  {}", score, key);
 								        }
 								    }
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
-												search: composable algorithm pipeline

Break search into composable stages that chain left-to-right:
each stage takes seeds Vec<(String, f64)> and returns modified seeds.

Available algorithms:
  spread              — spreading activation through graph edges
  spectral            — nearest neighbors in spectral embedding
  manifold            — (placeholder) extrapolation along seed direction

Stages accept inline params: spread,max_hops=4,edge_decay=0.5

memory-search gets --hook, --debug, --seen modes plus positional
pipeline args. poc-memory search gets -p/--pipeline flags.

Also: fix spectral decompose() to skip zero eigenvalues from
disconnected components, filter degenerate zero-coord nodes from
spectral projection, POC_AGENT bail-out for daemon agents, all
debug output to stdout.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-09 01:19:04 -04:00
+								    // Default pipeline: spectral → spread (legacy behavior)
 								    let pipeline = vec![
 								        AlgoStage { algo: Algorithm::Spectral, params: HashMap::new() },
 								        AlgoStage { algo: Algorithm::Spread, params: HashMap::new() },
 								    ];
 								    let raw_results = run_pipeline(&pipeline, seeds, &graph, store, debug, max_results);
 								    raw_results.into_iter()
 								        .take(max_results)
 								        .map(|(key, activation)| {
 								            let is_direct = direct_hits.contains(&key);
 								            SearchResult { key, activation, is_direct, snippet: None }
 								        }).collect()
 								}
 								/// Search with equal-weight terms (for interactive use).
 								pub fn search(query: &str, store: &impl StoreView) -> Vec<SearchResult> {
 								    let terms: BTreeMap<String, f64> = query.split_whitespace()
 								        .map(|t| (t.to_lowercase(), 1.0))
 								        .collect();
 								    search_weighted(&terms, store)
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								}
 								/// Extract meaningful search terms from natural language.
 								/// Strips common English stop words, returns up to max_terms words.
 								pub fn extract_query_terms(text: &str, max_terms: usize) -> String {
 								    const STOP_WORDS: &[&str] = &[
 								        "the", "a", "an", "is", "are", "was", "were", "do", "does", "did",
 								        "have", "has", "had", "will", "would", "could", "should", "can",
 								        "may", "might", "shall", "been", "being", "to", "of", "in", "for",
 								        "on", "with", "at", "by", "from", "as", "but", "or", "and", "not",
 								        "no", "if", "then", "than", "that", "this", "it", "its", "my",
 								        "your", "our", "we", "you", "i", "me", "he", "she", "they", "them",
 								        "what", "how", "why", "when", "where", "about", "just", "let",
 								        "want", "tell", "show", "think", "know", "see", "look", "make",
 								        "get", "go", "some", "any", "all", "very", "really", "also", "too",
 								        "so", "up", "out", "here", "there",
 								    ];
 								    text.to_lowercase()
 								        .split(|c: char| !c.is_alphanumeric())
 								        .filter(|w| !w.is_empty() && w.len() > 2 && !STOP_WORDS.contains(w))
 								        .take(max_terms)
 								        .collect::<Vec<_>>()
 								        .join(" ")
 								}
-												Extract lib.rs, inline search in memory-search hook

Create lib.rs so all binaries can share library code directly instead
of shelling out to poc-memory. memory-search now calls search::search()
and store::Store::load() in-process instead of Command::new("poc-memory").

The load-context call still shells out (needs get_group_content moved
from main.rs to a library module).

Also: add search::format_results(), deduplicate extract_query_terms.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-05 22:23:03 -05:00
 								/// Format search results as text lines (for hook consumption).
 								pub fn format_results(results: &[SearchResult]) -> String {
 								    let mut out = String::new();
-												Agent identity, parallel scheduling, memory-search fixes, stemmer optimization

- Agent identity injection: prepend core-personality to all agent prompts
  so agents dream as me, not as generic graph workers. Include instructions
  to walk the graph and connect new nodes to core concepts.

- Parallel agent scheduling: sequential within type, parallel across types.
  Different agent types (linker, organize, replay) run concurrently.

- Linker prompt: graph walking instead of keyword search for connections.
  "Explore the local topology and walk the graph until you find the best
  connections."

- memory-search fixes: format_results no longer truncates to 5 results,
  pipeline default raised to 50, returned file cleared on compaction,
  --seen and --seen-full merged, compaction timestamp in --seen output,
  max_entries=3 per prompt for steady memory drip.

- Stemmer optimization: strip_suffix now works in-place on a single String
  buffer instead of allocating 18 new Strings per word. Note for future:
  reversed-suffix trie for O(suffix_len) instead of O(n_rules).

- Transcript: add compaction_timestamp() for --seen display.

- Agent budget configurable (default 4000 from config).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-15 12:49:10 -04:00
+								    for (i, r) in results.iter().enumerate() {
-												Extract lib.rs, inline search in memory-search hook

Create lib.rs so all binaries can share library code directly instead
of shelling out to poc-memory. memory-search now calls search::search()
and store::Store::load() in-process instead of Command::new("poc-memory").

The load-context call still shells out (needs get_group_content moved
from main.rs to a library module).

Also: add search::format_results(), deduplicate extract_query_terms.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-05 22:23:03 -05:00
+								        let marker = if r.is_direct { "→" } else { " " };
 								        out.push_str(&format!("{}{:2}. [{:.2}/{:.2}] {}",
 								            marker, i + 1, r.activation, r.activation, r.key));
 								        out.push('\n');
 								        if let Some(ref snippet) = r.snippet {
 								            out.push_str(&format!("         {}\n", snippet));
 								        }
 								    }
 								    out
 								}