consciousness/src/graph.rs

// Graph algorithms: clustering coefficient, community detection (label
// propagation), schema fit scoring, small-world metrics, consolidation
// priority scoring.
//
// The Graph is built from the Store's nodes + relations. Edges are
// undirected for clustering/community (even causal edges count as
// connections), but relation type and direction are preserved for
// specific queries.

use crate::capnp_store::{Store, RelationType, StoreView};

use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet, VecDeque};

/// Weighted edge in the graph
#[derive(Clone, Debug)]
pub struct Edge {
    pub target: String,
    pub strength: f32,
    pub rel_type: RelationType,
}

/// The in-memory graph built from store nodes + relations
pub struct Graph {
    /// Adjacency list: node key → list of edges
    adj: HashMap<String, Vec<Edge>>,
    /// All node keys
    keys: HashSet<String>,
    /// Community labels (from label propagation)
    communities: HashMap<String, u32>,
}

impl Graph {
    pub fn nodes(&self) -> &HashSet<String> {
        &self.keys
    }

    pub fn degree(&self, key: &str) -> usize {
        self.adj.get(key).map(|e| e.len()).unwrap_or(0)
    }

    pub fn edge_count(&self) -> usize {
        self.adj.values().map(|e| e.len()).sum::<usize>() / 2
    }

    /// All edges for a node (full Edge data including rel_type)
    pub fn edges_of(&self, key: &str) -> &[Edge] {
        self.adj.get(key)
            .map(|v| v.as_slice())
            .unwrap_or(&[])
    }

    /// All neighbor keys with strengths
    pub fn neighbors(&self, key: &str) -> Vec<(&String, f32)> {
        self.adj.get(key)
            .map(|edges| edges.iter().map(|e| (&e.target, e.strength)).collect())
            .unwrap_or_default()
    }

    /// Just neighbor keys
    pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> {
        self.adj.get(key)
            .map(|edges| edges.iter().map(|e| e.target.as_str()).collect())
            .unwrap_or_default()
    }

    pub fn community_count(&self) -> usize {
        let labels: HashSet<_> = self.communities.values().collect();
        labels.len()
    }

    pub fn communities(&self) -> &HashMap<String, u32> {
        &self.communities
    }

    /// Local clustering coefficient: fraction of a node's neighbors
    /// that are also neighbors of each other.
    /// cc(v) = 2E / (deg * (deg - 1))
    pub fn clustering_coefficient(&self, key: &str) -> f32 {
        let neighbors = self.neighbor_keys(key);
        let deg = neighbors.len();
        if deg < 2 {
            return 0.0;
        }

        let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect();
        let mut triangles = 0u32;
        for i in 0..neighbor_vec.len() {
            for j in (i + 1)..neighbor_vec.len() {
                let ni_neighbors = self.neighbor_keys(neighbor_vec[i]);
                if ni_neighbors.contains(neighbor_vec[j]) {
                    triangles += 1;
                }
            }
        }

        (2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0))
    }

    /// Average clustering coefficient across all nodes with deg >= 2
    pub fn avg_clustering_coefficient(&self) -> f32 {
        let mut sum = 0.0f32;
        let mut count = 0u32;
        for key in &self.keys {
            if self.degree(key) >= 2 {
                sum += self.clustering_coefficient(key);
                count += 1;
            }
        }
        if count == 0 { 0.0 } else { sum / count as f32 }
    }

    /// Average shortest path length (sampled BFS from up to 100 nodes)
    pub fn avg_path_length(&self) -> f32 {
        let sample: Vec<&String> = self.keys.iter().take(100).collect();
        if sample.is_empty() { return 0.0; }

        let mut total_dist = 0u64;
        let mut total_pairs = 0u64;

        for &start in &sample {
            let dists = self.bfs_distances(start);
            for d in dists.values() {
                if *d > 0 {
                    total_dist += *d as u64;
                    total_pairs += 1;
                }
            }
        }

        if total_pairs == 0 { 0.0 } else { total_dist as f32 / total_pairs as f32 }
    }

    fn bfs_distances(&self, start: &str) -> HashMap<String, u32> {
        let mut dist = HashMap::new();
        let mut queue = VecDeque::new();
        dist.insert(start.to_string(), 0u32);
        queue.push_back(start.to_string());

        while let Some(node) = queue.pop_front() {
            let d = dist[&node];
            for neighbor in self.neighbor_keys(&node) {
                if !dist.contains_key(neighbor) {
                    dist.insert(neighbor.to_string(), d + 1);
                    queue.push_back(neighbor.to_string());
                }
            }
        }
        dist
    }

    /// Power-law exponent α of the degree distribution.
    ///
    /// Estimated via MLE: α = 1 + n / Σ ln(k_i / (k_min - 0.5))
    /// α ≈ 2: extreme hub dominance (fragile)
    /// α ≈ 3: healthy scale-free
    /// α > 3: approaching random graph (egalitarian)
    pub fn degree_power_law_exponent(&self) -> f32 {
        let mut degrees: Vec<usize> = self.keys.iter()
            .map(|k| self.degree(k))
            .filter(|&d| d > 0) // exclude isolates
            .collect();
        if degrees.len() < 10 { return 0.0; } // not enough data

        degrees.sort_unstable();
        let k_min = degrees[0] as f64;
        if k_min < 1.0 { return 0.0; }

        let n = degrees.len() as f64;
        let sum_ln: f64 = degrees.iter()
            .map(|&k| (k as f64 / (k_min - 0.5)).ln())
            .sum();

        if sum_ln <= 0.0 { return 0.0; }
        (1.0 + n / sum_ln) as f32
    }

    /// Gini coefficient of the degree distribution.
    ///
    /// 0 = perfectly egalitarian (all nodes same degree)
    /// 1 = maximally unequal (one node has all edges)
    /// Measures hub concentration independent of distribution shape.
    pub fn degree_gini(&self) -> f32 {
        let mut degrees: Vec<f64> = self.keys.iter()
            .map(|k| self.degree(k) as f64)
            .collect();
        let n = degrees.len();
        if n < 2 { return 0.0; }

        degrees.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
        let mean = degrees.iter().sum::<f64>() / n as f64;
        if mean < 1e-10 { return 0.0; }

        // Gini = (2 Σ i·x_i) / (n Σ x_i) - (n+1)/n
        let weighted_sum: f64 = degrees.iter().enumerate()
            .map(|(i, &d)| (i as f64 + 1.0) * d)
            .sum();
        let total = degrees.iter().sum::<f64>();

        let gini = (2.0 * weighted_sum) / (n as f64 * total) - (n as f64 + 1.0) / n as f64;
        gini.max(0.0) as f32
    }

    /// Small-world coefficient σ = (C/C_rand) / (L/L_rand)
    /// C_rand ≈ <k>/n, L_rand ≈ ln(n)/ln(<k>)
    pub fn small_world_sigma(&self) -> f32 {
        let n = self.keys.len() as f32;
        if n < 10.0 { return 0.0; }

        let avg_degree = self.adj.values()
            .map(|e| e.len() as f32)
            .sum::<f32>() / n;
        if avg_degree < 1.0 { return 0.0; }

        let c = self.avg_clustering_coefficient();
        let l = self.avg_path_length();

        let c_rand = avg_degree / n;
        let l_rand = n.ln() / avg_degree.ln();

        if c_rand < 1e-10 || l_rand < 1e-10 || l < 1e-10 {
            return 0.0;
        }

        (c / c_rand) / (l / l_rand)
    }
}

/// Impact of adding a hypothetical edge
#[derive(Debug)]
pub struct LinkImpact {
    pub source: String,
    pub target: String,
    pub source_deg: usize,
    pub target_deg: usize,
    /// Is this a hub link? (either endpoint in top 5% by degree)
    pub is_hub_link: bool,
    /// Are both endpoints in the same community?
    pub same_community: bool,
    /// Change in clustering coefficient for source
    pub delta_cc_source: f32,
    /// Change in clustering coefficient for target
    pub delta_cc_target: f32,
    /// Change in degree Gini (positive = more hub-dominated)
    pub delta_gini: f32,
    /// Qualitative assessment
    pub assessment: &'static str,
}

impl Graph {
    /// Simulate adding an edge and report impact on topology metrics.
    ///
    /// Doesn't modify the graph — computes what would change if the
    /// edge were added.
    pub fn link_impact(&self, source: &str, target: &str) -> LinkImpact {
        let source_deg = self.degree(source);
        let target_deg = self.degree(target);

        // Hub threshold: top 5% by degree
        let mut all_degrees: Vec<usize> = self.keys.iter()
            .map(|k| self.degree(k))
            .collect();
        all_degrees.sort_unstable();
        let hub_threshold = if all_degrees.len() >= 20 {
            all_degrees[all_degrees.len() * 95 / 100]
        } else {
            usize::MAX // can't define hubs with <20 nodes
        };
        let is_hub_link = source_deg >= hub_threshold || target_deg >= hub_threshold;

        // Community check
        let sc = self.communities.get(source);
        let tc = self.communities.get(target);
        let same_community = match (sc, tc) {
            (Some(a), Some(b)) => a == b,
            _ => false,
        };

        // CC change for source: adding target as neighbor changes the
        // triangle count. New triangles form for each node that's a
        // neighbor of BOTH source and target.
        let source_neighbors = self.neighbor_keys(source);
        let target_neighbors = self.neighbor_keys(target);
        let shared_neighbors = source_neighbors.intersection(&target_neighbors).count();

        let cc_before_source = self.clustering_coefficient(source);
        let cc_before_target = self.clustering_coefficient(target);

        // Estimate new CC for source after adding edge
        let new_source_deg = source_deg + 1;
        let new_source_triangles = if source_deg >= 2 {
            // Current triangles + new ones from shared neighbors
            let current_triangles = (cc_before_source
                * source_deg as f32 * (source_deg as f32 - 1.0) / 2.0) as u32;
            current_triangles + shared_neighbors as u32
        } else {
            shared_neighbors as u32
        };
        let cc_after_source = if new_source_deg >= 2 {
            (2.0 * new_source_triangles as f32)
                / (new_source_deg as f32 * (new_source_deg as f32 - 1.0))
        } else {
            0.0
        };

        let new_target_deg = target_deg + 1;
        let new_target_triangles = if target_deg >= 2 {
            let current_triangles = (cc_before_target
                * target_deg as f32 * (target_deg as f32 - 1.0) / 2.0) as u32;
            current_triangles + shared_neighbors as u32
        } else {
            shared_neighbors as u32
        };
        let cc_after_target = if new_target_deg >= 2 {
            (2.0 * new_target_triangles as f32)
                / (new_target_deg as f32 * (new_target_deg as f32 - 1.0))
        } else {
            0.0
        };

        // Gini change via influence function:
        // IF(x; Gini, F) = (2F(x) - 1) * x/μ - Gini - 1
        // Adding an edge increments two degrees. The net ΔGini is the sum
        // of influence contributions from both endpoints shifting up by 1.
        let gini_before = self.degree_gini();
        let n = self.keys.len();
        let total_degree: f64 = self.keys.iter()
            .map(|k| self.degree(k) as f64)
            .sum();
        let mean_deg = if n > 0 { total_degree / n as f64 } else { 1.0 };

        // CDF at each endpoint's degree: fraction of nodes with degree ≤ d
        let delta_gini = if mean_deg > 1e-10 && n >= 2 {
            // Count nodes with degree ≤ source_deg and ≤ target_deg
            let f_source = self.keys.iter()
                .filter(|k| self.degree(k) <= source_deg)
                .count() as f64 / n as f64;
            let f_target = self.keys.iter()
                .filter(|k| self.degree(k) <= target_deg)
                .count() as f64 / n as f64;

            // Influence of incrementing source's degree by 1
            let new_source = (source_deg + 1) as f64;
            let if_source = (2.0 * f_source - 1.0) * new_source / mean_deg
                - gini_before as f64 - 1.0;
            // Influence of incrementing target's degree by 1
            let new_target = (target_deg + 1) as f64;
            let if_target = (2.0 * f_target - 1.0) * new_target / mean_deg
                - gini_before as f64 - 1.0;

            // Scale: each point contributes 1/n to the distribution
            ((if_source + if_target) / n as f64) as f32
        } else {
            0.0f32
        };

        // Qualitative assessment
        let assessment = if is_hub_link && same_community {
            "hub-reinforcing: strengthens existing star topology"
        } else if is_hub_link && !same_community {
            "hub-bridging: cross-community but through a hub"
        } else if !is_hub_link && same_community && shared_neighbors > 0 {
            "lateral-clustering: strengthens local mesh topology"
        } else if !is_hub_link && !same_community {
            "lateral-bridging: best kind — cross-community lateral link"
        } else if !is_hub_link && same_community {
            "lateral-local: connects peripheral nodes in same community"
        } else {
            "neutral"
        };

        LinkImpact {
            source: source.to_string(),
            target: target.to_string(),
            source_deg,
            target_deg,
            is_hub_link,
            same_community,
            delta_cc_source: cc_after_source - cc_before_source,
            delta_cc_target: cc_after_target - cc_before_target,
            delta_gini,
            assessment,
        }
    }
}

/// Build graph from store data (with community detection)
pub fn build_graph(store: &impl StoreView) -> Graph {
    let (adj, keys) = build_adjacency(store);
    let communities = label_propagation(&keys, &adj, 20);
    Graph { adj, keys, communities }
}

/// Build graph without community detection — for spreading activation
/// searches where we only need the adjacency list.
pub fn build_graph_fast(store: &impl StoreView) -> Graph {
    let (adj, keys) = build_adjacency(store);
    Graph { adj, keys, communities: HashMap::new() }
}

fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashSet<String>) {
    let mut adj: HashMap<String, Vec<Edge>> = HashMap::new();
    let mut keys: HashSet<String> = HashSet::new();

    store.for_each_node(|key, _, _| {
        keys.insert(key.to_owned());
    });

    store.for_each_relation(|source_key, target_key, strength, rel_type| {
        if !keys.contains(source_key) || !keys.contains(target_key) {
            return;
        }

        adj.entry(source_key.to_owned()).or_default().push(Edge {
            target: target_key.to_owned(),
            strength,
            rel_type,
        });
        adj.entry(target_key.to_owned()).or_default().push(Edge {
            target: source_key.to_owned(),
            strength,
            rel_type,
        });
    });

    (adj, keys)
}

/// Label propagation community detection.
///
/// Each node starts with its own label. Each iteration: adopt the most
/// common label among neighbors (weighted by edge strength). Iterate
/// until stable or max_iterations.
fn label_propagation(
    keys: &HashSet<String>,
    adj: &HashMap<String, Vec<Edge>>,
    max_iterations: u32,
) -> HashMap<String, u32> {
    // Only consider edges above this strength for community votes.
    // Weak auto-links from triangle closure (0.15-0.35) bridge
    // unrelated clusters — filtering them lets natural communities emerge.
    let min_strength: f32 = 0.3;

    // Initialize: each node gets its own label
    let key_vec: Vec<String> = keys.iter().cloned().collect();
    let mut labels: HashMap<String, u32> = key_vec.iter()
        .enumerate()
        .map(|(i, k)| (k.clone(), i as u32))
        .collect();

    for _iter in 0..max_iterations {
        let mut changed = false;

        for key in &key_vec {
            let edges = match adj.get(key) {
                Some(e) => e,
                None => continue,
            };
            if edges.is_empty() { continue; }

            // Count weighted votes for each label (skip weak edges)
            let mut votes: HashMap<u32, f32> = HashMap::new();
            for edge in edges {
                if edge.strength < min_strength { continue; }
                if let Some(&label) = labels.get(&edge.target) {
                    *votes.entry(label).or_default() += edge.strength;
                }
            }

            // Adopt the label with most votes
            if let Some((&best_label, _)) = votes.iter()
                .max_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal))
            {
                let current = labels[key];
                if best_label != current {
                    labels.insert(key.clone(), best_label);
                    changed = true;
                }
            }
        }

        if !changed { break; }
    }

    // Compact labels to 0..n
    let mut label_map: HashMap<u32, u32> = HashMap::new();
    let mut next_id = 0;
    for label in labels.values_mut() {
        let new_label = *label_map.entry(*label).or_insert_with(|| {
            let id = next_id;
            next_id += 1;
            id
        });
        *label = new_label;
    }

    labels
}

/// Schema fit: for a node, measure how well-connected its neighbors are
/// to each other. High density + high CC among neighbors = good schema fit.
pub fn schema_fit(graph: &Graph, key: &str) -> f32 {
    let neighbors = graph.neighbor_keys(key);
    let n = neighbors.len();
    if n < 2 {
        return 0.0; // isolated or leaf — no schema context
    }

    // Count edges among neighbors
    let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect();
    let mut inter_edges = 0u32;
    for i in 0..neighbor_vec.len() {
        for j in (i + 1)..neighbor_vec.len() {
            let ni_neighbors = graph.neighbor_keys(neighbor_vec[i]);
            if ni_neighbors.contains(neighbor_vec[j]) {
                inter_edges += 1;
            }
        }
    }

    let max_edges = (n * (n - 1)) / 2;
    let density = if max_edges == 0 { 0.0 } else {
        inter_edges as f32 / max_edges as f32
    };

    // Combine neighborhood density with own CC
    let cc = graph.clustering_coefficient(key);
    (density + cc) / 2.0
}

/// Compute schema fit for all nodes
pub fn schema_fit_all(graph: &Graph) -> HashMap<String, f32> {
    graph.nodes().iter()
        .map(|key| (key.clone(), schema_fit(graph, key)))
        .collect()
}

/// A snapshot of graph topology metrics, for tracking evolution over time
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct MetricsSnapshot {
    pub timestamp: f64,
    pub date: String,
    pub nodes: usize,
    pub edges: usize,
    pub communities: usize,
    pub sigma: f32,
    pub alpha: f32,
    pub gini: f32,
    pub avg_cc: f32,
    pub avg_path_length: f32,
    pub avg_schema_fit: f32,
}

fn metrics_log_path() -> std::path::PathBuf {
    let home = std::env::var("HOME").unwrap_or_default();
    std::path::PathBuf::from(home).join(".claude/memory/metrics.jsonl")
}

/// Load previous metrics snapshots
pub fn load_metrics_history() -> Vec<MetricsSnapshot> {
    let path = metrics_log_path();
    let content = match std::fs::read_to_string(&path) {
        Ok(c) => c,
        Err(_) => return Vec::new(),
    };
    content.lines()
        .filter_map(|line| serde_json::from_str(line).ok())
        .collect()
}

/// Append a metrics snapshot to the log
pub fn save_metrics_snapshot(snap: &MetricsSnapshot) {
    let path = metrics_log_path();
    if let Ok(json) = serde_json::to_string(snap) {
        use std::io::Write;
        if let Ok(mut f) = std::fs::OpenOptions::new()
            .create(true).append(true).open(&path)
        {
            let _ = writeln!(f, "{}", json);
        }
    }
}

/// Health report: summary of graph metrics
pub fn health_report(graph: &Graph, store: &Store) -> String {
    let n = graph.nodes().len();
    let e = graph.edge_count();
    let avg_cc = graph.avg_clustering_coefficient();
    let avg_pl = graph.avg_path_length();
    let sigma = graph.small_world_sigma();
    let communities = graph.community_count();

    // Community sizes
    let mut comm_sizes: HashMap<u32, usize> = HashMap::new();
    for label in graph.communities().values() {
        *comm_sizes.entry(*label).or_default() += 1;
    }
    let mut sizes: Vec<usize> = comm_sizes.values().copied().collect();
    sizes.sort_unstable_by(|a, b| b.cmp(a));

    // Degree distribution
    let mut degrees: Vec<usize> = graph.nodes().iter()
        .map(|k| graph.degree(k))
        .collect();
    degrees.sort_unstable();
    let max_deg = degrees.last().copied().unwrap_or(0);
    let median_deg = if degrees.is_empty() { 0 } else { degrees[degrees.len() / 2] };
    let avg_deg = if n == 0 { 0.0 } else {
        degrees.iter().sum::<usize>() as f64 / n as f64
    };

    // Topology metrics
    let alpha = graph.degree_power_law_exponent();
    let gini = graph.degree_gini();

    // Schema fit distribution
    let fits = schema_fit_all(graph);
    let avg_fit = if fits.is_empty() { 0.0 } else {
        fits.values().sum::<f32>() / fits.len() as f32
    };
    let low_fit = fits.values().filter(|&&f| f < 0.1).count();

    // Category breakdown
    let cats = store.category_counts();

    // Snapshot current metrics and log
    let now = crate::capnp_store::now_epoch();
    let date = crate::capnp_store::format_datetime_space(now);
    let snap = MetricsSnapshot {
        timestamp: now,
        date: date.clone(),
        nodes: n, edges: e, communities,
        sigma, alpha, gini, avg_cc,
        avg_path_length: avg_pl,
        avg_schema_fit: avg_fit,
    };
    save_metrics_snapshot(&snap);

    // Load history for deltas
    let history = load_metrics_history();
    let prev = if history.len() >= 2 {
        Some(&history[history.len() - 2]) // second-to-last (last is the one we just wrote)
    } else {
        None
    };

    fn delta(current: f32, prev: Option<f32>) -> String {
        match prev {
            Some(p) => {
                let d = current - p;
                if d.abs() < 0.001 { String::new() }
                else { format!(" (Δ{:+.3})", d) }
            }
            None => String::new(),
        }
    }

    let sigma_d = delta(sigma, prev.map(|p| p.sigma));
    let alpha_d = delta(alpha, prev.map(|p| p.alpha));
    let gini_d = delta(gini, prev.map(|p| p.gini));
    let cc_d = delta(avg_cc, prev.map(|p| p.avg_cc));
    let fit_d = delta(avg_fit, prev.map(|p| p.avg_schema_fit));

    let mut report = format!(
"Memory Health Report
====================
Nodes: {n}  Relations: {e}  Communities: {communities}

Degree: max={max_deg} median={median_deg} avg={avg_deg:.1}
Clustering coefficient (avg): {avg_cc:.4}{cc_d}
Average path length: {avg_pl:.2}
Small-world σ: {sigma:.3}{sigma_d} (>1 = small-world)
Power-law α: {alpha:.2}{alpha_d} (2=hub-dominated, 3=healthy, >3=egalitarian)
Degree Gini: {gini:.3}{gini_d} (0=equal, 1=one-hub)

Community sizes (top 5): {top5}
Schema fit: avg={avg_fit:.3}{fit_d}  low-fit (<0.1): {low_fit} nodes

Categories: core={core} tech={tech} gen={gen} obs={obs} task={task}",
        top5 = sizes.iter().take(5)
            .map(|s| s.to_string())
            .collect::<Vec<_>>()
            .join(", "),
        core = cats.get("core").unwrap_or(&0),
        tech = cats.get("tech").unwrap_or(&0),
        gen = cats.get("gen").unwrap_or(&0),
        obs = cats.get("obs").unwrap_or(&0),
        task = cats.get("task").unwrap_or(&0),
    );

    // Show history trend if we have enough data points
    if history.len() >= 3 {
        report.push_str("\n\nMetrics history (last 5):\n");
        for snap in &history[history.len().saturating_sub(5)..] {
            report.push_str(&format!("  {} — σ={:.1} α={:.2} gini={:.3} cc={:.4} fit={:.3}\n",
                snap.date, snap.sigma, snap.alpha, snap.gini, snap.avg_cc, snap.avg_schema_fit));
        }
    }

    report
}
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								// Graph algorithms: clustering coefficient, community detection (label
 								// propagation), schema fit scoring, small-world metrics, consolidation
 								// priority scoring.
 								//
 								// The Graph is built from the Store's nodes + relations. Edges are
 								// undirected for clustering/community (even causal edges count as
 								// connections), but relation type and direction are preserved for
 								// specific queries.
-												spectral decomposition, search improvements, char boundary fix

- New spectral module: Laplacian eigendecomposition of the memory graph.
  Commands: spectral, spectral-save, spectral-neighbors, spectral-positions,
  spectral-suggest. Spectral neighbors expand search results beyond keyword
  matching to structural proximity.

- Search: use StoreView trait to avoid 6MB state.bin rewrite on every query.
  Append-only retrieval logging. Spectral expansion shows structurally
  nearby nodes after text results.

- Fix panic in journal-tail: string truncation at byte 67 could land inside
  a multi-byte character (em dash). Now walks back to char boundary.

- Replay queue: show classification and spectral outlier score.

- Knowledge agents: extractor, challenger, connector prompts and runner
  scripts for automated graph enrichment.

- memory-search hook: stale state file cleanup (24h expiry).

											
										
										
											2026-03-03 01:33:31 -05:00
+								use crate::capnp_store::{Store, RelationType, StoreView};
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
 								use serde::{Deserialize, Serialize};
 								use std::collections::{HashMap, HashSet, VecDeque};
 								/// Weighted edge in the graph
 								#[derive(Clone, Debug)]
 								pub struct Edge {
 								    pub target: String,
 								    pub strength: f32,
 								    pub rel_type: RelationType,
 								}
 								/// The in-memory graph built from store nodes + relations
 								pub struct Graph {
 								    /// Adjacency list: node key → list of edges
 								    adj: HashMap<String, Vec<Edge>>,
 								    /// All node keys
 								    keys: HashSet<String>,
 								    /// Community labels (from label propagation)
 								    communities: HashMap<String, u32>,
 								}
 								impl Graph {
 								    pub fn nodes(&self) -> &HashSet<String> {
 								        &self.keys
 								    }
 								    pub fn degree(&self, key: &str) -> usize {
 								        self.adj.get(key).map(|e| e.len()).unwrap_or(0)
 								    }
 								    pub fn edge_count(&self) -> usize {
 								        self.adj.values().map(|e| e.len()).sum::<usize>() / 2
 								    }
-												query: peg-based query language for ad-hoc graph exploration

poc-memory query "degree > 15"
poc-memory query "key ~ 'journal.*' AND degree > 10"
poc-memory query "neighbors('identity.md') WHERE strength > 0.5"
poc-memory query "community_id = community('identity.md')" --fields degree,category

Grammar-driven: the peg definition IS the language spec. Supports
boolean logic (AND/OR/NOT), numeric and string comparison, regex
match (~), graph traversal (neighbors() with WHERE), and function
calls (community(), degree()). Output flags: --fields, --sort,
--limit, --count.

New dependency: peg 0.8 (~68KB, 2 tiny deps).

											
										
										
											2026-03-03 10:55:30 -05:00
+								    /// All edges for a node (full Edge data including rel_type)
 								    pub fn edges_of(&self, key: &str) -> &[Edge] {
 								        self.adj.get(key)
 								            .map(|v| v.as_slice())
 								            .unwrap_or(&[])
 								    }
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								    /// All neighbor keys with strengths
 								    pub fn neighbors(&self, key: &str) -> Vec<(&String, f32)> {
 								        self.adj.get(key)
 								            .map(|edges| edges.iter().map(|e| (&e.target, e.strength)).collect())
 								            .unwrap_or_default()
 								    }
 								    /// Just neighbor keys
 								    pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> {
 								        self.adj.get(key)
 								            .map(|edges| edges.iter().map(|e| e.target.as_str()).collect())
 								            .unwrap_or_default()
 								    }
 								    pub fn community_count(&self) -> usize {
 								        let labels: HashSet<_> = self.communities.values().collect();
 								        labels.len()
 								    }
 								    pub fn communities(&self) -> &HashMap<String, u32> {
 								        &self.communities
 								    }
 								    /// Local clustering coefficient: fraction of a node's neighbors
 								    /// that are also neighbors of each other.
 								    /// cc(v) = 2E / (deg * (deg - 1))
 								    pub fn clustering_coefficient(&self, key: &str) -> f32 {
 								        let neighbors = self.neighbor_keys(key);
 								        let deg = neighbors.len();
 								        if deg < 2 {
 								            return 0.0;
 								        }
 								        let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect();
 								        let mut triangles = 0u32;
 								        for i in 0..neighbor_vec.len() {
 								            for j in (i + 1)..neighbor_vec.len() {
 								                let ni_neighbors = self.neighbor_keys(neighbor_vec[i]);
 								                if ni_neighbors.contains(neighbor_vec[j]) {
 								                    triangles += 1;
 								                }
 								            }
 								        }
 								        (2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0))
 								    }
 								    /// Average clustering coefficient across all nodes with deg >= 2
 								    pub fn avg_clustering_coefficient(&self) -> f32 {
 								        let mut sum = 0.0f32;
 								        let mut count = 0u32;
 								        for key in &self.keys {
 								            if self.degree(key) >= 2 {
 								                sum += self.clustering_coefficient(key);
 								                count += 1;
 								            }
 								        }
 								        if count == 0 { 0.0 } else { sum / count as f32 }
 								    }
 								    /// Average shortest path length (sampled BFS from up to 100 nodes)
 								    pub fn avg_path_length(&self) -> f32 {
 								        let sample: Vec<&String> = self.keys.iter().take(100).collect();
 								        if sample.is_empty() { return 0.0; }
 								        let mut total_dist = 0u64;
 								        let mut total_pairs = 0u64;
 								        for &start in &sample {
 								            let dists = self.bfs_distances(start);
 								            for d in dists.values() {
 								                if *d > 0 {
 								                    total_dist += *d as u64;
 								                    total_pairs += 1;
 								                }
 								            }
 								        }
 								        if total_pairs == 0 { 0.0 } else { total_dist as f32 / total_pairs as f32 }
 								    }
 								    fn bfs_distances(&self, start: &str) -> HashMap<String, u32> {
 								        let mut dist = HashMap::new();
 								        let mut queue = VecDeque::new();
 								        dist.insert(start.to_string(), 0u32);
 								        queue.push_back(start.to_string());
 								        while let Some(node) = queue.pop_front() {
 								            let d = dist[&node];
 								            for neighbor in self.neighbor_keys(&node) {
 								                if !dist.contains_key(neighbor) {
 								                    dist.insert(neighbor.to_string(), d + 1);
 								                    queue.push_back(neighbor.to_string());
 								                }
 								            }
 								        }
 								        dist
 								    }
 								    /// Power-law exponent α of the degree distribution.
 								    ///
 								    /// Estimated via MLE: α = 1 + n / Σ ln(k_i / (k_min - 0.5))
 								    /// α ≈ 2: extreme hub dominance (fragile)
 								    /// α ≈ 3: healthy scale-free
 								    /// α > 3: approaching random graph (egalitarian)
 								    pub fn degree_power_law_exponent(&self) -> f32 {
 								        let mut degrees: Vec<usize> = self.keys.iter()
 								            .map(|k| self.degree(k))
 								            .filter(|&d| d > 0) // exclude isolates
 								            .collect();
 								        if degrees.len() < 10 { return 0.0; } // not enough data
 								        degrees.sort_unstable();
 								        let k_min = degrees[0] as f64;
 								        if k_min < 1.0 { return 0.0; }
 								        let n = degrees.len() as f64;
 								        let sum_ln: f64 = degrees.iter()
 								            .map(|&k| (k as f64 / (k_min - 0.5)).ln())
 								            .sum();
 								        if sum_ln <= 0.0 { return 0.0; }
 								        (1.0 + n / sum_ln) as f32
 								    }
 								    /// Gini coefficient of the degree distribution.
 								    ///
 								    /// 0 = perfectly egalitarian (all nodes same degree)
 								    /// 1 = maximally unequal (one node has all edges)
 								    /// Measures hub concentration independent of distribution shape.
 								    pub fn degree_gini(&self) -> f32 {
 								        let mut degrees: Vec<f64> = self.keys.iter()
 								            .map(|k| self.degree(k) as f64)
 								            .collect();
 								        let n = degrees.len();
 								        if n < 2 { return 0.0; }
-												fix NaN panics and eliminate redundant graph rebuilds

- All partial_cmp().unwrap() → unwrap_or(Ordering::Equal) to prevent
  NaN panics in sort operations across neuro.rs, graph.rs, similarity.rs
- replay_queue_with_graph: accepts pre-built graph, avoids rebuilding
  in agent_prompt (was building 2-3x per prompt)
- differentiate_hub_with_graph: same pattern for differentiation
- Simplify double-reverse history iteration to slice indexing

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-01 00:33:53 -05:00
+								        degrees.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								        let mean = degrees.iter().sum::<f64>() / n as f64;
 								        if mean < 1e-10 { return 0.0; }
 								        // Gini = (2 Σ i·x_i) / (n Σ x_i) - (n+1)/n
 								        let weighted_sum: f64 = degrees.iter().enumerate()
 								            .map(|(i, &d)| (i as f64 + 1.0) * d)
 								            .sum();
 								        let total = degrees.iter().sum::<f64>();
 								        let gini = (2.0 * weighted_sum) / (n as f64 * total) - (n as f64 + 1.0) / n as f64;
 								        gini.max(0.0) as f32
 								    }
 								    /// Small-world coefficient σ = (C/C_rand) / (L/L_rand)
 								    /// C_rand ≈ <k>/n, L_rand ≈ ln(n)/ln(<k>)
 								    pub fn small_world_sigma(&self) -> f32 {
 								        let n = self.keys.len() as f32;
 								        if n < 10.0 { return 0.0; }
 								        let avg_degree = self.adj.values()
 								            .map(|e| e.len() as f32)
 								            .sum::<f32>() / n;
 								        if avg_degree < 1.0 { return 0.0; }
 								        let c = self.avg_clustering_coefficient();
 								        let l = self.avg_path_length();
 								        let c_rand = avg_degree / n;
 								        let l_rand = n.ln() / avg_degree.ln();
 								        if c_rand < 1e-10 || l_rand < 1e-10 || l < 1e-10 {
 								            return 0.0;
 								        }
 								        (c / c_rand) / (l / l_rand)
 								    }
 								}
 								/// Impact of adding a hypothetical edge
 								#[derive(Debug)]
 								pub struct LinkImpact {
 								    pub source: String,
 								    pub target: String,
 								    pub source_deg: usize,
 								    pub target_deg: usize,
 								    /// Is this a hub link? (either endpoint in top 5% by degree)
 								    pub is_hub_link: bool,
 								    /// Are both endpoints in the same community?
 								    pub same_community: bool,
 								    /// Change in clustering coefficient for source
 								    pub delta_cc_source: f32,
 								    /// Change in clustering coefficient for target
 								    pub delta_cc_target: f32,
 								    /// Change in degree Gini (positive = more hub-dominated)
 								    pub delta_gini: f32,
 								    /// Qualitative assessment
 								    pub assessment: &'static str,
 								}
 								impl Graph {
 								    /// Simulate adding an edge and report impact on topology metrics.
 								    ///
 								    /// Doesn't modify the graph — computes what would change if the
 								    /// edge were added.
 								    pub fn link_impact(&self, source: &str, target: &str) -> LinkImpact {
 								        let source_deg = self.degree(source);
 								        let target_deg = self.degree(target);
 								        // Hub threshold: top 5% by degree
 								        let mut all_degrees: Vec<usize> = self.keys.iter()
 								            .map(|k| self.degree(k))
 								            .collect();
 								        all_degrees.sort_unstable();
 								        let hub_threshold = if all_degrees.len() >= 20 {
 								            all_degrees[all_degrees.len() * 95 / 100]
 								        } else {
 								            usize::MAX // can't define hubs with <20 nodes
 								        };
 								        let is_hub_link = source_deg >= hub_threshold || target_deg >= hub_threshold;
 								        // Community check
 								        let sc = self.communities.get(source);
 								        let tc = self.communities.get(target);
 								        let same_community = match (sc, tc) {
 								            (Some(a), Some(b)) => a == b,
 								            _ => false,
 								        };
 								        // CC change for source: adding target as neighbor changes the
 								        // triangle count. New triangles form for each node that's a
 								        // neighbor of BOTH source and target.
 								        let source_neighbors = self.neighbor_keys(source);
 								        let target_neighbors = self.neighbor_keys(target);
 								        let shared_neighbors = source_neighbors.intersection(&target_neighbors).count();
 								        let cc_before_source = self.clustering_coefficient(source);
 								        let cc_before_target = self.clustering_coefficient(target);
 								        // Estimate new CC for source after adding edge
 								        let new_source_deg = source_deg + 1;
 								        let new_source_triangles = if source_deg >= 2 {
 								            // Current triangles + new ones from shared neighbors
 								            let current_triangles = (cc_before_source
 								                * source_deg as f32 * (source_deg as f32 - 1.0) / 2.0) as u32;
 								            current_triangles + shared_neighbors as u32
 								        } else {
 								            shared_neighbors as u32
 								        };
 								        let cc_after_source = if new_source_deg >= 2 {
 								            (2.0 * new_source_triangles as f32)
 								                / (new_source_deg as f32 * (new_source_deg as f32 - 1.0))
 								        } else {
 .0
 								        };
 								        let new_target_deg = target_deg + 1;
 								        let new_target_triangles = if target_deg >= 2 {
 								            let current_triangles = (cc_before_target
 								                * target_deg as f32 * (target_deg as f32 - 1.0) / 2.0) as u32;
 								            current_triangles + shared_neighbors as u32
 								        } else {
 								            shared_neighbors as u32
 								        };
 								        let cc_after_target = if new_target_deg >= 2 {
 								            (2.0 * new_target_triangles as f32)
 								                / (new_target_deg as f32 * (new_target_deg as f32 - 1.0))
 								        } else {
 .0
 								        };
 								        // Gini change via influence function:
 								        // IF(x; Gini, F) = (2F(x) - 1) * x/μ - Gini - 1
 								        // Adding an edge increments two degrees. The net ΔGini is the sum
 								        // of influence contributions from both endpoints shifting up by 1.
 								        let gini_before = self.degree_gini();
 								        let n = self.keys.len();
 								        let total_degree: f64 = self.keys.iter()
 								            .map(|k| self.degree(k) as f64)
 								            .sum();
 								        let mean_deg = if n > 0 { total_degree / n as f64 } else { 1.0 };
 								        // CDF at each endpoint's degree: fraction of nodes with degree ≤ d
 								        let delta_gini = if mean_deg > 1e-10 && n >= 2 {
 								            // Count nodes with degree ≤ source_deg and ≤ target_deg
 								            let f_source = self.keys.iter()
 								                .filter(|k| self.degree(k) <= source_deg)
 								                .count() as f64 / n as f64;
 								            let f_target = self.keys.iter()
 								                .filter(|k| self.degree(k) <= target_deg)
 								                .count() as f64 / n as f64;
 								            // Influence of incrementing source's degree by 1
 								            let new_source = (source_deg + 1) as f64;
 								            let if_source = (2.0 * f_source - 1.0) * new_source / mean_deg
 								                - gini_before as f64 - 1.0;
 								            // Influence of incrementing target's degree by 1
 								            let new_target = (target_deg + 1) as f64;
 								            let if_target = (2.0 * f_target - 1.0) * new_target / mean_deg
 								                - gini_before as f64 - 1.0;
 								            // Scale: each point contributes 1/n to the distribution
 								            ((if_source + if_target) / n as f64) as f32
 								        } else {
 .0f32
 								        };
 								        // Qualitative assessment
 								        let assessment = if is_hub_link && same_community {
 								            "hub-reinforcing: strengthens existing star topology"
 								        } else if is_hub_link && !same_community {
 								            "hub-bridging: cross-community but through a hub"
 								        } else if !is_hub_link && same_community && shared_neighbors > 0 {
 								            "lateral-clustering: strengthens local mesh topology"
 								        } else if !is_hub_link && !same_community {
 								            "lateral-bridging: best kind — cross-community lateral link"
 								        } else if !is_hub_link && same_community {
 								            "lateral-local: connects peripheral nodes in same community"
 								        } else {
 								            "neutral"
 								        };
 								        LinkImpact {
 								            source: source.to_string(),
 								            target: target.to_string(),
 								            source_deg,
 								            target_deg,
 								            is_hub_link,
 								            same_community,
 								            delta_cc_source: cc_after_source - cc_before_source,
 								            delta_cc_target: cc_after_target - cc_before_target,
-												clippy: fix all warnings across all binaries

- &PathBuf → &Path in memory-search.rs signatures
- Redundant field name in graph.rs struct init
- Add truncate(false) to lock file open
- Derive Default for Store instead of manual impl
- slice::from_ref instead of &[x.clone()]
- rsplit_once instead of split().last()
- str::repeat instead of iter::repeat().take().collect()
- is_none_or instead of map_or(true, ...)
- strip_prefix instead of manual slicing

Zero warnings on `cargo clippy`.

											
										
										
											2026-02-28 23:47:11 -05:00
+								            delta_gini,
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								            assessment,
 								        }
 								    }
 								}
-												spectral decomposition, search improvements, char boundary fix

- New spectral module: Laplacian eigendecomposition of the memory graph.
  Commands: spectral, spectral-save, spectral-neighbors, spectral-positions,
  spectral-suggest. Spectral neighbors expand search results beyond keyword
  matching to structural proximity.

- Search: use StoreView trait to avoid 6MB state.bin rewrite on every query.
  Append-only retrieval logging. Spectral expansion shows structurally
  nearby nodes after text results.

- Fix panic in journal-tail: string truncation at byte 67 could land inside
  a multi-byte character (em dash). Now walks back to char boundary.

- Replay queue: show classification and spectral outlier score.

- Knowledge agents: extractor, challenger, connector prompts and runner
  scripts for automated graph enrichment.

- memory-search hook: stale state file cleanup (24h expiry).

											
										
										
											2026-03-03 01:33:31 -05:00
+								/// Build graph from store data (with community detection)
 								pub fn build_graph(store: &impl StoreView) -> Graph {
 								    let (adj, keys) = build_adjacency(store);
 								    let communities = label_propagation(&keys, &adj, 20);
 								    Graph { adj, keys, communities }
 								}
 								/// Build graph without community detection — for spreading activation
 								/// searches where we only need the adjacency list.
 								pub fn build_graph_fast(store: &impl StoreView) -> Graph {
 								    let (adj, keys) = build_adjacency(store);
 								    Graph { adj, keys, communities: HashMap::new() }
 								}
 								fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashSet<String>) {
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								    let mut adj: HashMap<String, Vec<Edge>> = HashMap::new();
-												spectral decomposition, search improvements, char boundary fix

- New spectral module: Laplacian eigendecomposition of the memory graph.
  Commands: spectral, spectral-save, spectral-neighbors, spectral-positions,
  spectral-suggest. Spectral neighbors expand search results beyond keyword
  matching to structural proximity.

- Search: use StoreView trait to avoid 6MB state.bin rewrite on every query.
  Append-only retrieval logging. Spectral expansion shows structurally
  nearby nodes after text results.

- Fix panic in journal-tail: string truncation at byte 67 could land inside
  a multi-byte character (em dash). Now walks back to char boundary.

- Replay queue: show classification and spectral outlier score.

- Knowledge agents: extractor, challenger, connector prompts and runner
  scripts for automated graph enrichment.

- memory-search hook: stale state file cleanup (24h expiry).

											
										
										
											2026-03-03 01:33:31 -05:00
+								    let mut keys: HashSet<String> = HashSet::new();
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
-												spectral decomposition, search improvements, char boundary fix

- New spectral module: Laplacian eigendecomposition of the memory graph.
  Commands: spectral, spectral-save, spectral-neighbors, spectral-positions,
  spectral-suggest. Spectral neighbors expand search results beyond keyword
  matching to structural proximity.

- Search: use StoreView trait to avoid 6MB state.bin rewrite on every query.
  Append-only retrieval logging. Spectral expansion shows structurally
  nearby nodes after text results.

- Fix panic in journal-tail: string truncation at byte 67 could land inside
  a multi-byte character (em dash). Now walks back to char boundary.

- Replay queue: show classification and spectral outlier score.

- Knowledge agents: extractor, challenger, connector prompts and runner
  scripts for automated graph enrichment.

- memory-search hook: stale state file cleanup (24h expiry).

											
										
										
											2026-03-03 01:33:31 -05:00
+								    store.for_each_node(|key, _, _| {
 								        keys.insert(key.to_owned());
 								    });
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
-												spectral decomposition, search improvements, char boundary fix

- New spectral module: Laplacian eigendecomposition of the memory graph.
  Commands: spectral, spectral-save, spectral-neighbors, spectral-positions,
  spectral-suggest. Spectral neighbors expand search results beyond keyword
  matching to structural proximity.

- Search: use StoreView trait to avoid 6MB state.bin rewrite on every query.
  Append-only retrieval logging. Spectral expansion shows structurally
  nearby nodes after text results.

- Fix panic in journal-tail: string truncation at byte 67 could land inside
  a multi-byte character (em dash). Now walks back to char boundary.

- Replay queue: show classification and spectral outlier score.

- Knowledge agents: extractor, challenger, connector prompts and runner
  scripts for automated graph enrichment.

- memory-search hook: stale state file cleanup (24h expiry).

											
										
										
											2026-03-03 01:33:31 -05:00
+								    store.for_each_relation(|source_key, target_key, strength, rel_type| {
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								        if !keys.contains(source_key) || !keys.contains(target_key) {
-												spectral decomposition, search improvements, char boundary fix

- New spectral module: Laplacian eigendecomposition of the memory graph.
  Commands: spectral, spectral-save, spectral-neighbors, spectral-positions,
  spectral-suggest. Spectral neighbors expand search results beyond keyword
  matching to structural proximity.

- Search: use StoreView trait to avoid 6MB state.bin rewrite on every query.
  Append-only retrieval logging. Spectral expansion shows structurally
  nearby nodes after text results.

- Fix panic in journal-tail: string truncation at byte 67 could land inside
  a multi-byte character (em dash). Now walks back to char boundary.

- Replay queue: show classification and spectral outlier score.

- Knowledge agents: extractor, challenger, connector prompts and runner
  scripts for automated graph enrichment.

- memory-search hook: stale state file cleanup (24h expiry).

											
										
										
											2026-03-03 01:33:31 -05:00
+								            return;
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								        }
-												spectral decomposition, search improvements, char boundary fix

- New spectral module: Laplacian eigendecomposition of the memory graph.
  Commands: spectral, spectral-save, spectral-neighbors, spectral-positions,
  spectral-suggest. Spectral neighbors expand search results beyond keyword
  matching to structural proximity.

- Search: use StoreView trait to avoid 6MB state.bin rewrite on every query.
  Append-only retrieval logging. Spectral expansion shows structurally
  nearby nodes after text results.

- Fix panic in journal-tail: string truncation at byte 67 could land inside
  a multi-byte character (em dash). Now walks back to char boundary.

- Replay queue: show classification and spectral outlier score.

- Knowledge agents: extractor, challenger, connector prompts and runner
  scripts for automated graph enrichment.

- memory-search hook: stale state file cleanup (24h expiry).

											
										
										
											2026-03-03 01:33:31 -05:00
+								        adj.entry(source_key.to_owned()).or_default().push(Edge {
 								            target: target_key.to_owned(),
 								            strength,
 								            rel_type,
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								        });
-												spectral decomposition, search improvements, char boundary fix

- New spectral module: Laplacian eigendecomposition of the memory graph.
  Commands: spectral, spectral-save, spectral-neighbors, spectral-positions,
  spectral-suggest. Spectral neighbors expand search results beyond keyword
  matching to structural proximity.

- Search: use StoreView trait to avoid 6MB state.bin rewrite on every query.
  Append-only retrieval logging. Spectral expansion shows structurally
  nearby nodes after text results.

- Fix panic in journal-tail: string truncation at byte 67 could land inside
  a multi-byte character (em dash). Now walks back to char boundary.

- Replay queue: show classification and spectral outlier score.

- Knowledge agents: extractor, challenger, connector prompts and runner
  scripts for automated graph enrichment.

- memory-search hook: stale state file cleanup (24h expiry).

											
										
										
											2026-03-03 01:33:31 -05:00
+								        adj.entry(target_key.to_owned()).or_default().push(Edge {
 								            target: source_key.to_owned(),
 								            strength,
 								            rel_type,
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								        });
-												spectral decomposition, search improvements, char boundary fix

- New spectral module: Laplacian eigendecomposition of the memory graph.
  Commands: spectral, spectral-save, spectral-neighbors, spectral-positions,
  spectral-suggest. Spectral neighbors expand search results beyond keyword
  matching to structural proximity.

- Search: use StoreView trait to avoid 6MB state.bin rewrite on every query.
  Append-only retrieval logging. Spectral expansion shows structurally
  nearby nodes after text results.

- Fix panic in journal-tail: string truncation at byte 67 could land inside
  a multi-byte character (em dash). Now walks back to char boundary.

- Replay queue: show classification and spectral outlier score.

- Knowledge agents: extractor, challenger, connector prompts and runner
  scripts for automated graph enrichment.

- memory-search hook: stale state file cleanup (24h expiry).

											
										
										
											2026-03-03 01:33:31 -05:00
+								    });
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
-												spectral decomposition, search improvements, char boundary fix

- New spectral module: Laplacian eigendecomposition of the memory graph.
  Commands: spectral, spectral-save, spectral-neighbors, spectral-positions,
  spectral-suggest. Spectral neighbors expand search results beyond keyword
  matching to structural proximity.

- Search: use StoreView trait to avoid 6MB state.bin rewrite on every query.
  Append-only retrieval logging. Spectral expansion shows structurally
  nearby nodes after text results.

- Fix panic in journal-tail: string truncation at byte 67 could land inside
  a multi-byte character (em dash). Now walks back to char boundary.

- Replay queue: show classification and spectral outlier score.

- Knowledge agents: extractor, challenger, connector prompts and runner
  scripts for automated graph enrichment.

- memory-search hook: stale state file cleanup (24h expiry).

											
										
										
											2026-03-03 01:33:31 -05:00
+								    (adj, keys)
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								}
 								/// Label propagation community detection.
 								///
 								/// Each node starts with its own label. Each iteration: adopt the most
 								/// common label among neighbors (weighted by edge strength). Iterate
 								/// until stable or max_iterations.
 								fn label_propagation(
 								    keys: &HashSet<String>,
 								    adj: &HashMap<String, Vec<Edge>>,
 								    max_iterations: u32,
 								) -> HashMap<String, u32> {
-												graph health: fix-categories, cap-degree, link-orphans

Three new tools for structural graph health:

- fix-categories: rule-based recategorization fixing core inflation
  (225 → 26 core nodes). Only identity.md and kent.md stay core;
  everything else reclassified to tech/obs/gen by file prefix rules.

- cap-degree: two-phase degree capping. First prunes weakest Auto
  edges, then prunes Link edges to high-degree targets (they have
  alternative paths). Brought max degree from 919 → 50.

- link-orphans: connects degree-0/1 nodes to most textually similar
  connected nodes via cosine similarity. Linked 614 orphans.

Also: community detection now filters edges below strength 0.3,
preventing weak auto-links from merging unrelated communities.

Pipeline updated: consolidate-full now runs link-orphans + cap-degree
instead of triangle-close (which was counterproductive — densified
hub neighborhoods instead of building bridges).

Net effect: Gini 0.754 → 0.546, max degree 919 → 50.

											
										
										
											2026-03-01 08:18:07 -05:00
+								    // Only consider edges above this strength for community votes.
 								    // Weak auto-links from triangle closure (0.15-0.35) bridge
 								    // unrelated clusters — filtering them lets natural communities emerge.
 								    let min_strength: f32 = 0.3;
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								    // Initialize: each node gets its own label
 								    let key_vec: Vec<String> = keys.iter().cloned().collect();
 								    let mut labels: HashMap<String, u32> = key_vec.iter()
 								        .enumerate()
 								        .map(|(i, k)| (k.clone(), i as u32))
 								        .collect();
 								    for _iter in 0..max_iterations {
 								        let mut changed = false;
 								        for key in &key_vec {
 								            let edges = match adj.get(key) {
 								                Some(e) => e,
 								                None => continue,
 								            };
 								            if edges.is_empty() { continue; }
-												graph health: fix-categories, cap-degree, link-orphans

Three new tools for structural graph health:

- fix-categories: rule-based recategorization fixing core inflation
  (225 → 26 core nodes). Only identity.md and kent.md stay core;
  everything else reclassified to tech/obs/gen by file prefix rules.

- cap-degree: two-phase degree capping. First prunes weakest Auto
  edges, then prunes Link edges to high-degree targets (they have
  alternative paths). Brought max degree from 919 → 50.

- link-orphans: connects degree-0/1 nodes to most textually similar
  connected nodes via cosine similarity. Linked 614 orphans.

Also: community detection now filters edges below strength 0.3,
preventing weak auto-links from merging unrelated communities.

Pipeline updated: consolidate-full now runs link-orphans + cap-degree
instead of triangle-close (which was counterproductive — densified
hub neighborhoods instead of building bridges).

Net effect: Gini 0.754 → 0.546, max degree 919 → 50.

											
										
										
											2026-03-01 08:18:07 -05:00
+								            // Count weighted votes for each label (skip weak edges)
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								            let mut votes: HashMap<u32, f32> = HashMap::new();
 								            for edge in edges {
-												graph health: fix-categories, cap-degree, link-orphans

Three new tools for structural graph health:

- fix-categories: rule-based recategorization fixing core inflation
  (225 → 26 core nodes). Only identity.md and kent.md stay core;
  everything else reclassified to tech/obs/gen by file prefix rules.

- cap-degree: two-phase degree capping. First prunes weakest Auto
  edges, then prunes Link edges to high-degree targets (they have
  alternative paths). Brought max degree from 919 → 50.

- link-orphans: connects degree-0/1 nodes to most textually similar
  connected nodes via cosine similarity. Linked 614 orphans.

Also: community detection now filters edges below strength 0.3,
preventing weak auto-links from merging unrelated communities.

Pipeline updated: consolidate-full now runs link-orphans + cap-degree
instead of triangle-close (which was counterproductive — densified
hub neighborhoods instead of building bridges).

Net effect: Gini 0.754 → 0.546, max degree 919 → 50.

											
										
										
											2026-03-01 08:18:07 -05:00
+								                if edge.strength < min_strength { continue; }
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								                if let Some(&label) = labels.get(&edge.target) {
 								                    *votes.entry(label).or_default() += edge.strength;
 								                }
 								            }
 								            // Adopt the label with most votes
 								            if let Some((&best_label, _)) = votes.iter()
-												fix NaN panics and eliminate redundant graph rebuilds

- All partial_cmp().unwrap() → unwrap_or(Ordering::Equal) to prevent
  NaN panics in sort operations across neuro.rs, graph.rs, similarity.rs
- replay_queue_with_graph: accepts pre-built graph, avoids rebuilding
  in agent_prompt (was building 2-3x per prompt)
- differentiate_hub_with_graph: same pattern for differentiation
- Simplify double-reverse history iteration to slice indexing

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-01 00:33:53 -05:00
+								                .max_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal))
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								            {
 								                let current = labels[key];
 								                if best_label != current {
 								                    labels.insert(key.clone(), best_label);
 								                    changed = true;
 								                }
 								            }
 								        }
 								        if !changed { break; }
 								    }
 								    // Compact labels to 0..n
 								    let mut label_map: HashMap<u32, u32> = HashMap::new();
 								    let mut next_id = 0;
 								    for label in labels.values_mut() {
 								        let new_label = *label_map.entry(*label).or_insert_with(|| {
 								            let id = next_id;
 								            next_id += 1;
 								            id
 								        });
 								        *label = new_label;
 								    }
 								    labels
 								}
 								/// Schema fit: for a node, measure how well-connected its neighbors are
 								/// to each other. High density + high CC among neighbors = good schema fit.
 								pub fn schema_fit(graph: &Graph, key: &str) -> f32 {
 								    let neighbors = graph.neighbor_keys(key);
 								    let n = neighbors.len();
 								    if n < 2 {
 								        return 0.0; // isolated or leaf — no schema context
 								    }
 								    // Count edges among neighbors
 								    let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect();
 								    let mut inter_edges = 0u32;
 								    for i in 0..neighbor_vec.len() {
 								        for j in (i + 1)..neighbor_vec.len() {
 								            let ni_neighbors = graph.neighbor_keys(neighbor_vec[i]);
 								            if ni_neighbors.contains(neighbor_vec[j]) {
 								                inter_edges += 1;
 								            }
 								        }
 								    }
 								    let max_edges = (n * (n - 1)) / 2;
 								    let density = if max_edges == 0 { 0.0 } else {
 								        inter_edges as f32 / max_edges as f32
 								    };
 								    // Combine neighborhood density with own CC
 								    let cc = graph.clustering_coefficient(key);
 								    (density + cc) / 2.0
 								}
 								/// Compute schema fit for all nodes
 								pub fn schema_fit_all(graph: &Graph) -> HashMap<String, f32> {
 								    graph.nodes().iter()
 								        .map(|key| (key.clone(), schema_fit(graph, key)))
 								        .collect()
 								}
 								/// A snapshot of graph topology metrics, for tracking evolution over time
 								#[derive(Clone, Debug, Serialize, Deserialize)]
 								pub struct MetricsSnapshot {
 								    pub timestamp: f64,
 								    pub date: String,
 								    pub nodes: usize,
 								    pub edges: usize,
 								    pub communities: usize,
 								    pub sigma: f32,
 								    pub alpha: f32,
 								    pub gini: f32,
 								    pub avg_cc: f32,
 								    pub avg_path_length: f32,
 								    pub avg_schema_fit: f32,
 								}
 								fn metrics_log_path() -> std::path::PathBuf {
 								    let home = std::env::var("HOME").unwrap_or_default();
 								    std::path::PathBuf::from(home).join(".claude/memory/metrics.jsonl")
 								}
 								/// Load previous metrics snapshots
 								pub fn load_metrics_history() -> Vec<MetricsSnapshot> {
 								    let path = metrics_log_path();
 								    let content = match std::fs::read_to_string(&path) {
 								        Ok(c) => c,
 								        Err(_) => return Vec::new(),
 								    };
 								    content.lines()
 								        .filter_map(|line| serde_json::from_str(line).ok())
 								        .collect()
 								}
 								/// Append a metrics snapshot to the log
 								pub fn save_metrics_snapshot(snap: &MetricsSnapshot) {
 								    let path = metrics_log_path();
 								    if let Ok(json) = serde_json::to_string(snap) {
 								        use std::io::Write;
 								        if let Ok(mut f) = std::fs::OpenOptions::new()
 								            .create(true).append(true).open(&path)
 								        {
 								            let _ = writeln!(f, "{}", json);
 								        }
 								    }
 								}
 								/// Health report: summary of graph metrics
 								pub fn health_report(graph: &Graph, store: &Store) -> String {
 								    let n = graph.nodes().len();
 								    let e = graph.edge_count();
 								    let avg_cc = graph.avg_clustering_coefficient();
 								    let avg_pl = graph.avg_path_length();
 								    let sigma = graph.small_world_sigma();
 								    let communities = graph.community_count();
 								    // Community sizes
 								    let mut comm_sizes: HashMap<u32, usize> = HashMap::new();
 								    for label in graph.communities().values() {
 								        *comm_sizes.entry(*label).or_default() += 1;
 								    }
 								    let mut sizes: Vec<usize> = comm_sizes.values().copied().collect();
 								    sizes.sort_unstable_by(|a, b| b.cmp(a));
 								    // Degree distribution
 								    let mut degrees: Vec<usize> = graph.nodes().iter()
 								        .map(|k| graph.degree(k))
 								        .collect();
 								    degrees.sort_unstable();
 								    let max_deg = degrees.last().copied().unwrap_or(0);
 								    let median_deg = if degrees.is_empty() { 0 } else { degrees[degrees.len() / 2] };
 								    let avg_deg = if n == 0 { 0.0 } else {
 								        degrees.iter().sum::<usize>() as f64 / n as f64
 								    };
 								    // Topology metrics
 								    let alpha = graph.degree_power_law_exponent();
 								    let gini = graph.degree_gini();
 								    // Schema fit distribution
 								    let fits = schema_fit_all(graph);
 								    let avg_fit = if fits.is_empty() { 0.0 } else {
 								        fits.values().sum::<f32>() / fits.len() as f32
 								    };
 								    let low_fit = fits.values().filter(|&&f| f < 0.1).count();
 								    // Category breakdown
 								    let cats = store.category_counts();
 								    // Snapshot current metrics and log
-												refactor: eliminate date shell-outs, move logic to Store methods

- Replace all 5 `Command::new("date")` calls across 4 files with
  pure Rust time formatting via libc localtime_r
- Add format_date/format_datetime/format_datetime_space helpers to
  capnp_store
- Move import_file, find_journal_node, export_to_markdown, render_file,
  file_sections into Store methods where they belong
- Fix find_current_transcript to search all project dirs instead of
  hardcoding bcachefs-tools path
- Fix double-reference .clone() warnings in cmd_trace
- Fix unused variable warning in neuro.rs

main.rs: 1290 → 1137 lines, zero warnings.

											
										
										
											2026-02-28 23:44:44 -05:00
+								    let now = crate::capnp_store::now_epoch();
 								    let date = crate::capnp_store::format_datetime_space(now);
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								    let snap = MetricsSnapshot {
 								        timestamp: now,
 								        date: date.clone(),
 								        nodes: n, edges: e, communities,
 								        sigma, alpha, gini, avg_cc,
 								        avg_path_length: avg_pl,
 								        avg_schema_fit: avg_fit,
 								    };
 								    save_metrics_snapshot(&snap);
 								    // Load history for deltas
 								    let history = load_metrics_history();
 								    let prev = if history.len() >= 2 {
 								        Some(&history[history.len() - 2]) // second-to-last (last is the one we just wrote)
 								    } else {
 								        None
 								    };
 								    fn delta(current: f32, prev: Option<f32>) -> String {
 								        match prev {
 								            Some(p) => {
 								                let d = current - p;
 								                if d.abs() < 0.001 { String::new() }
 								                else { format!(" (Δ{:+.3})", d) }
 								            }
 								            None => String::new(),
 								        }
 								    }
 								    let sigma_d = delta(sigma, prev.map(|p| p.sigma));
 								    let alpha_d = delta(alpha, prev.map(|p| p.alpha));
 								    let gini_d = delta(gini, prev.map(|p| p.gini));
 								    let cc_d = delta(avg_cc, prev.map(|p| p.avg_cc));
 								    let fit_d = delta(avg_fit, prev.map(|p| p.avg_schema_fit));
 								    let mut report = format!(
 								"Memory Health Report
 								====================
 								Nodes: {n}  Relations: {e}  Communities: {communities}
 								Degree: max={max_deg} median={median_deg} avg={avg_deg:.1}
 								Clustering coefficient (avg): {avg_cc:.4}{cc_d}
 								Average path length: {avg_pl:.2}
 								Small-world σ: {sigma:.3}{sigma_d} (>1 = small-world)
 								Power-law α: {alpha:.2}{alpha_d} (2=hub-dominated, 3=healthy, >3=egalitarian)
 								Degree Gini: {gini:.3}{gini_d} (0=equal, 1=one-hub)
 								Community sizes (top 5): {top5}
 								Schema fit: avg={avg_fit:.3}{fit_d}  low-fit (<0.1): {low_fit} nodes
 								Categories: core={core} tech={tech} gen={gen} obs={obs} task={task}",
 								        top5 = sizes.iter().take(5)
 								            .map(|s| s.to_string())
 								            .collect::<Vec<_>>()
 								            .join(", "),
 								        core = cats.get("core").unwrap_or(&0),
 								        tech = cats.get("tech").unwrap_or(&0),
 								        gen = cats.get("gen").unwrap_or(&0),
 								        obs = cats.get("obs").unwrap_or(&0),
 								        task = cats.get("task").unwrap_or(&0),
 								    );
 								    // Show history trend if we have enough data points
 								    if history.len() >= 3 {
 								        report.push_str("\n\nMetrics history (last 5):\n");
-												fix NaN panics and eliminate redundant graph rebuilds

- All partial_cmp().unwrap() → unwrap_or(Ordering::Equal) to prevent
  NaN panics in sort operations across neuro.rs, graph.rs, similarity.rs
- replay_queue_with_graph: accepts pre-built graph, avoids rebuilding
  in agent_prompt (was building 2-3x per prompt)
- differentiate_hub_with_graph: same pattern for differentiation
- Simplify double-reverse history iteration to slice indexing

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>

											
										
										
											2026-03-01 00:33:53 -05:00
+								        for snap in &history[history.len().saturating_sub(5)..] {
-												poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-02-28 22:17:00 -05:00
+								            report.push_str(&format!("  {} — σ={:.1} α={:.2} gini={:.3} cc={:.4} fit={:.3}\n",
 								                snap.date, snap.sigma, snap.alpha, snap.gini, snap.avg_cc, snap.avg_schema_fit));
 								        }
 								    }
 								    report
 								}