consciousness/poc-memory/src/graph.rs

// Graph algorithms: clustering coefficient, community detection (label
// propagation), schema fit scoring, small-world metrics, consolidation
// priority scoring.
//
// The Graph is built from the Store's nodes + relations. Edges are
// undirected for clustering/community (even causal edges count as
// connections), but relation type and direction are preserved for
// specific queries.

use crate::store::{Store, RelationType, StoreView};

use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet, VecDeque};

/// Weighted edge in the graph
#[derive(Clone, Debug)]
pub struct Edge {
    pub target: String,
    pub strength: f32,
    pub rel_type: RelationType,
}

/// The in-memory graph built from store nodes + relations
pub struct Graph {
    /// Adjacency list: node key → list of edges
    adj: HashMap<String, Vec<Edge>>,
    /// All node keys
    keys: HashSet<String>,
    /// Community labels (from label propagation)
    communities: HashMap<String, u32>,
}

impl Graph {
    pub fn nodes(&self) -> &HashSet<String> {
        &self.keys
    }

    pub fn degree(&self, key: &str) -> usize {
        self.adj.get(key).map(|e| e.len()).unwrap_or(0)
    }

    pub fn edge_count(&self) -> usize {
        self.adj.values().map(|e| e.len()).sum::<usize>() / 2
    }

    /// All edges for a node (full Edge data including rel_type)
    pub fn edges_of(&self, key: &str) -> &[Edge] {
        self.adj.get(key)
            .map(|v| v.as_slice())
            .unwrap_or(&[])
    }

    /// All neighbor keys with strengths
    pub fn neighbors(&self, key: &str) -> Vec<(&String, f32)> {
        self.adj.get(key)
            .map(|edges| edges.iter().map(|e| (&e.target, e.strength)).collect())
            .unwrap_or_default()
    }

    /// Just neighbor keys
    pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> {
        self.adj.get(key)
            .map(|edges| edges.iter().map(|e| e.target.as_str()).collect())
            .unwrap_or_default()
    }

    /// Jaccard similarity between two nodes' neighborhoods.
    /// Measures overlap: |intersection| / |union| of their neighbor sets.
    pub fn jaccard(&self, a: &str, b: &str) -> f32 {
        let na = self.neighbor_keys(a);
        let nb = self.neighbor_keys(b);
        let intersection = na.intersection(&nb).count();
        let union = na.union(&nb).count();
        if union == 0 { 0.0 } else { intersection as f32 / union as f32 }
    }

    /// Compute Jaccard-based strength for every edge in the graph.
    /// Returns (source_key, target_key, jaccard_strength) triples.
    /// Scales raw Jaccard (typically 0.0-0.3) to a useful range.
    pub fn jaccard_strengths(&self) -> Vec<(String, String, f32)> {
        let mut result = Vec::new();
        let mut seen = HashSet::new();
        for (key, edges) in &self.adj {
            for edge in edges {
                // Deduplicate undirected edges
                let pair = if key < &edge.target {
                    (key.as_str(), edge.target.as_str())
                } else {
                    (edge.target.as_str(), key.as_str())
                };
                if !seen.insert((pair.0.to_string(), pair.1.to_string())) {
                    continue;
                }
                let j = self.jaccard(key, &edge.target);
                // Scale: raw Jaccard 0.05 → 0.15, 0.15 → 0.45, 0.30 → 0.90
                // Formula: clamp(j * 3, 0.1, 1.0)
                let strength = (j * 3.0).clamp(0.1, 1.0);
                result.push((key.clone(), edge.target.clone(), strength));
            }
        }
        result
    }

    pub fn community_count(&self) -> usize {
        let labels: HashSet<_> = self.communities.values().collect();
        labels.len()
    }

    pub fn communities(&self) -> &HashMap<String, u32> {
        &self.communities
    }

    /// Hub degree threshold: top 5% by degree
    pub fn hub_threshold(&self) -> usize {
        let mut degrees: Vec<usize> = self.keys.iter()
            .map(|k| self.degree(k))
            .collect();
        degrees.sort_unstable();
        if degrees.len() >= 20 {
            degrees[degrees.len() * 95 / 100]
        } else {
            usize::MAX
        }
    }

    /// Local clustering coefficient: fraction of a node's neighbors
    /// that are also neighbors of each other.
    /// cc(v) = 2E / (deg * (deg - 1))
    pub fn clustering_coefficient(&self, key: &str) -> f32 {
        let neighbors = self.neighbor_keys(key);
        let deg = neighbors.len();
        if deg < 2 {
            return 0.0;
        }

        let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect();
        let mut triangles = 0u32;
        for i in 0..neighbor_vec.len() {
            for j in (i + 1)..neighbor_vec.len() {
                let ni_neighbors = self.neighbor_keys(neighbor_vec[i]);
                if ni_neighbors.contains(neighbor_vec[j]) {
                    triangles += 1;
                }
            }
        }

        (2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0))
    }

    /// Average clustering coefficient across all nodes with deg >= 2
    pub fn avg_clustering_coefficient(&self) -> f32 {
        let mut sum = 0.0f32;
        let mut count = 0u32;
        for key in &self.keys {
            if self.degree(key) >= 2 {
                sum += self.clustering_coefficient(key);
                count += 1;
            }
        }
        if count == 0 { 0.0 } else { sum / count as f32 }
    }

    /// Average shortest path length (sampled BFS from up to 100 nodes)
    pub fn avg_path_length(&self) -> f32 {
        let sample: Vec<&String> = self.keys.iter().take(100).collect();
        if sample.is_empty() { return 0.0; }

        let mut total_dist = 0u64;
        let mut total_pairs = 0u64;

        for &start in &sample {
            let dists = self.bfs_distances(start);
            for d in dists.values() {
                if *d > 0 {
                    total_dist += *d as u64;
                    total_pairs += 1;
                }
            }
        }

        if total_pairs == 0 { 0.0 } else { total_dist as f32 / total_pairs as f32 }
    }

    fn bfs_distances(&self, start: &str) -> HashMap<String, u32> {
        let mut dist = HashMap::new();
        let mut queue = VecDeque::new();
        dist.insert(start.to_string(), 0u32);
        queue.push_back(start.to_string());

        while let Some(node) = queue.pop_front() {
            let d = dist[&node];
            for neighbor in self.neighbor_keys(&node) {
                if !dist.contains_key(neighbor) {
                    dist.insert(neighbor.to_string(), d + 1);
                    queue.push_back(neighbor.to_string());
                }
            }
        }
        dist
    }

    /// Power-law exponent α of the degree distribution.
    ///
    /// Estimated via MLE: α = 1 + n / Σ ln(k_i / (k_min - 0.5))
    /// α ≈ 2: extreme hub dominance (fragile)
    /// α ≈ 3: healthy scale-free
    /// α > 3: approaching random graph (egalitarian)
    pub fn degree_power_law_exponent(&self) -> f32 {
        let mut degrees: Vec<usize> = self.keys.iter()
            .map(|k| self.degree(k))
            .filter(|&d| d > 0) // exclude isolates
            .collect();
        if degrees.len() < 10 { return 0.0; } // not enough data

        degrees.sort_unstable();
        let k_min = degrees[0] as f64;
        if k_min < 1.0 { return 0.0; }

        let n = degrees.len() as f64;
        let sum_ln: f64 = degrees.iter()
            .map(|&k| (k as f64 / (k_min - 0.5)).ln())
            .sum();

        if sum_ln <= 0.0 { return 0.0; }
        (1.0 + n / sum_ln) as f32
    }

    /// Gini coefficient of the degree distribution.
    ///
    /// 0 = perfectly egalitarian (all nodes same degree)
    /// 1 = maximally unequal (one node has all edges)
    /// Measures hub concentration independent of distribution shape.
    pub fn degree_gini(&self) -> f32 {
        let mut degrees: Vec<f64> = self.keys.iter()
            .map(|k| self.degree(k) as f64)
            .collect();
        let n = degrees.len();
        if n < 2 { return 0.0; }

        degrees.sort_by(|a, b| a.total_cmp(b));
        let mean = degrees.iter().sum::<f64>() / n as f64;
        if mean < 1e-10 { return 0.0; }

        // Gini = (2 Σ i·x_i) / (n Σ x_i) - (n+1)/n
        let weighted_sum: f64 = degrees.iter().enumerate()
            .map(|(i, &d)| (i as f64 + 1.0) * d)
            .sum();
        let total = degrees.iter().sum::<f64>();

        let gini = (2.0 * weighted_sum) / (n as f64 * total) - (n as f64 + 1.0) / n as f64;
        gini.max(0.0) as f32
    }

    /// Small-world coefficient σ = (C/C_rand) / (L/L_rand)
    /// C_rand ≈ <k>/n, L_rand ≈ ln(n)/ln(<k>)
    pub fn small_world_sigma(&self) -> f32 {
        let n = self.keys.len() as f32;
        if n < 10.0 { return 0.0; }

        let avg_degree = self.adj.values()
            .map(|e| e.len() as f32)
            .sum::<f32>() / n;
        if avg_degree < 1.0 { return 0.0; }

        let c = self.avg_clustering_coefficient();
        let l = self.avg_path_length();

        let c_rand = avg_degree / n;
        let l_rand = n.ln() / avg_degree.ln();

        if c_rand < 1e-10 || l_rand < 1e-10 || l < 1e-10 {
            return 0.0;
        }

        (c / c_rand) / (l / l_rand)
    }
}

/// Impact of adding a hypothetical edge
#[derive(Debug)]
pub struct LinkImpact {
    pub source: String,
    pub target: String,
    pub source_deg: usize,
    pub target_deg: usize,
    /// Is this a hub link? (either endpoint in top 5% by degree)
    pub is_hub_link: bool,
    /// Are both endpoints in the same community?
    pub same_community: bool,
    /// Change in clustering coefficient for source
    pub delta_cc_source: f32,
    /// Change in clustering coefficient for target
    pub delta_cc_target: f32,
    /// Change in degree Gini (positive = more hub-dominated)
    pub delta_gini: f32,
    /// Qualitative assessment
    pub assessment: &'static str,
}

impl Graph {
    /// Simulate adding an edge and report impact on topology metrics.
    ///
    /// Doesn't modify the graph — computes what would change if the
    /// edge were added.
    pub fn link_impact(&self, source: &str, target: &str) -> LinkImpact {
        let source_deg = self.degree(source);
        let target_deg = self.degree(target);
        let hub_threshold = self.hub_threshold();
        let is_hub_link = source_deg >= hub_threshold || target_deg >= hub_threshold;

        // Community check
        let sc = self.communities.get(source);
        let tc = self.communities.get(target);
        let same_community = match (sc, tc) {
            (Some(a), Some(b)) => a == b,
            _ => false,
        };

        // CC change for source: adding target as neighbor changes the
        // triangle count. New triangles form for each node that's a
        // neighbor of BOTH source and target.
        let source_neighbors = self.neighbor_keys(source);
        let target_neighbors = self.neighbor_keys(target);
        let shared_neighbors = source_neighbors.intersection(&target_neighbors).count();

        let cc_before_source = self.clustering_coefficient(source);
        let cc_before_target = self.clustering_coefficient(target);

        // Estimate new CC for source after adding edge
        let new_source_deg = source_deg + 1;
        let new_source_triangles = if source_deg >= 2 {
            // Current triangles + new ones from shared neighbors
            let current_triangles = (cc_before_source
                * source_deg as f32 * (source_deg as f32 - 1.0) / 2.0) as u32;
            current_triangles + shared_neighbors as u32
        } else {
            shared_neighbors as u32
        };
        let cc_after_source = if new_source_deg >= 2 {
            (2.0 * new_source_triangles as f32)
                / (new_source_deg as f32 * (new_source_deg as f32 - 1.0))
        } else {
            0.0
        };

        let new_target_deg = target_deg + 1;
        let new_target_triangles = if target_deg >= 2 {
            let current_triangles = (cc_before_target
                * target_deg as f32 * (target_deg as f32 - 1.0) / 2.0) as u32;
            current_triangles + shared_neighbors as u32
        } else {
            shared_neighbors as u32
        };
        let cc_after_target = if new_target_deg >= 2 {
            (2.0 * new_target_triangles as f32)
                / (new_target_deg as f32 * (new_target_deg as f32 - 1.0))
        } else {
            0.0
        };

        // Gini change via influence function:
        // IF(x; Gini, F) = (2F(x) - 1) * x/μ - Gini - 1
        // Adding an edge increments two degrees. The net ΔGini is the sum
        // of influence contributions from both endpoints shifting up by 1.
        let gini_before = self.degree_gini();
        let n = self.keys.len();
        let total_degree: f64 = self.keys.iter()
            .map(|k| self.degree(k) as f64)
            .sum();
        let mean_deg = if n > 0 { total_degree / n as f64 } else { 1.0 };

        // CDF at each endpoint's degree: fraction of nodes with degree ≤ d
        let delta_gini = if mean_deg > 1e-10 && n >= 2 {
            // Count nodes with degree ≤ source_deg and ≤ target_deg
            let f_source = self.keys.iter()
                .filter(|k| self.degree(k) <= source_deg)
                .count() as f64 / n as f64;
            let f_target = self.keys.iter()
                .filter(|k| self.degree(k) <= target_deg)
                .count() as f64 / n as f64;

            // Influence of incrementing source's degree by 1
            let new_source = (source_deg + 1) as f64;
            let if_source = (2.0 * f_source - 1.0) * new_source / mean_deg
                - gini_before as f64 - 1.0;
            // Influence of incrementing target's degree by 1
            let new_target = (target_deg + 1) as f64;
            let if_target = (2.0 * f_target - 1.0) * new_target / mean_deg
                - gini_before as f64 - 1.0;

            // Scale: each point contributes 1/n to the distribution
            ((if_source + if_target) / n as f64) as f32
        } else {
            0.0f32
        };

        // Qualitative assessment
        let assessment = if is_hub_link && same_community {
            "hub-reinforcing: strengthens existing star topology"
        } else if is_hub_link && !same_community {
            "hub-bridging: cross-community but through a hub"
        } else if !is_hub_link && same_community && shared_neighbors > 0 {
            "lateral-clustering: strengthens local mesh topology"
        } else if !is_hub_link && !same_community {
            "lateral-bridging: best kind — cross-community lateral link"
        } else if !is_hub_link && same_community {
            "lateral-local: connects peripheral nodes in same community"
        } else {
            "neutral"
        };

        LinkImpact {
            source: source.to_string(),
            target: target.to_string(),
            source_deg,
            target_deg,
            is_hub_link,
            same_community,
            delta_cc_source: cc_after_source - cc_before_source,
            delta_cc_target: cc_after_target - cc_before_target,
            delta_gini,
            assessment,
        }
    }
}

/// Build graph from store data (with community detection)
pub fn build_graph(store: &impl StoreView) -> Graph {
    let (adj, keys) = build_adjacency(store);
    let communities = label_propagation(&keys, &adj, 20);
    Graph { adj, keys, communities }
}

/// Build graph without community detection — for spreading activation
/// searches where we only need the adjacency list.
pub fn build_graph_fast(store: &impl StoreView) -> Graph {
    let (adj, keys) = build_adjacency(store);
    Graph { adj, keys, communities: HashMap::new() }
}

fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashSet<String>) {
    let mut adj: HashMap<String, Vec<Edge>> = HashMap::new();
    let mut keys: HashSet<String> = HashSet::new();

    store.for_each_node(|key, _, _| {
        keys.insert(key.to_owned());
    });

    store.for_each_relation(|source_key, target_key, strength, rel_type| {
        if !keys.contains(source_key) || !keys.contains(target_key) {
            return;
        }

        adj.entry(source_key.to_owned()).or_default().push(Edge {
            target: target_key.to_owned(),
            strength,
            rel_type,
        });
        adj.entry(target_key.to_owned()).or_default().push(Edge {
            target: source_key.to_owned(),
            strength,
            rel_type,
        });
    });

    add_implicit_temporal_edges(store, &keys, &mut adj);

    (adj, keys)
}

/// Add implicit edges for the temporal/digest hierarchy.
///
/// These edges are derived from node types and dates — they don't
/// need to be stored. Two kinds:
///   - parent/child: session→daily→weekly→monthly (by date containment)
///   - prev/next: chronological ordering within each level
///
/// Sessions use their timestamp for date. Digest nodes (daily/weekly/monthly)
/// extract the date they *cover* from the key name, since their timestamp
/// is when the digest was created, not what period it covers.
fn add_implicit_temporal_edges(
    store: &impl StoreView,
    keys: &HashSet<String>,
    adj: &mut HashMap<String, Vec<Edge>>,
) {
    use crate::store::NodeType::*;
    use chrono::{Datelike, DateTime, NaiveDate};

    // Extract the covered date from a key name.
    // Patterns: "daily-2026-03-06", "daily-2026-03-06-identity",
    //           "weekly-2026-W09", "monthly-2026-02"
    //           "journal#j-2026-03-13t...", "journal#2026-03-13-..."
    fn date_from_key(key: &str) -> Option<NaiveDate> {
        // Try extracting YYYY-MM-DD after known prefixes
        for prefix in ["daily-", "journal#j-", "journal#"] {
            if let Some(rest) = key.strip_prefix(prefix) {
                if rest.len() >= 10 {
                    if let Ok(d) = NaiveDate::parse_from_str(&rest[..10], "%Y-%m-%d") {
                        return Some(d);
                    }
                }
            }
        }
        None
    }

    fn week_from_key(key: &str) -> Option<(i32, u32)> {
        // "weekly-2026-W09" → (2026, 9)
        let rest = key.strip_prefix("weekly-")?;
        let (year_str, w_str) = rest.split_once("-W")?;
        let year: i32 = year_str.parse().ok()?;
        // Week string might have a suffix like "-foo"
        let week_str = w_str.split('-').next()?;
        let week: u32 = week_str.parse().ok()?;
        Some((year, week))
    }

    fn month_from_key(key: &str) -> Option<(i32, u32)> {
        // "monthly-2026-02" → (2026, 2)
        let rest = key.strip_prefix("monthly-")?;
        let (year_str, month_str) = rest.split_once('-')?;
        let year: i32 = year_str.parse().ok()?;
        let month_str = month_str.split('-').next()?;
        let month: u32 = month_str.parse().ok()?;
        Some((year, month))
    }

    // Collect episodic nodes by type
    struct Dated { key: String, ts: i64, date: NaiveDate }

    let mut sessions: Vec<Dated> = Vec::new();
    let mut dailies: Vec<(String, NaiveDate)> = Vec::new();
    let mut weeklies: Vec<(String, (i32, u32))> = Vec::new();
    let mut monthlies: Vec<(String, (i32, u32))> = Vec::new();

    store.for_each_node_meta(|key, node_type, ts| {
        if !keys.contains(key) { return; }
        match node_type {
            EpisodicSession => {
                // Prefer date from key (local time) over timestamp (UTC)
                // to avoid timezone mismatches
                let date = date_from_key(key).or_else(|| {
                    DateTime::from_timestamp(ts, 0).map(|dt| dt.date_naive())
                });
                if let Some(date) = date {
                    sessions.push(Dated { key: key.to_owned(), ts, date });
                }
            }
            EpisodicDaily => {
                if let Some(date) = date_from_key(key) {
                    dailies.push((key.to_owned(), date));
                }
            }
            EpisodicWeekly => {
                if let Some(yw) = week_from_key(key) {
                    weeklies.push((key.to_owned(), yw));
                }
            }
            EpisodicMonthly => {
                if let Some(ym) = month_from_key(key) {
                    monthlies.push((key.to_owned(), ym));
                }
            }
            _ => {}
        }
    });

    sessions.sort_by_key(|d| d.ts);
    dailies.sort_by_key(|(_, d)| *d);
    weeklies.sort_by_key(|(_, yw)| *yw);
    monthlies.sort_by_key(|(_, ym)| *ym);

    let add_edge = |adj: &mut HashMap<String, Vec<Edge>>, a: &str, b: &str| {
        if let Some(edges) = adj.get(a) {
            if edges.iter().any(|e| e.target == b) { return; }
        }
        adj.entry(a.to_owned()).or_default().push(Edge {
            target: b.to_owned(),
            strength: 1.0,
            rel_type: RelationType::Auto,
        });
        adj.entry(b.to_owned()).or_default().push(Edge {
            target: a.to_owned(),
            strength: 1.0,
            rel_type: RelationType::Auto,
        });
    };

    // Build indexes: date→dailies, (year,week)→weekly, (year,month)→monthly
    // Note: multiple dailies can share a date (e.g. daily-2026-03-06-identity,
    // daily-2026-03-06-technical), so we collect all of them.
    let mut date_to_dailies: HashMap<NaiveDate, Vec<String>> = HashMap::new();
    for (key, date) in &dailies {
        date_to_dailies.entry(*date).or_default().push(key.clone());
    }

    let mut yw_to_weekly: HashMap<(i32, u32), String> = HashMap::new();
    for (key, yw) in &weeklies {
        yw_to_weekly.insert(*yw, key.clone());
    }

    let mut ym_to_monthly: HashMap<(i32, u32), String> = HashMap::new();
    for (key, ym) in &monthlies {
        ym_to_monthly.insert(*ym, key.clone());
    }

    // Session → Daily (parent): each session links to all dailies for its date
    for sess in &sessions {
        if let Some(daily_keys) = date_to_dailies.get(&sess.date) {
            for daily in daily_keys {
                add_edge(adj, &sess.key, daily);
            }
        }
    }

    // Daily → Weekly (parent)
    for (key, date) in &dailies {
        let yw = (date.iso_week().year(), date.iso_week().week());
        if let Some(weekly) = yw_to_weekly.get(&yw) {
            add_edge(adj, key, weekly);
        }
    }

    // Weekly → Monthly (parent)
    for (key, yw) in &weeklies {
        // A week can span two months; use the Thursday date (ISO week convention)
        let thursday = NaiveDate::from_isoywd_opt(yw.0, yw.1, chrono::Weekday::Thu);
        if let Some(d) = thursday {
            let ym = (d.year(), d.month());
            if let Some(monthly) = ym_to_monthly.get(&ym) {
                add_edge(adj, key, monthly);
            }
        }
    }

    // Prev/next within each level
    for pair in sessions.windows(2) {
        add_edge(adj, &pair[0].key, &pair[1].key);
    }
    for pair in dailies.windows(2) {
        add_edge(adj, &pair[0].0, &pair[1].0);
    }
    for pair in weeklies.windows(2) {
        add_edge(adj, &pair[0].0, &pair[1].0);
    }
    for pair in monthlies.windows(2) {
        add_edge(adj, &pair[0].0, &pair[1].0);
    }

}

/// Label propagation community detection.
///
/// Each node starts with its own label. Each iteration: adopt the most
/// common label among neighbors (weighted by edge strength). Iterate
/// until stable or max_iterations.
fn label_propagation(
    keys: &HashSet<String>,
    adj: &HashMap<String, Vec<Edge>>,
    max_iterations: u32,
) -> HashMap<String, u32> {
    // Only consider edges above this strength for community votes.
    // Weak auto-links from triangle closure (0.15-0.35) bridge
    // unrelated clusters — filtering them lets natural communities emerge.
    let min_strength: f32 = 0.3;

    // Initialize: each node gets its own label
    let key_vec: Vec<String> = keys.iter().cloned().collect();
    let mut labels: HashMap<String, u32> = key_vec.iter()
        .enumerate()
        .map(|(i, k)| (k.clone(), i as u32))
        .collect();

    for _iter in 0..max_iterations {
        let mut changed = false;

        for key in &key_vec {
            let edges = match adj.get(key) {
                Some(e) => e,
                None => continue,
            };
            if edges.is_empty() { continue; }

            // Count weighted votes for each label (skip weak edges)
            let mut votes: HashMap<u32, f32> = HashMap::new();
            for edge in edges {
                if edge.strength < min_strength { continue; }
                if let Some(&label) = labels.get(&edge.target) {
                    *votes.entry(label).or_default() += edge.strength;
                }
            }

            // Adopt the label with most votes
            if let Some((&best_label, _)) = votes.iter()
                .max_by(|a, b| a.1.total_cmp(b.1))
            {
                let current = labels[key];
                if best_label != current {
                    labels.insert(key.clone(), best_label);
                    changed = true;
                }
            }
        }

        if !changed { break; }
    }

    // Compact labels to 0..n
    let mut label_map: HashMap<u32, u32> = HashMap::new();
    let mut next_id = 0;
    for label in labels.values_mut() {
        let new_label = *label_map.entry(*label).or_insert_with(|| {
            let id = next_id;
            next_id += 1;
            id
        });
        *label = new_label;
    }

    labels
}


/// A snapshot of graph topology metrics, for tracking evolution over time
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct MetricsSnapshot {
    pub timestamp: i64,
    pub date: String,
    pub nodes: usize,
    pub edges: usize,
    pub communities: usize,
    pub sigma: f32,
    pub alpha: f32,
    pub gini: f32,
    pub avg_cc: f32,
    pub avg_path_length: f32,
    // Removed: avg_schema_fit was identical to avg_cc.
    // Old snapshots with the field still deserialize (serde ignores unknown fields by default).
}

fn metrics_log_path() -> std::path::PathBuf {
    crate::store::memory_dir().join("metrics.jsonl")
}

/// Load previous metrics snapshots
pub fn load_metrics_history() -> Vec<MetricsSnapshot> {
    crate::util::jsonl_load(&metrics_log_path())
}

/// Append a metrics snapshot to the log
pub fn save_metrics_snapshot(snap: &MetricsSnapshot) {
    let _ = crate::util::jsonl_append(&metrics_log_path(), snap);
}

/// Compute current graph metrics as a snapshot (no side effects).
pub fn current_metrics(graph: &Graph) -> MetricsSnapshot {
    let now = crate::store::now_epoch();
    let date = crate::store::format_datetime_space(now);
    MetricsSnapshot {
        timestamp: now,
        date,
        nodes: graph.nodes().len(),
        edges: graph.edge_count(),
        communities: graph.community_count(),
        sigma: graph.small_world_sigma(),
        alpha: graph.degree_power_law_exponent(),
        gini: graph.degree_gini(),
        avg_cc: graph.avg_clustering_coefficient(),
        avg_path_length: graph.avg_path_length(),
    }
}

/// Health report: summary of graph metrics.
/// Saves a metrics snapshot as a side effect (callers who want pure
/// computation should use `current_metrics` + `save_metrics_snapshot`).
pub fn health_report(graph: &Graph, store: &Store) -> String {
    let snap = current_metrics(graph);
    save_metrics_snapshot(&snap);

    let n = snap.nodes;
    let e = snap.edges;
    let avg_cc = snap.avg_cc;
    let avg_pl = snap.avg_path_length;
    let sigma = snap.sigma;
    let alpha = snap.alpha;
    let gini = snap.gini;
    let communities = snap.communities;

    // Community sizes
    let mut comm_sizes: HashMap<u32, usize> = HashMap::new();
    for label in graph.communities().values() {
        *comm_sizes.entry(*label).or_default() += 1;
    }
    let mut sizes: Vec<usize> = comm_sizes.values().copied().collect();
    sizes.sort_unstable_by(|a, b| b.cmp(a));

    // Degree distribution
    let mut degrees: Vec<usize> = graph.nodes().iter()
        .map(|k| graph.degree(k))
        .collect();
    degrees.sort_unstable();
    let max_deg = degrees.last().copied().unwrap_or(0);
    let median_deg = if degrees.is_empty() { 0 } else { degrees[degrees.len() / 2] };
    let avg_deg = if n == 0 { 0.0 } else {
        degrees.iter().sum::<usize>() as f64 / n as f64
    };

    // Low-CC nodes: poorly integrated
    let low_cc = graph.nodes().iter()
        .filter(|k| graph.clustering_coefficient(k) < 0.1)
        .count();

    // Orphan edges: relations referencing non-existent nodes
    let mut orphan_edges = 0usize;
    let mut missing_nodes: HashSet<String> = HashSet::new();
    for rel in &store.relations {
        if rel.deleted { continue; }
        let s_missing = !store.nodes.contains_key(&rel.source_key);
        let t_missing = !store.nodes.contains_key(&rel.target_key);
        if s_missing || t_missing {
            orphan_edges += 1;
            if s_missing { missing_nodes.insert(rel.source_key.clone()); }
            if t_missing { missing_nodes.insert(rel.target_key.clone()); }
        }
    }

    // NodeType breakdown
    let mut type_counts: HashMap<&str, usize> = HashMap::new();
    for node in store.nodes.values() {
        let label = match node.node_type {
            crate::store::NodeType::EpisodicSession => "episodic",
            crate::store::NodeType::EpisodicDaily => "daily",
            crate::store::NodeType::EpisodicWeekly => "weekly",
            crate::store::NodeType::EpisodicMonthly => "monthly",
            crate::store::NodeType::Semantic => "semantic",
        };
        *type_counts.entry(label).or_default() += 1;
    }

    // Load history for deltas
    let history = load_metrics_history();
    let prev = if history.len() >= 2 {
        Some(&history[history.len() - 2]) // second-to-last (last is the one we just wrote)
    } else {
        None
    };

    fn delta(current: f32, prev: Option<f32>) -> String {
        match prev {
            Some(p) => {
                let d = current - p;
                if d.abs() < 0.001 { String::new() }
                else { format!(" (Δ{:+.3})", d) }
            }
            None => String::new(),
        }
    }

    let sigma_d = delta(sigma, prev.map(|p| p.sigma));
    let alpha_d = delta(alpha, prev.map(|p| p.alpha));
    let gini_d = delta(gini, prev.map(|p| p.gini));
    let cc_d = delta(avg_cc, prev.map(|p| p.avg_cc));

    let mut report = format!(
"Memory Health Report
====================
Nodes: {n}  Relations: {e}  Communities: {communities}

Degree: max={max_deg} median={median_deg} avg={avg_deg:.1}
Clustering coefficient (avg): {avg_cc:.4}{cc_d}  low-CC (<0.1): {low_cc} nodes
Average path length: {avg_pl:.2}
Small-world σ: {sigma:.3}{sigma_d} (>1 = small-world)
Power-law α: {alpha:.2}{alpha_d} (2=hub-dominated, 3=healthy, >3=egalitarian)
Degree Gini: {gini:.3}{gini_d} (0=equal, 1=one-hub)

Community sizes (top 5): {top5}
Types: semantic={semantic} episodic={episodic} daily={daily} weekly={weekly} monthly={monthly}",
        top5 = sizes.iter().take(5)
            .map(|s| s.to_string())
            .collect::<Vec<_>>()
            .join(", "),
        semantic = type_counts.get("semantic").unwrap_or(&0),
        episodic = type_counts.get("episodic").unwrap_or(&0),
        daily = type_counts.get("daily").unwrap_or(&0),
        weekly = type_counts.get("weekly").unwrap_or(&0),
        monthly = type_counts.get("monthly").unwrap_or(&0),
    );

    // Orphan edges
    if orphan_edges == 0 {
        report.push_str("\n\nBroken links: 0");
    } else {
        report.push_str(&format!(
            "\n\nBroken links: {} edges reference {} missing nodes",
            orphan_edges, missing_nodes.len()));
        let mut sorted: Vec<_> = missing_nodes.iter().collect();
        sorted.sort();
        for key in sorted.iter().take(10) {
            report.push_str(&format!("\n  - {}", key));
        }
        if sorted.len() > 10 {
            report.push_str(&format!("\n  ... and {} more", sorted.len() - 10));
        }
    }

    // Show history trend if we have enough data points
    if history.len() >= 3 {
        report.push_str("\n\nMetrics history (last 5):\n");
        for snap in &history[history.len().saturating_sub(5)..] {
            report.push_str(&format!("  {} — σ={:.1} α={:.2} gini={:.3} cc={:.4}\n",
                snap.date, snap.sigma, snap.alpha, snap.gini, snap.avg_cc));
        }
    }

    report
}