training: per-node scoring with graph weight updates

Memory scoring now uses the graph as source of truth: - last_scored timestamp on each node (new capnp field @22) - Nodes scored when older than scoring_interval_secs (default 1hr) - Oldest-scored-first ordering - Window: scoring_response_window assistant responses (default 100) - First-quarter memories scored even without full window - Per-response normalization (raw divergence / response count) - Asymmetric weight update: alpha=0.5 up, alpha=0.1 down (responds fast to importance, decays slowly — memories stay surfaced even if only useful 1/4 of the time) Graph writes disabled pending normalization calibration. Also: configurable scoring_interval_secs and scoring_response_window. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-04 05:01:49 -04:00 · 2026-04-04 05:01:49 -04:00 · fcd77fb79e
commit fcd77fb79e
parent b0603fd1ef
8 changed files with 109 additions and 64 deletions
--- a/src/agent/training.rs
+++ b/src/agent/training.rs
@ -253,6 +253,19 @@ pub async fn score_memories(
    })
 }

+/// Find the entry index after `start` that contains the Nth assistant response.
+/// Returns (end_index, true) if N responses were found, (entries.len(), false) if not.
+fn nth_response_end(entries: &[ConversationEntry], start: usize, n: usize) -> (usize, bool) {
+    let mut count = 0;
+    for i in start..entries.len() {
+        if entries[i].message().role == Role::Assistant {
+            count += 1;
+            if count >= n { return (i + 1, true); }
+        }
+    }
+    (entries.len(), false)
+}
+
 // ── Single memory scoring ───────────────────────────────────────

 /// Score how important a single memory is to the conversation.
@ -266,7 +279,7 @@ pub async fn score_memory(
    client: &ApiClient,
    ui_tx: &UiSender,
 ) -> anyhow::Result<f64> {
-    const WINDOW: usize = 50;
+    const RESPONSE_WINDOW: usize = 50;

    let first_pos = match context.entries.iter().position(|e| {
        matches!(e, ConversationEntry::Memory { key: k, .. } if k == key)
@ -275,7 +288,8 @@ pub async fn score_memory(
        None => return Ok(0.0),
    };

-    let range = first_pos..(first_pos + WINDOW).min(context.entries.len());
+    let (end, _) = nth_response_end(&context.entries, first_pos, RESPONSE_WINDOW);
+    let range = first_pos..end;
    if !context.entries[range.clone()].iter().any(|e| e.message().role == Role::Assistant) {
        return Ok(0.0);
    }
@ -290,63 +304,71 @@ pub async fn score_memory(

 // ── Background memory scoring ───────────────────────────────────

-/// Incrementally score memories through the conversation.
+/// Score memories in the conversation that are due for re-scoring.
 ///
-/// Walks memory entries in conversation order starting from `cursor`.
-/// For each memory with a full WINDOW after it, calls score_memory()
-/// and yields the result. Stops at the first memory that doesn't have
-/// enough messages yet — the conversation needs to grow before we can
-/// score it.
+/// Checks the graph for each memory's last_scored timestamp. Scores
+/// nodes that haven't been scored within `max_age_secs`, oldest first.
+/// Updates the graph weight (EWMA) and last_scored after each.
 ///
-/// Returns the updated cursor (entry index to resume from next time)
-/// and the scores for each memory that was scored this round.
+/// Returns the number of nodes scored and their (key, score) pairs.
 pub async fn score_memories_incremental(
    context: &ContextState,
-    cursor: usize,
+    max_age_secs: i64,
+    response_window: usize,
    client: &ApiClient,
    ui_tx: &UiSender,
-) -> anyhow::Result<(usize, Vec<(String, f64)>)> {
-    const WINDOW: usize = 50;
+) -> anyhow::Result<Vec<(String, f64)>> {
+    let now = chrono::Utc::now().timestamp();

-    // Collect unique memory keys with their first position, starting from cursor
+    // Collect unique memory keys with their first position
    let mut seen = std::collections::HashSet::new();
-    let mut to_score: Vec<(usize, String)> = Vec::new();
+    let mut candidates: Vec<(usize, String, i64)> = Vec::new(); // (pos, key, last_scored)

-    for (i, entry) in context.entries.iter().enumerate().skip(cursor) {
+    let store = crate::hippocampus::store::Store::load().unwrap_or_default();
+
+    for (i, entry) in context.entries.iter().enumerate() {
        if let ConversationEntry::Memory { key, .. } = entry {
-            if seen.insert(key.clone()) {
-                to_score.push((i, key.clone()));
+            if !seen.insert(key.clone()) { continue; }
+            let last_scored = store.nodes.get(key.as_str())
+                .map(|n| n.last_scored)
+                .unwrap_or(0);
+            if now - last_scored >= max_age_secs {
+                candidates.push((i, key.clone(), last_scored));
            }
        }
    }

+    // Score oldest-first
+    candidates.sort_by_key(|&(_, _, last)| last);
+
    let http = http_client();
-    let mut new_cursor = cursor;
    let mut results = Vec::new();

-    for (pos, key) in &to_score {
-        let end = pos + WINDOW;
+    let total_entries = context.entries.len();
+    let first_quarter = total_entries / 4;

-        // Not enough conversation after this memory yet — stop here
-        if end > context.entries.len() {
-            break;
+    for (pos, key, _) in &candidates {
+        let (end, full_window) = nth_response_end(&context.entries, *pos, response_window);
+        // Skip memories without a full window, unless they're in the
+        // first quarter of the conversation (always score those).
+        if !full_window && *pos >= first_quarter {
+            continue;
        }
-
-        // Need at least one assistant response in the window
        let range = *pos..end;
        if !context.entries[range.clone()].iter().any(|e| e.message().role == Role::Assistant) {
-            new_cursor = end;
            continue;
        }

        let _ = ui_tx.send(UiMessage::Activity(format!("scoring memory: {}...", key)));
        match score_divergence(&http, client, context, range, Filter::SkipKey(key)).await {
            Ok((divs, _)) => {
-                let importance: f64 = divs.iter().sum();
+                let n_responses = divs.len();
+                let max_div = divs.iter().cloned().fold(0.0f64, f64::max);
                let _ = ui_tx.send(UiMessage::Debug(format!(
-                    "[scoring] {} → {:.2}", key, importance,
+                    "[scoring] {} max:{:.3} ({} responses)", key, max_div, n_responses,
                )));
-                results.push((key.clone(), importance));
+                // TODO: update graph weight once normalization is figured out
+                results.push((key.clone(), max_div));
            }
            Err(e) => {
                let _ = ui_tx.send(UiMessage::Debug(format!(
@ -354,11 +376,10 @@ pub async fn score_memories_incremental(
                )));
            }
        }
-        new_cursor = end;
    }

    let _ = ui_tx.send(UiMessage::Activity(String::new()));
-    Ok((new_cursor, results))
+    Ok(results)
 }

 // ── Fine-tuning scoring ─────────────────────────────────────────