training: per-node scoring with graph weight updates
Memory scoring now uses the graph as source of truth: - last_scored timestamp on each node (new capnp field @22) - Nodes scored when older than scoring_interval_secs (default 1hr) - Oldest-scored-first ordering - Window: scoring_response_window assistant responses (default 100) - First-quarter memories scored even without full window - Per-response normalization (raw divergence / response count) - Asymmetric weight update: alpha=0.5 up, alpha=0.1 down (responds fast to importance, decays slowly — memories stay surfaced even if only useful 1/4 of the time) Graph writes disabled pending normalization calibration. Also: configurable scoring_interval_secs and scoring_response_window. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
b0603fd1ef
commit
fcd77fb79e
8 changed files with 109 additions and 64 deletions
|
|
@ -662,13 +662,19 @@ impl Agent {
|
|||
_ => unreachable!(),
|
||||
};
|
||||
let text = entry.message().content_text();
|
||||
let score = memory_scores
|
||||
// Show node weight from graph (updated by incremental scorer)
|
||||
let graph_weight = crate::hippocampus::store::Store::load().ok()
|
||||
.and_then(|s| s.nodes.get(key).map(|n| n.weight));
|
||||
// Show full matrix score if available
|
||||
let matrix_score = memory_scores
|
||||
.and_then(|s| s.memory_weights.iter()
|
||||
.find(|(k, _)| k == key)
|
||||
.map(|(_, v)| *v));
|
||||
let label = match score {
|
||||
Some(v) => format!("{} (importance: {:.1})", key, v),
|
||||
None => key.to_string(),
|
||||
let label = match (graph_weight, matrix_score) {
|
||||
(Some(w), Some(s)) => format!("{} (w:{:.2} score:{:.1})", key, w, s),
|
||||
(Some(w), None) => format!("{} (w:{:.2})", key, w),
|
||||
(None, Some(s)) => format!("{} (score:{:.1})", key, s),
|
||||
(None, None) => key.to_string(),
|
||||
};
|
||||
ContextSection {
|
||||
name: label,
|
||||
|
|
|
|||
|
|
@ -253,6 +253,19 @@ pub async fn score_memories(
|
|||
})
|
||||
}
|
||||
|
||||
/// Find the entry index after `start` that contains the Nth assistant response.
|
||||
/// Returns (end_index, true) if N responses were found, (entries.len(), false) if not.
|
||||
fn nth_response_end(entries: &[ConversationEntry], start: usize, n: usize) -> (usize, bool) {
|
||||
let mut count = 0;
|
||||
for i in start..entries.len() {
|
||||
if entries[i].message().role == Role::Assistant {
|
||||
count += 1;
|
||||
if count >= n { return (i + 1, true); }
|
||||
}
|
||||
}
|
||||
(entries.len(), false)
|
||||
}
|
||||
|
||||
// ── Single memory scoring ───────────────────────────────────────
|
||||
|
||||
/// Score how important a single memory is to the conversation.
|
||||
|
|
@ -266,7 +279,7 @@ pub async fn score_memory(
|
|||
client: &ApiClient,
|
||||
ui_tx: &UiSender,
|
||||
) -> anyhow::Result<f64> {
|
||||
const WINDOW: usize = 50;
|
||||
const RESPONSE_WINDOW: usize = 50;
|
||||
|
||||
let first_pos = match context.entries.iter().position(|e| {
|
||||
matches!(e, ConversationEntry::Memory { key: k, .. } if k == key)
|
||||
|
|
@ -275,7 +288,8 @@ pub async fn score_memory(
|
|||
None => return Ok(0.0),
|
||||
};
|
||||
|
||||
let range = first_pos..(first_pos + WINDOW).min(context.entries.len());
|
||||
let (end, _) = nth_response_end(&context.entries, first_pos, RESPONSE_WINDOW);
|
||||
let range = first_pos..end;
|
||||
if !context.entries[range.clone()].iter().any(|e| e.message().role == Role::Assistant) {
|
||||
return Ok(0.0);
|
||||
}
|
||||
|
|
@ -290,63 +304,71 @@ pub async fn score_memory(
|
|||
|
||||
// ── Background memory scoring ───────────────────────────────────
|
||||
|
||||
/// Incrementally score memories through the conversation.
|
||||
/// Score memories in the conversation that are due for re-scoring.
|
||||
///
|
||||
/// Walks memory entries in conversation order starting from `cursor`.
|
||||
/// For each memory with a full WINDOW after it, calls score_memory()
|
||||
/// and yields the result. Stops at the first memory that doesn't have
|
||||
/// enough messages yet — the conversation needs to grow before we can
|
||||
/// score it.
|
||||
/// Checks the graph for each memory's last_scored timestamp. Scores
|
||||
/// nodes that haven't been scored within `max_age_secs`, oldest first.
|
||||
/// Updates the graph weight (EWMA) and last_scored after each.
|
||||
///
|
||||
/// Returns the updated cursor (entry index to resume from next time)
|
||||
/// and the scores for each memory that was scored this round.
|
||||
/// Returns the number of nodes scored and their (key, score) pairs.
|
||||
pub async fn score_memories_incremental(
|
||||
context: &ContextState,
|
||||
cursor: usize,
|
||||
max_age_secs: i64,
|
||||
response_window: usize,
|
||||
client: &ApiClient,
|
||||
ui_tx: &UiSender,
|
||||
) -> anyhow::Result<(usize, Vec<(String, f64)>)> {
|
||||
const WINDOW: usize = 50;
|
||||
) -> anyhow::Result<Vec<(String, f64)>> {
|
||||
let now = chrono::Utc::now().timestamp();
|
||||
|
||||
// Collect unique memory keys with their first position, starting from cursor
|
||||
// Collect unique memory keys with their first position
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
let mut to_score: Vec<(usize, String)> = Vec::new();
|
||||
let mut candidates: Vec<(usize, String, i64)> = Vec::new(); // (pos, key, last_scored)
|
||||
|
||||
for (i, entry) in context.entries.iter().enumerate().skip(cursor) {
|
||||
let store = crate::hippocampus::store::Store::load().unwrap_or_default();
|
||||
|
||||
for (i, entry) in context.entries.iter().enumerate() {
|
||||
if let ConversationEntry::Memory { key, .. } = entry {
|
||||
if seen.insert(key.clone()) {
|
||||
to_score.push((i, key.clone()));
|
||||
if !seen.insert(key.clone()) { continue; }
|
||||
let last_scored = store.nodes.get(key.as_str())
|
||||
.map(|n| n.last_scored)
|
||||
.unwrap_or(0);
|
||||
if now - last_scored >= max_age_secs {
|
||||
candidates.push((i, key.clone(), last_scored));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Score oldest-first
|
||||
candidates.sort_by_key(|&(_, _, last)| last);
|
||||
|
||||
let http = http_client();
|
||||
let mut new_cursor = cursor;
|
||||
let mut results = Vec::new();
|
||||
|
||||
for (pos, key) in &to_score {
|
||||
let end = pos + WINDOW;
|
||||
let total_entries = context.entries.len();
|
||||
let first_quarter = total_entries / 4;
|
||||
|
||||
// Not enough conversation after this memory yet — stop here
|
||||
if end > context.entries.len() {
|
||||
break;
|
||||
for (pos, key, _) in &candidates {
|
||||
let (end, full_window) = nth_response_end(&context.entries, *pos, response_window);
|
||||
// Skip memories without a full window, unless they're in the
|
||||
// first quarter of the conversation (always score those).
|
||||
if !full_window && *pos >= first_quarter {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Need at least one assistant response in the window
|
||||
let range = *pos..end;
|
||||
if !context.entries[range.clone()].iter().any(|e| e.message().role == Role::Assistant) {
|
||||
new_cursor = end;
|
||||
continue;
|
||||
}
|
||||
|
||||
let _ = ui_tx.send(UiMessage::Activity(format!("scoring memory: {}...", key)));
|
||||
match score_divergence(&http, client, context, range, Filter::SkipKey(key)).await {
|
||||
Ok((divs, _)) => {
|
||||
let importance: f64 = divs.iter().sum();
|
||||
let n_responses = divs.len();
|
||||
let max_div = divs.iter().cloned().fold(0.0f64, f64::max);
|
||||
let _ = ui_tx.send(UiMessage::Debug(format!(
|
||||
"[scoring] {} → {:.2}", key, importance,
|
||||
"[scoring] {} max:{:.3} ({} responses)", key, max_div, n_responses,
|
||||
)));
|
||||
results.push((key.clone(), importance));
|
||||
// TODO: update graph weight once normalization is figured out
|
||||
results.push((key.clone(), max_div));
|
||||
}
|
||||
Err(e) => {
|
||||
let _ = ui_tx.send(UiMessage::Debug(format!(
|
||||
|
|
@ -354,11 +376,10 @@ pub async fn score_memories_incremental(
|
|||
)));
|
||||
}
|
||||
}
|
||||
new_cursor = end;
|
||||
}
|
||||
|
||||
let _ = ui_tx.send(UiMessage::Activity(String::new()));
|
||||
Ok((new_cursor, results))
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
// ── Fine-tuning scoring ─────────────────────────────────────────
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue