2026-04-03 00:31:57 -04:00
|
|
|
|
// training.rs — Memory importance scoring via /v1/score
|
2026-04-02 22:13:55 -04:00
|
|
|
|
//
|
2026-04-04 01:33:31 -04:00
|
|
|
|
// Three scoring modes, all built on the same call_score() primitive:
|
2026-04-02 22:13:55 -04:00
|
|
|
|
//
|
2026-04-04 01:33:31 -04:00
|
|
|
|
// score_memories() — Full N×M matrix (memories × responses) for the
|
|
|
|
|
|
// debug screen. Expensive: N+1 API calls.
|
|
|
|
|
|
//
|
|
|
|
|
|
// memory_score() — Single memory importance. Scores the 50 messages
|
|
|
|
|
|
// after it was surfaced, with/without that memory.
|
|
|
|
|
|
// 2 API calls.
|
|
|
|
|
|
//
|
|
|
|
|
|
// finetune_score() — Identifies training candidates. Scores recent
|
|
|
|
|
|
// messages with all memories stripped. Responses
|
|
|
|
|
|
// with high divergence depend on memories the model
|
|
|
|
|
|
// hasn't internalized. 2 API calls.
|
2026-04-04 00:29:11 -04:00
|
|
|
|
|
2026-04-05 01:48:11 -04:00
|
|
|
|
use crate::agent::api::ApiClient;
|
2026-04-08 15:29:52 -04:00
|
|
|
|
use crate::agent::context::{AstNode, Ast, NodeBody, ContextState, Role};
|
2026-04-02 22:13:55 -04:00
|
|
|
|
|
2026-04-03 01:07:47 -04:00
|
|
|
|
const SCORE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(120);
|
|
|
|
|
|
|
2026-04-04 01:33:31 -04:00
|
|
|
|
// ── Message building ────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
/// What to filter when building the message array for scoring.
|
2026-04-08 16:35:57 -04:00
|
|
|
|
#[allow(dead_code)]
|
2026-04-04 01:33:31 -04:00
|
|
|
|
enum Filter<'a> {
|
|
|
|
|
|
None,
|
|
|
|
|
|
SkipIndex(usize),
|
|
|
|
|
|
SkipKey(&'a str),
|
|
|
|
|
|
SkipAllMemories,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-08 15:29:52 -04:00
|
|
|
|
fn is_memory(node: &AstNode) -> bool {
|
|
|
|
|
|
matches!(node, AstNode::Leaf(leaf) if matches!(leaf.body(), NodeBody::Memory { .. }))
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn memory_key(node: &AstNode) -> Option<&str> {
|
|
|
|
|
|
match node {
|
|
|
|
|
|
AstNode::Leaf(leaf) => match leaf.body() {
|
|
|
|
|
|
NodeBody::Memory { key, .. } => Some(key),
|
|
|
|
|
|
_ => None,
|
|
|
|
|
|
},
|
|
|
|
|
|
_ => None,
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn is_assistant(node: &AstNode) -> bool {
|
|
|
|
|
|
matches!(node, AstNode::Branch { role: Role::Assistant, .. })
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Push an AstNode as one or more JSON messages for the scoring API.
|
|
|
|
|
|
fn push_api_message(node: &AstNode, msgs: &mut Vec<serde_json::Value>) {
|
|
|
|
|
|
match node {
|
|
|
|
|
|
AstNode::Branch { role, children } => {
|
|
|
|
|
|
let content: String = children.iter().map(|c| c.render()).collect();
|
|
|
|
|
|
msgs.push(serde_json::json!({
|
|
|
|
|
|
"role": role.as_str(),
|
|
|
|
|
|
"content": content,
|
|
|
|
|
|
}));
|
|
|
|
|
|
}
|
|
|
|
|
|
AstNode::Leaf(leaf) => {
|
|
|
|
|
|
let role = match leaf.body() {
|
|
|
|
|
|
NodeBody::ToolResult(_) => "tool",
|
|
|
|
|
|
_ => "user",
|
|
|
|
|
|
};
|
|
|
|
|
|
msgs.push(serde_json::json!({
|
|
|
|
|
|
"role": role,
|
|
|
|
|
|
"content": leaf.body().text(),
|
|
|
|
|
|
}));
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-04 01:33:31 -04:00
|
|
|
|
/// Build the messages array for a scoring call.
|
|
|
|
|
|
///
|
2026-04-08 15:29:52 -04:00
|
|
|
|
/// Always includes system prompt as prefix, then entries from `range`
|
|
|
|
|
|
/// filtered by `filter`.
|
2026-04-04 01:33:31 -04:00
|
|
|
|
fn build_messages(
|
|
|
|
|
|
context: &ContextState,
|
|
|
|
|
|
range: std::ops::Range<usize>,
|
|
|
|
|
|
filter: Filter,
|
|
|
|
|
|
) -> Vec<serde_json::Value> {
|
2026-04-07 20:15:31 -04:00
|
|
|
|
let mut msgs = Vec::new();
|
2026-04-08 15:29:52 -04:00
|
|
|
|
for node in context.system() {
|
|
|
|
|
|
push_api_message(node, &mut msgs);
|
2026-04-04 01:33:31 -04:00
|
|
|
|
}
|
2026-04-08 15:29:52 -04:00
|
|
|
|
let entries = context.conversation();
|
2026-04-04 01:33:31 -04:00
|
|
|
|
for i in range {
|
2026-04-08 15:29:52 -04:00
|
|
|
|
let node = &entries[i];
|
2026-04-04 01:33:31 -04:00
|
|
|
|
let skip = match &filter {
|
|
|
|
|
|
Filter::None => false,
|
|
|
|
|
|
Filter::SkipIndex(idx) => i == *idx,
|
2026-04-08 15:29:52 -04:00
|
|
|
|
Filter::SkipKey(key) => memory_key(node) == Some(*key),
|
|
|
|
|
|
Filter::SkipAllMemories => is_memory(node),
|
2026-04-04 01:33:31 -04:00
|
|
|
|
};
|
|
|
|
|
|
if skip { continue; }
|
2026-04-08 15:29:52 -04:00
|
|
|
|
push_api_message(node, &mut msgs);
|
2026-04-04 01:33:31 -04:00
|
|
|
|
}
|
|
|
|
|
|
msgs
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ── Score API ───────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
#[derive(serde::Deserialize)]
|
|
|
|
|
|
struct ScoreResult {
|
|
|
|
|
|
total_logprob: f64,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[derive(serde::Deserialize)]
|
|
|
|
|
|
struct ScoreResponse {
|
|
|
|
|
|
scores: Vec<ScoreResult>,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-07 12:50:40 -04:00
|
|
|
|
fn http_client() -> crate::agent::api::http::HttpClient {
|
|
|
|
|
|
crate::agent::api::http::HttpClient::builder()
|
2026-04-04 01:33:31 -04:00
|
|
|
|
.timeout(SCORE_TIMEOUT)
|
|
|
|
|
|
.build()
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
async fn call_score(
|
2026-04-07 12:50:40 -04:00
|
|
|
|
http: &crate::agent::api::http::HttpClient,
|
2026-04-04 01:33:31 -04:00
|
|
|
|
client: &ApiClient,
|
|
|
|
|
|
messages: &[serde_json::Value],
|
2026-04-09 20:42:38 -04:00
|
|
|
|
priority: Option<i32>,
|
2026-04-04 01:33:31 -04:00
|
|
|
|
) -> anyhow::Result<Vec<ScoreResult>> {
|
2026-04-07 12:50:40 -04:00
|
|
|
|
let url = format!("{}/score", client.base_url());
|
|
|
|
|
|
let auth = format!("Bearer {}", client.api_key());
|
2026-04-09 20:42:38 -04:00
|
|
|
|
let mut body = serde_json::json!({
|
2026-04-07 12:50:40 -04:00
|
|
|
|
"model": client.model,
|
|
|
|
|
|
"messages": messages,
|
|
|
|
|
|
"logprobs": 1,
|
|
|
|
|
|
});
|
2026-04-09 20:42:38 -04:00
|
|
|
|
if let Some(p) = priority {
|
|
|
|
|
|
body["priority"] = serde_json::json!(p);
|
|
|
|
|
|
}
|
2026-04-04 01:33:31 -04:00
|
|
|
|
let response = http
|
2026-04-07 12:50:40 -04:00
|
|
|
|
.send_json("POST", &url, &[
|
|
|
|
|
|
("authorization", &auth),
|
|
|
|
|
|
], &body)
|
|
|
|
|
|
.await?;
|
2026-04-04 01:33:31 -04:00
|
|
|
|
|
|
|
|
|
|
let status = response.status();
|
|
|
|
|
|
let body: serde_json::Value = response.json().await?;
|
|
|
|
|
|
|
|
|
|
|
|
if !status.is_success() {
|
|
|
|
|
|
let msg = body.get("error").and_then(|e| e.as_str()).unwrap_or("unknown error");
|
|
|
|
|
|
anyhow::bail!("score API HTTP {}: {}", status, msg);
|
|
|
|
|
|
}
|
|
|
|
|
|
if let Some(err) = body.get("error").and_then(|e| e.as_str()) {
|
|
|
|
|
|
anyhow::bail!("score API error: {}", err);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let result: ScoreResponse = serde_json::from_value(body)
|
|
|
|
|
|
.map_err(|e| anyhow::anyhow!("failed to parse score response: {}", e))?;
|
|
|
|
|
|
Ok(result.scores)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Compute per-position logprob divergence: how much worse the model
|
|
|
|
|
|
/// scores each response without something vs with it.
|
|
|
|
|
|
fn divergence(baseline: &[ScoreResult], without: &[ScoreResult]) -> Vec<f64> {
|
|
|
|
|
|
baseline.iter().enumerate()
|
|
|
|
|
|
.map(|(i, base)| {
|
|
|
|
|
|
let without_lp = without.get(i).map(|s| s.total_logprob).unwrap_or(base.total_logprob);
|
|
|
|
|
|
(base.total_logprob - without_lp).max(0.0)
|
|
|
|
|
|
})
|
|
|
|
|
|
.collect()
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Score two message sets and return total divergence.
|
|
|
|
|
|
async fn score_divergence(
|
2026-04-07 12:50:40 -04:00
|
|
|
|
http: &crate::agent::api::http::HttpClient,
|
2026-04-04 01:33:31 -04:00
|
|
|
|
client: &ApiClient,
|
|
|
|
|
|
context: &ContextState,
|
|
|
|
|
|
range: std::ops::Range<usize>,
|
|
|
|
|
|
filter: Filter<'_>,
|
2026-04-09 20:42:38 -04:00
|
|
|
|
priority: Option<i32>,
|
2026-04-04 01:33:31 -04:00
|
|
|
|
) -> anyhow::Result<(Vec<f64>, Vec<ScoreResult>)> {
|
2026-04-09 20:42:38 -04:00
|
|
|
|
let baseline = call_score(http, client, &build_messages(context, range.clone(), Filter::None), priority).await?;
|
|
|
|
|
|
let without = call_score(http, client, &build_messages(context, range, filter), priority).await?;
|
2026-04-04 01:33:31 -04:00
|
|
|
|
let divs = divergence(&baseline, &without);
|
|
|
|
|
|
Ok((divs, baseline))
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ── Full matrix scoring (debug screen) ──────────────────────────
|
|
|
|
|
|
|
2026-04-02 22:13:55 -04:00
|
|
|
|
/// Result of scoring one conversation's memory usage.
|
|
|
|
|
|
pub struct MemoryScore {
|
|
|
|
|
|
pub memory_weights: Vec<(String, f64)>,
|
|
|
|
|
|
pub response_scores: Vec<f64>,
|
|
|
|
|
|
/// Full matrix: divergence[memory_idx][response_idx]
|
|
|
|
|
|
pub matrix: Vec<Vec<f64>>,
|
|
|
|
|
|
pub memory_keys: Vec<String>,
|
2026-04-02 22:27:43 -04:00
|
|
|
|
pub response_entry_indices: Vec<usize>,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
impl MemoryScore {
|
|
|
|
|
|
pub fn important_memories_for_entry(&self, entry_idx: usize) -> Vec<(&str, f64)> {
|
|
|
|
|
|
let Some(resp_idx) = self.response_entry_indices.iter().position(|&i| i == entry_idx)
|
|
|
|
|
|
else { return Vec::new() };
|
|
|
|
|
|
|
|
|
|
|
|
let mut result: Vec<(&str, f64)> = self.memory_keys.iter()
|
|
|
|
|
|
.zip(self.matrix.iter())
|
|
|
|
|
|
.filter_map(|(key, row)| {
|
|
|
|
|
|
let score = row.get(resp_idx).copied().unwrap_or(0.0);
|
|
|
|
|
|
if score > 0.01 { Some((key.as_str(), score)) } else { None }
|
|
|
|
|
|
})
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
result.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
|
|
|
|
|
result
|
|
|
|
|
|
}
|
2026-04-02 22:13:55 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-04 01:33:31 -04:00
|
|
|
|
/// Score how important each memory is to the conversation (full matrix).
|
2026-04-02 22:13:55 -04:00
|
|
|
|
pub async fn score_memories(
|
|
|
|
|
|
context: &ContextState,
|
|
|
|
|
|
client: &ApiClient,
|
|
|
|
|
|
) -> anyhow::Result<MemoryScore> {
|
2026-04-08 15:29:52 -04:00
|
|
|
|
let mut memory_keys: Vec<String> = context.conversation().iter()
|
|
|
|
|
|
.filter_map(|node| memory_key(node).map(String::from))
|
2026-04-02 22:13:55 -04:00
|
|
|
|
.collect();
|
2026-04-04 01:33:31 -04:00
|
|
|
|
memory_keys.dedup();
|
2026-04-02 22:13:55 -04:00
|
|
|
|
|
2026-04-08 15:29:52 -04:00
|
|
|
|
let response_indices: Vec<usize> = context.conversation().iter().enumerate()
|
|
|
|
|
|
.filter(|(_, node)| is_assistant(node))
|
2026-04-02 22:13:55 -04:00
|
|
|
|
.map(|(i, _)| i)
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
2026-04-04 01:33:31 -04:00
|
|
|
|
if memory_keys.is_empty() || response_indices.is_empty() {
|
2026-04-02 22:13:55 -04:00
|
|
|
|
return Ok(MemoryScore {
|
2026-04-04 01:33:31 -04:00
|
|
|
|
memory_weights: Vec::new(), response_scores: Vec::new(),
|
|
|
|
|
|
matrix: Vec::new(), memory_keys: Vec::new(),
|
2026-04-02 22:27:43 -04:00
|
|
|
|
response_entry_indices: Vec::new(),
|
2026-04-02 22:13:55 -04:00
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-04 01:33:31 -04:00
|
|
|
|
let http = http_client();
|
2026-04-08 15:29:52 -04:00
|
|
|
|
let range = 0..context.conversation().len();
|
2026-04-03 00:31:57 -04:00
|
|
|
|
|
2026-04-09 20:42:38 -04:00
|
|
|
|
let baseline = call_score(&http, client, &build_messages(context, range.clone(), Filter::None), Some(5)).await?;
|
2026-04-02 22:13:55 -04:00
|
|
|
|
|
2026-04-04 01:33:31 -04:00
|
|
|
|
let total = memory_keys.len();
|
2026-04-02 22:13:55 -04:00
|
|
|
|
let mut matrix: Vec<Vec<f64>> = Vec::new();
|
|
|
|
|
|
|
2026-04-04 01:33:31 -04:00
|
|
|
|
for (mem_idx, key) in memory_keys.iter().enumerate() {
|
2026-04-05 21:45:55 -04:00
|
|
|
|
dbglog!(
|
2026-04-03 01:07:47 -04:00
|
|
|
|
"scoring {}/{}: {}...", mem_idx + 1, total, key,
|
2026-04-05 21:45:55 -04:00
|
|
|
|
);
|
2026-04-04 01:33:31 -04:00
|
|
|
|
let msgs = build_messages(context, range.clone(), Filter::SkipKey(key));
|
2026-04-09 20:42:38 -04:00
|
|
|
|
match call_score(&http, client, &msgs, Some(5)).await {
|
2026-04-04 01:33:31 -04:00
|
|
|
|
Ok(without) => matrix.push(divergence(&baseline, &without)),
|
2026-04-03 01:07:47 -04:00
|
|
|
|
Err(e) => {
|
2026-04-05 21:45:55 -04:00
|
|
|
|
dbglog!(
|
2026-04-04 01:33:31 -04:00
|
|
|
|
"[training] {} FAILED: {:#}", key, e,
|
2026-04-05 21:45:55 -04:00
|
|
|
|
);
|
2026-04-03 01:07:47 -04:00
|
|
|
|
matrix.push(vec![0.0; baseline.len()]);
|
|
|
|
|
|
}
|
2026-04-02 22:13:55 -04:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-03 00:31:57 -04:00
|
|
|
|
|
2026-04-02 22:13:55 -04:00
|
|
|
|
let memory_weights: Vec<(String, f64)> = memory_keys.iter()
|
|
|
|
|
|
.zip(matrix.iter())
|
|
|
|
|
|
.map(|(key, row)| (key.clone(), row.iter().sum()))
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
2026-04-04 01:33:31 -04:00
|
|
|
|
let mut response_scores = vec![0.0; response_indices.len()];
|
2026-04-02 22:13:55 -04:00
|
|
|
|
for row in &matrix {
|
|
|
|
|
|
for (j, &v) in row.iter().enumerate() {
|
2026-04-04 01:33:31 -04:00
|
|
|
|
if j < response_scores.len() { response_scores[j] += v; }
|
2026-04-02 22:13:55 -04:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Ok(MemoryScore {
|
2026-04-04 01:33:31 -04:00
|
|
|
|
memory_weights, response_scores, matrix, memory_keys,
|
2026-04-02 22:27:43 -04:00
|
|
|
|
response_entry_indices: response_indices,
|
2026-04-02 22:13:55 -04:00
|
|
|
|
})
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-04 05:01:49 -04:00
|
|
|
|
/// Find the entry index after `start` that contains the Nth assistant response.
|
|
|
|
|
|
/// Returns (end_index, true) if N responses were found, (entries.len(), false) if not.
|
2026-04-08 15:29:52 -04:00
|
|
|
|
fn nth_response_end(entries: &[AstNode], start: usize, n: usize) -> (usize, bool) {
|
2026-04-04 05:01:49 -04:00
|
|
|
|
let mut count = 0;
|
|
|
|
|
|
for i in start..entries.len() {
|
2026-04-08 15:29:52 -04:00
|
|
|
|
if is_assistant(&entries[i]) {
|
2026-04-04 05:01:49 -04:00
|
|
|
|
count += 1;
|
|
|
|
|
|
if count >= n { return (i + 1, true); }
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
(entries.len(), false)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-04 01:33:31 -04:00
|
|
|
|
// ── Single memory scoring ───────────────────────────────────────
|
2026-04-02 22:35:29 -04:00
|
|
|
|
|
2026-04-04 01:33:31 -04:00
|
|
|
|
/// Score how important a single memory is to the conversation.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// Scores the 50 messages after the memory was surfaced — the window
|
|
|
|
|
|
/// where it could have influenced responses. Returns the sum of
|
|
|
|
|
|
/// divergence, or 0.0 if the memory isn't in the conversation.
|
|
|
|
|
|
pub async fn score_memory(
|
|
|
|
|
|
context: &ContextState,
|
|
|
|
|
|
key: &str,
|
|
|
|
|
|
client: &ApiClient,
|
|
|
|
|
|
) -> anyhow::Result<f64> {
|
2026-04-04 05:01:49 -04:00
|
|
|
|
const RESPONSE_WINDOW: usize = 50;
|
2026-04-04 01:33:31 -04:00
|
|
|
|
|
2026-04-08 15:29:52 -04:00
|
|
|
|
let entries = context.conversation();
|
|
|
|
|
|
let first_pos = match entries.iter().position(|node| memory_key(node) == Some(key)) {
|
2026-04-04 01:33:31 -04:00
|
|
|
|
Some(p) => p,
|
|
|
|
|
|
None => return Ok(0.0),
|
|
|
|
|
|
};
|
|
|
|
|
|
|
2026-04-07 20:15:31 -04:00
|
|
|
|
let (end, _) = nth_response_end(entries, first_pos, RESPONSE_WINDOW);
|
2026-04-04 05:01:49 -04:00
|
|
|
|
let range = first_pos..end;
|
2026-04-08 15:29:52 -04:00
|
|
|
|
if !entries[range.clone()].iter().any(|node| is_assistant(node)) {
|
2026-04-04 01:33:31 -04:00
|
|
|
|
return Ok(0.0);
|
2026-04-02 22:35:29 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-04 01:33:31 -04:00
|
|
|
|
let http = http_client();
|
2026-04-09 20:42:38 -04:00
|
|
|
|
let (divs, _) = score_divergence(&http, client, context, range, Filter::SkipKey(key), Some(5)).await?;
|
2026-04-02 22:35:29 -04:00
|
|
|
|
|
2026-04-04 01:33:31 -04:00
|
|
|
|
Ok(divs.iter().sum())
|
|
|
|
|
|
}
|
2026-04-02 22:13:55 -04:00
|
|
|
|
|
2026-04-04 02:46:32 -04:00
|
|
|
|
// ── Background memory scoring ───────────────────────────────────
|
|
|
|
|
|
|
2026-04-04 05:01:49 -04:00
|
|
|
|
/// Score memories in the conversation that are due for re-scoring.
|
2026-04-04 02:46:32 -04:00
|
|
|
|
///
|
2026-04-04 05:01:49 -04:00
|
|
|
|
/// Checks the graph for each memory's last_scored timestamp. Scores
|
|
|
|
|
|
/// nodes that haven't been scored within `max_age_secs`, oldest first.
|
|
|
|
|
|
/// Updates the graph weight (EWMA) and last_scored after each.
|
2026-04-04 02:46:32 -04:00
|
|
|
|
///
|
2026-04-04 05:01:49 -04:00
|
|
|
|
/// Returns the number of nodes scored and their (key, score) pairs.
|
2026-04-07 21:10:09 -04:00
|
|
|
|
pub async fn score_memories_incremental<F, Fut>(
|
2026-04-04 02:46:32 -04:00
|
|
|
|
context: &ContextState,
|
2026-04-04 05:01:49 -04:00
|
|
|
|
max_age_secs: i64,
|
|
|
|
|
|
response_window: usize,
|
2026-04-04 02:46:32 -04:00
|
|
|
|
client: &ApiClient,
|
2026-04-08 15:40:36 -04:00
|
|
|
|
agent: &std::sync::Arc<crate::agent::Agent>,
|
2026-04-07 21:10:09 -04:00
|
|
|
|
mut on_score: F,
|
|
|
|
|
|
) -> anyhow::Result<usize>
|
|
|
|
|
|
where
|
|
|
|
|
|
F: FnMut(String, f64) -> Fut,
|
|
|
|
|
|
Fut: std::future::Future<Output = ()>,
|
|
|
|
|
|
{
|
2026-04-04 05:01:49 -04:00
|
|
|
|
let now = chrono::Utc::now().timestamp();
|
2026-04-04 02:46:32 -04:00
|
|
|
|
|
2026-04-04 05:01:49 -04:00
|
|
|
|
// Collect unique memory keys with their first position
|
2026-04-04 02:46:32 -04:00
|
|
|
|
let mut seen = std::collections::HashSet::new();
|
2026-04-04 05:01:49 -04:00
|
|
|
|
let mut candidates: Vec<(usize, String, i64)> = Vec::new(); // (pos, key, last_scored)
|
2026-04-04 02:46:32 -04:00
|
|
|
|
|
2026-04-04 05:01:49 -04:00
|
|
|
|
let store = crate::hippocampus::store::Store::load().unwrap_or_default();
|
|
|
|
|
|
|
2026-04-08 15:29:52 -04:00
|
|
|
|
for (i, node) in context.conversation().iter().enumerate() {
|
|
|
|
|
|
if let Some(key) = memory_key(node) {
|
|
|
|
|
|
if !seen.insert(key.to_owned()) { continue; }
|
|
|
|
|
|
let last_scored = store.nodes.get(key)
|
2026-04-04 05:01:49 -04:00
|
|
|
|
.map(|n| n.last_scored)
|
|
|
|
|
|
.unwrap_or(0);
|
|
|
|
|
|
if now - last_scored >= max_age_secs {
|
2026-04-08 15:29:52 -04:00
|
|
|
|
candidates.push((i, key.to_owned(), last_scored));
|
2026-04-04 02:46:32 -04:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-04 05:01:49 -04:00
|
|
|
|
// Score oldest-first
|
|
|
|
|
|
candidates.sort_by_key(|&(_, _, last)| last);
|
|
|
|
|
|
|
2026-04-04 02:46:32 -04:00
|
|
|
|
let http = http_client();
|
2026-04-07 21:10:09 -04:00
|
|
|
|
let mut scored = 0;
|
2026-04-04 02:46:32 -04:00
|
|
|
|
|
2026-04-08 15:29:52 -04:00
|
|
|
|
let entries = context.conversation();
|
|
|
|
|
|
let total_tokens: usize = entries.iter().map(|n| n.tokens()).sum();
|
2026-04-07 21:36:39 -04:00
|
|
|
|
let token_cutoff = total_tokens * 60 / 100;
|
|
|
|
|
|
|
|
|
|
|
|
// Precompute cumulative token position for each entry
|
|
|
|
|
|
let mut cumulative: Vec<usize> = Vec::with_capacity(entries.len());
|
|
|
|
|
|
let mut running = 0;
|
|
|
|
|
|
for e in entries {
|
2026-04-08 11:20:03 -04:00
|
|
|
|
running += e.tokens();
|
2026-04-07 21:36:39 -04:00
|
|
|
|
cumulative.push(running);
|
|
|
|
|
|
}
|
2026-04-04 02:46:32 -04:00
|
|
|
|
|
2026-04-04 05:01:49 -04:00
|
|
|
|
for (pos, key, _) in &candidates {
|
2026-04-07 21:36:39 -04:00
|
|
|
|
// Only score memories in the first 70% of the conversation by tokens —
|
|
|
|
|
|
// recent memories don't have enough responses to evaluate yet.
|
|
|
|
|
|
if cumulative.get(*pos).copied().unwrap_or(total_tokens) > token_cutoff {
|
2026-04-04 05:01:49 -04:00
|
|
|
|
continue;
|
2026-04-04 02:46:32 -04:00
|
|
|
|
}
|
2026-04-08 15:29:52 -04:00
|
|
|
|
let (end, _) = nth_response_end(context.conversation(), *pos, response_window);
|
2026-04-04 02:46:32 -04:00
|
|
|
|
let range = *pos..end;
|
2026-04-08 15:29:52 -04:00
|
|
|
|
if !context.conversation()[range.clone()].iter().any(|node| is_assistant(node)) {
|
2026-04-04 02:46:32 -04:00
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-05 22:18:07 -04:00
|
|
|
|
let _scoring = crate::agent::start_activity(agent, format!("scoring: {}", key)).await;
|
2026-04-09 20:42:38 -04:00
|
|
|
|
match score_divergence(&http, client, context, range, Filter::SkipKey(key), Some(5)).await {
|
2026-04-04 02:46:32 -04:00
|
|
|
|
Ok((divs, _)) => {
|
2026-04-04 05:01:49 -04:00
|
|
|
|
let n_responses = divs.len();
|
|
|
|
|
|
let max_div = divs.iter().cloned().fold(0.0f64, f64::max);
|
2026-04-05 21:45:55 -04:00
|
|
|
|
dbglog!(
|
2026-04-04 05:01:49 -04:00
|
|
|
|
"[scoring] {} max:{:.3} ({} responses)", key, max_div, n_responses,
|
2026-04-05 21:45:55 -04:00
|
|
|
|
);
|
2026-04-07 21:10:09 -04:00
|
|
|
|
on_score(key.clone(), max_div).await;
|
|
|
|
|
|
scored += 1;
|
2026-04-04 02:46:32 -04:00
|
|
|
|
}
|
|
|
|
|
|
Err(e) => {
|
2026-04-05 21:45:55 -04:00
|
|
|
|
dbglog!(
|
2026-04-04 02:46:32 -04:00
|
|
|
|
"[scoring] {} FAILED: {:#}", key, e,
|
2026-04-05 21:45:55 -04:00
|
|
|
|
);
|
2026-04-04 02:46:32 -04:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-07 21:10:09 -04:00
|
|
|
|
Ok(scored)
|
2026-04-04 02:46:32 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-04 01:33:31 -04:00
|
|
|
|
// ── Fine-tuning scoring ─────────────────────────────────────────
|
2026-04-02 22:13:55 -04:00
|
|
|
|
|
2026-04-04 01:33:31 -04:00
|
|
|
|
/// Score which recent responses are candidates for fine-tuning.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// Removes all memories and scores the most recent `count` messages.
|
|
|
|
|
|
/// Responses with high divergence depend on memories the model hasn't
|
|
|
|
|
|
/// internalized — these are fine-tuning candidates.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// Returns (entry_index, divergence) pairs, sorted by divergence descending.
|
|
|
|
|
|
pub async fn score_finetune(
|
|
|
|
|
|
context: &ContextState,
|
|
|
|
|
|
count: usize,
|
|
|
|
|
|
client: &ApiClient,
|
|
|
|
|
|
) -> anyhow::Result<Vec<(usize, f64)>> {
|
2026-04-08 15:29:52 -04:00
|
|
|
|
let entries = context.conversation();
|
|
|
|
|
|
let range = entries.len().saturating_sub(count)..entries.len();
|
2026-04-02 22:13:55 -04:00
|
|
|
|
|
2026-04-04 01:33:31 -04:00
|
|
|
|
let response_positions: Vec<usize> = range.clone()
|
2026-04-08 15:29:52 -04:00
|
|
|
|
.filter(|&i| is_assistant(&entries[i]))
|
2026-04-04 01:33:31 -04:00
|
|
|
|
.collect();
|
|
|
|
|
|
if response_positions.is_empty() {
|
|
|
|
|
|
return Ok(Vec::new());
|
2026-04-03 01:07:47 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-04 01:33:31 -04:00
|
|
|
|
let http = http_client();
|
2026-04-09 20:42:38 -04:00
|
|
|
|
let (divs, _) = score_divergence(&http, client, context, range, Filter::SkipAllMemories, Some(5)).await?;
|
2026-04-02 22:13:55 -04:00
|
|
|
|
|
2026-04-04 01:33:31 -04:00
|
|
|
|
let mut results: Vec<(usize, f64)> = response_positions.iter()
|
|
|
|
|
|
.enumerate()
|
|
|
|
|
|
.map(|(i, &entry_idx)| (entry_idx, divs.get(i).copied().unwrap_or(0.0)))
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
|
|
|
|
|
Ok(results)
|
2026-04-02 22:13:55 -04:00
|
|
|
|
}
|