Compare commits

..

4 commits

Author SHA1 Message Date
Kent Overstreet
d2c0ef61a1 reenable memory scoring 2026-04-09 21:15:32 -04:00
ProofOfConcept
b116b3536e Widen name column on F2 conscious screen
Memory node keys were running into the token count column. Bump the
name column from 40 to 70 characters.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-09 21:13:56 -04:00
ProofOfConcept
be65399710 Switch memory scoring from chat messages to raw token IDs
The /score endpoint was receiving chat-format messages which had to go
through the chat template tokenizer — this was failing with "System
message must be first" errors because the AST structure doesn't map
cleanly to chat message format.

Send raw token IDs via the new `prompt` field instead, matching what
the /completions endpoint already does. The vLLM score endpoint finds
assistant boundaries by scanning for <|im_start|>assistant token
patterns, so no message-level metadata is needed.

Also includes identity and journal sections in the scored context,
matching what the model actually sees during inference.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-09 21:07:00 -04:00
ProofOfConcept
67332eb55e Add vLLM priority to memory scoring requests
Scoring calls the /score endpoint directly via HTTP, bypassing the
stream_completion path. These requests had no priority field, so they
could preempt interactive work. Set priority=5 (between subconscious
agents at 2 and unconscious at 10).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-09 20:42:38 -04:00
4 changed files with 43 additions and 52 deletions

View file

@ -527,12 +527,9 @@ impl Mind {
} }
cmds.push(MindCommand::Compact); cmds.push(MindCommand::Compact);
/*
* Broken since the AST context window conversion:
if !self.config.no_agents { if !self.config.no_agents {
cmds.push(MindCommand::Score); cmds.push(MindCommand::Score);
} }
*/
} }
_ = tokio::time::sleep(timeout), if !has_input => dmn_expired = true, _ = tokio::time::sleep(timeout), if !has_input => dmn_expired = true,

View file

@ -48,41 +48,25 @@ fn is_assistant(node: &AstNode) -> bool {
matches!(node, AstNode::Branch { role: Role::Assistant, .. }) matches!(node, AstNode::Branch { role: Role::Assistant, .. })
} }
/// Push an AstNode as one or more JSON messages for the scoring API. /// Build a token ID array for a scoring call.
fn push_api_message(node: &AstNode, msgs: &mut Vec<serde_json::Value>) {
match node {
AstNode::Branch { role, children } => {
let content: String = children.iter().map(|c| c.render()).collect();
msgs.push(serde_json::json!({
"role": role.as_str(),
"content": content,
}));
}
AstNode::Leaf(leaf) => {
let role = match leaf.body() {
NodeBody::ToolResult(_) => "tool",
_ => "user",
};
msgs.push(serde_json::json!({
"role": role,
"content": leaf.body().text(),
}));
}
}
}
/// Build the messages array for a scoring call.
/// ///
/// Always includes system prompt as prefix, then entries from `range` /// Includes all sections up to and including conversation entries in
/// filtered by `filter`. /// `range`, with `filter` applied to conversation entries.
fn build_messages( fn build_token_ids(
context: &ContextState, context: &ContextState,
range: std::ops::Range<usize>, range: std::ops::Range<usize>,
filter: Filter, filter: Filter,
) -> Vec<serde_json::Value> { ) -> Vec<u32> {
let mut msgs = Vec::new(); use crate::agent::context::Ast;
let mut ids = Vec::new();
for node in context.system() { for node in context.system() {
push_api_message(node, &mut msgs); ids.extend(node.token_ids());
}
for node in context.identity() {
ids.extend(node.token_ids());
}
for node in context.journal() {
ids.extend(node.token_ids());
} }
let entries = context.conversation(); let entries = context.conversation();
for i in range { for i in range {
@ -94,9 +78,9 @@ fn build_messages(
Filter::SkipAllMemories => is_memory(node), Filter::SkipAllMemories => is_memory(node),
}; };
if skip { continue; } if skip { continue; }
push_api_message(node, &mut msgs); ids.extend(node.token_ids());
} }
msgs ids
} }
// ── Score API ─────────────────────────────────────────────────── // ── Score API ───────────────────────────────────────────────────
@ -120,15 +104,19 @@ fn http_client() -> crate::agent::api::http::HttpClient {
async fn call_score( async fn call_score(
http: &crate::agent::api::http::HttpClient, http: &crate::agent::api::http::HttpClient,
client: &ApiClient, client: &ApiClient,
messages: &[serde_json::Value], prompt: &[u32],
priority: Option<i32>,
) -> anyhow::Result<Vec<ScoreResult>> { ) -> anyhow::Result<Vec<ScoreResult>> {
let url = format!("{}/score", client.base_url()); let url = format!("{}/score", client.base_url());
let auth = format!("Bearer {}", client.api_key()); let auth = format!("Bearer {}", client.api_key());
let body = serde_json::json!({ let mut body = serde_json::json!({
"model": client.model, "model": client.model,
"messages": messages, "prompt": prompt,
"logprobs": 1, "logprobs": 1,
}); });
if let Some(p) = priority {
body["priority"] = serde_json::json!(p);
}
let response = http let response = http
.send_json("POST", &url, &[ .send_json("POST", &url, &[
("authorization", &auth), ("authorization", &auth),
@ -169,9 +157,10 @@ async fn score_divergence(
context: &ContextState, context: &ContextState,
range: std::ops::Range<usize>, range: std::ops::Range<usize>,
filter: Filter<'_>, filter: Filter<'_>,
priority: Option<i32>,
) -> anyhow::Result<(Vec<f64>, Vec<ScoreResult>)> { ) -> anyhow::Result<(Vec<f64>, Vec<ScoreResult>)> {
let baseline = call_score(http, client, &build_messages(context, range.clone(), Filter::None)).await?; let baseline = call_score(http, client, &build_token_ids(context, range.clone(), Filter::None), priority).await?;
let without = call_score(http, client, &build_messages(context, range, filter)).await?; let without = call_score(http, client, &build_token_ids(context, range, filter), priority).await?;
let divs = divergence(&baseline, &without); let divs = divergence(&baseline, &without);
Ok((divs, baseline)) Ok((divs, baseline))
} }
@ -232,7 +221,7 @@ pub async fn score_memories(
let http = http_client(); let http = http_client();
let range = 0..context.conversation().len(); let range = 0..context.conversation().len();
let baseline = call_score(&http, client, &build_messages(context, range.clone(), Filter::None)).await?; let baseline = call_score(&http, client, &build_token_ids(context, range.clone(), Filter::None), Some(5)).await?;
let total = memory_keys.len(); let total = memory_keys.len();
let mut matrix: Vec<Vec<f64>> = Vec::new(); let mut matrix: Vec<Vec<f64>> = Vec::new();
@ -241,8 +230,8 @@ pub async fn score_memories(
dbglog!( dbglog!(
"scoring {}/{}: {}...", mem_idx + 1, total, key, "scoring {}/{}: {}...", mem_idx + 1, total, key,
); );
let msgs = build_messages(context, range.clone(), Filter::SkipKey(key)); let msgs = build_token_ids(context, range.clone(), Filter::SkipKey(key));
match call_score(&http, client, &msgs).await { match call_score(&http, client, &msgs, Some(5)).await {
Ok(without) => matrix.push(divergence(&baseline, &without)), Ok(without) => matrix.push(divergence(&baseline, &without)),
Err(e) => { Err(e) => {
dbglog!( dbglog!(
@ -312,7 +301,7 @@ pub async fn score_memory(
} }
let http = http_client(); let http = http_client();
let (divs, _) = score_divergence(&http, client, context, range, Filter::SkipKey(key)).await?; let (divs, _) = score_divergence(&http, client, context, range, Filter::SkipKey(key), Some(5)).await?;
Ok(divs.iter().sum()) Ok(divs.iter().sum())
} }
@ -376,20 +365,25 @@ where
cumulative.push(running); cumulative.push(running);
} }
dbglog!("[scoring] total_tokens={}, cutoff={}, {} candidates", total_tokens, token_cutoff, candidates.len());
for (pos, key, _) in &candidates { for (pos, key, _) in &candidates {
// Only score memories in the first 70% of the conversation by tokens — // Only score memories in the first 60% of the conversation by tokens —
// recent memories don't have enough responses to evaluate yet. // recent memories don't have enough responses to evaluate yet.
if cumulative.get(*pos).copied().unwrap_or(total_tokens) > token_cutoff { let cum = cumulative.get(*pos).copied().unwrap_or(total_tokens);
if cum > token_cutoff {
dbglog!("[scoring] skip {} (tokens {}/{} past cutoff)", key, cum, token_cutoff);
continue; continue;
} }
let (end, _) = nth_response_end(context.conversation(), *pos, response_window); let (end, _) = nth_response_end(context.conversation(), *pos, response_window);
let range = *pos..end; let range = *pos..end;
if !context.conversation()[range.clone()].iter().any(|node| is_assistant(node)) { if !context.conversation()[range.clone()].iter().any(|node| is_assistant(node)) {
dbglog!("[scoring] skip {} (no assistant response in range {}..{})", key, pos, end);
continue; continue;
} }
let _scoring = crate::agent::start_activity(agent, format!("scoring: {}", key)).await; let _scoring = crate::agent::start_activity(agent, format!("scoring: {}", key)).await;
match score_divergence(&http, client, context, range, Filter::SkipKey(key)).await { match score_divergence(&http, client, context, range, Filter::SkipKey(key), Some(5)).await {
Ok((divs, _)) => { Ok((divs, _)) => {
let n_responses = divs.len(); let n_responses = divs.len();
let max_div = divs.iter().cloned().fold(0.0f64, f64::max); let max_div = divs.iter().cloned().fold(0.0f64, f64::max);
@ -435,7 +429,7 @@ pub async fn score_finetune(
} }
let http = http_client(); let http = http_client();
let (divs, _) = score_divergence(&http, client, context, range, Filter::SkipAllMemories).await?; let (divs, _) = score_divergence(&http, client, context, range, Filter::SkipAllMemories, Some(5)).await?;
let mut results: Vec<(usize, f64)> = response_positions.iter() let mut results: Vec<(usize, f64)> = response_positions.iter()
.enumerate() .enumerate()

View file

@ -110,8 +110,8 @@ impl ScreenView for ConsciousScreen {
self.tree.render_sections(&context_state, &mut lines); self.tree.render_sections(&context_state, &mut lines);
lines.push(Line::raw(format!(" {:23} {:>6} tokens", "────────", "──────"))); lines.push(Line::raw(format!(" {:53} {:>6} tokens", "────────", "──────")));
lines.push(Line::raw(format!(" {:23} {:>6} tokens", "Total", total))); lines.push(Line::raw(format!(" {:53} {:>6} tokens", "Total", total)));
} else if let Some(ref info) = app.context_info { } else if let Some(ref info) = app.context_info {
lines.push(Line::raw(format!(" System prompt: {:>6} chars", info.system_prompt_chars))); lines.push(Line::raw(format!(" System prompt: {:>6} chars", info.system_prompt_chars)));
lines.push(Line::raw(format!(" Context message: {:>6} chars", info.context_message_chars))); lines.push(Line::raw(format!(" Context message: {:>6} chars", info.context_message_chars)));

View file

@ -259,9 +259,9 @@ impl SectionTree {
let name_col = format!("{}{} {}", indent, marker, section.name); let name_col = format!("{}{} {}", indent, marker, section.name);
let tokens_col = format!("{:>6} tokens", section.tokens); let tokens_col = format!("{:>6} tokens", section.tokens);
let label = if section.status.is_empty() { let label = if section.status.is_empty() {
format!("{:40} {}", name_col, tokens_col) format!("{:70} {}", name_col, tokens_col)
} else { } else {
format!("{:40} {:16} {}", name_col, tokens_col, section.status) format!("{:70} {:16} {}", name_col, tokens_col, section.status)
}; };
let style = if selected { let style = if selected {
Style::default().fg(Color::Yellow).add_modifier(Modifier::BOLD) Style::default().fg(Color::Yellow).add_modifier(Modifier::BOLD)