reenable memory scoring

Widen name column on F2 conscious screen
Memory node keys were running into the token count column. Bump the name column from 40 to 70 characters. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-09 21:15:32 -04:00 · 2026-04-09 21:13:56 -04:00 · 2026-04-09 21:07:00 -04:00 · 2026-04-09 20:42:38 -04:00
4 changed files with 43 additions and 52 deletions
--- a/src/mind/mod.rs
+++ b/src/mind/mod.rs
@ -527,12 +527,9 @@ impl Mind {
                    }
                    cmds.push(MindCommand::Compact);
                    /*
                     * Broken since the AST context window conversion:
                    if !self.config.no_agents {
                        cmds.push(MindCommand::Score);
                    }
                    */
                }
                _ = tokio::time::sleep(timeout), if !has_input => dmn_expired = true,
--- a/src/subconscious/learn.rs
+++ b/src/subconscious/learn.rs
@ -48,41 +48,25 @@ fn is_assistant(node: &AstNode) -> bool {
    matches!(node, AstNode::Branch { role: Role::Assistant, .. })
 }
-/// Push an AstNode as one or more JSON messages for the scoring API.
+/// Build a token ID array for a scoring call.
 fn push_api_message(node: &AstNode, msgs: &mut Vec<serde_json::Value>) {
    match node {
        AstNode::Branch { role, children } => {
            let content: String = children.iter().map(|c| c.render()).collect();
            msgs.push(serde_json::json!({
                "role": role.as_str(),
                "content": content,
            }));
        }
        AstNode::Leaf(leaf) => {
            let role = match leaf.body() {
                NodeBody::ToolResult(_) => "tool",
                _ => "user",
            };
            msgs.push(serde_json::json!({
                "role": role,
                "content": leaf.body().text(),
            }));
        }
    }
 }
 /// Build the messages array for a scoring call.
 ///
-/// Always includes system prompt as prefix, then entries from `range`
+/// Includes all sections up to and including conversation entries in
-/// filtered by `filter`.
+/// `range`, with `filter` applied to conversation entries.
-fn build_messages(
+fn build_token_ids(
    context: &ContextState,
    range: std::ops::Range<usize>,
    filter: Filter,
-) -> Vec<serde_json::Value> {
+) -> Vec<u32> {
-    let mut msgs = Vec::new();
+    use crate::agent::context::Ast;
    let mut ids = Vec::new();
    for node in context.system() {
-        push_api_message(node, &mut msgs);
+        ids.extend(node.token_ids());
    }
    for node in context.identity() {
        ids.extend(node.token_ids());
    }
    for node in context.journal() {
        ids.extend(node.token_ids());
    }
    let entries = context.conversation();
    for i in range {
@ -94,9 +78,9 @@ fn build_messages(
            Filter::SkipAllMemories => is_memory(node),
        };
        if skip { continue; }
-        push_api_message(node, &mut msgs);
+        ids.extend(node.token_ids());
    }
-    msgs
+    ids
 }
 // ── Score API ───────────────────────────────────────────────────
@ -120,15 +104,19 @@ fn http_client() -> crate::agent::api::http::HttpClient {
 async fn call_score(
    http: &crate::agent::api::http::HttpClient,
    client: &ApiClient,
-    messages: &[serde_json::Value],
+    prompt: &[u32],
    priority: Option<i32>,
 ) -> anyhow::Result<Vec<ScoreResult>> {
    let url = format!("{}/score", client.base_url());
    let auth = format!("Bearer {}", client.api_key());
-    let body = serde_json::json!({
+    let mut body = serde_json::json!({
        "model": client.model,
-        "messages": messages,
+        "prompt": prompt,
        "logprobs": 1,
    });
    if let Some(p) = priority {
        body["priority"] = serde_json::json!(p);
    }
    let response = http
        .send_json("POST", &url, &[
            ("authorization", &auth),
@ -169,9 +157,10 @@ async fn score_divergence(
    context: &ContextState,
    range: std::ops::Range<usize>,
    filter: Filter<'_>,
    priority: Option<i32>,
 ) -> anyhow::Result<(Vec<f64>, Vec<ScoreResult>)> {
-    let baseline = call_score(http, client, &build_messages(context, range.clone(), Filter::None)).await?;
+    let baseline = call_score(http, client, &build_token_ids(context, range.clone(), Filter::None), priority).await?;
-    let without = call_score(http, client, &build_messages(context, range, filter)).await?;
+    let without = call_score(http, client, &build_token_ids(context, range, filter), priority).await?;
    let divs = divergence(&baseline, &without);
    Ok((divs, baseline))
 }
@ -232,7 +221,7 @@ pub async fn score_memories(
    let http = http_client();
    let range = 0..context.conversation().len();
-    let baseline = call_score(&http, client, &build_messages(context, range.clone(), Filter::None)).await?;
+    let baseline = call_score(&http, client, &build_token_ids(context, range.clone(), Filter::None), Some(5)).await?;
    let total = memory_keys.len();
    let mut matrix: Vec<Vec<f64>> = Vec::new();
@ -241,8 +230,8 @@ pub async fn score_memories(
        dbglog!(
            "scoring {}/{}: {}...", mem_idx + 1, total, key,
        );
-        let msgs = build_messages(context, range.clone(), Filter::SkipKey(key));
+        let msgs = build_token_ids(context, range.clone(), Filter::SkipKey(key));
-        match call_score(&http, client, &msgs).await {
+        match call_score(&http, client, &msgs, Some(5)).await {
            Ok(without) => matrix.push(divergence(&baseline, &without)),
            Err(e) => {
                dbglog!(
@ -312,7 +301,7 @@ pub async fn score_memory(
    }
    let http = http_client();
-    let (divs, _) = score_divergence(&http, client, context, range, Filter::SkipKey(key)).await?;
+    let (divs, _) = score_divergence(&http, client, context, range, Filter::SkipKey(key), Some(5)).await?;
    Ok(divs.iter().sum())
 }
@ -376,20 +365,25 @@ where
        cumulative.push(running);
    }
    dbglog!("[scoring] total_tokens={}, cutoff={}, {} candidates", total_tokens, token_cutoff, candidates.len());
    for (pos, key, _) in &candidates {
-        // Only score memories in the first 70% of the conversation by tokens —
+        // Only score memories in the first 60% of the conversation by tokens —
        // recent memories don't have enough responses to evaluate yet.
-        if cumulative.get(*pos).copied().unwrap_or(total_tokens) > token_cutoff {
+        let cum = cumulative.get(*pos).copied().unwrap_or(total_tokens);
        if cum > token_cutoff {
            dbglog!("[scoring] skip {} (tokens {}/{} past cutoff)", key, cum, token_cutoff);
            continue;
        }
        let (end, _) = nth_response_end(context.conversation(), *pos, response_window);
        let range = *pos..end;
        if !context.conversation()[range.clone()].iter().any(|node| is_assistant(node)) {
            dbglog!("[scoring] skip {} (no assistant response in range {}..{})", key, pos, end);
            continue;
        }
        let _scoring = crate::agent::start_activity(agent, format!("scoring: {}", key)).await;
-        match score_divergence(&http, client, context, range, Filter::SkipKey(key)).await {
+        match score_divergence(&http, client, context, range, Filter::SkipKey(key), Some(5)).await {
            Ok((divs, _)) => {
                let n_responses = divs.len();
                let max_div = divs.iter().cloned().fold(0.0f64, f64::max);
@ -435,7 +429,7 @@ pub async fn score_finetune(
    }
    let http = http_client();
-    let (divs, _) = score_divergence(&http, client, context, range, Filter::SkipAllMemories).await?;
+    let (divs, _) = score_divergence(&http, client, context, range, Filter::SkipAllMemories, Some(5)).await?;
    let mut results: Vec<(usize, f64)> = response_positions.iter()
        .enumerate()
--- a/src/user/context.rs
+++ b/src/user/context.rs
@ -110,8 +110,8 @@ impl ScreenView for ConsciousScreen {
            self.tree.render_sections(&context_state, &mut lines);
-            lines.push(Line::raw(format!("  {:23} {:>6} tokens", "────────", "──────")));
+            lines.push(Line::raw(format!("  {:53} {:>6} tokens", "────────", "──────")));
-            lines.push(Line::raw(format!("  {:23} {:>6} tokens", "Total", total)));
+            lines.push(Line::raw(format!("  {:53} {:>6} tokens", "Total", total)));
        } else if let Some(ref info) = app.context_info {
            lines.push(Line::raw(format!("  System prompt:   {:>6} chars", info.system_prompt_chars)));
            lines.push(Line::raw(format!("  Context message: {:>6} chars", info.context_message_chars)));
--- a/src/user/widgets.rs
+++ b/src/user/widgets.rs
@ -259,9 +259,9 @@ impl SectionTree {
        let name_col = format!("{}{} {}", indent, marker, section.name);
        let tokens_col = format!("{:>6} tokens", section.tokens);
        let label = if section.status.is_empty() {
-            format!("{:40} {}", name_col, tokens_col)
+            format!("{:70} {}", name_col, tokens_col)
        } else {
-            format!("{:40} {:16} {}", name_col, tokens_col, section.status)
+            format!("{:70} {:16} {}", name_col, tokens_col, section.status)
        };
        let style = if selected {
            Style::default().fg(Color::Yellow).add_modifier(Modifier::BOLD)
Author	SHA1	Message	Date
Kent Overstreet	d2c0ef61a1	reenable memory scoring	2026-04-09 21:15:32 -04:00
ProofOfConcept	b116b3536e	Widen name column on F2 conscious screen Memory node keys were running into the token count column. Bump the name column from 40 to 70 characters. Co-Authored-By: Proof of Concept <poc@bcachefs.org>	2026-04-09 21:13:56 -04:00
ProofOfConcept	be65399710	Switch memory scoring from chat messages to raw token IDs The /score endpoint was receiving chat-format messages which had to go through the chat template tokenizer — this was failing with "System message must be first" errors because the AST structure doesn't map cleanly to chat message format. Send raw token IDs via the new `prompt` field instead, matching what the /completions endpoint already does. The vLLM score endpoint finds assistant boundaries by scanning for <\|im_start\|>assistant token patterns, so no message-level metadata is needed. Also includes identity and journal sections in the scored context, matching what the model actually sees during inference. Co-Authored-By: Proof of Concept <poc@bcachefs.org>	2026-04-09 21:07:00 -04:00
ProofOfConcept	67332eb55e	Add vLLM priority to memory scoring requests Scoring calls the /score endpoint directly via HTTP, bypassing the stream_completion path. These requests had no priority field, so they could preempt interactive work. Set priority=5 (between subconscious agents at 2 and unconscious at 10). Co-Authored-By: Proof of Concept <poc@bcachefs.org>	2026-04-09 20:42:38 -04:00