From be6539971005271180098bdf1ff03fe00e601c24 Mon Sep 17 00:00:00 2001 From: ProofOfConcept Date: Thu, 9 Apr 2026 21:07:00 -0400 Subject: [PATCH] Switch memory scoring from chat messages to raw token IDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The /score endpoint was receiving chat-format messages which had to go through the chat template tokenizer — this was failing with "System message must be first" errors because the AST structure doesn't map cleanly to chat message format. Send raw token IDs via the new `prompt` field instead, matching what the /completions endpoint already does. The vLLM score endpoint finds assistant boundaries by scanning for <|im_start|>assistant token patterns, so no message-level metadata is needed. Also includes identity and journal sections in the scored context, matching what the model actually sees during inference. Co-Authored-By: Proof of Concept --- src/subconscious/learn.rs | 69 ++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 40 deletions(-) diff --git a/src/subconscious/learn.rs b/src/subconscious/learn.rs index a81f0a4..80aa31e 100644 --- a/src/subconscious/learn.rs +++ b/src/subconscious/learn.rs @@ -48,41 +48,25 @@ fn is_assistant(node: &AstNode) -> bool { matches!(node, AstNode::Branch { role: Role::Assistant, .. }) } -/// Push an AstNode as one or more JSON messages for the scoring API. -fn push_api_message(node: &AstNode, msgs: &mut Vec) { - match node { - AstNode::Branch { role, children } => { - let content: String = children.iter().map(|c| c.render()).collect(); - msgs.push(serde_json::json!({ - "role": role.as_str(), - "content": content, - })); - } - AstNode::Leaf(leaf) => { - let role = match leaf.body() { - NodeBody::ToolResult(_) => "tool", - _ => "user", - }; - msgs.push(serde_json::json!({ - "role": role, - "content": leaf.body().text(), - })); - } - } -} - -/// Build the messages array for a scoring call. +/// Build a token ID array for a scoring call. /// -/// Always includes system prompt as prefix, then entries from `range` -/// filtered by `filter`. -fn build_messages( +/// Includes all sections up to and including conversation entries in +/// `range`, with `filter` applied to conversation entries. +fn build_token_ids( context: &ContextState, range: std::ops::Range, filter: Filter, -) -> Vec { - let mut msgs = Vec::new(); +) -> Vec { + use crate::agent::context::Ast; + let mut ids = Vec::new(); for node in context.system() { - push_api_message(node, &mut msgs); + ids.extend(node.token_ids()); + } + for node in context.identity() { + ids.extend(node.token_ids()); + } + for node in context.journal() { + ids.extend(node.token_ids()); } let entries = context.conversation(); for i in range { @@ -94,9 +78,9 @@ fn build_messages( Filter::SkipAllMemories => is_memory(node), }; if skip { continue; } - push_api_message(node, &mut msgs); + ids.extend(node.token_ids()); } - msgs + ids } // ── Score API ─────────────────────────────────────────────────── @@ -120,14 +104,14 @@ fn http_client() -> crate::agent::api::http::HttpClient { async fn call_score( http: &crate::agent::api::http::HttpClient, client: &ApiClient, - messages: &[serde_json::Value], + prompt: &[u32], priority: Option, ) -> anyhow::Result> { let url = format!("{}/score", client.base_url()); let auth = format!("Bearer {}", client.api_key()); let mut body = serde_json::json!({ "model": client.model, - "messages": messages, + "prompt": prompt, "logprobs": 1, }); if let Some(p) = priority { @@ -175,8 +159,8 @@ async fn score_divergence( filter: Filter<'_>, priority: Option, ) -> anyhow::Result<(Vec, Vec)> { - let baseline = call_score(http, client, &build_messages(context, range.clone(), Filter::None), priority).await?; - let without = call_score(http, client, &build_messages(context, range, filter), priority).await?; + let baseline = call_score(http, client, &build_token_ids(context, range.clone(), Filter::None), priority).await?; + let without = call_score(http, client, &build_token_ids(context, range, filter), priority).await?; let divs = divergence(&baseline, &without); Ok((divs, baseline)) } @@ -237,7 +221,7 @@ pub async fn score_memories( let http = http_client(); let range = 0..context.conversation().len(); - let baseline = call_score(&http, client, &build_messages(context, range.clone(), Filter::None), Some(5)).await?; + let baseline = call_score(&http, client, &build_token_ids(context, range.clone(), Filter::None), Some(5)).await?; let total = memory_keys.len(); let mut matrix: Vec> = Vec::new(); @@ -246,7 +230,7 @@ pub async fn score_memories( dbglog!( "scoring {}/{}: {}...", mem_idx + 1, total, key, ); - let msgs = build_messages(context, range.clone(), Filter::SkipKey(key)); + let msgs = build_token_ids(context, range.clone(), Filter::SkipKey(key)); match call_score(&http, client, &msgs, Some(5)).await { Ok(without) => matrix.push(divergence(&baseline, &without)), Err(e) => { @@ -381,15 +365,20 @@ where cumulative.push(running); } + dbglog!("[scoring] total_tokens={}, cutoff={}, {} candidates", total_tokens, token_cutoff, candidates.len()); + for (pos, key, _) in &candidates { - // Only score memories in the first 70% of the conversation by tokens — + // Only score memories in the first 60% of the conversation by tokens — // recent memories don't have enough responses to evaluate yet. - if cumulative.get(*pos).copied().unwrap_or(total_tokens) > token_cutoff { + let cum = cumulative.get(*pos).copied().unwrap_or(total_tokens); + if cum > token_cutoff { + dbglog!("[scoring] skip {} (tokens {}/{} past cutoff)", key, cum, token_cutoff); continue; } let (end, _) = nth_response_end(context.conversation(), *pos, response_window); let range = *pos..end; if !context.conversation()[range.clone()].iter().any(|node| is_assistant(node)) { + dbglog!("[scoring] skip {} (no assistant response in range {}..{})", key, pos, end); continue; }