learn: skip empty responses; show prior conversation context on F6

Two fixes to the F6 candidate display:

1. Turns where the assistant produced nothing human-visible (an
   interrupted generation, a turn consisting of only a tool call the
   renderer folds to the tool name) were landing as candidates with
   an empty response_text. They'd render as blank cards and, worse,
   we'd still burn a full alternate generation on each one. Filter
   them out before they reach the candidate list.

2. The detail pane showed only the scored response + alternate, with
   no hint of what the user had actually asked. Pre-compute the last
   two user/assistant exchanges on each candidate as a rendered
   prior_context string ([user]/[assistant] markers) and show them
   above the response, under a new "context & response" section
   heading.

render_branch_text and render_prior_context extracted as helpers —
the response-text rendering and prior-context rendering share the
same "flatten Branch children to text" pass.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-16 13:20:03 -04:00
parent 7ef02c97d1
commit 2eddf3b4cf
2 changed files with 76 additions and 16 deletions

View file

@ -474,12 +474,59 @@ pub async fn score_finetune(
Ok(results)
}
/// Concatenate the text of a Branch's Leaf children — what the model
/// actually produced on that turn (Content + Thinking + ToolCall name).
fn render_branch_text(children: &[AstNode]) -> String {
children.iter()
.filter_map(|c| match c {
AstNode::Leaf(leaf) => Some(leaf.body().text().to_string()),
_ => None,
})
.collect::<Vec<_>>()
.join("")
}
/// Render the last `max_msgs` user/assistant branches before `idx` as a
/// review-friendly string with `[user]` / `[assistant]` markers.
fn render_prior_context(entries: &[AstNode], idx: usize, max_msgs: usize) -> String {
use crate::agent::context::Role;
let mut picked: Vec<&AstNode> = Vec::with_capacity(max_msgs);
for i in (0..idx).rev() {
if picked.len() >= max_msgs { break; }
if let AstNode::Branch { role, .. } = &entries[i] {
if matches!(role, Role::User | Role::Assistant) {
picked.push(&entries[i]);
}
}
}
picked.reverse();
let mut out = String::new();
for node in picked {
if let AstNode::Branch { role, children, .. } = node {
let marker = match role {
Role::User => "[user]",
Role::Assistant => "[assistant]",
_ => continue,
};
out.push_str(marker);
out.push('\n');
out.push_str(render_branch_text(children).trim());
out.push_str("\n\n");
}
}
out.trim_end().to_string()
}
/// Enriched finetune candidate with context for review.
#[derive(Clone, Debug)]
pub struct FinetuneCandidate {
pub entry_idx: usize,
pub divergence: f64,
pub response_text: String,
/// Last couple of user/assistant messages before this response,
/// already rendered with role markers, for F6 display context.
pub prior_context: String,
/// Token IDs for context (everything before the response).
pub context_ids: Vec<u32>,
/// Token IDs for the response (what we're training on).
@ -529,20 +576,22 @@ pub async fn score_finetune_candidates(
continue;
}
// Extract response text.
// Extract response text — content of the assistant turn.
let response_text = match node {
AstNode::Branch { children, .. } => {
children.iter()
.filter_map(|c| match c {
AstNode::Leaf(leaf) => Some(leaf.body().text().to_string()),
_ => None,
})
.collect::<Vec<_>>()
.join("")
}
AstNode::Branch { children, .. } => render_branch_text(children),
_ => continue,
};
// Skip turns that produced nothing human-visible (e.g., a
// tool-only turn, or an interrupted generation). They'd show
// up as blank cards and we'd still burn alternate-gen on them.
if response_text.trim().is_empty() {
continue;
}
// Build the last couple of user/assistant exchanges for review.
let prior_context = render_prior_context(entries, entry_idx, 2);
// Build token IDs: context = everything before response, continuation = response.
let (context_ids, _) = build_token_ids(context, 0..entry_idx, Filter::None);
let continuation_ids: Vec<u32> = node.token_ids().into_iter().collect();
@ -551,6 +600,7 @@ pub async fn score_finetune_candidates(
entry_idx,
divergence,
response_text,
prior_context,
context_ids,
continuation_ids,
alternate_text: None,