user: F7 compare screen

Side-by-side model comparison against the current conversation context.
Built on the MindTriggered pattern — F7 drops in as one more
CompareScoring flow next to MemoryScoring / FinetuneScoring.

Motivation: we have the VRAM on the b200 to load two versions of the
same family simultaneously (e.g. Qwen3.5 27B bf16 and q8_k_xl). Rather
than trust perplexity/KLD numbers on a generic corpus, we can measure
divergence on our actual conversations: for each assistant response,
ask the test model what it would have said given the same prefix, and
eyeball the diffs.

 - config.compare.test_backend — names an entry in the existing
   backends map to use as the test model. Empty = F7 reports "(unset)"
   and does nothing.

 - subconscious::compare::{score_compare_candidates, CompareCandidate,
   CompareScoringStats, CompareScoring}. For each assistant response,
   gen_continuation runs with the test client against the same prefix
   the original response saw; pairs stream into
   shared.compare_candidates as they complete.

 - user::compare::CompareScreen — F7 in the screen list. c/Enter
   triggers a run; list/detail layout mirroring F6, detail shows
   prior context / original / test-model alternate.

No persistence yet — each F7 run regenerates. Caching via a context
manifest (so we can re-view without re-burning generation) is the
natural follow-up; for now light usage is fine.

Also reusable later for validating finetune checkpoints: same pattern,
swap the test backend for the new checkpoint, watch where it diverges
from the base.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-17 16:01:11 -04:00
parent 575325e855
commit 2b03dbb200
7 changed files with 301 additions and 11 deletions

142
src/user/compare.rs Normal file
View file

@ -0,0 +1,142 @@
// compare.rs — F7 compare screen: side-by-side test-model regen of
// every assistant response in the current context.
use ratatui::{
layout::{Constraint, Layout, Rect},
style::{Color, Modifier, Style},
text::{Line, Span},
widgets::{Block, Borders, List, ListItem, ListState, Paragraph, Wrap},
Frame,
};
use ratatui::crossterm::event::{Event, KeyCode, KeyEvent};
use super::{App, ScreenView, screen_legend, truncate};
pub use crate::subconscious::compare::CompareCandidate;
pub(crate) struct CompareScreen {
list_state: ListState,
mind_tx: tokio::sync::mpsc::UnboundedSender<crate::mind::MindCommand>,
}
impl CompareScreen {
pub fn new(
mind_tx: tokio::sync::mpsc::UnboundedSender<crate::mind::MindCommand>,
) -> Self {
Self { list_state: ListState::default(), mind_tx }
}
}
impl ScreenView for CompareScreen {
fn label(&self) -> &'static str { "compare" }
fn tick(&mut self, frame: &mut Frame, area: Rect,
events: &[Event], app: &mut App) {
let n = app.compare_candidates.len();
for event in events {
if let Event::Key(KeyEvent { code, .. }) = event {
match code {
KeyCode::Up | KeyCode::Char('k') => {
let i = self.list_state.selected().unwrap_or(0);
self.list_state.select(Some(i.saturating_sub(1)));
}
KeyCode::Down | KeyCode::Char('j') => {
let i = self.list_state.selected().unwrap_or(0);
self.list_state.select(Some((i + 1).min(n.saturating_sub(1))));
}
KeyCode::Char('c') | KeyCode::Enter => {
let _ = self.mind_tx.send(crate::mind::MindCommand::Compare);
}
_ => {}
}
}
}
if n > 0 {
let sel = self.list_state.selected().unwrap_or(0).min(n - 1);
self.list_state.select(Some(sel));
}
let test_backend = crate::config::app().compare.test_backend.clone();
let block = Block::default()
.title_top(Line::from(screen_legend()).left_aligned())
.title_top(Line::from(" compare ").right_aligned())
.borders(Borders::ALL)
.border_style(Style::default().fg(Color::Magenta));
let inner = block.inner(area);
frame.render_widget(block, area);
let [settings_area, content_area] = Layout::vertical([
Constraint::Length(1), Constraint::Min(0),
]).areas(inner);
let backend_label = if test_backend.is_empty() {
("(unset — set compare.test_backend)", Color::Red)
} else {
(test_backend.as_str(), Color::Yellow)
};
frame.render_widget(Paragraph::new(Line::from(vec![
Span::raw(" test model: "),
Span::styled(backend_label.0.to_string(), Style::default().fg(backend_label.1)),
])), settings_area);
let candidates = &app.compare_candidates;
if candidates.is_empty() {
let err = app.mind_state.as_ref().and_then(|ms| ms.compare_error.as_deref());
let mut lines = vec![Line::from(""),
Line::styled(" Press c/Enter to compare against the configured test model.",
Style::default().fg(Color::DarkGray))];
if let Some(e) = err {
lines.push(Line::from(""));
lines.push(Line::from(vec![
Span::raw(" "),
Span::styled(format!("error: {}", e), Style::default().fg(Color::Red)),
]));
}
frame.render_widget(Paragraph::new(lines), content_area);
} else {
let [list_area, detail_area] = Layout::horizontal([
Constraint::Percentage(40), Constraint::Percentage(60),
]).areas(content_area);
let items: Vec<ListItem> = candidates.iter().map(|c| ListItem::new(Line::from(vec![
Span::styled(format!("#{:<3} ", c.entry_idx), Style::default().fg(Color::DarkGray)),
Span::raw(truncate(&c.original_text, 30)),
]))).collect();
frame.render_stateful_widget(
List::new(items)
.block(Block::default().borders(Borders::RIGHT).title(" candidates "))
.highlight_style(Style::default().add_modifier(Modifier::REVERSED)),
list_area, &mut self.list_state,
);
if let Some(c) = self.list_state.selected().and_then(|i| candidates.get(i)) {
let mut text = String::new();
if !c.prior_context.is_empty() {
text.push_str(&c.prior_context);
text.push_str("\n\n─── original ───\n\n");
}
text.push_str(&c.original_text);
text.push_str("\n\n─── test model ───\n\n");
text.push_str(&c.alternate_text);
frame.render_widget(
Paragraph::new(text)
.block(Block::default().borders(Borders::TOP)
.title(format!(" entry {} ", c.entry_idx)))
.wrap(Wrap { trim: false }),
detail_area,
);
}
}
let help = Line::from(vec![
Span::styled(" j/k/\u{2191}\u{2193}", Style::default().fg(Color::Cyan)),
Span::raw("=nav "),
Span::styled("c/Enter", Style::default().fg(Color::Green)),
Span::raw("=run "),
]);
frame.render_widget(
Paragraph::new(help),
Rect { y: area.y + area.height - 1, height: 1, ..area },
);
}
}

View file

@ -12,7 +12,7 @@ use ratatui::{
};
use ratatui::crossterm::event::{Event, KeyCode, KeyEvent};
use super::{App, ScreenView, screen_legend};
use super::{App, ScreenView, screen_legend, truncate};
/// A candidate response identified for fine-tuning.
#[derive(Clone, Debug)]
@ -331,11 +331,3 @@ fn render_detail(frame: &mut Frame, c: &FinetuneCandidate, area: Rect) {
frame.render_widget(content, content_area);
}
fn truncate(s: &str, max: usize) -> String {
let first_line = s.lines().next().unwrap_or("");
if first_line.len() > max {
format!("{}...", &first_line[..max])
} else {
first_line.to_string()
}
}

View file

@ -4,6 +4,7 @@
// machine, DMN, identity) lives in mind/.
pub(crate) mod chat;
pub(crate) mod compare;
mod context;
pub(crate) mod learn;
pub(crate) mod scroll_pane;
@ -64,6 +65,13 @@ fn screen_legend() -> String {
SCREEN_LEGEND.get().cloned().unwrap_or_default()
}
/// Return the first line of `s`, truncated to `max` chars with an
/// ellipsis suffix. Used by candidate-list screens.
fn truncate(s: &str, max: usize) -> String {
let first = s.lines().next().unwrap_or("");
if first.len() > max { format!("{}...", &first[..max]) } else { first.to_string() }
}
/// A screen that can draw itself and handle input.
trait ScreenView: Send {
fn tick(&mut self, frame: &mut ratatui::Frame, area: ratatui::layout::Rect,
@ -114,6 +122,8 @@ struct App {
idle_info: Option<IdleInfo>,
/// Fine-tuning candidates pending review.
finetune_candidates: Vec<learn::FinetuneCandidate>,
/// F7 compare candidates — response pairs from test-model comparison.
compare_candidates: Vec<compare::CompareCandidate>,
}
impl App {
@ -144,6 +154,7 @@ impl App {
walked_count: 0,
channel_status: Vec::new(), idle_info: None,
finetune_candidates: Vec::new(),
compare_candidates: Vec::new(),
}
}
@ -372,7 +383,7 @@ async fn run(
}
let notify_rx = crate::thalamus::channels::subscribe_all();
// F1=chat, F2=conscious, F3=subconscious, F4=unconscious, F5=thalamus, F6=learn
// F1=chat, F2=conscious, F3=subconscious, F4=unconscious, F5=thalamus, F6=learn, F7=compare
let mut screens: Vec<Box<dyn tui::ScreenView>> = vec![
Box::new(crate::user::chat::InteractScreen::new(
mind.agent.clone(), mind.shared.clone(), mind_tx.clone(),
@ -382,6 +393,7 @@ async fn run(
Box::new(crate::user::unconscious::UnconsciousScreen::new()),
Box::new(crate::user::thalamus::ThalamusScreen::new()),
Box::new(crate::user::learn::LearnScreen::new(mind_tx.clone())),
Box::new(crate::user::compare::CompareScreen::new(mind_tx.clone())),
];
let mut active_screen: usize = 1; // F-key number
tui::set_screen_legend(tui::screen_legend_from(&*screens));
@ -505,6 +517,9 @@ async fn run(
});
}
// Sync compare candidates — a fresh run clears, so take a snapshot.
app.compare_candidates = ms.compare_candidates.clone();
app.mind_state = Some(ms.clone());
}
app.walked_count = mind.subconscious_walked().await.len();