user: F7 compare screen

Side-by-side model comparison against the current conversation context. Built on the MindTriggered pattern — F7 drops in as one more CompareScoring flow next to MemoryScoring / FinetuneScoring. Motivation: we have the VRAM on the b200 to load two versions of the same family simultaneously (e.g. Qwen3.5 27B bf16 and q8_k_xl). Rather than trust perplexity/KLD numbers on a generic corpus, we can measure divergence on our actual conversations: for each assistant response, ask the test model what it would have said given the same prefix, and eyeball the diffs. - config.compare.test_backend — names an entry in the existing backends map to use as the test model. Empty = F7 reports "(unset)" and does nothing. - subconscious::compare::{score_compare_candidates, CompareCandidate, CompareScoringStats, CompareScoring}. For each assistant response, gen_continuation runs with the test client against the same prefix the original response saw; pairs stream into shared.compare_candidates as they complete. - user::compare::CompareScreen — F7 in the screen list. c/Enter triggers a run; list/detail layout mirroring F6, detail shows prior context / original / test-model alternate. No persistence yet — each F7 run regenerates. Caching via a context manifest (so we can re-view without re-burning generation) is the natural follow-up; for now light usage is fine. Also reusable later for validating finetune checkpoints: same pattern, swap the test backend for the new checkpoint, watch where it diverges from the base. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-17 16:01:11 -04:00 · 2026-04-17 16:01:11 -04:00 · 2b03dbb200
commit 2b03dbb200
parent 575325e855
7 changed files with 301 additions and 11 deletions
--- a/src/user/compare.rs
+++ b/src/user/compare.rs
@ -0,0 +1,142 @@
+// compare.rs — F7 compare screen: side-by-side test-model regen of
+// every assistant response in the current context.
+
+use ratatui::{
+    layout::{Constraint, Layout, Rect},
+    style::{Color, Modifier, Style},
+    text::{Line, Span},
+    widgets::{Block, Borders, List, ListItem, ListState, Paragraph, Wrap},
+    Frame,
+};
+use ratatui::crossterm::event::{Event, KeyCode, KeyEvent};
+
+use super::{App, ScreenView, screen_legend, truncate};
+
+pub use crate::subconscious::compare::CompareCandidate;
+
+pub(crate) struct CompareScreen {
+    list_state: ListState,
+    mind_tx: tokio::sync::mpsc::UnboundedSender<crate::mind::MindCommand>,
+}
+
+impl CompareScreen {
+    pub fn new(
+        mind_tx: tokio::sync::mpsc::UnboundedSender<crate::mind::MindCommand>,
+    ) -> Self {
+        Self { list_state: ListState::default(), mind_tx }
+    }
+}
+
+impl ScreenView for CompareScreen {
+    fn label(&self) -> &'static str { "compare" }
+
+    fn tick(&mut self, frame: &mut Frame, area: Rect,
+            events: &[Event], app: &mut App) {
+        let n = app.compare_candidates.len();
+        for event in events {
+            if let Event::Key(KeyEvent { code, .. }) = event {
+                match code {
+                    KeyCode::Up | KeyCode::Char('k') => {
+                        let i = self.list_state.selected().unwrap_or(0);
+                        self.list_state.select(Some(i.saturating_sub(1)));
+                    }
+                    KeyCode::Down | KeyCode::Char('j') => {
+                        let i = self.list_state.selected().unwrap_or(0);
+                        self.list_state.select(Some((i + 1).min(n.saturating_sub(1))));
+                    }
+                    KeyCode::Char('c') | KeyCode::Enter => {
+                        let _ = self.mind_tx.send(crate::mind::MindCommand::Compare);
+                    }
+                    _ => {}
+                }
+            }
+        }
+        if n > 0 {
+            let sel = self.list_state.selected().unwrap_or(0).min(n - 1);
+            self.list_state.select(Some(sel));
+        }
+
+        let test_backend = crate::config::app().compare.test_backend.clone();
+        let block = Block::default()
+            .title_top(Line::from(screen_legend()).left_aligned())
+            .title_top(Line::from(" compare ").right_aligned())
+            .borders(Borders::ALL)
+            .border_style(Style::default().fg(Color::Magenta));
+        let inner = block.inner(area);
+        frame.render_widget(block, area);
+
+        let [settings_area, content_area] = Layout::vertical([
+            Constraint::Length(1), Constraint::Min(0),
+        ]).areas(inner);
+
+        let backend_label = if test_backend.is_empty() {
+            ("(unset — set compare.test_backend)", Color::Red)
+        } else {
+            (test_backend.as_str(), Color::Yellow)
+        };
+        frame.render_widget(Paragraph::new(Line::from(vec![
+            Span::raw(" test model: "),
+            Span::styled(backend_label.0.to_string(), Style::default().fg(backend_label.1)),
+        ])), settings_area);
+
+        let candidates = &app.compare_candidates;
+        if candidates.is_empty() {
+            let err = app.mind_state.as_ref().and_then(|ms| ms.compare_error.as_deref());
+            let mut lines = vec![Line::from(""),
+                Line::styled("  Press c/Enter to compare against the configured test model.",
+                    Style::default().fg(Color::DarkGray))];
+            if let Some(e) = err {
+                lines.push(Line::from(""));
+                lines.push(Line::from(vec![
+                    Span::raw("  "),
+                    Span::styled(format!("error: {}", e), Style::default().fg(Color::Red)),
+                ]));
+            }
+            frame.render_widget(Paragraph::new(lines), content_area);
+        } else {
+            let [list_area, detail_area] = Layout::horizontal([
+                Constraint::Percentage(40), Constraint::Percentage(60),
+            ]).areas(content_area);
+
+            let items: Vec<ListItem> = candidates.iter().map(|c| ListItem::new(Line::from(vec![
+                Span::styled(format!("#{:<3} ", c.entry_idx), Style::default().fg(Color::DarkGray)),
+                Span::raw(truncate(&c.original_text, 30)),
+            ]))).collect();
+            frame.render_stateful_widget(
+                List::new(items)
+                    .block(Block::default().borders(Borders::RIGHT).title(" candidates "))
+                    .highlight_style(Style::default().add_modifier(Modifier::REVERSED)),
+                list_area, &mut self.list_state,
+            );
+
+            if let Some(c) = self.list_state.selected().and_then(|i| candidates.get(i)) {
+                let mut text = String::new();
+                if !c.prior_context.is_empty() {
+                    text.push_str(&c.prior_context);
+                    text.push_str("\n\n─── original ───\n\n");
+                }
+                text.push_str(&c.original_text);
+                text.push_str("\n\n─── test model ───\n\n");
+                text.push_str(&c.alternate_text);
+                frame.render_widget(
+                    Paragraph::new(text)
+                        .block(Block::default().borders(Borders::TOP)
+                            .title(format!(" entry {} ", c.entry_idx)))
+                        .wrap(Wrap { trim: false }),
+                    detail_area,
+                );
+            }
+        }
+
+        let help = Line::from(vec![
+            Span::styled(" j/k/\u{2191}\u{2193}", Style::default().fg(Color::Cyan)),
+            Span::raw("=nav  "),
+            Span::styled("c/Enter", Style::default().fg(Color::Green)),
+            Span::raw("=run "),
+        ]);
+        frame.render_widget(
+            Paragraph::new(help),
+            Rect { y: area.y + area.height - 1, height: 1, ..area },
+        );
+    }
+}