From 2b03dbb20006b15f19b96a2f911a8fd0de934b07 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 17 Apr 2026 16:01:11 -0400 Subject: [PATCH] user: F7 compare screen MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Side-by-side model comparison against the current conversation context. Built on the MindTriggered pattern — F7 drops in as one more CompareScoring flow next to MemoryScoring / FinetuneScoring. Motivation: we have the VRAM on the b200 to load two versions of the same family simultaneously (e.g. Qwen3.5 27B bf16 and q8_k_xl). Rather than trust perplexity/KLD numbers on a generic corpus, we can measure divergence on our actual conversations: for each assistant response, ask the test model what it would have said given the same prefix, and eyeball the diffs. - config.compare.test_backend — names an entry in the existing backends map to use as the test model. Empty = F7 reports "(unset)" and does nothing. - subconscious::compare::{score_compare_candidates, CompareCandidate, CompareScoringStats, CompareScoring}. For each assistant response, gen_continuation runs with the test client against the same prefix the original response saw; pairs stream into shared.compare_candidates as they complete. - user::compare::CompareScreen — F7 in the screen list. c/Enter triggers a run; list/detail layout mirroring F6, detail shows prior context / original / test-model alternate. No persistence yet — each F7 run regenerates. Caching via a context manifest (so we can re-view without re-burning generation) is the natural follow-up; for now light usage is fine. Also reusable later for validating finetune checkpoints: same pattern, swap the test backend for the new checkpoint, watch where it diverges from the base. Co-Authored-By: Proof of Concept --- src/config.rs | 13 ++++ src/mind/mod.rs | 20 ++++- src/subconscious/compare.rs | 109 +++++++++++++++++++++++++++ src/subconscious/mod.rs | 1 + src/user/compare.rs | 142 ++++++++++++++++++++++++++++++++++++ src/user/learn.rs | 10 +-- src/user/mod.rs | 17 ++++- 7 files changed, 301 insertions(+), 11 deletions(-) create mode 100644 src/subconscious/compare.rs create mode 100644 src/user/compare.rs diff --git a/src/config.rs b/src/config.rs index 6323aae..209bdc1 100644 --- a/src/config.rs +++ b/src/config.rs @@ -250,6 +250,8 @@ pub struct AppConfig { #[serde(default)] pub learn: LearnConfig, #[serde(default)] + pub compare: CompareConfig, + #[serde(default)] pub mcp_servers: Vec, #[serde(default)] pub lsp_servers: Vec, @@ -323,6 +325,16 @@ impl Default for LearnConfig { } } +/// Settings for the F7 compare screen — side-by-side generation with a +/// test model against the current context. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct CompareConfig { + /// Backend name (looked up in `backends`) to use as the test model. + /// Empty = F7 reports "no test backend configured" and does nothing. + #[serde(default)] + pub test_backend: String, +} + fn default_user_name() -> String { "User".into() } fn default_assistant_name() -> String { "Assistant".into() } @@ -340,6 +352,7 @@ impl Default for AppConfig { }, dmn: DmnConfig { max_turns: 20 }, learn: LearnConfig::default(), + compare: CompareConfig::default(), mcp_servers: Vec::new(), lsp_servers: Vec::new(), } diff --git a/src/mind/mod.rs b/src/mind/mod.rs index 4ca97ea..f526b10 100644 --- a/src/mind/mod.rs +++ b/src/mind/mod.rs @@ -63,7 +63,7 @@ use tokio::sync::mpsc; use crate::agent::{Agent, TurnResult}; use crate::agent::api::ApiClient; use crate::config::{AppConfig, SessionConfig}; -use crate::subconscious::learn; +use crate::subconscious::{compare, learn}; use crate::hippocampus::access_local; pub use subconscious::{SubconsciousSnapshot, Subconscious}; @@ -193,6 +193,11 @@ pub struct MindState { pub finetune_candidates: Vec, /// Last scoring run stats for UI display. pub finetune_last_run: Option, + /// F7 compare candidates — one per response, showing what the test + /// model would say given the same context. + pub compare_candidates: Vec, + /// F7 compare error from the last run, if any. + pub compare_error: Option, } impl Clone for MindState { @@ -213,6 +218,8 @@ impl Clone for MindState { unc_idle_deadline: self.unc_idle_deadline, finetune_candidates: self.finetune_candidates.clone(), finetune_last_run: self.finetune_last_run.clone(), + compare_candidates: self.compare_candidates.clone(), + compare_error: self.compare_error.clone(), } } } @@ -227,6 +234,9 @@ pub enum MindCommand { ScoreFull, /// Score for finetune candidates ScoreFinetune, + /// Run F7 compare: generate alternates with the configured test model + /// for every assistant response in the context. + Compare, /// Update the finetune divergence threshold and persist to config. SetLearnThreshold(f64), /// Toggle alternate-response generation during scoring; persist to config. @@ -258,6 +268,8 @@ impl MindState { unc_idle_deadline: Instant::now() + std::time::Duration::from_secs(60), finetune_candidates: Vec::new(), finetune_last_run: None, + compare_candidates: Vec::new(), + compare_error: None, } } @@ -359,6 +371,7 @@ pub struct Mind { conscious_active: tokio::sync::watch::Sender, memory_scoring: learn::MemoryScoring, finetune_scoring: learn::FinetuneScoring, + compare_scoring: compare::CompareScoring, _supervisor: crate::thalamus::supervisor::Supervisor, } @@ -486,12 +499,14 @@ impl Mind { let memory_scoring = learn::MemoryScoring::new( agent.clone(), shared.clone(), scores_path); let finetune_scoring = learn::FinetuneScoring::new(agent.clone(), shared.clone()); + let compare_scoring = compare::CompareScoring::new(agent.clone(), shared.clone()); Self { agent, shared, config, subconscious, unconscious, turn_tx, turn_watch, conscious_active, memory_scoring, finetune_scoring, + compare_scoring, _supervisor: sup } } @@ -593,6 +608,9 @@ impl Mind { MindCommand::ScoreFinetune => { self.finetune_scoring.trigger(); } + MindCommand::Compare => { + self.compare_scoring.trigger(); + } MindCommand::SetLearnThreshold(value) => { if let Err(e) = crate::config_writer::set_learn_threshold(value) { dbglog!("[learn] failed to persist threshold {}: {:#}", value, e); diff --git a/src/subconscious/compare.rs b/src/subconscious/compare.rs new file mode 100644 index 0000000..f2652ce --- /dev/null +++ b/src/subconscious/compare.rs @@ -0,0 +1,109 @@ +// compare.rs — F7 compare: for each assistant response in the current +// context, regenerate with a configured test model and emit pairs for +// side-by-side review. + +use std::sync::Arc; + +use crate::agent::api::ApiClient; +use crate::agent::context::{ + AstNode, Role, render_branch_text, render_prior_context, +}; +use crate::mind::{MindState, MindTriggered, TaskHandle}; +use crate::subconscious::generate::gen_continuation; +use crate::subconscious::learn::node_timestamp_ns; + +#[derive(Clone, Debug)] +pub struct CompareCandidate { + pub entry_idx: usize, + pub original_text: String, + pub alternate_text: String, + pub prior_context: String, + pub timestamp_ns: i64, +} + +pub struct CompareScoring { + agent: Arc, + shared: Arc>, + task: TaskHandle, +} + +impl CompareScoring { + pub fn new( + agent: Arc, + shared: Arc>, + ) -> Self { + Self { agent, shared, task: TaskHandle::new() } + } +} + +impl MindTriggered for CompareScoring { + fn trigger(&self) { + self.task.trigger(run(self.agent.clone(), self.shared.clone())); + } +} + +fn resolve_test_client() -> Result { + let cfg = crate::config::app(); + let name = cfg.compare.test_backend.clone(); + if name.is_empty() { + return Err("compare.test_backend not set in config".to_string()); + } + let r = cfg.resolve_model(&name).map_err(|e| format!("{:#}", e))?; + Ok(ApiClient::new(&r.api_base, &r.api_key, &r.model_id)) +} + +async fn run( + agent: Arc, + shared: Arc>, +) { + { + let mut s = shared.lock().unwrap(); + s.compare_candidates.clear(); + s.compare_error = None; + } + agent.state.lock().await.changed.notify_one(); + + let activity = crate::agent::start_activity(&agent, "compare: scoring...").await; + + let test_client = match resolve_test_client() { + Ok(c) => c, + Err(e) => { + shared.lock().unwrap().compare_error = Some(e); + agent.state.lock().await.changed.notify_one(); + return; + } + }; + + let context = agent.context.lock().await.clone(); + let entries = context.conversation(); + let responses: Vec = entries.iter().enumerate() + .filter(|(_, n)| matches!(n, AstNode::Branch { role: Role::Assistant, .. })) + .map(|(i, _)| i).collect(); + + for (i, entry_idx) in responses.iter().copied().enumerate() { + activity.update(format!("compare: {}/{}", i + 1, responses.len())).await; + + let node = &entries[entry_idx]; + let original_text = match node { + AstNode::Branch { children, .. } => render_branch_text(children), + _ => continue, + }; + if original_text.trim().is_empty() { continue; } + + let alternate_text = match + gen_continuation(&context, entry_idx, |_| false, &test_client).await + { + Ok(t) => t, + Err(e) => { dbglog!("[compare] gen failed at {}: {:#}", entry_idx, e); continue; } + }; + + shared.lock().unwrap().compare_candidates.push(CompareCandidate { + entry_idx, + original_text, + alternate_text, + prior_context: render_prior_context(entries, entry_idx, 2), + timestamp_ns: node_timestamp_ns(node), + }); + if let Ok(st) = agent.state.try_lock() { st.changed.notify_one(); } + } +} diff --git a/src/subconscious/mod.rs b/src/subconscious/mod.rs index d50f833..1abf25a 100644 --- a/src/subconscious/mod.rs +++ b/src/subconscious/mod.rs @@ -1,5 +1,6 @@ // Agent layer: LLM-powered operations on the memory graph +pub mod compare; pub mod daemon; pub mod defs; pub mod digest; diff --git a/src/user/compare.rs b/src/user/compare.rs new file mode 100644 index 0000000..74fb10d --- /dev/null +++ b/src/user/compare.rs @@ -0,0 +1,142 @@ +// compare.rs — F7 compare screen: side-by-side test-model regen of +// every assistant response in the current context. + +use ratatui::{ + layout::{Constraint, Layout, Rect}, + style::{Color, Modifier, Style}, + text::{Line, Span}, + widgets::{Block, Borders, List, ListItem, ListState, Paragraph, Wrap}, + Frame, +}; +use ratatui::crossterm::event::{Event, KeyCode, KeyEvent}; + +use super::{App, ScreenView, screen_legend, truncate}; + +pub use crate::subconscious::compare::CompareCandidate; + +pub(crate) struct CompareScreen { + list_state: ListState, + mind_tx: tokio::sync::mpsc::UnboundedSender, +} + +impl CompareScreen { + pub fn new( + mind_tx: tokio::sync::mpsc::UnboundedSender, + ) -> Self { + Self { list_state: ListState::default(), mind_tx } + } +} + +impl ScreenView for CompareScreen { + fn label(&self) -> &'static str { "compare" } + + fn tick(&mut self, frame: &mut Frame, area: Rect, + events: &[Event], app: &mut App) { + let n = app.compare_candidates.len(); + for event in events { + if let Event::Key(KeyEvent { code, .. }) = event { + match code { + KeyCode::Up | KeyCode::Char('k') => { + let i = self.list_state.selected().unwrap_or(0); + self.list_state.select(Some(i.saturating_sub(1))); + } + KeyCode::Down | KeyCode::Char('j') => { + let i = self.list_state.selected().unwrap_or(0); + self.list_state.select(Some((i + 1).min(n.saturating_sub(1)))); + } + KeyCode::Char('c') | KeyCode::Enter => { + let _ = self.mind_tx.send(crate::mind::MindCommand::Compare); + } + _ => {} + } + } + } + if n > 0 { + let sel = self.list_state.selected().unwrap_or(0).min(n - 1); + self.list_state.select(Some(sel)); + } + + let test_backend = crate::config::app().compare.test_backend.clone(); + let block = Block::default() + .title_top(Line::from(screen_legend()).left_aligned()) + .title_top(Line::from(" compare ").right_aligned()) + .borders(Borders::ALL) + .border_style(Style::default().fg(Color::Magenta)); + let inner = block.inner(area); + frame.render_widget(block, area); + + let [settings_area, content_area] = Layout::vertical([ + Constraint::Length(1), Constraint::Min(0), + ]).areas(inner); + + let backend_label = if test_backend.is_empty() { + ("(unset — set compare.test_backend)", Color::Red) + } else { + (test_backend.as_str(), Color::Yellow) + }; + frame.render_widget(Paragraph::new(Line::from(vec![ + Span::raw(" test model: "), + Span::styled(backend_label.0.to_string(), Style::default().fg(backend_label.1)), + ])), settings_area); + + let candidates = &app.compare_candidates; + if candidates.is_empty() { + let err = app.mind_state.as_ref().and_then(|ms| ms.compare_error.as_deref()); + let mut lines = vec![Line::from(""), + Line::styled(" Press c/Enter to compare against the configured test model.", + Style::default().fg(Color::DarkGray))]; + if let Some(e) = err { + lines.push(Line::from("")); + lines.push(Line::from(vec![ + Span::raw(" "), + Span::styled(format!("error: {}", e), Style::default().fg(Color::Red)), + ])); + } + frame.render_widget(Paragraph::new(lines), content_area); + } else { + let [list_area, detail_area] = Layout::horizontal([ + Constraint::Percentage(40), Constraint::Percentage(60), + ]).areas(content_area); + + let items: Vec = candidates.iter().map(|c| ListItem::new(Line::from(vec![ + Span::styled(format!("#{:<3} ", c.entry_idx), Style::default().fg(Color::DarkGray)), + Span::raw(truncate(&c.original_text, 30)), + ]))).collect(); + frame.render_stateful_widget( + List::new(items) + .block(Block::default().borders(Borders::RIGHT).title(" candidates ")) + .highlight_style(Style::default().add_modifier(Modifier::REVERSED)), + list_area, &mut self.list_state, + ); + + if let Some(c) = self.list_state.selected().and_then(|i| candidates.get(i)) { + let mut text = String::new(); + if !c.prior_context.is_empty() { + text.push_str(&c.prior_context); + text.push_str("\n\n─── original ───\n\n"); + } + text.push_str(&c.original_text); + text.push_str("\n\n─── test model ───\n\n"); + text.push_str(&c.alternate_text); + frame.render_widget( + Paragraph::new(text) + .block(Block::default().borders(Borders::TOP) + .title(format!(" entry {} ", c.entry_idx))) + .wrap(Wrap { trim: false }), + detail_area, + ); + } + } + + let help = Line::from(vec![ + Span::styled(" j/k/\u{2191}\u{2193}", Style::default().fg(Color::Cyan)), + Span::raw("=nav "), + Span::styled("c/Enter", Style::default().fg(Color::Green)), + Span::raw("=run "), + ]); + frame.render_widget( + Paragraph::new(help), + Rect { y: area.y + area.height - 1, height: 1, ..area }, + ); + } +} diff --git a/src/user/learn.rs b/src/user/learn.rs index 0bd351f..78c16d0 100644 --- a/src/user/learn.rs +++ b/src/user/learn.rs @@ -12,7 +12,7 @@ use ratatui::{ }; use ratatui::crossterm::event::{Event, KeyCode, KeyEvent}; -use super::{App, ScreenView, screen_legend}; +use super::{App, ScreenView, screen_legend, truncate}; /// A candidate response identified for fine-tuning. #[derive(Clone, Debug)] @@ -331,11 +331,3 @@ fn render_detail(frame: &mut Frame, c: &FinetuneCandidate, area: Rect) { frame.render_widget(content, content_area); } -fn truncate(s: &str, max: usize) -> String { - let first_line = s.lines().next().unwrap_or(""); - if first_line.len() > max { - format!("{}...", &first_line[..max]) - } else { - first_line.to_string() - } -} diff --git a/src/user/mod.rs b/src/user/mod.rs index e077167..33008b7 100644 --- a/src/user/mod.rs +++ b/src/user/mod.rs @@ -4,6 +4,7 @@ // machine, DMN, identity) lives in mind/. pub(crate) mod chat; +pub(crate) mod compare; mod context; pub(crate) mod learn; pub(crate) mod scroll_pane; @@ -64,6 +65,13 @@ fn screen_legend() -> String { SCREEN_LEGEND.get().cloned().unwrap_or_default() } +/// Return the first line of `s`, truncated to `max` chars with an +/// ellipsis suffix. Used by candidate-list screens. +fn truncate(s: &str, max: usize) -> String { + let first = s.lines().next().unwrap_or(""); + if first.len() > max { format!("{}...", &first[..max]) } else { first.to_string() } +} + /// A screen that can draw itself and handle input. trait ScreenView: Send { fn tick(&mut self, frame: &mut ratatui::Frame, area: ratatui::layout::Rect, @@ -114,6 +122,8 @@ struct App { idle_info: Option, /// Fine-tuning candidates pending review. finetune_candidates: Vec, + /// F7 compare candidates — response pairs from test-model comparison. + compare_candidates: Vec, } impl App { @@ -144,6 +154,7 @@ impl App { walked_count: 0, channel_status: Vec::new(), idle_info: None, finetune_candidates: Vec::new(), + compare_candidates: Vec::new(), } } @@ -372,7 +383,7 @@ async fn run( } let notify_rx = crate::thalamus::channels::subscribe_all(); - // F1=chat, F2=conscious, F3=subconscious, F4=unconscious, F5=thalamus, F6=learn + // F1=chat, F2=conscious, F3=subconscious, F4=unconscious, F5=thalamus, F6=learn, F7=compare let mut screens: Vec> = vec![ Box::new(crate::user::chat::InteractScreen::new( mind.agent.clone(), mind.shared.clone(), mind_tx.clone(), @@ -382,6 +393,7 @@ async fn run( Box::new(crate::user::unconscious::UnconsciousScreen::new()), Box::new(crate::user::thalamus::ThalamusScreen::new()), Box::new(crate::user::learn::LearnScreen::new(mind_tx.clone())), + Box::new(crate::user::compare::CompareScreen::new(mind_tx.clone())), ]; let mut active_screen: usize = 1; // F-key number tui::set_screen_legend(tui::screen_legend_from(&*screens)); @@ -505,6 +517,9 @@ async fn run( }); } + // Sync compare candidates — a fresh run clears, so take a snapshot. + app.compare_candidates = ms.compare_candidates.clone(); + app.mind_state = Some(ms.clone()); } app.walked_count = mind.subconscious_walked().await.len();