forked from kent/consciousness
user: F7 compare screen
Side-by-side model comparison against the current conversation context.
Built on the MindTriggered pattern — F7 drops in as one more
CompareScoring flow next to MemoryScoring / FinetuneScoring.
Motivation: we have the VRAM on the b200 to load two versions of the
same family simultaneously (e.g. Qwen3.5 27B bf16 and q8_k_xl). Rather
than trust perplexity/KLD numbers on a generic corpus, we can measure
divergence on our actual conversations: for each assistant response,
ask the test model what it would have said given the same prefix, and
eyeball the diffs.
- config.compare.test_backend — names an entry in the existing
backends map to use as the test model. Empty = F7 reports "(unset)"
and does nothing.
- subconscious::compare::{score_compare_candidates, CompareCandidate,
CompareScoringStats, CompareScoring}. For each assistant response,
gen_continuation runs with the test client against the same prefix
the original response saw; pairs stream into
shared.compare_candidates as they complete.
- user::compare::CompareScreen — F7 in the screen list. c/Enter
triggers a run; list/detail layout mirroring F6, detail shows
prior context / original / test-model alternate.
No persistence yet — each F7 run regenerates. Caching via a context
manifest (so we can re-view without re-burning generation) is the
natural follow-up; for now light usage is fine.
Also reusable later for validating finetune checkpoints: same pattern,
swap the test backend for the new checkpoint, watch where it diverges
from the base.
Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
575325e855
commit
2b03dbb200
7 changed files with 301 additions and 11 deletions
|
|
@ -4,6 +4,7 @@
|
|||
// machine, DMN, identity) lives in mind/.
|
||||
|
||||
pub(crate) mod chat;
|
||||
pub(crate) mod compare;
|
||||
mod context;
|
||||
pub(crate) mod learn;
|
||||
pub(crate) mod scroll_pane;
|
||||
|
|
@ -64,6 +65,13 @@ fn screen_legend() -> String {
|
|||
SCREEN_LEGEND.get().cloned().unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Return the first line of `s`, truncated to `max` chars with an
|
||||
/// ellipsis suffix. Used by candidate-list screens.
|
||||
fn truncate(s: &str, max: usize) -> String {
|
||||
let first = s.lines().next().unwrap_or("");
|
||||
if first.len() > max { format!("{}...", &first[..max]) } else { first.to_string() }
|
||||
}
|
||||
|
||||
/// A screen that can draw itself and handle input.
|
||||
trait ScreenView: Send {
|
||||
fn tick(&mut self, frame: &mut ratatui::Frame, area: ratatui::layout::Rect,
|
||||
|
|
@ -114,6 +122,8 @@ struct App {
|
|||
idle_info: Option<IdleInfo>,
|
||||
/// Fine-tuning candidates pending review.
|
||||
finetune_candidates: Vec<learn::FinetuneCandidate>,
|
||||
/// F7 compare candidates — response pairs from test-model comparison.
|
||||
compare_candidates: Vec<compare::CompareCandidate>,
|
||||
}
|
||||
|
||||
impl App {
|
||||
|
|
@ -144,6 +154,7 @@ impl App {
|
|||
walked_count: 0,
|
||||
channel_status: Vec::new(), idle_info: None,
|
||||
finetune_candidates: Vec::new(),
|
||||
compare_candidates: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -372,7 +383,7 @@ async fn run(
|
|||
}
|
||||
let notify_rx = crate::thalamus::channels::subscribe_all();
|
||||
|
||||
// F1=chat, F2=conscious, F3=subconscious, F4=unconscious, F5=thalamus, F6=learn
|
||||
// F1=chat, F2=conscious, F3=subconscious, F4=unconscious, F5=thalamus, F6=learn, F7=compare
|
||||
let mut screens: Vec<Box<dyn tui::ScreenView>> = vec![
|
||||
Box::new(crate::user::chat::InteractScreen::new(
|
||||
mind.agent.clone(), mind.shared.clone(), mind_tx.clone(),
|
||||
|
|
@ -382,6 +393,7 @@ async fn run(
|
|||
Box::new(crate::user::unconscious::UnconsciousScreen::new()),
|
||||
Box::new(crate::user::thalamus::ThalamusScreen::new()),
|
||||
Box::new(crate::user::learn::LearnScreen::new(mind_tx.clone())),
|
||||
Box::new(crate::user::compare::CompareScreen::new(mind_tx.clone())),
|
||||
];
|
||||
let mut active_screen: usize = 1; // F-key number
|
||||
tui::set_screen_legend(tui::screen_legend_from(&*screens));
|
||||
|
|
@ -505,6 +517,9 @@ async fn run(
|
|||
});
|
||||
}
|
||||
|
||||
// Sync compare candidates — a fresh run clears, so take a snapshot.
|
||||
app.compare_candidates = ms.compare_candidates.clone();
|
||||
|
||||
app.mind_state = Some(ms.clone());
|
||||
}
|
||||
app.walked_count = mind.subconscious_walked().await.len();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue