F6 learn screen: fine-tuning candidate review
Wire up divergence scoring to identify responses that depend heavily on memories the model hasn't internalized. These are candidates for fine-tuning. - Score finetune candidates automatically after each turn - Track trained responses by timestamp to prevent overtraining - F6 screen shows candidates with divergence scores - j/k nav, a=approve, r=reject, g=toggle alternate gen, s=send - Additive sync preserves approval status across ticks - Keeps 10 most recent rejected, removes sent The 's' key currently just marks as trained locally — actual /finetune endpoint call to follow. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
2c6a5c0f4a
commit
50b7b3a33a
4 changed files with 557 additions and 3 deletions
|
|
@ -5,11 +5,12 @@
|
|||
|
||||
pub(crate) mod chat;
|
||||
mod context;
|
||||
pub(crate) mod learn;
|
||||
pub(crate) mod scroll_pane;
|
||||
pub mod selectable;
|
||||
mod subconscious;
|
||||
mod unconscious;
|
||||
mod thalamus;
|
||||
mod unconscious;
|
||||
mod widgets;
|
||||
|
||||
use anyhow::Result;
|
||||
|
|
@ -121,6 +122,8 @@ struct App {
|
|||
walked_count: usize,
|
||||
channel_status: Vec<ChannelStatus>,
|
||||
idle_info: Option<IdleInfo>,
|
||||
/// Fine-tuning candidates pending review.
|
||||
finetune_candidates: Vec<learn::FinetuneCandidate>,
|
||||
}
|
||||
|
||||
impl App {
|
||||
|
|
@ -151,6 +154,24 @@ impl App {
|
|||
rebuild_tools_pending: false,
|
||||
walked_count: 0,
|
||||
channel_status: Vec::new(), idle_info: None,
|
||||
finetune_candidates: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn finetune_action(&mut self, idx: usize, status: learn::CandidateStatus) {
|
||||
if let Some(candidate) = self.finetune_candidates.get_mut(idx) {
|
||||
candidate.status = status;
|
||||
}
|
||||
}
|
||||
|
||||
fn finetune_send_approved(&mut self) {
|
||||
// TODO: Send approved candidates to /finetune endpoint
|
||||
// For now, just mark them as sent and record as trained
|
||||
for candidate in &mut self.finetune_candidates {
|
||||
if candidate.status == learn::CandidateStatus::Approved {
|
||||
crate::subconscious::learn::mark_trained(candidate.timestamp_ms);
|
||||
candidate.status = learn::CandidateStatus::Sent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -334,7 +355,7 @@ async fn run(
|
|||
}
|
||||
let notify_rx = crate::thalamus::channels::subscribe_all();
|
||||
|
||||
// F1=chat, F2=conscious, F3=subconscious, F4=unconscious, F5=thalamus
|
||||
// F1=chat, F2=conscious, F3=subconscious, F4=unconscious, F5=thalamus, F6=learn
|
||||
let mut screens: Vec<Box<dyn tui::ScreenView>> = vec![
|
||||
Box::new(crate::user::chat::InteractScreen::new(
|
||||
mind.agent.clone(), mind.shared.clone(), mind_tx.clone(),
|
||||
|
|
@ -343,6 +364,7 @@ async fn run(
|
|||
Box::new(crate::user::subconscious::SubconsciousScreen::new()),
|
||||
Box::new(crate::user::unconscious::UnconsciousScreen::new()),
|
||||
Box::new(crate::user::thalamus::ThalamusScreen::new()),
|
||||
Box::new(crate::user::learn::LearnScreen::new()),
|
||||
];
|
||||
let mut active_screen: usize = 1; // F-key number
|
||||
tui::set_screen_legend(tui::screen_legend_from(&*screens));
|
||||
|
|
@ -433,7 +455,36 @@ async fn run(
|
|||
};
|
||||
app.unconscious_state = unc.snapshots(store_guard.as_deref());
|
||||
app.graph_health = unc.graph_health.clone();
|
||||
app.mind_state = Some(mind.shared.lock().unwrap().clone());
|
||||
let ms = mind.shared.lock().unwrap();
|
||||
// Sync finetune candidates: add new ones, keep existing (preserves approval status)
|
||||
// Remove sent candidates (already trained, no need to keep)
|
||||
// Keep only 10 most recent rejected candidates
|
||||
app.finetune_candidates.retain(|c| c.status != learn::CandidateStatus::Sent);
|
||||
for c in &ms.finetune_candidates {
|
||||
let exists = app.finetune_candidates.iter()
|
||||
.any(|existing| existing.timestamp_ms == c.timestamp_ms);
|
||||
if !exists {
|
||||
app.finetune_candidates.push(learn::FinetuneCandidate::from(c.clone()));
|
||||
}
|
||||
}
|
||||
// Limit rejected candidates to 10 most recent
|
||||
let mut rejected: Vec<_> = app.finetune_candidates.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, c)| c.status == learn::CandidateStatus::Rejected)
|
||||
.map(|(i, c)| (i, c.timestamp_ms))
|
||||
.collect();
|
||||
if rejected.len() > 10 {
|
||||
rejected.sort_by_key(|(_, ts)| std::cmp::Reverse(*ts));
|
||||
let to_remove: std::collections::HashSet<_> = rejected[10..]
|
||||
.iter().map(|(i, _)| *i).collect();
|
||||
let mut idx = 0;
|
||||
app.finetune_candidates.retain(|_| {
|
||||
let keep = !to_remove.contains(&idx);
|
||||
idx += 1;
|
||||
keep
|
||||
});
|
||||
}
|
||||
app.mind_state = Some(ms.clone());
|
||||
}
|
||||
app.walked_count = mind.subconscious_walked().await.len();
|
||||
if !startup_done {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue