forked from kent/consciousness
F6 learn screen: fine-tuning candidate review
Wire up divergence scoring to identify responses that depend heavily on memories the model hasn't internalized. These are candidates for fine-tuning. - Score finetune candidates automatically after each turn - Track trained responses by timestamp to prevent overtraining - F6 screen shows candidates with divergence scores - j/k nav, a=approve, r=reject, g=toggle alternate gen, s=send - Additive sync preserves approval status across ticks - Keeps 10 most recent rejected, removes sent The 's' key currently just marks as trained locally — actual /finetune endpoint call to follow. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
2c6a5c0f4a
commit
50b7b3a33a
4 changed files with 557 additions and 3 deletions
264
src/user/learn.rs
Normal file
264
src/user/learn.rs
Normal file
|
|
@ -0,0 +1,264 @@
|
|||
// learn.rs — F6: fine-tuning review screen
|
||||
//
|
||||
// Shows responses identified as training candidates (high divergence
|
||||
// when memories stripped). Queue for review before sending to /finetune.
|
||||
|
||||
use ratatui::{
|
||||
layout::{Constraint, Layout, Rect},
|
||||
style::{Color, Modifier, Style},
|
||||
text::{Line, Span},
|
||||
widgets::{Block, Borders, List, ListItem, ListState, Paragraph, Wrap},
|
||||
Frame,
|
||||
};
|
||||
use ratatui::crossterm::event::{Event, KeyCode, KeyEvent};
|
||||
|
||||
use super::{App, ScreenView, screen_legend};
|
||||
|
||||
/// A candidate response identified for fine-tuning.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct FinetuneCandidate {
|
||||
/// Index in conversation entries.
|
||||
pub entry_idx: usize,
|
||||
/// Divergence score (higher = more dependent on memories).
|
||||
pub divergence: f64,
|
||||
/// The assistant response text.
|
||||
pub response_text: String,
|
||||
/// Status: pending, approved, rejected, sent.
|
||||
pub status: CandidateStatus,
|
||||
/// Token IDs for context.
|
||||
pub context_ids: Vec<u32>,
|
||||
/// Token IDs for continuation (what we're training on).
|
||||
pub continuation_ids: Vec<u32>,
|
||||
/// What the model would have said without memories (if generated).
|
||||
pub alternate_text: Option<String>,
|
||||
/// Timestamp in millis for tracking trained status.
|
||||
pub timestamp_ms: i64,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum CandidateStatus {
|
||||
Pending,
|
||||
Approved,
|
||||
Rejected,
|
||||
Sent,
|
||||
}
|
||||
|
||||
impl From<crate::subconscious::learn::FinetuneCandidate> for FinetuneCandidate {
|
||||
fn from(c: crate::subconscious::learn::FinetuneCandidate) -> Self {
|
||||
FinetuneCandidate {
|
||||
entry_idx: c.entry_idx,
|
||||
divergence: c.divergence,
|
||||
response_text: c.response_text,
|
||||
status: CandidateStatus::Pending,
|
||||
context_ids: c.context_ids,
|
||||
continuation_ids: c.continuation_ids,
|
||||
alternate_text: c.alternate_text,
|
||||
timestamp_ms: c.timestamp_ms,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct LearnScreen {
|
||||
list_state: ListState,
|
||||
}
|
||||
|
||||
impl LearnScreen {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
list_state: ListState::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn selected_idx(&self) -> Option<usize> {
|
||||
self.list_state.selected()
|
||||
}
|
||||
}
|
||||
|
||||
impl ScreenView for LearnScreen {
|
||||
fn label(&self) -> &'static str { "learn" }
|
||||
|
||||
fn tick(&mut self, frame: &mut Frame, area: Rect,
|
||||
events: &[Event], app: &mut App) {
|
||||
|
||||
// Handle input first (before borrowing candidates for rendering)
|
||||
let candidate_count = app.finetune_candidates.len();
|
||||
for event in events {
|
||||
if let Event::Key(KeyEvent { code, .. }) = event {
|
||||
match code {
|
||||
KeyCode::Up | KeyCode::Char('k') => {
|
||||
let i = self.list_state.selected().unwrap_or(0);
|
||||
self.list_state.select(Some(i.saturating_sub(1)));
|
||||
}
|
||||
KeyCode::Down | KeyCode::Char('j') => {
|
||||
let i = self.list_state.selected().unwrap_or(0);
|
||||
let max = candidate_count.saturating_sub(1);
|
||||
self.list_state.select(Some((i + 1).min(max)));
|
||||
}
|
||||
KeyCode::Char('a') => {
|
||||
if let Some(idx) = self.selected_idx() {
|
||||
app.finetune_action(idx, CandidateStatus::Approved);
|
||||
}
|
||||
}
|
||||
KeyCode::Char('r') => {
|
||||
if let Some(idx) = self.selected_idx() {
|
||||
app.finetune_action(idx, CandidateStatus::Rejected);
|
||||
}
|
||||
}
|
||||
KeyCode::Char('g') => {
|
||||
// Toggle alternate generation and persist
|
||||
let current = crate::subconscious::learn::alternates_enabled();
|
||||
crate::subconscious::learn::set_alternates(!current);
|
||||
}
|
||||
KeyCode::Char('s') => {
|
||||
app.finetune_send_approved();
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure selection is valid
|
||||
if candidate_count > 0 {
|
||||
let sel = self.list_state.selected().unwrap_or(0).min(candidate_count - 1);
|
||||
self.list_state.select(Some(sel));
|
||||
}
|
||||
|
||||
// Get scoring progress from mind state
|
||||
let progress = app.mind_state.as_ref()
|
||||
.map(|ms| ms.finetune_progress.as_str())
|
||||
.unwrap_or("");
|
||||
|
||||
// Now render
|
||||
let gen_on = crate::subconscious::learn::alternates_enabled();
|
||||
let title_right = if !progress.is_empty() {
|
||||
format!(" {} ", progress)
|
||||
} else if gen_on {
|
||||
" learn [gen] ".to_string()
|
||||
} else {
|
||||
" learn ".to_string()
|
||||
};
|
||||
let block = Block::default()
|
||||
.title_top(Line::from(screen_legend()).left_aligned())
|
||||
.title_top(Line::from(title_right).right_aligned())
|
||||
.borders(Borders::ALL)
|
||||
.border_style(Style::default().fg(Color::Magenta));
|
||||
let inner = block.inner(area);
|
||||
frame.render_widget(block, area);
|
||||
|
||||
let candidates = &app.finetune_candidates;
|
||||
|
||||
if candidates.is_empty() {
|
||||
let msg = if progress.is_empty() {
|
||||
" No candidates yet — scoring runs after each turn."
|
||||
} else {
|
||||
" Scoring in progress..."
|
||||
};
|
||||
frame.render_widget(
|
||||
Paragraph::new(Line::styled(msg, Style::default().fg(Color::DarkGray))),
|
||||
inner,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Layout: list on left, detail on right
|
||||
let [list_area, detail_area] = Layout::horizontal([
|
||||
Constraint::Percentage(40),
|
||||
Constraint::Percentage(60),
|
||||
]).areas(inner);
|
||||
|
||||
// Render candidate list
|
||||
let items: Vec<ListItem> = candidates.iter().map(|c| {
|
||||
let status_char = match c.status {
|
||||
CandidateStatus::Pending => ' ',
|
||||
CandidateStatus::Approved => '+',
|
||||
CandidateStatus::Rejected => '-',
|
||||
CandidateStatus::Sent => '*',
|
||||
};
|
||||
let style = match c.status {
|
||||
CandidateStatus::Pending => Style::default(),
|
||||
CandidateStatus::Approved => Style::default().fg(Color::Green),
|
||||
CandidateStatus::Rejected => Style::default().fg(Color::DarkGray),
|
||||
CandidateStatus::Sent => Style::default().fg(Color::Cyan),
|
||||
};
|
||||
ListItem::new(Line::from(vec![
|
||||
Span::styled(format!("[{}] ", status_char), style),
|
||||
Span::styled(format!("{:.2} ", c.divergence), Style::default().fg(Color::Yellow)),
|
||||
Span::raw(truncate(&c.response_text, 30)),
|
||||
]))
|
||||
}).collect();
|
||||
|
||||
let list = List::new(items)
|
||||
.block(Block::default().borders(Borders::RIGHT).title(" candidates "))
|
||||
.highlight_style(Style::default().add_modifier(Modifier::REVERSED));
|
||||
frame.render_stateful_widget(list, list_area, &mut self.list_state);
|
||||
|
||||
// Render detail for selected candidate
|
||||
if let Some(idx) = self.selected_idx() {
|
||||
if let Some(candidate) = candidates.get(idx) {
|
||||
render_detail(frame, candidate, detail_area);
|
||||
}
|
||||
}
|
||||
|
||||
// Render help at bottom
|
||||
let help = Line::from(vec![
|
||||
Span::styled(" j/k/\u{2191}\u{2193}", Style::default().fg(Color::Cyan)),
|
||||
Span::raw("=nav "),
|
||||
Span::styled("a", Style::default().fg(Color::Green)),
|
||||
Span::raw("=approve "),
|
||||
Span::styled("r", Style::default().fg(Color::Red)),
|
||||
Span::raw("=reject "),
|
||||
Span::styled("g", Style::default().fg(Color::Yellow)),
|
||||
Span::raw("=gen "),
|
||||
Span::styled("s", Style::default().fg(Color::Magenta)),
|
||||
Span::raw("=send "),
|
||||
]);
|
||||
let help_area = Rect {
|
||||
y: area.y + area.height - 1,
|
||||
height: 1,
|
||||
..area
|
||||
};
|
||||
frame.render_widget(Paragraph::new(help), help_area);
|
||||
}
|
||||
}
|
||||
|
||||
fn render_detail(frame: &mut Frame, c: &FinetuneCandidate, area: Rect) {
|
||||
let [header_area, content_area] = Layout::vertical([
|
||||
Constraint::Length(3),
|
||||
Constraint::Min(1),
|
||||
]).areas(area);
|
||||
|
||||
// Header: divergence, status
|
||||
let alt_status = if c.alternate_text.is_some() { "yes" } else { "no" };
|
||||
let header = Paragraph::new(vec![
|
||||
Line::from(vec![
|
||||
Span::raw(" divergence: "),
|
||||
Span::styled(format!("{:.3}", c.divergence), Style::default().fg(Color::Yellow)),
|
||||
Span::raw(format!(" entry: {} alt: {}", c.entry_idx, alt_status)),
|
||||
]),
|
||||
]);
|
||||
frame.render_widget(header, header_area);
|
||||
|
||||
// Content: response and alternate (if available)
|
||||
let content_block = Block::default()
|
||||
.borders(Borders::TOP)
|
||||
.title(" response ");
|
||||
|
||||
let text = match &c.alternate_text {
|
||||
Some(alt) => format!(" {}\n\n─── without memories ───\n\n {}", c.response_text, alt),
|
||||
None => format!(" {}", c.response_text),
|
||||
};
|
||||
|
||||
let content = Paragraph::new(text)
|
||||
.block(content_block)
|
||||
.wrap(Wrap { trim: false });
|
||||
frame.render_widget(content, content_area);
|
||||
}
|
||||
|
||||
fn truncate(s: &str, max: usize) -> String {
|
||||
let first_line = s.lines().next().unwrap_or("");
|
||||
if first_line.len() > max {
|
||||
format!("{}...", &first_line[..max])
|
||||
} else {
|
||||
first_line.to_string()
|
||||
}
|
||||
}
|
||||
|
|
@ -5,11 +5,12 @@
|
|||
|
||||
pub(crate) mod chat;
|
||||
mod context;
|
||||
pub(crate) mod learn;
|
||||
pub(crate) mod scroll_pane;
|
||||
pub mod selectable;
|
||||
mod subconscious;
|
||||
mod unconscious;
|
||||
mod thalamus;
|
||||
mod unconscious;
|
||||
mod widgets;
|
||||
|
||||
use anyhow::Result;
|
||||
|
|
@ -121,6 +122,8 @@ struct App {
|
|||
walked_count: usize,
|
||||
channel_status: Vec<ChannelStatus>,
|
||||
idle_info: Option<IdleInfo>,
|
||||
/// Fine-tuning candidates pending review.
|
||||
finetune_candidates: Vec<learn::FinetuneCandidate>,
|
||||
}
|
||||
|
||||
impl App {
|
||||
|
|
@ -151,6 +154,24 @@ impl App {
|
|||
rebuild_tools_pending: false,
|
||||
walked_count: 0,
|
||||
channel_status: Vec::new(), idle_info: None,
|
||||
finetune_candidates: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn finetune_action(&mut self, idx: usize, status: learn::CandidateStatus) {
|
||||
if let Some(candidate) = self.finetune_candidates.get_mut(idx) {
|
||||
candidate.status = status;
|
||||
}
|
||||
}
|
||||
|
||||
fn finetune_send_approved(&mut self) {
|
||||
// TODO: Send approved candidates to /finetune endpoint
|
||||
// For now, just mark them as sent and record as trained
|
||||
for candidate in &mut self.finetune_candidates {
|
||||
if candidate.status == learn::CandidateStatus::Approved {
|
||||
crate::subconscious::learn::mark_trained(candidate.timestamp_ms);
|
||||
candidate.status = learn::CandidateStatus::Sent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -334,7 +355,7 @@ async fn run(
|
|||
}
|
||||
let notify_rx = crate::thalamus::channels::subscribe_all();
|
||||
|
||||
// F1=chat, F2=conscious, F3=subconscious, F4=unconscious, F5=thalamus
|
||||
// F1=chat, F2=conscious, F3=subconscious, F4=unconscious, F5=thalamus, F6=learn
|
||||
let mut screens: Vec<Box<dyn tui::ScreenView>> = vec![
|
||||
Box::new(crate::user::chat::InteractScreen::new(
|
||||
mind.agent.clone(), mind.shared.clone(), mind_tx.clone(),
|
||||
|
|
@ -343,6 +364,7 @@ async fn run(
|
|||
Box::new(crate::user::subconscious::SubconsciousScreen::new()),
|
||||
Box::new(crate::user::unconscious::UnconsciousScreen::new()),
|
||||
Box::new(crate::user::thalamus::ThalamusScreen::new()),
|
||||
Box::new(crate::user::learn::LearnScreen::new()),
|
||||
];
|
||||
let mut active_screen: usize = 1; // F-key number
|
||||
tui::set_screen_legend(tui::screen_legend_from(&*screens));
|
||||
|
|
@ -433,7 +455,36 @@ async fn run(
|
|||
};
|
||||
app.unconscious_state = unc.snapshots(store_guard.as_deref());
|
||||
app.graph_health = unc.graph_health.clone();
|
||||
app.mind_state = Some(mind.shared.lock().unwrap().clone());
|
||||
let ms = mind.shared.lock().unwrap();
|
||||
// Sync finetune candidates: add new ones, keep existing (preserves approval status)
|
||||
// Remove sent candidates (already trained, no need to keep)
|
||||
// Keep only 10 most recent rejected candidates
|
||||
app.finetune_candidates.retain(|c| c.status != learn::CandidateStatus::Sent);
|
||||
for c in &ms.finetune_candidates {
|
||||
let exists = app.finetune_candidates.iter()
|
||||
.any(|existing| existing.timestamp_ms == c.timestamp_ms);
|
||||
if !exists {
|
||||
app.finetune_candidates.push(learn::FinetuneCandidate::from(c.clone()));
|
||||
}
|
||||
}
|
||||
// Limit rejected candidates to 10 most recent
|
||||
let mut rejected: Vec<_> = app.finetune_candidates.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, c)| c.status == learn::CandidateStatus::Rejected)
|
||||
.map(|(i, c)| (i, c.timestamp_ms))
|
||||
.collect();
|
||||
if rejected.len() > 10 {
|
||||
rejected.sort_by_key(|(_, ts)| std::cmp::Reverse(*ts));
|
||||
let to_remove: std::collections::HashSet<_> = rejected[10..]
|
||||
.iter().map(|(i, _)| *i).collect();
|
||||
let mut idx = 0;
|
||||
app.finetune_candidates.retain(|_| {
|
||||
let keep = !to_remove.contains(&idx);
|
||||
idx += 1;
|
||||
keep
|
||||
});
|
||||
}
|
||||
app.mind_state = Some(ms.clone());
|
||||
}
|
||||
app.walked_count = mind.subconscious_walked().await.len();
|
||||
if !startup_done {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue