F6 learn screen: fine-tuning candidate review

Wire up divergence scoring to identify responses that depend heavily on memories the model hasn't internalized. These are candidates for fine-tuning. - Score finetune candidates automatically after each turn - Track trained responses by timestamp to prevent overtraining - F6 screen shows candidates with divergence scores - j/k nav, a=approve, r=reject, g=toggle alternate gen, s=send - Additive sync preserves approval status across ticks - Keeps 10 most recent rejected, removes sent The 's' key currently just marks as trained locally — actual /finetune endpoint call to follow. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-16 00:31:39 -04:00 · 2026-04-16 00:31:39 -04:00 · 50b7b3a33a
commit 50b7b3a33a
parent 2c6a5c0f4a
4 changed files with 557 additions and 3 deletions
--- a/src/mind/mod.rs
+++ b/src/mind/mod.rs
@ -147,6 +147,10 @@ pub struct MindState {
    pub unc_idle: bool,
    /// When the unconscious idle timer will fire (for UI display).
    pub unc_idle_deadline: Instant,
    /// Fine-tuning candidates identified by scoring.
    pub finetune_candidates: Vec<learn::FinetuneCandidate>,
    /// Fine-tune scoring progress (empty = not running).
    pub finetune_progress: String,
 }
 impl Clone for MindState {
@ -165,6 +169,8 @@ impl Clone for MindState {
            turn_handle: None, // Not cloned — only Mind's loop uses this
            unc_idle: self.unc_idle,
            unc_idle_deadline: self.unc_idle_deadline,
            finetune_candidates: self.finetune_candidates.clone(),
            finetune_progress: self.finetune_progress.clone(),
        }
    }
 }
@ -177,6 +183,8 @@ pub enum MindCommand {
    Score,
    /// Run full N×M memory scoring matrix (/score command)
    ScoreFull,
    /// Score for finetune candidates
    ScoreFinetune,
    /// Abort current turn, kill processes
    Interrupt,
    /// Reset session
@ -202,6 +210,8 @@ impl MindState {
            turn_handle: None,
            unc_idle: false,
            unc_idle_deadline: Instant::now() + std::time::Duration::from_secs(60),
            finetune_candidates: Vec::new(),
            finetune_progress: String::new(),
        }
    }
@ -288,6 +298,7 @@ impl MindState {
 /// Background task completion events.
 enum BgEvent {
    ScoringDone,
    FinetuneCandidates(Vec<learn::FinetuneCandidate>),
 }
 // --- Mind: cognitive state machine ---
@ -529,6 +540,9 @@ impl Mind {
                    }
                    self.agent.compact().await;
                }
                MindCommand::ScoreFinetune => {
                    self.start_finetune_scoring();
                }
            }
        }
    }
@ -603,6 +617,31 @@ impl Mind {
        });
    }
    /// Score responses for fine-tuning candidates.
    pub fn start_finetune_scoring(&self) {
        let agent = self.agent.clone();
        let bg_tx = self.bg_tx.clone();
        let shared = self.shared.clone();
        shared.lock().unwrap().finetune_progress = "scoring...".into();
        tokio::spawn(async move {
            let (context, client) = {
                let ctx = agent.context.lock().await;
                (ctx.clone(), agent.client.clone())
            };
            // Min divergence 0.1 = only keep responses that differ meaningfully
            match learn::score_finetune_candidates(&context, 20, &client, 0.1).await {
                Ok(candidates) => {
                    dbglog!("[finetune] found {} candidates", candidates.len());
                    let _ = bg_tx.send(BgEvent::FinetuneCandidates(candidates));
                }
                Err(e) => {
                    dbglog!("[finetune] scoring FAILED: {:#}", e);
                }
            }
            shared.lock().unwrap().finetune_progress.clear();
        });
    }
    async fn start_turn(&self, text: &str, target: StreamTarget) {
        {
            match target {
@ -692,6 +731,9 @@ impl Mind {
                        BgEvent::ScoringDone => {
                            self.shared.lock().unwrap().scoring_in_flight = false;
                        }
                        BgEvent::FinetuneCandidates(candidates) => {
                            self.shared.lock().unwrap().finetune_candidates = candidates;
                        }
                    }
                }
@ -711,6 +753,7 @@ impl Mind {
                    cmds.push(MindCommand::Compact);
                    if !self.config.no_agents {
                        cmds.push(MindCommand::Score);
                        cmds.push(MindCommand::ScoreFinetune);
                    }
                }
--- a/src/subconscious/learn.rs
+++ b/src/subconscious/learn.rs
@ -16,6 +16,7 @@
 use crate::agent::api::ApiClient;
 use crate::agent::context::{AstNode, Ast, NodeBody, ContextState, Role};
 use crate::agent::tokenizer;
 const SCORE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(300);
@ -452,3 +453,198 @@ pub async fn score_finetune(
    results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
    Ok(results)
 }
 /// Enriched finetune candidate with context for review.
 #[derive(Clone, Debug)]
 pub struct FinetuneCandidate {
    pub entry_idx: usize,
    pub divergence: f64,
    pub response_text: String,
    /// Token IDs for context (everything before the response).
    pub context_ids: Vec<u32>,
    /// Token IDs for the response (what we're training on).
    pub continuation_ids: Vec<u32>,
    /// What the model would have said without memories (if generated).
    pub alternate_text: Option<String>,
    /// Timestamp in millis for tracking trained status.
    pub timestamp_ms: i64,
 }
 /// Score and enrich finetune candidates with full context.
 ///
 /// Returns candidates ready for review, with context/continuation token IDs
 /// already computed for sending to /finetune.
 pub async fn score_finetune_candidates(
    context: &ContextState,
    count: usize,
    client: &ApiClient,
    min_divergence: f64,
 ) -> anyhow::Result<Vec<FinetuneCandidate>> {
    let scores = score_finetune(context, count, client).await?;
    let entries = context.conversation();
    let mut candidates = Vec::new();
    let trained = load_trained();
    for (entry_idx, divergence) in scores {
        if divergence < min_divergence {
            continue;
        }
        let node = &entries[entry_idx];
        // Get timestamp and skip if already trained
        let timestamp_ms = match node_timestamp_ms(node) {
            Some(ts) => {
                if trained.contains(&ts) {
                    continue; // Already trained, skip
                }
                ts
            }
            None => continue, // No timestamp, skip
        };
        // Extract response text
        let response_text = match node {
            AstNode::Branch { children, .. } => {
                children.iter()
                    .filter_map(|c| match c {
                        AstNode::Leaf(leaf) => Some(leaf.body().text().to_string()),
                        _ => None,
                    })
                    .collect::<Vec<_>>()
                    .join("")
            }
            _ => continue,
        };
        // Build token IDs: context = everything before response, continuation = response
        let context_ids = build_token_ids(context, 0..entry_idx, Filter::None);
        let continuation_ids: Vec<u32> = node.token_ids().into_iter().collect();
        candidates.push(FinetuneCandidate {
            entry_idx,
            divergence,
            response_text,
            context_ids,
            continuation_ids,
            alternate_text: None,
            timestamp_ms,
        });
    }
    // Generate alternates if enabled
    if alternates_enabled() && !candidates.is_empty() {
        for candidate in &mut candidates {
            match generate_alternate(context, candidate.entry_idx, client).await {
                Ok(text) => candidate.alternate_text = Some(text),
                Err(e) => dbglog!("[finetune] alternate generation failed: {:#}", e),
            }
        }
    }
    Ok(candidates)
 }
 /// Generate what the model would say without memories for a given entry.
 async fn generate_alternate(
    context: &ContextState,
    entry_idx: usize,
    client: &ApiClient,
 ) -> anyhow::Result<String> {
    use crate::agent::api::{SamplingParams, StreamToken};
    // Build context tokens without memories, up to the response
    let mut prompt = build_token_ids(context, 0..entry_idx, Filter::SkipAllMemories);
    // Add assistant turn start
    prompt.push(tokenizer::IM_START);
    prompt.extend(tokenizer::encode("assistant\n"));
    // Generate completion
    let sampling = SamplingParams {
        temperature: 0.6,
        top_p: 0.95,
        top_k: 20,
    };
    let (mut rx, _guard) = client.stream_completion(&prompt, sampling, Some(-5));
    let mut tokens = Vec::new();
    while let Some(tok) = rx.recv().await {
        match tok {
            StreamToken::Token(id) => tokens.push(id),
            StreamToken::Done { .. } => break,
            StreamToken::Error(e) => anyhow::bail!("generation error: {}", e),
        }
    }
    Ok(tokenizer::decode(&tokens))
 }
 // ── Finetune config and persistence ─────────────────────────────
 use std::path::PathBuf;
 use std::collections::HashSet;
 const FINETUNE_ALTERNATES_FILE: &str = ".consciousness/cache/finetune-alternates";
 const TRAINED_RESPONSES_FILE: &str = ".consciousness/cache/trained-responses.json";
 fn alternates_path() -> PathBuf {
    dirs::home_dir().unwrap_or_default().join(FINETUNE_ALTERNATES_FILE)
 }
 fn trained_path() -> PathBuf {
    dirs::home_dir().unwrap_or_default().join(TRAINED_RESPONSES_FILE)
 }
 /// Check if alternate response generation is enabled.
 pub fn alternates_enabled() -> bool {
    alternates_path().exists()
 }
 /// Toggle alternate response generation and persist the setting.
 pub fn set_alternates(enabled: bool) {
    let path = alternates_path();
    if enabled {
        if let Some(parent) = path.parent() {
            let _ = std::fs::create_dir_all(parent);
        }
        let _ = std::fs::write(&path, "");
    } else {
        let _ = std::fs::remove_file(&path);
    }
 }
 /// Load set of trained response timestamps (millis since epoch).
 pub fn load_trained() -> HashSet<i64> {
    let path = trained_path();
    match std::fs::read_to_string(&path) {
        Ok(content) => serde_json::from_str(&content).unwrap_or_default(),
        Err(_) => HashSet::new(),
    }
 }
 /// Mark a response as trained by its timestamp.
 pub fn mark_trained(timestamp_ms: i64) {
    let mut trained = load_trained();
    trained.insert(timestamp_ms);
    let path = trained_path();
    if let Some(parent) = path.parent() {
        let _ = std::fs::create_dir_all(parent);
    }
    if let Ok(json) = serde_json::to_string(&trained) {
        let _ = std::fs::write(&path, json);
    }
 }
 /// Get timestamp in millis from an AstNode (for Branch, uses first child).
 pub fn node_timestamp_ms(node: &AstNode) -> Option<i64> {
    let ts = match node {
        AstNode::Leaf(leaf) => leaf.timestamp(),
        AstNode::Branch { children, .. } => {
            children.first()?.leaf()?.timestamp()
        }
    }?;
    Some(ts.timestamp_millis())
 }
--- a/src/user/learn.rs
+++ b/src/user/learn.rs
@ -0,0 +1,264 @@
 // learn.rs — F6: fine-tuning review screen
 //
 // Shows responses identified as training candidates (high divergence
 // when memories stripped). Queue for review before sending to /finetune.
 use ratatui::{
    layout::{Constraint, Layout, Rect},
    style::{Color, Modifier, Style},
    text::{Line, Span},
    widgets::{Block, Borders, List, ListItem, ListState, Paragraph, Wrap},
    Frame,
 };
 use ratatui::crossterm::event::{Event, KeyCode, KeyEvent};
 use super::{App, ScreenView, screen_legend};
 /// A candidate response identified for fine-tuning.
 #[derive(Clone, Debug)]
 pub struct FinetuneCandidate {
    /// Index in conversation entries.
    pub entry_idx: usize,
    /// Divergence score (higher = more dependent on memories).
    pub divergence: f64,
    /// The assistant response text.
    pub response_text: String,
    /// Status: pending, approved, rejected, sent.
    pub status: CandidateStatus,
    /// Token IDs for context.
    pub context_ids: Vec<u32>,
    /// Token IDs for continuation (what we're training on).
    pub continuation_ids: Vec<u32>,
    /// What the model would have said without memories (if generated).
    pub alternate_text: Option<String>,
    /// Timestamp in millis for tracking trained status.
    pub timestamp_ms: i64,
 }
 #[derive(Clone, Debug, PartialEq)]
 pub enum CandidateStatus {
    Pending,
    Approved,
    Rejected,
    Sent,
 }
 impl From<crate::subconscious::learn::FinetuneCandidate> for FinetuneCandidate {
    fn from(c: crate::subconscious::learn::FinetuneCandidate) -> Self {
        FinetuneCandidate {
            entry_idx: c.entry_idx,
            divergence: c.divergence,
            response_text: c.response_text,
            status: CandidateStatus::Pending,
            context_ids: c.context_ids,
            continuation_ids: c.continuation_ids,
            alternate_text: c.alternate_text,
            timestamp_ms: c.timestamp_ms,
        }
    }
 }
 pub(crate) struct LearnScreen {
    list_state: ListState,
 }
 impl LearnScreen {
    pub fn new() -> Self {
        Self {
            list_state: ListState::default(),
        }
    }
    fn selected_idx(&self) -> Option<usize> {
        self.list_state.selected()
    }
 }
 impl ScreenView for LearnScreen {
    fn label(&self) -> &'static str { "learn" }
    fn tick(&mut self, frame: &mut Frame, area: Rect,
            events: &[Event], app: &mut App) {
        // Handle input first (before borrowing candidates for rendering)
        let candidate_count = app.finetune_candidates.len();
        for event in events {
            if let Event::Key(KeyEvent { code, .. }) = event {
                match code {
                    KeyCode::Up | KeyCode::Char('k') => {
                        let i = self.list_state.selected().unwrap_or(0);
                        self.list_state.select(Some(i.saturating_sub(1)));
                    }
                    KeyCode::Down | KeyCode::Char('j') => {
                        let i = self.list_state.selected().unwrap_or(0);
                        let max = candidate_count.saturating_sub(1);
                        self.list_state.select(Some((i + 1).min(max)));
                    }
                    KeyCode::Char('a') => {
                        if let Some(idx) = self.selected_idx() {
                            app.finetune_action(idx, CandidateStatus::Approved);
                        }
                    }
                    KeyCode::Char('r') => {
                        if let Some(idx) = self.selected_idx() {
                            app.finetune_action(idx, CandidateStatus::Rejected);
                        }
                    }
                    KeyCode::Char('g') => {
                        // Toggle alternate generation and persist
                        let current = crate::subconscious::learn::alternates_enabled();
                        crate::subconscious::learn::set_alternates(!current);
                    }
                    KeyCode::Char('s') => {
                        app.finetune_send_approved();
                    }
                    _ => {}
                }
            }
        }
        // Ensure selection is valid
        if candidate_count > 0 {
            let sel = self.list_state.selected().unwrap_or(0).min(candidate_count - 1);
            self.list_state.select(Some(sel));
        }
        // Get scoring progress from mind state
        let progress = app.mind_state.as_ref()
            .map(|ms| ms.finetune_progress.as_str())
            .unwrap_or("");
        // Now render
        let gen_on = crate::subconscious::learn::alternates_enabled();
        let title_right = if !progress.is_empty() {
            format!(" {} ", progress)
        } else if gen_on {
            " learn [gen] ".to_string()
        } else {
            " learn ".to_string()
        };
        let block = Block::default()
            .title_top(Line::from(screen_legend()).left_aligned())
            .title_top(Line::from(title_right).right_aligned())
            .borders(Borders::ALL)
            .border_style(Style::default().fg(Color::Magenta));
        let inner = block.inner(area);
        frame.render_widget(block, area);
        let candidates = &app.finetune_candidates;
        if candidates.is_empty() {
            let msg = if progress.is_empty() {
                "  No candidates yet — scoring runs after each turn."
            } else {
                "  Scoring in progress..."
            };
            frame.render_widget(
                Paragraph::new(Line::styled(msg, Style::default().fg(Color::DarkGray))),
                inner,
            );
            return;
        }
        // Layout: list on left, detail on right
        let [list_area, detail_area] = Layout::horizontal([
            Constraint::Percentage(40),
            Constraint::Percentage(60),
        ]).areas(inner);
        // Render candidate list
        let items: Vec<ListItem> = candidates.iter().map(|c| {
            let status_char = match c.status {
                CandidateStatus::Pending => ' ',
                CandidateStatus::Approved => '+',
                CandidateStatus::Rejected => '-',
                CandidateStatus::Sent => '*',
            };
            let style = match c.status {
                CandidateStatus::Pending => Style::default(),
                CandidateStatus::Approved => Style::default().fg(Color::Green),
                CandidateStatus::Rejected => Style::default().fg(Color::DarkGray),
                CandidateStatus::Sent => Style::default().fg(Color::Cyan),
            };
            ListItem::new(Line::from(vec![
                Span::styled(format!("[{}] ", status_char), style),
                Span::styled(format!("{:.2} ", c.divergence), Style::default().fg(Color::Yellow)),
                Span::raw(truncate(&c.response_text, 30)),
            ]))
        }).collect();
        let list = List::new(items)
            .block(Block::default().borders(Borders::RIGHT).title(" candidates "))
            .highlight_style(Style::default().add_modifier(Modifier::REVERSED));
        frame.render_stateful_widget(list, list_area, &mut self.list_state);
        // Render detail for selected candidate
        if let Some(idx) = self.selected_idx() {
            if let Some(candidate) = candidates.get(idx) {
                render_detail(frame, candidate, detail_area);
            }
        }
        // Render help at bottom
        let help = Line::from(vec![
            Span::styled(" j/k/\u{2191}\u{2193}", Style::default().fg(Color::Cyan)),
            Span::raw("=nav  "),
            Span::styled("a", Style::default().fg(Color::Green)),
            Span::raw("=approve  "),
            Span::styled("r", Style::default().fg(Color::Red)),
            Span::raw("=reject  "),
            Span::styled("g", Style::default().fg(Color::Yellow)),
            Span::raw("=gen  "),
            Span::styled("s", Style::default().fg(Color::Magenta)),
            Span::raw("=send "),
        ]);
        let help_area = Rect {
            y: area.y + area.height - 1,
            height: 1,
            ..area
        };
        frame.render_widget(Paragraph::new(help), help_area);
    }
 }
 fn render_detail(frame: &mut Frame, c: &FinetuneCandidate, area: Rect) {
    let [header_area, content_area] = Layout::vertical([
        Constraint::Length(3),
        Constraint::Min(1),
    ]).areas(area);
    // Header: divergence, status
    let alt_status = if c.alternate_text.is_some() { "yes" } else { "no" };
    let header = Paragraph::new(vec![
        Line::from(vec![
            Span::raw("  divergence: "),
            Span::styled(format!("{:.3}", c.divergence), Style::default().fg(Color::Yellow)),
            Span::raw(format!("  entry: {}  alt: {}", c.entry_idx, alt_status)),
        ]),
    ]);
    frame.render_widget(header, header_area);
    // Content: response and alternate (if available)
    let content_block = Block::default()
        .borders(Borders::TOP)
        .title(" response ");
    let text = match &c.alternate_text {
        Some(alt) => format!("  {}\n\n─── without memories ───\n\n  {}", c.response_text, alt),
        None => format!("  {}", c.response_text),
    };
    let content = Paragraph::new(text)
        .block(content_block)
        .wrap(Wrap { trim: false });
    frame.render_widget(content, content_area);
 }
 fn truncate(s: &str, max: usize) -> String {
    let first_line = s.lines().next().unwrap_or("");
    if first_line.len() > max {
        format!("{}...", &first_line[..max])
    } else {
        first_line.to_string()
    }
 }
--- a/src/user/mod.rs
+++ b/src/user/mod.rs
@ -5,11 +5,12 @@
 pub(crate) mod chat;
 mod context;
 pub(crate) mod learn;
 pub(crate) mod scroll_pane;
 pub mod selectable;
 mod subconscious;
 mod unconscious;
 mod thalamus;
 mod unconscious;
 mod widgets;
 use anyhow::Result;
@ -121,6 +122,8 @@ struct App {
    walked_count: usize,
    channel_status: Vec<ChannelStatus>,
    idle_info: Option<IdleInfo>,
    /// Fine-tuning candidates pending review.
    finetune_candidates: Vec<learn::FinetuneCandidate>,
 }
 impl App {
@ -151,6 +154,24 @@ impl App {
            rebuild_tools_pending: false,
            walked_count: 0,
            channel_status: Vec::new(), idle_info: None,
            finetune_candidates: Vec::new(),
        }
    }
    fn finetune_action(&mut self, idx: usize, status: learn::CandidateStatus) {
        if let Some(candidate) = self.finetune_candidates.get_mut(idx) {
            candidate.status = status;
        }
    }
    fn finetune_send_approved(&mut self) {
        // TODO: Send approved candidates to /finetune endpoint
        // For now, just mark them as sent and record as trained
        for candidate in &mut self.finetune_candidates {
            if candidate.status == learn::CandidateStatus::Approved {
                crate::subconscious::learn::mark_trained(candidate.timestamp_ms);
                candidate.status = learn::CandidateStatus::Sent;
            }
        }
    }
@ -334,7 +355,7 @@ async fn run(
    }
    let notify_rx = crate::thalamus::channels::subscribe_all();
-    // F1=chat, F2=conscious, F3=subconscious, F4=unconscious, F5=thalamus
+    // F1=chat, F2=conscious, F3=subconscious, F4=unconscious, F5=thalamus, F6=learn
    let mut screens: Vec<Box<dyn tui::ScreenView>> = vec![
        Box::new(crate::user::chat::InteractScreen::new(
            mind.agent.clone(), mind.shared.clone(), mind_tx.clone(),
@ -343,6 +364,7 @@ async fn run(
        Box::new(crate::user::subconscious::SubconsciousScreen::new()),
        Box::new(crate::user::unconscious::UnconsciousScreen::new()),
        Box::new(crate::user::thalamus::ThalamusScreen::new()),
        Box::new(crate::user::learn::LearnScreen::new()),
    ];
    let mut active_screen: usize = 1; // F-key number
    tui::set_screen_legend(tui::screen_legend_from(&*screens));
@ -433,7 +455,36 @@ async fn run(
            };
            app.unconscious_state = unc.snapshots(store_guard.as_deref());
            app.graph_health = unc.graph_health.clone();
-            app.mind_state = Some(mind.shared.lock().unwrap().clone());
+            let ms = mind.shared.lock().unwrap();
            // Sync finetune candidates: add new ones, keep existing (preserves approval status)
            // Remove sent candidates (already trained, no need to keep)
            // Keep only 10 most recent rejected candidates
            app.finetune_candidates.retain(|c| c.status != learn::CandidateStatus::Sent);
            for c in &ms.finetune_candidates {
                let exists = app.finetune_candidates.iter()
                    .any(|existing| existing.timestamp_ms == c.timestamp_ms);
                if !exists {
                    app.finetune_candidates.push(learn::FinetuneCandidate::from(c.clone()));
                }
            }
            // Limit rejected candidates to 10 most recent
            let mut rejected: Vec<_> = app.finetune_candidates.iter()
                .enumerate()
                .filter(|(_, c)| c.status == learn::CandidateStatus::Rejected)
                .map(|(i, c)| (i, c.timestamp_ms))
                .collect();
            if rejected.len() > 10 {
                rejected.sort_by_key(|(_, ts)| std::cmp::Reverse(*ts));
                let to_remove: std::collections::HashSet<_> = rejected[10..]
                    .iter().map(|(i, _)| *i).collect();
                let mut idx = 0;
                app.finetune_candidates.retain(|_| {
                    let keep = !to_remove.contains(&idx);
                    idx += 1;
                    keep
                });
            }
            app.mind_state = Some(ms.clone());
        }
        app.walked_count = mind.subconscious_walked().await.len();
        if !startup_done {