learn: F6 screen — scoring stats, ActivityGuard, configurable threshold

Three changes that together reshape the F6 fine-tune-review screen: 1. Finetune scoring reports through the standard agent activity system instead of a separate finetune_progress String. The previous design ran an independent progress field that forced a cross-lock dance and bespoke UI plumbing. start_finetune_scoring now uses start_activity + activity.update, so the usual status line and notifications capture scoring progress uniformly with other background work. 2. MindState gains a FinetuneScoringStats snapshot (responses seen, above threshold, max divergence, error). The F6 empty screen shows this instead of a loading message — so after a scoring run that produced zero candidates, you can see *why* (e.g., max_divergence below threshold). 3. The divergence threshold is configurable from F6 via +/- hotkeys (scales by 10×) and persisted to ~/.consciousness/config.json5 via config_writer::set_learn_threshold. AppConfig grows a learn section with a threshold field (default 1e-7). Also: user/mod.rs no longer uses try_lock() for the per-tick unconscious/mind state sync — we fixed the locking hot paths that made try_lock necessary, so lock().await is now the right choice. And subconscious::learn::score_finetune_candidates now returns (candidates, max_divergence) so the stats can be populated. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-16 11:49:26 -04:00 · 2026-04-16 11:49:26 -04:00 · e5dd8312c7
commit e5dd8312c7
parent ac40c2cb98
5 changed files with 237 additions and 85 deletions
--- a/src/config.rs
+++ b/src/config.rs
@ -252,6 +252,8 @@ pub struct AppConfig {
    pub debug: bool,
    pub compaction: CompactionConfig,
    pub dmn: DmnConfig,
    #[serde(default)]
    pub learn: LearnConfig,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub memory_project: Option<PathBuf>,
    #[serde(default)]
@ -323,6 +325,22 @@ pub struct DmnConfig {
    pub max_turns: u32,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct LearnConfig {
    /// Divergence threshold — responses scoring above this become
    /// fine-tuning candidates. Lower = more sensitive.
    #[serde(default = "default_learn_threshold")]
    pub threshold: f64,
 }
 fn default_learn_threshold() -> f64 { 0.0000001 }
 impl Default for LearnConfig {
    fn default() -> Self {
        Self { threshold: default_learn_threshold() }
    }
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ModelConfig {
    /// Backend name ("anthropic" or "openrouter")
@ -366,6 +384,7 @@ impl Default for AppConfig {
                soft_threshold_pct: 80,
            },
            dmn: DmnConfig { max_turns: 20 },
            learn: LearnConfig::default(),
            memory_project: None,
            models: HashMap::new(),
            default_model: String::new(),
--- a/src/mind/mod.rs
+++ b/src/mind/mod.rs
@ -149,8 +149,26 @@ pub struct MindState {
    pub unc_idle_deadline: Instant,
    /// Fine-tuning candidates identified by scoring.
    pub finetune_candidates: Vec<learn::FinetuneCandidate>,
-    /// Fine-tune scoring progress (empty = not running).
+    /// Last scoring run stats for UI display.
-    pub finetune_progress: String,
+    pub finetune_last_run: Option<FinetuneScoringStats>,
    /// Divergence threshold for finetune scoring — mutable via F6 hotkeys
    /// and persisted back to ~/.consciousness/config.json5.
    pub learn_threshold: f64,
 }
 /// Stats from the last finetune scoring run.
 #[derive(Clone, Debug)]
 pub struct FinetuneScoringStats {
    /// Count of assistant responses we considered (recent half of context).
    pub responses_considered: usize,
    /// How many exceeded the divergence threshold.
    pub above_threshold: usize,
    /// Threshold used for this run.
    pub threshold: f64,
    /// Highest divergence observed.
    pub max_divergence: f64,
    /// Error message if the run failed.
    pub error: Option<String>,
 }
 impl Clone for MindState {
@ -170,7 +188,8 @@ impl Clone for MindState {
            unc_idle: self.unc_idle,
            unc_idle_deadline: self.unc_idle_deadline,
            finetune_candidates: self.finetune_candidates.clone(),
-            finetune_progress: self.finetune_progress.clone(),
+            finetune_last_run: self.finetune_last_run.clone(),
            learn_threshold: self.learn_threshold,
        }
    }
 }
@ -185,6 +204,8 @@ pub enum MindCommand {
    ScoreFull,
    /// Score for finetune candidates
    ScoreFinetune,
    /// Update the finetune divergence threshold and persist to config.
    SetLearnThreshold(f64),
    /// Abort current turn, kill processes
    Interrupt,
    /// Reset session
@ -194,7 +215,7 @@ pub enum MindCommand {
 }
 impl MindState {
-    pub fn new(max_dmn_turns: u32) -> Self {
+    pub fn new(max_dmn_turns: u32, learn_threshold: f64) -> Self {
        Self {
            input: Vec::new(),
            turn_active: false,
@ -211,7 +232,8 @@ impl MindState {
            unc_idle: false,
            unc_idle_deadline: Instant::now() + std::time::Duration::from_secs(60),
            finetune_candidates: Vec::new(),
-            finetune_progress: String::new(),
+            finetune_last_run: None,
            learn_threshold,
        }
    }
@ -341,7 +363,10 @@ impl Mind {
            crate::agent::tools::tools(),
        ).await;
-        let shared = Arc::new(std::sync::Mutex::new(MindState::new(config.app.dmn.max_turns)));
+        let shared = Arc::new(std::sync::Mutex::new(MindState::new(
            config.app.dmn.max_turns,
            config.app.learn.threshold,
        )));
        let (turn_watch, _) = tokio::sync::watch::channel(false);
        let (conscious_active, _) = tokio::sync::watch::channel(false);
        let (bg_tx, bg_rx) = mpsc::unbounded_channel();
@ -543,6 +568,12 @@ impl Mind {
                MindCommand::ScoreFinetune => {
                    self.start_finetune_scoring();
                }
                MindCommand::SetLearnThreshold(value) => {
                    self.shared.lock().unwrap().learn_threshold = value;
                    if let Err(e) = crate::config_writer::set_learn_threshold(value) {
                        dbglog!("[learn] failed to persist threshold {}: {:#}", value, e);
                    }
                }
            }
        }
    }
@ -618,27 +649,60 @@ impl Mind {
    }
    /// Score responses for fine-tuning candidates.
    ///
    /// Scores the most recent half of the context — responses near the end
    /// of the context window were generated with the most context available,
    /// which is what we want to train on. The threshold is a temporary knob;
    /// once this runs continuously, we'll just train whatever lands at full
    /// context without filtering.
    pub fn start_finetune_scoring(&self) {
        let threshold = self.shared.lock().unwrap().learn_threshold;
        let agent = self.agent.clone();
        let bg_tx = self.bg_tx.clone();
        let shared = self.shared.clone();
        shared.lock().unwrap().finetune_progress = "scoring...".into();
        tokio::spawn(async move {
            let activity = crate::agent::start_activity(&agent, "finetune: scoring...").await;
            let (context, client) = {
                let ctx = agent.context.lock().await;
                (ctx.clone(), agent.client.clone())
            };
-            // Min divergence 0.1 = only keep responses that differ meaningfully
+
-            match learn::score_finetune_candidates(&context, 20, &client, 0.1).await {
+            let entries = context.conversation();
-                Ok(candidates) => {
+            let score_count = entries.len() / 2;
-                    dbglog!("[finetune] found {} candidates", candidates.len());
+            let range_start = entries.len() - score_count;
            let responses_considered: usize = entries[range_start..].iter()
                .filter(|n| matches!(n, crate::agent::context::AstNode::Branch { role: crate::agent::context::Role::Assistant, .. }))
                .count();
            activity.update(format!("finetune: scoring {} responses...", responses_considered)).await;
            let stats = match learn::score_finetune_candidates(
                &context, score_count, &client, threshold,
            ).await {
                Ok((candidates, max_div)) => {
                    let above_threshold = candidates.len();
                    let _ = bg_tx.send(BgEvent::FinetuneCandidates(candidates));
-                }
+                    FinetuneScoringStats {
-                Err(e) => {
+                        responses_considered,
-                    dbglog!("[finetune] scoring FAILED: {:#}", e);
+                        above_threshold,
                        threshold,
                        max_divergence: max_div,
                        error: None,
                    }
                }
-            shared.lock().unwrap().finetune_progress.clear();
+                Err(e) => FinetuneScoringStats {
                    responses_considered,
                    above_threshold: 0,
                    threshold,
                    max_divergence: 0.0,
                    error: Some(format!("{}", e)),
                },
            };
            shared.lock().unwrap().finetune_last_run = Some(stats);
            // activity drops here, marking completion and notifying observers
        });
    }
@ -706,6 +770,12 @@ impl Mind {
        let mut bg_rx = self.bg_rx.lock().unwrap().take()
            .expect("Mind::run() called twice");
        let mut sub_handle: Option<tokio::task::JoinHandle<()>> = None;
        // Start finetune scoring at startup (scores existing conversation)
        if !self.config.no_agents {
            self.start_finetune_scoring();
        }
        loop {
            let (timeout, has_input) = {
                let me = self.shared.lock().unwrap();
--- a/src/subconscious/learn.rs
+++ b/src/subconscious/learn.rs
@ -490,16 +490,18 @@ pub struct FinetuneCandidate {
 /// Score and enrich finetune candidates with full context.
 ///
-/// Returns candidates ready for review, with context/continuation token IDs
+/// Returns (candidates, max_divergence) - candidates ready for review with
-/// already computed for sending to /finetune.
+/// context/continuation token IDs, and the highest divergence seen.
 pub async fn score_finetune_candidates(
    context: &ContextState,
    count: usize,
    client: &ApiClient,
    min_divergence: f64,
-) -> anyhow::Result<Vec<FinetuneCandidate>> {
+) -> anyhow::Result<(Vec<FinetuneCandidate>, f64)> {
    let scores = score_finetune(context, count, client).await?;
    let max_divergence = scores.iter().map(|(_, d)| *d).fold(0.0f64, f64::max);
    let entries = context.conversation();
    let mut candidates = Vec::new();
@ -562,7 +564,7 @@ pub async fn score_finetune_candidates(
        }
    }
-    Ok(candidates)
+    Ok((candidates, max_divergence))
 }
 /// Generate what the model would say without memories for a given entry.
--- a/src/user/learn.rs
+++ b/src/user/learn.rs
@ -60,12 +60,16 @@ impl From<crate::subconscious::learn::FinetuneCandidate> for FinetuneCandidate {
 pub(crate) struct LearnScreen {
    list_state: ListState,
    mind_tx: tokio::sync::mpsc::UnboundedSender<crate::mind::MindCommand>,
 }
 impl LearnScreen {
-    pub fn new() -> Self {
+    pub fn new(
        mind_tx: tokio::sync::mpsc::UnboundedSender<crate::mind::MindCommand>,
    ) -> Self {
        Self {
            list_state: ListState::default(),
            mind_tx,
        }
    }
@ -112,6 +116,22 @@ impl ScreenView for LearnScreen {
                    KeyCode::Char('s') => {
                        app.finetune_send_approved();
                    }
                    KeyCode::Char('+') | KeyCode::Char('=') => {
                        // Raise threshold 10× (less sensitive — fewer candidates)
                        if let Some(ms) = &app.mind_state {
                            let new = ms.learn_threshold * 10.0;
                            let _ = self.mind_tx.send(
                                crate::mind::MindCommand::SetLearnThreshold(new));
                        }
                    }
                    KeyCode::Char('-') => {
                        // Lower threshold 10× (more sensitive — more candidates)
                        if let Some(ms) = &app.mind_state {
                            let new = ms.learn_threshold / 10.0;
                            let _ = self.mind_tx.send(
                                crate::mind::MindCommand::SetLearnThreshold(new));
                        }
                    }
                    _ => {}
                }
            }
@ -123,19 +143,13 @@ impl ScreenView for LearnScreen {
            self.list_state.select(Some(sel));
        }
        // Get scoring progress from mind state
        let progress = app.mind_state.as_ref()
            .map(|ms| ms.finetune_progress.as_str())
            .unwrap_or("");
        // Now render
        let gen_on = crate::subconscious::learn::alternates_enabled();
-        let title_right = if !progress.is_empty() {
+        let threshold = app.mind_state.as_ref().map(|ms| ms.learn_threshold).unwrap_or(0.0);
-            format!(" {} ", progress)
+        let title_right = if gen_on {
-        } else if gen_on {
+            format!(" learn [thresh: {:e}] [gen] ", threshold)
            " learn [gen] ".to_string()
        } else {
-            " learn ".to_string()
+            format!(" learn [thresh: {:e}] ", threshold)
        };
        let block = Block::default()
            .title_top(Line::from(screen_legend()).left_aligned())
@ -148,18 +162,8 @@ impl ScreenView for LearnScreen {
        let candidates = &app.finetune_candidates;
        if candidates.is_empty() {
-            let msg = if progress.is_empty() {
+            render_empty(frame, inner, app);
                "  No candidates yet — scoring runs after each turn."
        } else {
                "  Scoring in progress..."
            };
            frame.render_widget(
                Paragraph::new(Line::styled(msg, Style::default().fg(Color::DarkGray))),
                inner,
            );
            return;
        }
            // Layout: list on left, detail on right
            let [list_area, detail_area] = Layout::horizontal([
                Constraint::Percentage(40),
@ -198,8 +202,10 @@ impl ScreenView for LearnScreen {
                    render_detail(frame, candidate, detail_area);
                }
            }
        }
-        // Render help at bottom
+        // Render help at bottom (always, even when empty)
        let gen_status = if gen_on { "[on]" } else { "[off]" };
        let help = Line::from(vec![
            Span::styled(" j/k/\u{2191}\u{2193}", Style::default().fg(Color::Cyan)),
            Span::raw("=nav  "),
@ -208,9 +214,11 @@ impl ScreenView for LearnScreen {
            Span::styled("r", Style::default().fg(Color::Red)),
            Span::raw("=reject  "),
            Span::styled("g", Style::default().fg(Color::Yellow)),
-            Span::raw("=gen  "),
+            Span::raw(format!("=gen{}  ", gen_status)),
            Span::styled("s", Style::default().fg(Color::Magenta)),
            Span::raw("=send  "),
            Span::styled("+/-", Style::default().fg(Color::Cyan)),
            Span::raw("=thresh "),
        ]);
        let help_area = Rect {
            y: area.y + area.height - 1,
@ -221,6 +229,56 @@ impl ScreenView for LearnScreen {
    }
 }
 fn render_empty(frame: &mut Frame, inner: Rect, app: &App) {
    let mut lines = Vec::new();
    lines.push(Line::from(""));
    match app.mind_state.as_ref().and_then(|ms| ms.finetune_last_run.as_ref()) {
        Some(stats) => {
            lines.push(Line::from(vec![
                Span::raw("  Last run: "),
                Span::styled(
                    format!("{}", stats.responses_considered),
                    Style::default().fg(Color::Cyan),
                ),
                Span::raw(" responses considered, "),
                Span::styled(
                    format!("{}", stats.above_threshold),
                    Style::default().fg(if stats.above_threshold > 0 { Color::Green } else { Color::DarkGray }),
                ),
                Span::raw(" above threshold, max divergence: "),
                Span::styled(
                    format!("{:.4}", stats.max_divergence),
                    Style::default().fg(Color::Yellow),
                ),
            ]));
            if let Some(err) = &stats.error {
                lines.push(Line::from(vec![
                    Span::raw("  "),
                    Span::styled(
                        format!("Error: {}", err),
                        Style::default().fg(Color::Red),
                    ),
                ]));
            }
        }
        None => {
            lines.push(Line::styled(
                "  No scoring run yet.",
                Style::default().fg(Color::DarkGray),
            ));
        }
    }
    lines.push(Line::from(""));
    lines.push(Line::styled(
        "  Scoring runs at startup and after each turn.",
        Style::default().fg(Color::DarkGray),
    ));
    frame.render_widget(Paragraph::new(lines), inner);
 }
 fn render_detail(frame: &mut Frame, c: &FinetuneCandidate, area: Rect) {
    let [header_area, content_area] = Layout::vertical([
        Constraint::Length(3),
--- a/src/user/mod.rs
+++ b/src/user/mod.rs
@ -389,7 +389,7 @@ async fn run(
        Box::new(crate::user::subconscious::SubconsciousScreen::new()),
        Box::new(crate::user::unconscious::UnconsciousScreen::new()),
        Box::new(crate::user::thalamus::ThalamusScreen::new()),
-        Box::new(crate::user::learn::LearnScreen::new()),
+        Box::new(crate::user::learn::LearnScreen::new(mind_tx.clone())),
    ];
    let mut active_screen: usize = 1; // F-key number
    tui::set_screen_legend(tui::screen_legend_from(&*screens));
@ -466,7 +466,8 @@ async fn run(
        idle_state.decay_ewma();
        app.update_idle(&idle_state);
        app.agent_state = mind.subconscious_snapshots().await;
-        if let Ok(mut unc) = mind.unconscious.try_lock() {
+        {
            let mut unc = mind.unconscious.lock().await;
            let toggles: Vec<String> = app.agent_toggles.drain(..).collect();
            for name in &toggles {
                if mind.subconscious.lock().await.toggle(name).is_none() {
@ -480,10 +481,13 @@ async fn run(
            };
            app.unconscious_state = unc.snapshots(store_guard.as_deref());
            app.graph_health = unc.graph_health.clone();
        }
        // Sync mind state (finetune candidates, last scoring run, etc.)
        {
            let ms = mind.shared.lock().unwrap();
-            // Sync finetune candidates: add new ones, keep existing (preserves approval status)
+            // Sync finetune candidates: add new ones, keep existing (preserves approval status),
-            // Remove sent candidates (already trained, no need to keep)
+            // remove sent candidates, keep only 10 most recent rejected.
            // Keep only 10 most recent rejected candidates
            app.finetune_candidates.retain(|c| c.status != learn::CandidateStatus::Sent);
            for c in &ms.finetune_candidates {
                let exists = app.finetune_candidates.iter()
@ -492,7 +496,6 @@ async fn run(
                    app.finetune_candidates.push(learn::FinetuneCandidate::from(c.clone()));
                }
            }
            // Limit rejected candidates to 10 most recent
            let mut rejected: Vec<_> = app.finetune_candidates.iter()
                .enumerate()
                .filter(|(_, c)| c.status == learn::CandidateStatus::Rejected)