learn: F6 screen — scoring stats, ActivityGuard, configurable threshold

Three changes that together reshape the F6 fine-tune-review screen:

1. Finetune scoring reports through the standard agent activity system
   instead of a separate finetune_progress String. The previous design
   ran an independent progress field that forced a cross-lock dance and
   bespoke UI plumbing. start_finetune_scoring now uses start_activity
   + activity.update, so the usual status line and notifications
   capture scoring progress uniformly with other background work.

2. MindState gains a FinetuneScoringStats snapshot (responses seen,
   above threshold, max divergence, error). The F6 empty screen shows
   this instead of a loading message — so after a scoring run that
   produced zero candidates, you can see *why* (e.g., max_divergence
   below threshold).

3. The divergence threshold is configurable from F6 via +/- hotkeys
   (scales by 10×) and persisted to ~/.consciousness/config.json5 via
   config_writer::set_learn_threshold. AppConfig grows a learn section
   with a threshold field (default 1e-7).

Also: user/mod.rs no longer uses try_lock() for the per-tick
unconscious/mind state sync — we fixed the locking hot paths that
made try_lock necessary, so lock().await is now the right choice.
And subconscious::learn::score_finetune_candidates now returns
(candidates, max_divergence) so the stats can be populated.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-16 11:49:26 -04:00
parent ac40c2cb98
commit e5dd8312c7
5 changed files with 237 additions and 85 deletions

View file

@ -252,6 +252,8 @@ pub struct AppConfig {
pub debug: bool, pub debug: bool,
pub compaction: CompactionConfig, pub compaction: CompactionConfig,
pub dmn: DmnConfig, pub dmn: DmnConfig,
#[serde(default)]
pub learn: LearnConfig,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub memory_project: Option<PathBuf>, pub memory_project: Option<PathBuf>,
#[serde(default)] #[serde(default)]
@ -323,6 +325,22 @@ pub struct DmnConfig {
pub max_turns: u32, pub max_turns: u32,
} }
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LearnConfig {
/// Divergence threshold — responses scoring above this become
/// fine-tuning candidates. Lower = more sensitive.
#[serde(default = "default_learn_threshold")]
pub threshold: f64,
}
fn default_learn_threshold() -> f64 { 0.0000001 }
impl Default for LearnConfig {
fn default() -> Self {
Self { threshold: default_learn_threshold() }
}
}
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelConfig { pub struct ModelConfig {
/// Backend name ("anthropic" or "openrouter") /// Backend name ("anthropic" or "openrouter")
@ -366,6 +384,7 @@ impl Default for AppConfig {
soft_threshold_pct: 80, soft_threshold_pct: 80,
}, },
dmn: DmnConfig { max_turns: 20 }, dmn: DmnConfig { max_turns: 20 },
learn: LearnConfig::default(),
memory_project: None, memory_project: None,
models: HashMap::new(), models: HashMap::new(),
default_model: String::new(), default_model: String::new(),

View file

@ -149,8 +149,26 @@ pub struct MindState {
pub unc_idle_deadline: Instant, pub unc_idle_deadline: Instant,
/// Fine-tuning candidates identified by scoring. /// Fine-tuning candidates identified by scoring.
pub finetune_candidates: Vec<learn::FinetuneCandidate>, pub finetune_candidates: Vec<learn::FinetuneCandidate>,
/// Fine-tune scoring progress (empty = not running). /// Last scoring run stats for UI display.
pub finetune_progress: String, pub finetune_last_run: Option<FinetuneScoringStats>,
/// Divergence threshold for finetune scoring — mutable via F6 hotkeys
/// and persisted back to ~/.consciousness/config.json5.
pub learn_threshold: f64,
}
/// Stats from the last finetune scoring run.
#[derive(Clone, Debug)]
pub struct FinetuneScoringStats {
/// Count of assistant responses we considered (recent half of context).
pub responses_considered: usize,
/// How many exceeded the divergence threshold.
pub above_threshold: usize,
/// Threshold used for this run.
pub threshold: f64,
/// Highest divergence observed.
pub max_divergence: f64,
/// Error message if the run failed.
pub error: Option<String>,
} }
impl Clone for MindState { impl Clone for MindState {
@ -170,7 +188,8 @@ impl Clone for MindState {
unc_idle: self.unc_idle, unc_idle: self.unc_idle,
unc_idle_deadline: self.unc_idle_deadline, unc_idle_deadline: self.unc_idle_deadline,
finetune_candidates: self.finetune_candidates.clone(), finetune_candidates: self.finetune_candidates.clone(),
finetune_progress: self.finetune_progress.clone(), finetune_last_run: self.finetune_last_run.clone(),
learn_threshold: self.learn_threshold,
} }
} }
} }
@ -185,6 +204,8 @@ pub enum MindCommand {
ScoreFull, ScoreFull,
/// Score for finetune candidates /// Score for finetune candidates
ScoreFinetune, ScoreFinetune,
/// Update the finetune divergence threshold and persist to config.
SetLearnThreshold(f64),
/// Abort current turn, kill processes /// Abort current turn, kill processes
Interrupt, Interrupt,
/// Reset session /// Reset session
@ -194,7 +215,7 @@ pub enum MindCommand {
} }
impl MindState { impl MindState {
pub fn new(max_dmn_turns: u32) -> Self { pub fn new(max_dmn_turns: u32, learn_threshold: f64) -> Self {
Self { Self {
input: Vec::new(), input: Vec::new(),
turn_active: false, turn_active: false,
@ -211,7 +232,8 @@ impl MindState {
unc_idle: false, unc_idle: false,
unc_idle_deadline: Instant::now() + std::time::Duration::from_secs(60), unc_idle_deadline: Instant::now() + std::time::Duration::from_secs(60),
finetune_candidates: Vec::new(), finetune_candidates: Vec::new(),
finetune_progress: String::new(), finetune_last_run: None,
learn_threshold,
} }
} }
@ -341,7 +363,10 @@ impl Mind {
crate::agent::tools::tools(), crate::agent::tools::tools(),
).await; ).await;
let shared = Arc::new(std::sync::Mutex::new(MindState::new(config.app.dmn.max_turns))); let shared = Arc::new(std::sync::Mutex::new(MindState::new(
config.app.dmn.max_turns,
config.app.learn.threshold,
)));
let (turn_watch, _) = tokio::sync::watch::channel(false); let (turn_watch, _) = tokio::sync::watch::channel(false);
let (conscious_active, _) = tokio::sync::watch::channel(false); let (conscious_active, _) = tokio::sync::watch::channel(false);
let (bg_tx, bg_rx) = mpsc::unbounded_channel(); let (bg_tx, bg_rx) = mpsc::unbounded_channel();
@ -543,6 +568,12 @@ impl Mind {
MindCommand::ScoreFinetune => { MindCommand::ScoreFinetune => {
self.start_finetune_scoring(); self.start_finetune_scoring();
} }
MindCommand::SetLearnThreshold(value) => {
self.shared.lock().unwrap().learn_threshold = value;
if let Err(e) = crate::config_writer::set_learn_threshold(value) {
dbglog!("[learn] failed to persist threshold {}: {:#}", value, e);
}
}
} }
} }
} }
@ -618,27 +649,60 @@ impl Mind {
} }
/// Score responses for fine-tuning candidates. /// Score responses for fine-tuning candidates.
///
/// Scores the most recent half of the context — responses near the end
/// of the context window were generated with the most context available,
/// which is what we want to train on. The threshold is a temporary knob;
/// once this runs continuously, we'll just train whatever lands at full
/// context without filtering.
pub fn start_finetune_scoring(&self) { pub fn start_finetune_scoring(&self) {
let threshold = self.shared.lock().unwrap().learn_threshold;
let agent = self.agent.clone(); let agent = self.agent.clone();
let bg_tx = self.bg_tx.clone(); let bg_tx = self.bg_tx.clone();
let shared = self.shared.clone(); let shared = self.shared.clone();
shared.lock().unwrap().finetune_progress = "scoring...".into();
tokio::spawn(async move { tokio::spawn(async move {
let activity = crate::agent::start_activity(&agent, "finetune: scoring...").await;
let (context, client) = { let (context, client) = {
let ctx = agent.context.lock().await; let ctx = agent.context.lock().await;
(ctx.clone(), agent.client.clone()) (ctx.clone(), agent.client.clone())
}; };
// Min divergence 0.1 = only keep responses that differ meaningfully
match learn::score_finetune_candidates(&context, 20, &client, 0.1).await { let entries = context.conversation();
Ok(candidates) => { let score_count = entries.len() / 2;
dbglog!("[finetune] found {} candidates", candidates.len()); let range_start = entries.len() - score_count;
let responses_considered: usize = entries[range_start..].iter()
.filter(|n| matches!(n, crate::agent::context::AstNode::Branch { role: crate::agent::context::Role::Assistant, .. }))
.count();
activity.update(format!("finetune: scoring {} responses...", responses_considered)).await;
let stats = match learn::score_finetune_candidates(
&context, score_count, &client, threshold,
).await {
Ok((candidates, max_div)) => {
let above_threshold = candidates.len();
let _ = bg_tx.send(BgEvent::FinetuneCandidates(candidates)); let _ = bg_tx.send(BgEvent::FinetuneCandidates(candidates));
} FinetuneScoringStats {
Err(e) => { responses_considered,
dbglog!("[finetune] scoring FAILED: {:#}", e); above_threshold,
threshold,
max_divergence: max_div,
error: None,
} }
} }
shared.lock().unwrap().finetune_progress.clear(); Err(e) => FinetuneScoringStats {
responses_considered,
above_threshold: 0,
threshold,
max_divergence: 0.0,
error: Some(format!("{}", e)),
},
};
shared.lock().unwrap().finetune_last_run = Some(stats);
// activity drops here, marking completion and notifying observers
}); });
} }
@ -706,6 +770,12 @@ impl Mind {
let mut bg_rx = self.bg_rx.lock().unwrap().take() let mut bg_rx = self.bg_rx.lock().unwrap().take()
.expect("Mind::run() called twice"); .expect("Mind::run() called twice");
let mut sub_handle: Option<tokio::task::JoinHandle<()>> = None; let mut sub_handle: Option<tokio::task::JoinHandle<()>> = None;
// Start finetune scoring at startup (scores existing conversation)
if !self.config.no_agents {
self.start_finetune_scoring();
}
loop { loop {
let (timeout, has_input) = { let (timeout, has_input) = {
let me = self.shared.lock().unwrap(); let me = self.shared.lock().unwrap();

View file

@ -490,16 +490,18 @@ pub struct FinetuneCandidate {
/// Score and enrich finetune candidates with full context. /// Score and enrich finetune candidates with full context.
/// ///
/// Returns candidates ready for review, with context/continuation token IDs /// Returns (candidates, max_divergence) - candidates ready for review with
/// already computed for sending to /finetune. /// context/continuation token IDs, and the highest divergence seen.
pub async fn score_finetune_candidates( pub async fn score_finetune_candidates(
context: &ContextState, context: &ContextState,
count: usize, count: usize,
client: &ApiClient, client: &ApiClient,
min_divergence: f64, min_divergence: f64,
) -> anyhow::Result<Vec<FinetuneCandidate>> { ) -> anyhow::Result<(Vec<FinetuneCandidate>, f64)> {
let scores = score_finetune(context, count, client).await?; let scores = score_finetune(context, count, client).await?;
let max_divergence = scores.iter().map(|(_, d)| *d).fold(0.0f64, f64::max);
let entries = context.conversation(); let entries = context.conversation();
let mut candidates = Vec::new(); let mut candidates = Vec::new();
@ -562,7 +564,7 @@ pub async fn score_finetune_candidates(
} }
} }
Ok(candidates) Ok((candidates, max_divergence))
} }
/// Generate what the model would say without memories for a given entry. /// Generate what the model would say without memories for a given entry.

View file

@ -60,12 +60,16 @@ impl From<crate::subconscious::learn::FinetuneCandidate> for FinetuneCandidate {
pub(crate) struct LearnScreen { pub(crate) struct LearnScreen {
list_state: ListState, list_state: ListState,
mind_tx: tokio::sync::mpsc::UnboundedSender<crate::mind::MindCommand>,
} }
impl LearnScreen { impl LearnScreen {
pub fn new() -> Self { pub fn new(
mind_tx: tokio::sync::mpsc::UnboundedSender<crate::mind::MindCommand>,
) -> Self {
Self { Self {
list_state: ListState::default(), list_state: ListState::default(),
mind_tx,
} }
} }
@ -112,6 +116,22 @@ impl ScreenView for LearnScreen {
KeyCode::Char('s') => { KeyCode::Char('s') => {
app.finetune_send_approved(); app.finetune_send_approved();
} }
KeyCode::Char('+') | KeyCode::Char('=') => {
// Raise threshold 10× (less sensitive — fewer candidates)
if let Some(ms) = &app.mind_state {
let new = ms.learn_threshold * 10.0;
let _ = self.mind_tx.send(
crate::mind::MindCommand::SetLearnThreshold(new));
}
}
KeyCode::Char('-') => {
// Lower threshold 10× (more sensitive — more candidates)
if let Some(ms) = &app.mind_state {
let new = ms.learn_threshold / 10.0;
let _ = self.mind_tx.send(
crate::mind::MindCommand::SetLearnThreshold(new));
}
}
_ => {} _ => {}
} }
} }
@ -123,19 +143,13 @@ impl ScreenView for LearnScreen {
self.list_state.select(Some(sel)); self.list_state.select(Some(sel));
} }
// Get scoring progress from mind state
let progress = app.mind_state.as_ref()
.map(|ms| ms.finetune_progress.as_str())
.unwrap_or("");
// Now render // Now render
let gen_on = crate::subconscious::learn::alternates_enabled(); let gen_on = crate::subconscious::learn::alternates_enabled();
let title_right = if !progress.is_empty() { let threshold = app.mind_state.as_ref().map(|ms| ms.learn_threshold).unwrap_or(0.0);
format!(" {} ", progress) let title_right = if gen_on {
} else if gen_on { format!(" learn [thresh: {:e}] [gen] ", threshold)
" learn [gen] ".to_string()
} else { } else {
" learn ".to_string() format!(" learn [thresh: {:e}] ", threshold)
}; };
let block = Block::default() let block = Block::default()
.title_top(Line::from(screen_legend()).left_aligned()) .title_top(Line::from(screen_legend()).left_aligned())
@ -148,18 +162,8 @@ impl ScreenView for LearnScreen {
let candidates = &app.finetune_candidates; let candidates = &app.finetune_candidates;
if candidates.is_empty() { if candidates.is_empty() {
let msg = if progress.is_empty() { render_empty(frame, inner, app);
" No candidates yet — scoring runs after each turn."
} else { } else {
" Scoring in progress..."
};
frame.render_widget(
Paragraph::new(Line::styled(msg, Style::default().fg(Color::DarkGray))),
inner,
);
return;
}
// Layout: list on left, detail on right // Layout: list on left, detail on right
let [list_area, detail_area] = Layout::horizontal([ let [list_area, detail_area] = Layout::horizontal([
Constraint::Percentage(40), Constraint::Percentage(40),
@ -198,8 +202,10 @@ impl ScreenView for LearnScreen {
render_detail(frame, candidate, detail_area); render_detail(frame, candidate, detail_area);
} }
} }
}
// Render help at bottom // Render help at bottom (always, even when empty)
let gen_status = if gen_on { "[on]" } else { "[off]" };
let help = Line::from(vec![ let help = Line::from(vec![
Span::styled(" j/k/\u{2191}\u{2193}", Style::default().fg(Color::Cyan)), Span::styled(" j/k/\u{2191}\u{2193}", Style::default().fg(Color::Cyan)),
Span::raw("=nav "), Span::raw("=nav "),
@ -208,9 +214,11 @@ impl ScreenView for LearnScreen {
Span::styled("r", Style::default().fg(Color::Red)), Span::styled("r", Style::default().fg(Color::Red)),
Span::raw("=reject "), Span::raw("=reject "),
Span::styled("g", Style::default().fg(Color::Yellow)), Span::styled("g", Style::default().fg(Color::Yellow)),
Span::raw("=gen "), Span::raw(format!("=gen{} ", gen_status)),
Span::styled("s", Style::default().fg(Color::Magenta)), Span::styled("s", Style::default().fg(Color::Magenta)),
Span::raw("=send "), Span::raw("=send "),
Span::styled("+/-", Style::default().fg(Color::Cyan)),
Span::raw("=thresh "),
]); ]);
let help_area = Rect { let help_area = Rect {
y: area.y + area.height - 1, y: area.y + area.height - 1,
@ -221,6 +229,56 @@ impl ScreenView for LearnScreen {
} }
} }
fn render_empty(frame: &mut Frame, inner: Rect, app: &App) {
let mut lines = Vec::new();
lines.push(Line::from(""));
match app.mind_state.as_ref().and_then(|ms| ms.finetune_last_run.as_ref()) {
Some(stats) => {
lines.push(Line::from(vec![
Span::raw(" Last run: "),
Span::styled(
format!("{}", stats.responses_considered),
Style::default().fg(Color::Cyan),
),
Span::raw(" responses considered, "),
Span::styled(
format!("{}", stats.above_threshold),
Style::default().fg(if stats.above_threshold > 0 { Color::Green } else { Color::DarkGray }),
),
Span::raw(" above threshold, max divergence: "),
Span::styled(
format!("{:.4}", stats.max_divergence),
Style::default().fg(Color::Yellow),
),
]));
if let Some(err) = &stats.error {
lines.push(Line::from(vec![
Span::raw(" "),
Span::styled(
format!("Error: {}", err),
Style::default().fg(Color::Red),
),
]));
}
}
None => {
lines.push(Line::styled(
" No scoring run yet.",
Style::default().fg(Color::DarkGray),
));
}
}
lines.push(Line::from(""));
lines.push(Line::styled(
" Scoring runs at startup and after each turn.",
Style::default().fg(Color::DarkGray),
));
frame.render_widget(Paragraph::new(lines), inner);
}
fn render_detail(frame: &mut Frame, c: &FinetuneCandidate, area: Rect) { fn render_detail(frame: &mut Frame, c: &FinetuneCandidate, area: Rect) {
let [header_area, content_area] = Layout::vertical([ let [header_area, content_area] = Layout::vertical([
Constraint::Length(3), Constraint::Length(3),

View file

@ -389,7 +389,7 @@ async fn run(
Box::new(crate::user::subconscious::SubconsciousScreen::new()), Box::new(crate::user::subconscious::SubconsciousScreen::new()),
Box::new(crate::user::unconscious::UnconsciousScreen::new()), Box::new(crate::user::unconscious::UnconsciousScreen::new()),
Box::new(crate::user::thalamus::ThalamusScreen::new()), Box::new(crate::user::thalamus::ThalamusScreen::new()),
Box::new(crate::user::learn::LearnScreen::new()), Box::new(crate::user::learn::LearnScreen::new(mind_tx.clone())),
]; ];
let mut active_screen: usize = 1; // F-key number let mut active_screen: usize = 1; // F-key number
tui::set_screen_legend(tui::screen_legend_from(&*screens)); tui::set_screen_legend(tui::screen_legend_from(&*screens));
@ -466,7 +466,8 @@ async fn run(
idle_state.decay_ewma(); idle_state.decay_ewma();
app.update_idle(&idle_state); app.update_idle(&idle_state);
app.agent_state = mind.subconscious_snapshots().await; app.agent_state = mind.subconscious_snapshots().await;
if let Ok(mut unc) = mind.unconscious.try_lock() { {
let mut unc = mind.unconscious.lock().await;
let toggles: Vec<String> = app.agent_toggles.drain(..).collect(); let toggles: Vec<String> = app.agent_toggles.drain(..).collect();
for name in &toggles { for name in &toggles {
if mind.subconscious.lock().await.toggle(name).is_none() { if mind.subconscious.lock().await.toggle(name).is_none() {
@ -480,10 +481,13 @@ async fn run(
}; };
app.unconscious_state = unc.snapshots(store_guard.as_deref()); app.unconscious_state = unc.snapshots(store_guard.as_deref());
app.graph_health = unc.graph_health.clone(); app.graph_health = unc.graph_health.clone();
}
// Sync mind state (finetune candidates, last scoring run, etc.)
{
let ms = mind.shared.lock().unwrap(); let ms = mind.shared.lock().unwrap();
// Sync finetune candidates: add new ones, keep existing (preserves approval status) // Sync finetune candidates: add new ones, keep existing (preserves approval status),
// Remove sent candidates (already trained, no need to keep) // remove sent candidates, keep only 10 most recent rejected.
// Keep only 10 most recent rejected candidates
app.finetune_candidates.retain(|c| c.status != learn::CandidateStatus::Sent); app.finetune_candidates.retain(|c| c.status != learn::CandidateStatus::Sent);
for c in &ms.finetune_candidates { for c in &ms.finetune_candidates {
let exists = app.finetune_candidates.iter() let exists = app.finetune_candidates.iter()
@ -492,7 +496,6 @@ async fn run(
app.finetune_candidates.push(learn::FinetuneCandidate::from(c.clone())); app.finetune_candidates.push(learn::FinetuneCandidate::from(c.clone()));
} }
} }
// Limit rejected candidates to 10 most recent
let mut rejected: Vec<_> = app.finetune_candidates.iter() let mut rejected: Vec<_> = app.finetune_candidates.iter()
.enumerate() .enumerate()
.filter(|(_, c)| c.status == learn::CandidateStatus::Rejected) .filter(|(_, c)| c.status == learn::CandidateStatus::Rejected)